diff --git a/lib/routes/gsau/namespace.ts b/lib/routes/gsau/namespace.ts new file mode 100644 index 00000000000000..bf10e38e3e9157 --- /dev/null +++ b/lib/routes/gsau/namespace.ts @@ -0,0 +1,6 @@ +import type { Namespace } from '@/types'; + +export const namespace: Namespace = { + name: '甘肃农业大学', + url: 'www.gsau.edu.cn', +}; diff --git a/lib/routes/gsau/news.ts b/lib/routes/gsau/news.ts new file mode 100644 index 00000000000000..69fed9f9efd4b5 --- /dev/null +++ b/lib/routes/gsau/news.ts @@ -0,0 +1,156 @@ +import { DataItem, Route } from '@/types'; +import cache from '@/utils/cache'; +import got from '@/utils/got'; +import { parseDate } from '@/utils/parse-date'; +import { load } from 'cheerio'; +import { generateRssItemForUnsupportedLink } from './utils/content'; +import { isSubdomainOfGsau } from './utils/domain'; + +type NewsCategory = { + title: string; + description: string; +}; + +const NEWS_TYPES: Record = { + xxyw: { + title: '学校要闻', + description: '甘肃农业大学学校要闻', + }, + xykx: { + title: '校园快讯', + description: '甘肃农业大学校园快讯', + }, + tzgg: { + title: '通知公告', + description: '甘肃农业大学校内通知公告', + }, + jzbg: { + title: '讲座报告', + description: '甘肃农业大学讲座报告信息', + }, + jqgz: { + title: '近期关注', + description: '甘肃农业大学近期关注', + }, + jyjx: { + title: '教育教学', + description: '甘肃农业大学教育教学新闻', + }, + xsky: { + title: '学术科研', + description: '甘肃农业大学学术科研信息', + }, + hzjl: { + title: '合作交流', + description: '甘肃农业大学合作交流信息', + }, + mzgn: { + title: '每周甘农', + description: '甘肃农业大学周记总结', + }, + mtnd: { + title: '媒体农大', + description: '相关对甘肃农业大学的媒体报道', + }, +}; + +const handler: Route['handler'] = async (context) => { + const { category } = context.req.param(); + + const BASE_URL = `https://www.gsau.edu.cn/xwzx/${category}.htm`; + + const { data: listResponse } = await got(BASE_URL); + const $ = load(listResponse); + + // Select all list items containing news information + const ITEM_SELECTOR = '#warp > div.nyleft > div.infolist > ul > li'; + const listItems = $(ITEM_SELECTOR); + + // Map through each list item to extract details + const contentLinkList = listItems + .toArray() + .map((element) => { + const title = $(element).find('a').attr('title')?.trim(); + const date: string = parseDate($(element).find('a > span').text().trim()).toISOString(); + + const relativeLink = $(element).find('a').attr('href') || ''; + const absoluteLink = new URL(relativeLink, BASE_URL).href; + if (title && date && relativeLink) { + return { title, date, link: absoluteLink }; + } + return null; + }) + .filter((item) => item !== null); + + return { + title: NEWS_TYPES[category].title, + description: NEWS_TYPES[category].description, + link: BASE_URL, + image: 'https://www.gsau.edu.cn/images/foot_03.jpg', + item: (await Promise.all( + contentLinkList.map((item) => + cache.tryGet(item.link, async () => { + if (!isSubdomainOfGsau(item.link)) { + return generateRssItemForUnsupportedLink(item.title, item.date, item.link); + } + const { data: contentResponse } = await got(item.link); + const CONTENT_SELECTOR = '#warp > div.nyleft > form > div > div.infoarea > div'; + const contentPage = load(contentResponse); + const content = contentPage(CONTENT_SELECTOR).html() || ''; + return { + title: item.title, + pubDate: item.date, + link: item.link, + description: content, + category: ['university'], + guid: item.link, + id: item.link, + image: 'https://www.gsau.edu.cn/images/foot_03.jpg', + content, + updated: item.date, + language: 'zh-cn', + }; + }) + ) + )) as DataItem[], + allowEmpty: true, + language: 'zh-cn', + feedLink: `https://rsshub.app/gsau/news/${category}`, + id: BASE_URL, + }; +}; + +export const route: Route = { + path: '/news/:category', + name: '主页新闻', + parameters: { + category: '新闻栏目代码,取值可见描述中的列表。', + }, + description: ` +| category | 标题 | +| -------- | ---------- | +| xxyw | 学校要闻 | +| xykx | 校园快讯 | +| tzgg | 通知公告 | +| jzbg | 讲座报告 | +| jqgz | 近期关注 | +| jyjx | 教育教学 | +| xsky | 学术科研 | +| hzjl | 合作交流 | +| mzgn | 每周甘农 | +| mtnd | 媒体农大 | + `, + maintainers: ['PrinOrange'], + url: 'www.gsau.edu.cn/xwzx/xxyw.htm', + handler, + categories: ['university'], + features: { + requireConfig: false, + requirePuppeteer: false, + antiCrawler: false, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + example: '/gsau/news/tzgg', +}; diff --git a/lib/routes/gsau/utils/content.ts b/lib/routes/gsau/utils/content.ts new file mode 100644 index 00000000000000..73f5bb8a5dd2cb --- /dev/null +++ b/lib/routes/gsau/utils/content.ts @@ -0,0 +1,37 @@ +import type { DataItem } from '@/types'; + +export const generateRssItemForUnsupportedLink = (title: string, date: string, url: string): DataItem => { + const htmlContent = ` +

+ 抱歉,本文章 ${title} 来源非甘肃农业大学官方网站,不支持解析。
+ 请通过链接查看原文:${url} +

+

+ Sorry, the provenance of article ${title} is not from official website of Gansu Agriculture University, + and it's not supported to parse.
+ Please read the origin website by link: ${url} +

+`; + const textContent = ` +抱歉,本文章 ${title} 来源非甘肃农业大学官方网站,不支持解析。 +请通过链接查看原文:${url} +Sorry, the provenance of article ${title} is not from official website of Gansu Agriculture University, +and it's not supported to parse. Please read the origin website by link: ${url} +`; + return { + title, + pubDate: date, + link: url, + description: htmlContent, + category: ['university'], + guid: url, + id: url, + image: 'https://www.gsau.edu.cn/images/foot_03.jpg', + content: { + text: textContent, + html: htmlContent, + }, + updated: date, + language: 'zh-cn', + }; +}; diff --git a/lib/routes/gsau/utils/domain.ts b/lib/routes/gsau/utils/domain.ts new file mode 100644 index 00000000000000..b49b7c80716cbc --- /dev/null +++ b/lib/routes/gsau/utils/domain.ts @@ -0,0 +1,18 @@ +const OFFICIAL_DOMAIN = 'gsau.edu.cn'; + +/** + * Check whether a URL is a subdomain belongs to the official domain. + * Because there maybe some different links of outside official domain in list, + * These page may have some anti-crawler or login-requirement measures. + * So I need check whether is a URL belongs to the official domain. + */ +export const isSubdomainOfGsau = (url: string): boolean => { + try { + const normalizedUrl = url.startsWith('http') ? url : `https://${url}`; + const parsedUrl = new URL(normalizedUrl); + const hostname = parsedUrl.hostname; + return hostname === OFFICIAL_DOMAIN || hostname.endsWith(`.${OFFICIAL_DOMAIN}`); + } catch { + return false; + } +};