Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add GSAU(甘肃农业大学) router of news index page. #18027

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions lib/routes/gsau/namespace.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import type { Namespace } from '@/types';

export const namespace: Namespace = {
name: '甘肃农业大学',
url: 'www.gsau.edu.cn',
};
158 changes: 158 additions & 0 deletions lib/routes/gsau/news.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
import { DataItem, Route } from '@/types';
import cache from '@/utils/cache';
import got from '@/utils/got';
import { parseDate } from '@/utils/parse-date';
import { load } from 'cheerio';
import { generateRssItemForUnsupportedLink } from './utils/content';
import { isSubdomainOfGsau } from './utils/domain';

type NewsCategory = {
title: string;
description: string;
};

const NEWS_TYPES: Record<string, NewsCategory> = {
xxyw: {
title: '学校要闻',
description: '甘肃农业大学学校要闻',
},
xykx: {
title: '校园快讯',
description: '甘肃农业大学校园快讯',
},
tzgg: {
title: '通知公告',
description: '甘肃农业大学校内通知公告',
},
jzbg: {
title: '讲座报告',
description: '甘肃农业大学讲座报告信息',
},
jqgz: {
title: '近期关注',
description: '甘肃农业大学近期关注',
},
jyjx: {
title: '教育教学',
description: '甘肃农业大学教育教学新闻',
},
xsky: {
title: '学术科研',
description: '甘肃农业大学学术科研信息',
},
hzjl: {
title: '合作交流',
description: '甘肃农业大学合作交流信息',
},
mzgn: {
title: '每周甘农',
description: '甘肃农业大学周记总结',
},
mtnd: {
title: '媒体农大',
description: '相关对甘肃农业大学的媒体报道',
},
};

const handler: Route['handler'] = async (context) => {
const { category } = context.req.param();

if (!category || !NEWS_TYPES[category]) {
PrinOrange marked this conversation as resolved.
Show resolved Hide resolved
throw new Error('Invalid category');
}

const BASE_URL = `https://www.gsau.edu.cn/xwzx/${category}.htm`;

const { data: listResponse } = await got(BASE_URL);
const $ = load(listResponse);

// Select all list items containing news information
const ITEM_SELECTOR = '#warp > div.nyleft > div.infolist > ul > li';
const listItems = $(ITEM_SELECTOR);

// Map through each list item to extract details
const contentLinkList = (
await Promise.all(
listItems.toArray().map((element) => {
const title = $(element).find('a').attr('title')?.trim();
const date: string = parseDate($(element).find('a > span').text().trim()).toISOString();

const relativeLink = $(element).find('a').attr('href') || '';
const absoluteLink = new URL(relativeLink, BASE_URL).href;
if (title && date && relativeLink) {
return { title, date, link: absoluteLink };
}
return null;
})
)
).filter((item) => item !== null);
PrinOrange marked this conversation as resolved.
Show resolved Hide resolved

return {
title: NEWS_TYPES[category].title,
description: NEWS_TYPES[category].description,
link: BASE_URL,
image: 'https://www.gsau.edu.cn/images/foot_03.jpg',
item: (await Promise.all(
contentLinkList.map((item) =>
cache.tryGet(item.link, async () => {
if (!isSubdomainOfGsau(item.link)) {
return generateRssItemForUnsupportedLink(item.title, item.date, item.link);
}
const { data: contentResponse } = await got(item.link);
const CONTENT_SELECTOR = '#warp > div.nyleft > form > div > div.infoarea > div';
const contentPage = load(contentResponse);
const content = contentPage(CONTENT_SELECTOR).html() || '';
return {
title: item.title,
pubDate: item.date,
link: item.link,
description: content,
category: ['university'],
guid: item.link,
id: item.link,
image: 'https://www.gsau.edu.cn/images/foot_03.jpg',
content,
updated: item.date,
language: 'zh-cn',
};
})
)
)) as DataItem[],
allowEmpty: true,
language: 'zh-cn',
feedLink: `https://rsshub.app/gsau/news/${category}`,
id: BASE_URL,
};
};

export const route: Route = {
path: '/news/:category',
PrinOrange marked this conversation as resolved.
Show resolved Hide resolved
name: '主页新闻',
description: `
| 类型 | 标题 | 描述 |
| ------- | ---------- | ---------------------------- |
| xxyw | 学校要闻 | 甘肃农业大学学校要闻 |
| xykx | 校园快讯 | 甘肃农业大学校园快讯 |
| tzgg | 通知公告 | 甘肃农业大学校内通知公告 |
| jzbg | 讲座报告 | 甘肃农业大学讲座报告信息 |
| jqgz | 近期关注 | 甘肃农业大学近期关注 |
| jyjx | 教育教学 | 甘肃农业大学教育教学新闻 |
| xsky | 学术科研 | 甘肃农业大学学术科研信息 |
| hzjl | 合作交流 | 甘肃农业大学合作交流信息 |
| mzgn | 每周甘农 | 甘肃农业大学周记总结 |
| mtnd | 媒体农大 | 相关对甘肃农业大学的媒体报道 |
`,
maintainers: ['PrinOrange'],
url: 'https://www.gsau.edu.cn/xwzx/xxyw.htm',
PrinOrange marked this conversation as resolved.
Show resolved Hide resolved
handler,
categories: ['university'],
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: false,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
example: '/gsau/news/tzgg',
};
37 changes: 37 additions & 0 deletions lib/routes/gsau/utils/content.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import type { DataItem } from '@/types';

export const generateRssItemForUnsupportedLink = (title: string, date: string, url: string): DataItem => {
const htmlContent = `
<p>
抱歉,本文章 <u>${title}</u> 来源非甘肃农业大学官方网站,不支持解析。<br/>
请通过链接查看原文:<a href="${url}">${url}</a>
</p>
<p>
Sorry, the provenance of article <u>${title}</u> is not from official website of Gansu Agriculture University,
and it's not supported to parse. <br/>
Please read the origin website by link: <a href="${url}">${url}</a>
</p>
`;
const textContent = `
抱歉,本文章 ${title} 来源非甘肃农业大学官方网站,不支持解析。
请通过链接查看原文:${url}
Sorry, the provenance of article ${title} is not from official website of Gansu Agriculture University,
and it's not supported to parse. Please read the origin website by link: ${url}
`;
return {
title,
pubDate: date,
link: url,
description: htmlContent,
category: ['university'],
guid: url,
id: url,
image: 'https://www.gsau.edu.cn/images/foot_03.jpg',
content: {
text: textContent,
html: htmlContent,
},
updated: date,
language: 'zh-cn',
};
};
18 changes: 18 additions & 0 deletions lib/routes/gsau/utils/domain.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
const OFFICIAL_DOMAIN = 'gsau.edu.cn';

/**
* Check whether a URL is a subdomain belongs to the official domain.
* Because there maybe some different links of outside official domain in list,
* These page may have some anti-crawler or login-requirement measures.
* So I need check whether is a URL belongs to the official domain.
*/
export const isSubdomainOfGsau = (url: string): boolean => {
try {
const normalizedUrl = url.startsWith('http://') || url.startsWith('https://') ? url : `https://${url}`;
PrinOrange marked this conversation as resolved.
Show resolved Hide resolved
const parsedUrl = new URL(normalizedUrl);
const hostname = parsedUrl.hostname;
return hostname === OFFICIAL_DOMAIN || hostname.endsWith(`.${OFFICIAL_DOMAIN}`);
} catch {
return false;
}
};
91 changes: 91 additions & 0 deletions lib/routes/xbmu/academic.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import { DataItem, Route } from '@/types';
PrinOrange marked this conversation as resolved.
Show resolved Hide resolved
import cache from '@/utils/cache';
import got from '@/utils/got';
import { parseDate } from '@/utils/parse-date';
import { load } from 'cheerio';

const BASE_URL = 'https://www.xbmu.edu.cn/xwzx/xsxx.htm';

const handler: Route['handler'] = async () => {
try {
// Fetch the academic page
const { data: listResponse } = await got(BASE_URL);
const $ = load(listResponse);

// Select all list items containing academic information
const ITEM_SELECTOR = 'body > div.container.list-container.ny_mani > div > div.news_list > ul > li';
const listItems = $(ITEM_SELECTOR);

// Map through each list item to extract details
const academicLinkList = await Promise.all(
listItems.toArray().map((element) => {
const rawDate = $(element).find('span').text().trim();
const [day, yearMonth] = rawDate.split('/').map((s) => s.trim());
const formattedDate = parseDate(`${yearMonth}-${day}`).toUTCString();

const title = $(element).find('a').attr('title') || '学术信息';
const relativeHref = $(element).find('a').attr('href') || '';
const link = `https://www.xbmu.edu.cn/${relativeHref.replaceAll('../', '')}`;

return {
date: formattedDate,
title,
link,
};
})
);

return {
title: '西北民族大学学术信息',
description: '西北民族大学近日学术信息',
link: BASE_URL,
image: 'http://210.26.0.114:9090/mdxg/img/weex/default_img.jpg',
item: (await Promise.all(
academicLinkList.map((item) =>
cache.tryGet(item.link, async () => {
const CONTENT_SELECTOR = '#vsb_content > div';
const { data: contentResponse } = await got(item.link);
const contentPage = load(contentResponse);
const content = contentPage(CONTENT_SELECTOR).html() || '';
return {
title: item.title,
pubDate: item.date,
link: item.link,
description: content,
category: ['university'],
guid: item.link,
id: item.link,
image: 'http://210.26.0.114:9090/mdxg/img/weex/default_img.jpg',
content,
updated: item.date,
language: 'zh-cn',
};
})
)
)) as DataItem[],
allowEmpty: true,
language: 'zh-cn',
feedLink: 'https://rsshub.app/xbmu/academic',
id: 'https://rsshub.app/xbmu/academic',
};
} catch (error) {
throw new Error(`Error fetching academic information: ${error}`);
}
};

export const route: Route = {
path: '/academic',
name: '学术信息',
maintainers: ['PrinOrange'],
handler,
categories: ['university'],
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: false,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
example: '/xbmu/academic',
};
Loading
Loading