From 4f3f713a599eac18064827aacbba9d6a9119d0e9 Mon Sep 17 00:00:00 2001 From: Liueic Aicnal Date: Mon, 25 Nov 2024 20:36:40 +0800 Subject: [PATCH 01/10] New feature: Add new route of CNU --- lib/routes/cnu/jwc.ts | 84 +++++++++++++++++++++++++++++++++++++ lib/routes/cnu/namespace.ts | 7 ++++ 2 files changed, 91 insertions(+) create mode 100644 lib/routes/cnu/jwc.ts create mode 100644 lib/routes/cnu/namespace.ts diff --git a/lib/routes/cnu/jwc.ts b/lib/routes/cnu/jwc.ts new file mode 100644 index 00000000000000..0c7b2967c9cb6a --- /dev/null +++ b/lib/routes/cnu/jwc.ts @@ -0,0 +1,84 @@ +import { Route } from '@/types'; +import { parseDate } from '@/utils/parse-date'; +import got from '@/utils/got'; +import { load } from 'cheerio'; + +const BASE_URL = 'https://jwc.cnu.edu.cn/tzgg/index.htm'; + +export const route: Route = { + path: '/jwc', + categories: ['university'], + example: '/cnu/jwc', + radar: [ + { + source: [BASE_URL], + }, + ], + name: '首都师范大学教务处', + maintainers: ['Aicnal'], + handler, + url: BASE_URL, +}; + +async function handler() { + const response = await got({ method: 'get', url: BASE_URL }); + const $ = load(response.data); + + const list = $('li') + .map((i, e) => { + const element = $(e); + const rawTitle = element.find('a').text().trim(); + const dateRegex = /^(\d{1,2})\s+(\d{4})-(\d{1,2})/; + const match = rawTitle.match(dateRegex); + + if (!match) {return null;} + + const [, day, year, month] = match; + const pubDate = parseDate(`${year}-${month}-${day}`, 'YYYY-MM-DD'); + const title = rawTitle + .replace(dateRegex, '') + .trim() + .replaceAll(/(公众|教师|学生)/g, '') + .trim(); + const href = element.find('a').attr('href') ?? ''; + const link = href.startsWith('http') ? href : new URL(href, BASE_URL).href; + + return { title, link, pubDate }; + }) + .toArray() + .filter(Boolean); + + const items = await Promise.all( + list.map(async (item) => { + try { + const detailResponse = await got({ method: 'get', url: item.link }); + const content = load(detailResponse.data); + const paragraphs = content('body p') + .filter((_, el) => { + const text = content(el).text(); + return !/分享到:|版权所有|地址:|E-mail:|网站地图|ICP备|京公网安备/.test(text); + }) + .map((_, el) => content(el).html()?.trim()) + .toArray() + .join('

'); + + return { + ...item, + description: paragraphs || '暂无内容', + }; + } catch (error) { + return { + ...item, + description: `内容获取失败: ${error.message}`, + }; + } + }) + ); + + return { + title: '首都师范大学教务信息', + link: BASE_URL, + description: '首都师范大学教务处的最新通知公告', + item: items, + }; +} diff --git a/lib/routes/cnu/namespace.ts b/lib/routes/cnu/namespace.ts new file mode 100644 index 00000000000000..7271f7bfa6754a --- /dev/null +++ b/lib/routes/cnu/namespace.ts @@ -0,0 +1,7 @@ +import type { Namespace } from '@/types'; + +export const namespace: Namespace = { + name: '首都师范大学教务处', + url: 'jwc.cnu.edu.cn', + lang: 'zh-CN', +}; From 22ea2c7bed330d7032980ca16f4ca363a86f0018 Mon Sep 17 00:00:00 2001 From: Liueic Aicnal Date: Mon, 25 Nov 2024 20:42:51 +0800 Subject: [PATCH 02/10] Update: Remove img --- lib/routes/cnu/jwc.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/routes/cnu/jwc.ts b/lib/routes/cnu/jwc.ts index 0c7b2967c9cb6a..9b60695306471c 100644 --- a/lib/routes/cnu/jwc.ts +++ b/lib/routes/cnu/jwc.ts @@ -58,7 +58,10 @@ async function handler() { const text = content(el).text(); return !/分享到:|版权所有|地址:|E-mail:|网站地图|ICP备|京公网安备/.test(text); }) - .map((_, el) => content(el).html()?.trim()) + .map((_, el) => { + content(el).find('img').remove(); // 移除 标签 + return content(el).html()?.trim(); + }) .toArray() .join('

'); From bad1499eafdbfb6fc93ff9bd235b4ef0a5153462 Mon Sep 17 00:00:00 2001 From: Liueic Aicnal Date: Mon, 25 Nov 2024 21:03:08 +0800 Subject: [PATCH 03/10] Update: Using to.Array() before .map() --- lib/routes/cnu/jwc.ts | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/lib/routes/cnu/jwc.ts b/lib/routes/cnu/jwc.ts index 9b60695306471c..58bd227fa5a3f9 100644 --- a/lib/routes/cnu/jwc.ts +++ b/lib/routes/cnu/jwc.ts @@ -25,9 +25,12 @@ async function handler() { const $ = load(response.data); const list = $('li') - .map((i, e) => { - const element = $(e); - const rawTitle = element.find('a').text().trim(); + .toArray() + .map((element) => { + const item = $(element); + const rawTitle = item.find('a').text().trim(); + + // Extract date using regex const dateRegex = /^(\d{1,2})\s+(\d{4})-(\d{1,2})/; const match = rawTitle.match(dateRegex); @@ -35,34 +38,36 @@ async function handler() { const [, day, year, month] = match; const pubDate = parseDate(`${year}-${month}-${day}`, 'YYYY-MM-DD'); + + // Clean up title and construct link const title = rawTitle .replace(dateRegex, '') .trim() .replaceAll(/(公众|教师|学生)/g, '') .trim(); - const href = element.find('a').attr('href') ?? ''; + const href = item.find('a').attr('href') ?? ''; const link = href.startsWith('http') ? href : new URL(href, BASE_URL).href; return { title, link, pubDate }; }) - .toArray() - .filter(Boolean); + .filter((item) => item !== null); // Filter out null values const items = await Promise.all( list.map(async (item) => { try { const detailResponse = await got({ method: 'get', url: item.link }); const content = load(detailResponse.data); + const paragraphs = content('body p') - .filter((_, el) => { + .toArray() + .filter((el) => { const text = content(el).text(); return !/分享到:|版权所有|地址:|E-mail:|网站地图|ICP备|京公网安备/.test(text); }) - .map((_, el) => { - content(el).find('img').remove(); // 移除 标签 + .map((el) => { + content(el).find('img').remove(); // Remove tags return content(el).html()?.trim(); }) - .toArray() .join('

'); return { From c89845179bd58ed1b5296f2b65f661aa90dc0ab4 Mon Sep 17 00:00:00 2001 From: Liueic Aicnal Date: Mon, 25 Nov 2024 21:13:38 +0800 Subject: [PATCH 04/10] Update: Fix --- lib/routes/cnu/jwc.ts | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/lib/routes/cnu/jwc.ts b/lib/routes/cnu/jwc.ts index 58bd227fa5a3f9..8140dfc32ae96c 100644 --- a/lib/routes/cnu/jwc.ts +++ b/lib/routes/cnu/jwc.ts @@ -25,41 +25,38 @@ async function handler() { const $ = load(response.data); const list = $('li') - .toArray() - .map((element) => { - const item = $(element); - const rawTitle = item.find('a').text().trim(); - - // Extract date using regex + .toArray() // Convert to an array first + .map((e) => { + const element = $(e); + const rawTitle = element.find('a').text().trim(); const dateRegex = /^(\d{1,2})\s+(\d{4})-(\d{1,2})/; const match = rawTitle.match(dateRegex); - if (!match) {return null;} + if (!match) { + return null; + } const [, day, year, month] = match; const pubDate = parseDate(`${year}-${month}-${day}`, 'YYYY-MM-DD'); - - // Clean up title and construct link const title = rawTitle .replace(dateRegex, '') .trim() .replaceAll(/(公众|教师|学生)/g, '') .trim(); - const href = item.find('a').attr('href') ?? ''; + const href = element.find('a').attr('href') ?? ''; const link = href.startsWith('http') ? href : new URL(href, BASE_URL).href; return { title, link, pubDate }; }) - .filter((item) => item !== null); // Filter out null values + .filter(Boolean); const items = await Promise.all( list.map(async (item) => { try { const detailResponse = await got({ method: 'get', url: item.link }); const content = load(detailResponse.data); - const paragraphs = content('body p') - .toArray() + .toArray() // Convert to an array first .filter((el) => { const text = content(el).text(); return !/分享到:|版权所有|地址:|E-mail:|网站地图|ICP备|京公网安备/.test(text); From f9fab655f42db907bb3ba4d0aa949444e189a91d Mon Sep 17 00:00:00 2001 From: Liueic Aicnal Date: Tue, 26 Nov 2024 09:52:17 +0800 Subject: [PATCH 05/10] Update: Fix of first PR --- lib/routes/cnu/jwc.ts | 34 +++++++++++++--------------------- lib/routes/cnu/namespace.ts | 2 +- 2 files changed, 14 insertions(+), 22 deletions(-) diff --git a/lib/routes/cnu/jwc.ts b/lib/routes/cnu/jwc.ts index 8140dfc32ae96c..3de413e8db6721 100644 --- a/lib/routes/cnu/jwc.ts +++ b/lib/routes/cnu/jwc.ts @@ -2,6 +2,7 @@ import { Route } from '@/types'; import { parseDate } from '@/utils/parse-date'; import got from '@/utils/got'; import { load } from 'cheerio'; +import cache from '@/utils/cache'; const BASE_URL = 'https://jwc.cnu.edu.cn/tzgg/index.htm'; @@ -11,13 +12,13 @@ export const route: Route = { example: '/cnu/jwc', radar: [ { - source: [BASE_URL], + source: [new URL(BASE_URL).host], }, ], name: '首都师范大学教务处', maintainers: ['Aicnal'], handler, - url: BASE_URL, + url: new URL(BASE_URL).host + new URL(BASE_URL).pathname, // host + pathname }; async function handler() { @@ -51,33 +52,24 @@ async function handler() { .filter(Boolean); const items = await Promise.all( - list.map(async (item) => { - try { + list.map((item) => + cache.tryGet(item.link, async () => { + // Cache the detail page const detailResponse = await got({ method: 'get', url: item.link }); const content = load(detailResponse.data); - const paragraphs = content('body p') - .toArray() // Convert to an array first - .filter((el) => { - const text = content(el).text(); - return !/分享到:|版权所有|地址:|E-mail:|网站地图|ICP备|京公网安备/.test(text); - }) - .map((el) => { - content(el).find('img').remove(); // Remove tags - return content(el).html()?.trim(); - }) + const paragraphs = content( + 'body p:not(:contains("分享到:")):not(:contains("版权所有")):not(:contains("地址:")):not(:contains("E-mail:")):not(:contains("网站地图")):not(:contains("ICP备")):not(:contains("京公网安备"))' + ) + .toArray() + .map((el) => content(el).html()?.trim()) .join('

'); return { ...item, description: paragraphs || '暂无内容', }; - } catch (error) { - return { - ...item, - description: `内容获取失败: ${error.message}`, - }; - } - }) + }) + ) ); return { diff --git a/lib/routes/cnu/namespace.ts b/lib/routes/cnu/namespace.ts index 7271f7bfa6754a..5c5510dc83f1c7 100644 --- a/lib/routes/cnu/namespace.ts +++ b/lib/routes/cnu/namespace.ts @@ -1,7 +1,7 @@ import type { Namespace } from '@/types'; export const namespace: Namespace = { - name: '首都师范大学教务处', + name: '首都师范大学', url: 'jwc.cnu.edu.cn', lang: 'zh-CN', }; From b23c731f70a5f1179b22088f11601d689267df6a Mon Sep 17 00:00:00 2001 From: Liueic Aicnal Date: Tue, 26 Nov 2024 09:58:42 +0800 Subject: [PATCH 06/10] Update: namespace 's link --- lib/routes/cnu/namespace.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/routes/cnu/namespace.ts b/lib/routes/cnu/namespace.ts index 5c5510dc83f1c7..3beaa9cadd58c9 100644 --- a/lib/routes/cnu/namespace.ts +++ b/lib/routes/cnu/namespace.ts @@ -2,6 +2,6 @@ import type { Namespace } from '@/types'; export const namespace: Namespace = { name: '首都师范大学', - url: 'jwc.cnu.edu.cn', + url: 'cnu.edu.cn', lang: 'zh-CN', }; From 9ece9f8a2f769b50adcf9c17305a2ee8d4e7831e Mon Sep 17 00:00:00 2001 From: Liueic Aicnal Date: Tue, 26 Nov 2024 23:18:12 +0800 Subject: [PATCH 07/10] Update: Simply select element --- lib/routes/cnu/jwc.ts | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/lib/routes/cnu/jwc.ts b/lib/routes/cnu/jwc.ts index 3de413e8db6721..0f846345be5a9a 100644 --- a/lib/routes/cnu/jwc.ts +++ b/lib/routes/cnu/jwc.ts @@ -15,7 +15,7 @@ export const route: Route = { source: [new URL(BASE_URL).host], }, ], - name: '首都师范大学教务处', + name: '教务处', // Name of the route maintainers: ['Aicnal'], handler, url: new URL(BASE_URL).host + new URL(BASE_URL).pathname, // host + pathname @@ -57,16 +57,11 @@ async function handler() { // Cache the detail page const detailResponse = await got({ method: 'get', url: item.link }); const content = load(detailResponse.data); - const paragraphs = content( - 'body p:not(:contains("分享到:")):not(:contains("版权所有")):not(:contains("地址:")):not(:contains("E-mail:")):not(:contains("网站地图")):not(:contains("ICP备")):not(:contains("京公网安备"))' - ) - .toArray() - .map((el) => content(el).html()?.trim()) - .join('

'); + const detailContent = content('.article02').html()?.trim(); return { ...item, - description: paragraphs || '暂无内容', + description: detailContent || '

暂无内容

', }; }) ) From 90e1116def6d753c3a3869689474e2c664136846 Mon Sep 17 00:00:00 2001 From: Liueic Aicnal Date: Tue, 26 Nov 2024 23:20:47 +0800 Subject: [PATCH 08/10] Update: Fix --- lib/routes/cnu/jwc.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/routes/cnu/jwc.ts b/lib/routes/cnu/jwc.ts index 0f846345be5a9a..17af4da3bd0c78 100644 --- a/lib/routes/cnu/jwc.ts +++ b/lib/routes/cnu/jwc.ts @@ -61,7 +61,7 @@ async function handler() { return { ...item, - description: detailContent || '

暂无内容

', + description: detailContent || '

暂无内容

', // Fallback to prevent empty description }; }) ) From ab3b2eea7eadd261d51ae6e57475a62d7df71844 Mon Sep 17 00:00:00 2001 From: Liueic Aicnal Date: Tue, 26 Nov 2024 23:28:33 +0800 Subject: [PATCH 09/10] Update: Fix --- lib/routes/cnu/jwc.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/routes/cnu/jwc.ts b/lib/routes/cnu/jwc.ts index 17af4da3bd0c78..40a79eb98d6913 100644 --- a/lib/routes/cnu/jwc.ts +++ b/lib/routes/cnu/jwc.ts @@ -57,11 +57,14 @@ async function handler() { // Cache the detail page const detailResponse = await got({ method: 'get', url: item.link }); const content = load(detailResponse.data); - const detailContent = content('.article02').html()?.trim(); + const paragraphs = content('.article02') + .toArray() + .map((el) => content(el).html()?.trim()) + .join('

'); return { ...item, - description: detailContent || '

暂无内容

', // Fallback to prevent empty description + description: paragraphs || '暂无内容', }; }) ) From e928d7b23b92c58c614bc298e166ef87e7699cc4 Mon Sep 17 00:00:00 2001 From: Liueic Aicnal Date: Tue, 26 Nov 2024 23:32:46 +0800 Subject: [PATCH 10/10] Update: Fix --- lib/routes/cnu/jwc.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/routes/cnu/jwc.ts b/lib/routes/cnu/jwc.ts index 40a79eb98d6913..2919174ad61a94 100644 --- a/lib/routes/cnu/jwc.ts +++ b/lib/routes/cnu/jwc.ts @@ -57,7 +57,7 @@ async function handler() { // Cache the detail page const detailResponse = await got({ method: 'get', url: item.link }); const content = load(detailResponse.data); - const paragraphs = content('.article02') + const paragraphs = content('.article02') // with `.article02` .toArray() .map((el) => content(el).html()?.trim()) .join('

');