From 5f256bf9f8ae2b13223859bf4052621bf097ea31 Mon Sep 17 00:00:00 2001 From: Ethan Shen <42264778+nczitzk@users.noreply.github.com> Date: Sun, 10 Nov 2024 02:51:14 +0800 Subject: [PATCH] fix(route): DeepLearning.AI The Batch with tags (#17514) --- lib/routes/deeplearning/namespace.ts | 4 +- .../deeplearning/templates/description.art | 21 ++ lib/routes/deeplearning/the-batch.ts | 296 ++++++++++++++++++ lib/routes/deeplearning/thebatch.ts | 74 ----- 4 files changed, 320 insertions(+), 75 deletions(-) create mode 100644 lib/routes/deeplearning/templates/description.art create mode 100644 lib/routes/deeplearning/the-batch.ts delete mode 100644 lib/routes/deeplearning/thebatch.ts diff --git a/lib/routes/deeplearning/namespace.ts b/lib/routes/deeplearning/namespace.ts index 82bbae5596d1f6..85f560dc8625c0 100644 --- a/lib/routes/deeplearning/namespace.ts +++ b/lib/routes/deeplearning/namespace.ts @@ -1,7 +1,9 @@ import type { Namespace } from '@/types'; export const namespace: Namespace = { - name: 'deeplearning.ai', + name: 'DeepLearning.AI', url: 'www.deeplearning.ai', + categories: ['programming'], + description: '', lang: 'en', }; diff --git a/lib/routes/deeplearning/templates/description.art b/lib/routes/deeplearning/templates/description.art new file mode 100644 index 00000000000000..249654e7e618a4 --- /dev/null +++ b/lib/routes/deeplearning/templates/description.art @@ -0,0 +1,21 @@ +{{ if images }} + {{ each images image }} + {{ if image?.src }} +
+ {{ image.alt }} +
+ {{ /if }} + {{ /each }} +{{ /if }} + +{{ if intro }} +
{{ intro }}
+{{ /if }} + +{{ if description }} + {{@ description }} +{{ /if }} \ No newline at end of file diff --git a/lib/routes/deeplearning/the-batch.ts b/lib/routes/deeplearning/the-batch.ts new file mode 100644 index 00000000000000..3456d03f822d2a --- /dev/null +++ b/lib/routes/deeplearning/the-batch.ts @@ -0,0 +1,296 @@ +import { Route } from '@/types'; +import { getCurrentPath } from '@/utils/helpers'; +const __dirname = getCurrentPath(import.meta.url); + +import cache from '@/utils/cache'; +import ofetch from '@/utils/ofetch'; +import { load } from 'cheerio'; +import { parseDate } from '@/utils/parse-date'; +import { art } from '@/utils/render'; +import path from 'node:path'; + +export const handler = async (ctx) => { + const { tag } = ctx.req.param(); + const limit = ctx.req.query('limit') ? Number.parseInt(ctx.req.query('limit'), 10) : 1; + + const rootUrl = 'https://www.deeplearning.ai'; + const currentUrl = new URL(`the-batch${tag ? `/tag/${tag.replace(/^tag\//, '').replace(/\/$/, '')}` : ''}/`, rootUrl).href; + + const response = await ofetch(currentUrl); + + const $ = load(response); + + const language = $('html').prop('lang'); + + const data = JSON.parse($('script#__NEXT_DATA__').text()); + + const nextBuildId = data.buildId; + const posts = data.props?.pageProps?.posts ?? []; + + let items = posts.slice(0, limit).map((item) => { + const title = item.title; + const description = art(path.join(__dirname, 'templates/description.art'), { + images: item.feature_image + ? [ + { + src: item.feature_image, + alt: item.feature_image_alt, + }, + ] + : undefined, + intro: item.excerpt ?? item.custom_excerpt, + }); + const image = item.feature_image; + const guid = `the-batch-${item.slug}`; + + return { + title, + description, + pubDate: parseDate(item.published_at), + link: new URL(`_next/data/${nextBuildId}/the-batch/${item.slug}.json`, rootUrl).href, + category: item.tags.map((t) => t.name), + guid, + id: guid, + content: { + html: description, + text: item.excerpt ?? item.custom_excerpt, + }, + image, + banner: image, + language, + }; + }); + + items = await Promise.all( + items.map((item) => + cache.tryGet(item.link, async () => { + const detailResponse = await ofetch(item.link); + + const post = detailResponse.pageProps?.cmsData?.post ?? undefined; + + if (!post) { + return item; + } + + const $$ = load(post.html); + + $$('a').each((_, ele) => { + if (ele.attribs.href?.includes('utm_campaign')) { + const url = new URL(ele.attribs.href); + url.searchParams.delete('utm_campaign'); + url.searchParams.delete('utm_source'); + url.searchParams.delete('utm_medium'); + url.searchParams.delete('_hsenc'); + ele.attribs.href = url.href; + } + }); + + const title = post.title; + const description = art(path.join(__dirname, 'templates/description.art'), { + images: post.feature_image + ? [ + { + src: post.feature_image, + alt: post.feature_image_alt, + }, + ] + : undefined, + intro: post.excerpt ?? post.custom_excerpt, + description: $$.html(), + }); + const guid = `the-batch-${post.slug}`; + const image = post.feature_image; + + item.title = title; + item.description = description; + item.pubDate = parseDate(post.published_at); + item.link = new URL(`the-batch/${post.slug}`, rootUrl).href; + item.category = post.tags.map((t) => t.name); + item.author = post.authors.map((a) => a.name).join('/'); + item.guid = guid; + item.id = guid; + item.content = { + html: description, + text: post.excerpt ?? post.custom_excerpt, + }; + item.image = image; + item.banner = image; + item.updated = parseDate(post.updated_at); + item.language = language; + + return item; + }) + ) + ); + + const image = new URL($('meta[property="og:image"]').prop('content'), rootUrl).href; + + return { + title: $('title').text(), + description: $('meta[property="og:description"]').prop('content'), + link: currentUrl, + item: items, + allowEmpty: true, + image, + author: $('meta[property="og:site_name"]').prop('content'), + language, + }; +}; + +export const route: Route = { + path: '/the-batch/:tag{.+}?', + name: 'The Batch', + url: 'www.deeplearning.ai', + maintainers: ['nczitzk', 'juvenn', 'TonyRL'], + handler, + example: '/deeplearning/the-batch', + parameters: { tag: 'Tag, Weekly Issues by default' }, + description: `:::tip + If you subscribe to [Data Points](https://www.deeplearning.ai/the-batch/tag/data-points/),where the URL is \`https://www.deeplearning.ai/the-batch/tag/data-points/\`, extract the part \`https://www.deeplearning.ai/the-batch/tag\` to the end, which is \`data-points\`, and use it as the parameter to fill in. Therefore, the route will be [\`/deeplearning/the-batch/data-points\`](https://rsshub.app/deeplearning/the-batch/data-points). + + ::: + + | Tag | ID | + | ---------------------------------------------------------------------- | -------------------------------------------------------------------- | + | [Weekly Issues](https://www.deeplearning.ai/the-batch/) | [](https://rsshub.app/deeplearning/the-batch) | + | [Andrew's Letters](https://www.deeplearning.ai/the-batch/tag/letters/) | [letters](https://rsshub.app/deeplearning/the-batch/letters) | + | [Data Points](https://www.deeplearning.ai/the-batch/tag/data-points/) | [data-points](https://rsshub.app/deeplearning/the-batch/data-points) | + | [ML Research](https://www.deeplearning.ai/the-batch/tag/research/) | [research](https://rsshub.app/deeplearning/the-batch/research) | + | [Business](https://www.deeplearning.ai/the-batch/tag/business/) | [business](https://rsshub.app/deeplearning/the-batch/business) | + | [Science](https://www.deeplearning.ai/the-batch/tag/science/) | [science](https://rsshub.app/deeplearning/the-batch/science) | + | [AI & Society](https://www.deeplearning.ai/the-batch/tag/ai-society/) | [ai-society](https://rsshub.app/deeplearning/the-batch/ai-society) | + | [Culture](https://www.deeplearning.ai/the-batch/tag/culture/) | [culture](https://rsshub.app/deeplearning/the-batch/culture) | + | [Hardware](https://www.deeplearning.ai/the-batch/tag/hardware/) | [hardware](https://rsshub.app/deeplearning/the-batch/hardware) | + | [AI Careers](https://www.deeplearning.ai/the-batch/tag/ai-careers/) | [ai-careers](https://rsshub.app/deeplearning/the-batch/ai-careers) | + + #### [Letters from Andrew Ng](https://www.deeplearning.ai/the-batch/tag/letters/) + + | Tag | ID | + | --------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------- | + | [All](https://www.deeplearning.ai/the-batch/tag/letters/) | [letters](https://rsshub.app/deeplearning/the-batch/letters) | + | [Personal Insights](https://www.deeplearning.ai/the-batch/tag/personal-insights/) | [personal-insights](https://rsshub.app/deeplearning/the-batch/personal-insights) | + | [Technical Insights](https://www.deeplearning.ai/the-batch/tag/technical-insights/) | [technical-insights](https://rsshub.app/deeplearning/the-batch/technical-insights) | + | [Business Insights](https://www.deeplearning.ai/the-batch/tag/business-insights/) | [business-insights](https://rsshub.app/deeplearning/the-batch/business-insights) | + | [Tech & Society](https://www.deeplearning.ai/the-batch/tag/tech-society/) | [tech-society](https://rsshub.app/deeplearning/the-batch/tech-society) | + | [DeepLearning.AI News](https://www.deeplearning.ai/the-batch/tag/deeplearning-ai-news/) | [deeplearning-ai-news](https://rsshub.app/deeplearning/the-batch/deeplearning-ai-news) | + | [AI Careers](https://www.deeplearning.ai/the-batch/tag/ai-careers/) | [ai-careers](https://rsshub.app/deeplearning/the-batch/ai-careers) | + | [Just For Fun](https://www.deeplearning.ai/the-batch/tag/just-for-fun/) | [just-for-fun](https://rsshub.app/deeplearning/the-batch/just-for-fun) | + | [Learning & Education](https://www.deeplearning.ai/the-batch/tag/learning-education/) | [learning-education](https://rsshub.app/deeplearning/the-batch/learning-education) | + `, + categories: ['programming'], + + features: { + requireConfig: false, + requirePuppeteer: false, + antiCrawler: false, + supportRadar: true, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + radar: [ + { + source: ['www.deeplearning.ai/the-batch', 'www.deeplearning.ai/the-batch/tag/:tag/'], + target: (params) => { + const tag = params.tag; + + return `/the-batch${tag ? `/${tag}` : ''}`; + }, + }, + { + title: 'Weekly Issues', + source: ['www.deeplearning.ai/the-batch/'], + target: '/the-batch', + }, + { + title: "Andrew's Letters", + source: ['www.deeplearning.ai/the-batch/tag/letters/'], + target: '/the-batch/letters', + }, + { + title: 'Data Points', + source: ['www.deeplearning.ai/the-batch/tag/data-points/'], + target: '/the-batch/data-points', + }, + { + title: 'ML Research', + source: ['www.deeplearning.ai/the-batch/tag/research/'], + target: '/the-batch/research', + }, + { + title: 'Business', + source: ['www.deeplearning.ai/the-batch/tag/business/'], + target: '/the-batch/business', + }, + { + title: 'Science', + source: ['www.deeplearning.ai/the-batch/tag/science/'], + target: '/the-batch/science', + }, + { + title: 'AI & Society', + source: ['www.deeplearning.ai/the-batch/tag/ai-society/'], + target: '/the-batch/ai-society', + }, + { + title: 'Culture', + source: ['www.deeplearning.ai/the-batch/tag/culture/'], + target: '/the-batch/culture', + }, + { + title: 'Hardware', + source: ['www.deeplearning.ai/the-batch/tag/hardware/'], + target: '/the-batch/hardware', + }, + { + title: 'AI Careers', + source: ['www.deeplearning.ai/the-batch/tag/ai-careers/'], + target: '/the-batch/ai-careers', + }, + { + title: 'Letters from Andrew Ng - All', + source: ['www.deeplearning.ai/the-batch/tag/letters/'], + target: '/the-batch/letters', + }, + { + title: 'Letters from Andrew Ng - Personal Insights', + source: ['www.deeplearning.ai/the-batch/tag/personal-insights/'], + target: '/the-batch/personal-insights', + }, + { + title: 'Letters from Andrew Ng - Technical Insights', + source: ['www.deeplearning.ai/the-batch/tag/technical-insights/'], + target: '/the-batch/technical-insights', + }, + { + title: 'Letters from Andrew Ng - Business Insights', + source: ['www.deeplearning.ai/the-batch/tag/business-insights/'], + target: '/the-batch/business-insights', + }, + { + title: 'Letters from Andrew Ng - Tech & Society', + source: ['www.deeplearning.ai/the-batch/tag/tech-society/'], + target: '/the-batch/tech-society', + }, + { + title: 'Letters from Andrew Ng - DeepLearning.AI News', + source: ['www.deeplearning.ai/the-batch/tag/deeplearning-ai-news/'], + target: '/the-batch/deeplearning-ai-news', + }, + { + title: 'Letters from Andrew Ng - AI Careers', + source: ['www.deeplearning.ai/the-batch/tag/ai-careers/'], + target: '/the-batch/ai-careers', + }, + { + title: 'Letters from Andrew Ng - Just For Fun', + source: ['www.deeplearning.ai/the-batch/tag/just-for-fun/'], + target: '/the-batch/just-for-fun', + }, + { + title: 'Letters from Andrew Ng - Learning & Education', + source: ['www.deeplearning.ai/the-batch/tag/learning-education/'], + target: '/the-batch/learning-education', + }, + ], +}; diff --git a/lib/routes/deeplearning/thebatch.ts b/lib/routes/deeplearning/thebatch.ts deleted file mode 100644 index f7816a7e060cc1..00000000000000 --- a/lib/routes/deeplearning/thebatch.ts +++ /dev/null @@ -1,74 +0,0 @@ -import { Route } from '@/types'; -import cache from '@/utils/cache'; -import ofetch from '@/utils/ofetch'; -import * as cheerio from 'cheerio'; -import { parseDate } from '@/utils/parse-date'; - -export const route: Route = { - path: '/thebatch', - categories: ['programming'], - example: '/deeplearning/thebatch', - parameters: {}, - features: { - requireConfig: false, - requirePuppeteer: false, - antiCrawler: false, - supportBT: false, - supportPodcast: false, - supportScihub: false, - }, - radar: [ - { - source: ['www.deeplearning.ai/thebatch', 'www.deeplearning.ai/'], - }, - ], - name: 'TheBatch 周报', - maintainers: ['nczitzk', 'juvenn'], - handler, - url: 'www.deeplearning.ai/thebatch', -}; - -async function handler() { - const baseUrl = 'https://www.deeplearning.ai'; - const link = `${baseUrl}/the-batch/`; - const page = await ofetch(link); - const $ = cheerio.load(page); - const nextJs = $('script#__NEXT_DATA__').text(); - const nextBuildId = JSON.parse(nextJs).buildId; - - const listing = await ofetch(`${baseUrl}/_next/data/${nextBuildId}/the-batch.json`); - - const items = listing.pageProps.posts.map((item) => ({ - title: item.title, - link: `${link}${item.slug}`, - jsonUrl: `${baseUrl}/_next/data/${nextBuildId}/the-batch/${item.slug}.json`, - pubDate: parseDate(item.published_at), - })); - - return { - title: 'The Batch - a new weekly newsletter from deeplearning.ai', - link, - item: await Promise.all( - items.map((item) => - cache.tryGet(item.link, async () => { - const resp = await ofetch(item.jsonUrl); - const $ = cheerio.load(resp.pageProps.cmsData.post.html); - - $('a').each((_, ele) => { - if (ele.attribs.href?.includes('utm_campaign')) { - const url = new URL(ele.attribs.href); - url.searchParams.delete('utm_campaign'); - url.searchParams.delete('utm_source'); - url.searchParams.delete('utm_medium'); - url.searchParams.delete('_hsenc'); - ele.attribs.href = url.href; - } - }); - - item.description = $.html(); - return item; - }) - ) - ), - }; -}