From 5f256bf9f8ae2b13223859bf4052621bf097ea31 Mon Sep 17 00:00:00 2001
From: Ethan Shen <42264778+nczitzk@users.noreply.github.com>
Date: Sun, 10 Nov 2024 02:51:14 +0800
Subject: [PATCH] fix(route): DeepLearning.AI The Batch with tags (#17514)
---
lib/routes/deeplearning/namespace.ts | 4 +-
.../deeplearning/templates/description.art | 21 ++
lib/routes/deeplearning/the-batch.ts | 296 ++++++++++++++++++
lib/routes/deeplearning/thebatch.ts | 74 -----
4 files changed, 320 insertions(+), 75 deletions(-)
create mode 100644 lib/routes/deeplearning/templates/description.art
create mode 100644 lib/routes/deeplearning/the-batch.ts
delete mode 100644 lib/routes/deeplearning/thebatch.ts
diff --git a/lib/routes/deeplearning/namespace.ts b/lib/routes/deeplearning/namespace.ts
index 82bbae5596d1f6..85f560dc8625c0 100644
--- a/lib/routes/deeplearning/namespace.ts
+++ b/lib/routes/deeplearning/namespace.ts
@@ -1,7 +1,9 @@
import type { Namespace } from '@/types';
export const namespace: Namespace = {
- name: 'deeplearning.ai',
+ name: 'DeepLearning.AI',
url: 'www.deeplearning.ai',
+ categories: ['programming'],
+ description: '',
lang: 'en',
};
diff --git a/lib/routes/deeplearning/templates/description.art b/lib/routes/deeplearning/templates/description.art
new file mode 100644
index 00000000000000..249654e7e618a4
--- /dev/null
+++ b/lib/routes/deeplearning/templates/description.art
@@ -0,0 +1,21 @@
+{{ if images }}
+ {{ each images image }}
+ {{ if image?.src }}
+
+ {{ /if }}
+ {{ /each }}
+{{ /if }}
+
+{{ if intro }}
+
{{ intro }}
+{{ /if }}
+
+{{ if description }}
+ {{@ description }}
+{{ /if }}
\ No newline at end of file
diff --git a/lib/routes/deeplearning/the-batch.ts b/lib/routes/deeplearning/the-batch.ts
new file mode 100644
index 00000000000000..3456d03f822d2a
--- /dev/null
+++ b/lib/routes/deeplearning/the-batch.ts
@@ -0,0 +1,296 @@
+import { Route } from '@/types';
+import { getCurrentPath } from '@/utils/helpers';
+const __dirname = getCurrentPath(import.meta.url);
+
+import cache from '@/utils/cache';
+import ofetch from '@/utils/ofetch';
+import { load } from 'cheerio';
+import { parseDate } from '@/utils/parse-date';
+import { art } from '@/utils/render';
+import path from 'node:path';
+
+export const handler = async (ctx) => {
+ const { tag } = ctx.req.param();
+ const limit = ctx.req.query('limit') ? Number.parseInt(ctx.req.query('limit'), 10) : 1;
+
+ const rootUrl = 'https://www.deeplearning.ai';
+ const currentUrl = new URL(`the-batch${tag ? `/tag/${tag.replace(/^tag\//, '').replace(/\/$/, '')}` : ''}/`, rootUrl).href;
+
+ const response = await ofetch(currentUrl);
+
+ const $ = load(response);
+
+ const language = $('html').prop('lang');
+
+ const data = JSON.parse($('script#__NEXT_DATA__').text());
+
+ const nextBuildId = data.buildId;
+ const posts = data.props?.pageProps?.posts ?? [];
+
+ let items = posts.slice(0, limit).map((item) => {
+ const title = item.title;
+ const description = art(path.join(__dirname, 'templates/description.art'), {
+ images: item.feature_image
+ ? [
+ {
+ src: item.feature_image,
+ alt: item.feature_image_alt,
+ },
+ ]
+ : undefined,
+ intro: item.excerpt ?? item.custom_excerpt,
+ });
+ const image = item.feature_image;
+ const guid = `the-batch-${item.slug}`;
+
+ return {
+ title,
+ description,
+ pubDate: parseDate(item.published_at),
+ link: new URL(`_next/data/${nextBuildId}/the-batch/${item.slug}.json`, rootUrl).href,
+ category: item.tags.map((t) => t.name),
+ guid,
+ id: guid,
+ content: {
+ html: description,
+ text: item.excerpt ?? item.custom_excerpt,
+ },
+ image,
+ banner: image,
+ language,
+ };
+ });
+
+ items = await Promise.all(
+ items.map((item) =>
+ cache.tryGet(item.link, async () => {
+ const detailResponse = await ofetch(item.link);
+
+ const post = detailResponse.pageProps?.cmsData?.post ?? undefined;
+
+ if (!post) {
+ return item;
+ }
+
+ const $$ = load(post.html);
+
+ $$('a').each((_, ele) => {
+ if (ele.attribs.href?.includes('utm_campaign')) {
+ const url = new URL(ele.attribs.href);
+ url.searchParams.delete('utm_campaign');
+ url.searchParams.delete('utm_source');
+ url.searchParams.delete('utm_medium');
+ url.searchParams.delete('_hsenc');
+ ele.attribs.href = url.href;
+ }
+ });
+
+ const title = post.title;
+ const description = art(path.join(__dirname, 'templates/description.art'), {
+ images: post.feature_image
+ ? [
+ {
+ src: post.feature_image,
+ alt: post.feature_image_alt,
+ },
+ ]
+ : undefined,
+ intro: post.excerpt ?? post.custom_excerpt,
+ description: $$.html(),
+ });
+ const guid = `the-batch-${post.slug}`;
+ const image = post.feature_image;
+
+ item.title = title;
+ item.description = description;
+ item.pubDate = parseDate(post.published_at);
+ item.link = new URL(`the-batch/${post.slug}`, rootUrl).href;
+ item.category = post.tags.map((t) => t.name);
+ item.author = post.authors.map((a) => a.name).join('/');
+ item.guid = guid;
+ item.id = guid;
+ item.content = {
+ html: description,
+ text: post.excerpt ?? post.custom_excerpt,
+ };
+ item.image = image;
+ item.banner = image;
+ item.updated = parseDate(post.updated_at);
+ item.language = language;
+
+ return item;
+ })
+ )
+ );
+
+ const image = new URL($('meta[property="og:image"]').prop('content'), rootUrl).href;
+
+ return {
+ title: $('title').text(),
+ description: $('meta[property="og:description"]').prop('content'),
+ link: currentUrl,
+ item: items,
+ allowEmpty: true,
+ image,
+ author: $('meta[property="og:site_name"]').prop('content'),
+ language,
+ };
+};
+
+export const route: Route = {
+ path: '/the-batch/:tag{.+}?',
+ name: 'The Batch',
+ url: 'www.deeplearning.ai',
+ maintainers: ['nczitzk', 'juvenn', 'TonyRL'],
+ handler,
+ example: '/deeplearning/the-batch',
+ parameters: { tag: 'Tag, Weekly Issues by default' },
+ description: `:::tip
+ If you subscribe to [Data Points](https://www.deeplearning.ai/the-batch/tag/data-points/),where the URL is \`https://www.deeplearning.ai/the-batch/tag/data-points/\`, extract the part \`https://www.deeplearning.ai/the-batch/tag\` to the end, which is \`data-points\`, and use it as the parameter to fill in. Therefore, the route will be [\`/deeplearning/the-batch/data-points\`](https://rsshub.app/deeplearning/the-batch/data-points).
+
+ :::
+
+ | Tag | ID |
+ | ---------------------------------------------------------------------- | -------------------------------------------------------------------- |
+ | [Weekly Issues](https://www.deeplearning.ai/the-batch/) | [](https://rsshub.app/deeplearning/the-batch) |
+ | [Andrew's Letters](https://www.deeplearning.ai/the-batch/tag/letters/) | [letters](https://rsshub.app/deeplearning/the-batch/letters) |
+ | [Data Points](https://www.deeplearning.ai/the-batch/tag/data-points/) | [data-points](https://rsshub.app/deeplearning/the-batch/data-points) |
+ | [ML Research](https://www.deeplearning.ai/the-batch/tag/research/) | [research](https://rsshub.app/deeplearning/the-batch/research) |
+ | [Business](https://www.deeplearning.ai/the-batch/tag/business/) | [business](https://rsshub.app/deeplearning/the-batch/business) |
+ | [Science](https://www.deeplearning.ai/the-batch/tag/science/) | [science](https://rsshub.app/deeplearning/the-batch/science) |
+ | [AI & Society](https://www.deeplearning.ai/the-batch/tag/ai-society/) | [ai-society](https://rsshub.app/deeplearning/the-batch/ai-society) |
+ | [Culture](https://www.deeplearning.ai/the-batch/tag/culture/) | [culture](https://rsshub.app/deeplearning/the-batch/culture) |
+ | [Hardware](https://www.deeplearning.ai/the-batch/tag/hardware/) | [hardware](https://rsshub.app/deeplearning/the-batch/hardware) |
+ | [AI Careers](https://www.deeplearning.ai/the-batch/tag/ai-careers/) | [ai-careers](https://rsshub.app/deeplearning/the-batch/ai-careers) |
+
+ #### [Letters from Andrew Ng](https://www.deeplearning.ai/the-batch/tag/letters/)
+
+ | Tag | ID |
+ | --------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------- |
+ | [All](https://www.deeplearning.ai/the-batch/tag/letters/) | [letters](https://rsshub.app/deeplearning/the-batch/letters) |
+ | [Personal Insights](https://www.deeplearning.ai/the-batch/tag/personal-insights/) | [personal-insights](https://rsshub.app/deeplearning/the-batch/personal-insights) |
+ | [Technical Insights](https://www.deeplearning.ai/the-batch/tag/technical-insights/) | [technical-insights](https://rsshub.app/deeplearning/the-batch/technical-insights) |
+ | [Business Insights](https://www.deeplearning.ai/the-batch/tag/business-insights/) | [business-insights](https://rsshub.app/deeplearning/the-batch/business-insights) |
+ | [Tech & Society](https://www.deeplearning.ai/the-batch/tag/tech-society/) | [tech-society](https://rsshub.app/deeplearning/the-batch/tech-society) |
+ | [DeepLearning.AI News](https://www.deeplearning.ai/the-batch/tag/deeplearning-ai-news/) | [deeplearning-ai-news](https://rsshub.app/deeplearning/the-batch/deeplearning-ai-news) |
+ | [AI Careers](https://www.deeplearning.ai/the-batch/tag/ai-careers/) | [ai-careers](https://rsshub.app/deeplearning/the-batch/ai-careers) |
+ | [Just For Fun](https://www.deeplearning.ai/the-batch/tag/just-for-fun/) | [just-for-fun](https://rsshub.app/deeplearning/the-batch/just-for-fun) |
+ | [Learning & Education](https://www.deeplearning.ai/the-batch/tag/learning-education/) | [learning-education](https://rsshub.app/deeplearning/the-batch/learning-education) |
+ `,
+ categories: ['programming'],
+
+ features: {
+ requireConfig: false,
+ requirePuppeteer: false,
+ antiCrawler: false,
+ supportRadar: true,
+ supportBT: false,
+ supportPodcast: false,
+ supportScihub: false,
+ },
+ radar: [
+ {
+ source: ['www.deeplearning.ai/the-batch', 'www.deeplearning.ai/the-batch/tag/:tag/'],
+ target: (params) => {
+ const tag = params.tag;
+
+ return `/the-batch${tag ? `/${tag}` : ''}`;
+ },
+ },
+ {
+ title: 'Weekly Issues',
+ source: ['www.deeplearning.ai/the-batch/'],
+ target: '/the-batch',
+ },
+ {
+ title: "Andrew's Letters",
+ source: ['www.deeplearning.ai/the-batch/tag/letters/'],
+ target: '/the-batch/letters',
+ },
+ {
+ title: 'Data Points',
+ source: ['www.deeplearning.ai/the-batch/tag/data-points/'],
+ target: '/the-batch/data-points',
+ },
+ {
+ title: 'ML Research',
+ source: ['www.deeplearning.ai/the-batch/tag/research/'],
+ target: '/the-batch/research',
+ },
+ {
+ title: 'Business',
+ source: ['www.deeplearning.ai/the-batch/tag/business/'],
+ target: '/the-batch/business',
+ },
+ {
+ title: 'Science',
+ source: ['www.deeplearning.ai/the-batch/tag/science/'],
+ target: '/the-batch/science',
+ },
+ {
+ title: 'AI & Society',
+ source: ['www.deeplearning.ai/the-batch/tag/ai-society/'],
+ target: '/the-batch/ai-society',
+ },
+ {
+ title: 'Culture',
+ source: ['www.deeplearning.ai/the-batch/tag/culture/'],
+ target: '/the-batch/culture',
+ },
+ {
+ title: 'Hardware',
+ source: ['www.deeplearning.ai/the-batch/tag/hardware/'],
+ target: '/the-batch/hardware',
+ },
+ {
+ title: 'AI Careers',
+ source: ['www.deeplearning.ai/the-batch/tag/ai-careers/'],
+ target: '/the-batch/ai-careers',
+ },
+ {
+ title: 'Letters from Andrew Ng - All',
+ source: ['www.deeplearning.ai/the-batch/tag/letters/'],
+ target: '/the-batch/letters',
+ },
+ {
+ title: 'Letters from Andrew Ng - Personal Insights',
+ source: ['www.deeplearning.ai/the-batch/tag/personal-insights/'],
+ target: '/the-batch/personal-insights',
+ },
+ {
+ title: 'Letters from Andrew Ng - Technical Insights',
+ source: ['www.deeplearning.ai/the-batch/tag/technical-insights/'],
+ target: '/the-batch/technical-insights',
+ },
+ {
+ title: 'Letters from Andrew Ng - Business Insights',
+ source: ['www.deeplearning.ai/the-batch/tag/business-insights/'],
+ target: '/the-batch/business-insights',
+ },
+ {
+ title: 'Letters from Andrew Ng - Tech & Society',
+ source: ['www.deeplearning.ai/the-batch/tag/tech-society/'],
+ target: '/the-batch/tech-society',
+ },
+ {
+ title: 'Letters from Andrew Ng - DeepLearning.AI News',
+ source: ['www.deeplearning.ai/the-batch/tag/deeplearning-ai-news/'],
+ target: '/the-batch/deeplearning-ai-news',
+ },
+ {
+ title: 'Letters from Andrew Ng - AI Careers',
+ source: ['www.deeplearning.ai/the-batch/tag/ai-careers/'],
+ target: '/the-batch/ai-careers',
+ },
+ {
+ title: 'Letters from Andrew Ng - Just For Fun',
+ source: ['www.deeplearning.ai/the-batch/tag/just-for-fun/'],
+ target: '/the-batch/just-for-fun',
+ },
+ {
+ title: 'Letters from Andrew Ng - Learning & Education',
+ source: ['www.deeplearning.ai/the-batch/tag/learning-education/'],
+ target: '/the-batch/learning-education',
+ },
+ ],
+};
diff --git a/lib/routes/deeplearning/thebatch.ts b/lib/routes/deeplearning/thebatch.ts
deleted file mode 100644
index f7816a7e060cc1..00000000000000
--- a/lib/routes/deeplearning/thebatch.ts
+++ /dev/null
@@ -1,74 +0,0 @@
-import { Route } from '@/types';
-import cache from '@/utils/cache';
-import ofetch from '@/utils/ofetch';
-import * as cheerio from 'cheerio';
-import { parseDate } from '@/utils/parse-date';
-
-export const route: Route = {
- path: '/thebatch',
- categories: ['programming'],
- example: '/deeplearning/thebatch',
- parameters: {},
- features: {
- requireConfig: false,
- requirePuppeteer: false,
- antiCrawler: false,
- supportBT: false,
- supportPodcast: false,
- supportScihub: false,
- },
- radar: [
- {
- source: ['www.deeplearning.ai/thebatch', 'www.deeplearning.ai/'],
- },
- ],
- name: 'TheBatch 周报',
- maintainers: ['nczitzk', 'juvenn'],
- handler,
- url: 'www.deeplearning.ai/thebatch',
-};
-
-async function handler() {
- const baseUrl = 'https://www.deeplearning.ai';
- const link = `${baseUrl}/the-batch/`;
- const page = await ofetch(link);
- const $ = cheerio.load(page);
- const nextJs = $('script#__NEXT_DATA__').text();
- const nextBuildId = JSON.parse(nextJs).buildId;
-
- const listing = await ofetch(`${baseUrl}/_next/data/${nextBuildId}/the-batch.json`);
-
- const items = listing.pageProps.posts.map((item) => ({
- title: item.title,
- link: `${link}${item.slug}`,
- jsonUrl: `${baseUrl}/_next/data/${nextBuildId}/the-batch/${item.slug}.json`,
- pubDate: parseDate(item.published_at),
- }));
-
- return {
- title: 'The Batch - a new weekly newsletter from deeplearning.ai',
- link,
- item: await Promise.all(
- items.map((item) =>
- cache.tryGet(item.link, async () => {
- const resp = await ofetch(item.jsonUrl);
- const $ = cheerio.load(resp.pageProps.cmsData.post.html);
-
- $('a').each((_, ele) => {
- if (ele.attribs.href?.includes('utm_campaign')) {
- const url = new URL(ele.attribs.href);
- url.searchParams.delete('utm_campaign');
- url.searchParams.delete('utm_source');
- url.searchParams.delete('utm_medium');
- url.searchParams.delete('_hsenc');
- ele.attribs.href = url.href;
- }
- });
-
- item.description = $.html();
- return item;
- })
- )
- ),
- };
-}