Replace rss-parser and manual work with extractus
This commit is contained in:
parent
9d7275b69e
commit
10d017a511
3 changed files with 15 additions and 119 deletions
BIN
bun.lockb
BIN
bun.lockb
Binary file not shown.
|
|
@ -28,8 +28,8 @@
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@astrojs/tailwind": "^5.1.0",
|
"@astrojs/tailwind": "^5.1.0",
|
||||||
"astro": "4.15.6",
|
"@extractus/feed-extractor": "^7.1.3",
|
||||||
"rss-parser": "^3.13.0"
|
"astro": "4.15.6"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@biomejs/biome": "^1.8.1",
|
"@biomejs/biome": "^1.8.1",
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
import Parser from "rss-parser";
|
import { extract } from "@extractus/feed-extractor";
|
||||||
|
|
||||||
interface FeedItem {
|
interface FeedItem {
|
||||||
title: string;
|
title: string;
|
||||||
|
|
@ -9,10 +9,6 @@ interface FeedItem {
|
||||||
category: string;
|
category: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
const MAX_CONNECTIONS = Number.POSITIVE_INFINITY;
|
|
||||||
const DELAY_MS = 850;
|
|
||||||
const parser = new Parser();
|
|
||||||
|
|
||||||
function readFeedCategoriesFromEnv(): Record<string, string[]> {
|
function readFeedCategoriesFromEnv(): Record<string, string[]> {
|
||||||
if (import.meta.env.FEEDS) {
|
if (import.meta.env.FEEDS) {
|
||||||
return JSON.parse(import.meta.env.FEEDS);
|
return JSON.parse(import.meta.env.FEEDS);
|
||||||
|
|
@ -20,130 +16,30 @@ function readFeedCategoriesFromEnv(): Record<string, string[]> {
|
||||||
throw new Error("FEEDS environment variable is not set");
|
throw new Error("FEEDS environment variable is not set");
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getRawFeedContents(response: Response): Promise<unknown> {
|
|
||||||
const contentType = response.headers.get("content-type")?.split(";")[0];
|
|
||||||
if (!contentType) return {};
|
|
||||||
if (
|
|
||||||
[
|
|
||||||
"application/atom+xml",
|
|
||||||
"application/rss+xml",
|
|
||||||
"application/xml",
|
|
||||||
"text/xml",
|
|
||||||
"text/html",
|
|
||||||
].includes(contentType)
|
|
||||||
) {
|
|
||||||
return response.text();
|
|
||||||
}
|
|
||||||
if (["application/json", "application/feed+json"].includes(contentType)) {
|
|
||||||
return response.json();
|
|
||||||
}
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
interface RawFeedItem {
|
|
||||||
creator?: string;
|
|
||||||
title: string;
|
|
||||||
link: string;
|
|
||||||
pubDate: string;
|
|
||||||
"content:encoded"?: string;
|
|
||||||
"content:encodedSnippet"?: string;
|
|
||||||
"dc:creator"?: string;
|
|
||||||
comments?: string;
|
|
||||||
content: string;
|
|
||||||
contentSnippet: string;
|
|
||||||
guid: string;
|
|
||||||
categories: unknown[];
|
|
||||||
isoDate: string;
|
|
||||||
[other: string]: unknown;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface RawFeed {
|
|
||||||
items: RawFeedItem[];
|
|
||||||
feedUrl?: string;
|
|
||||||
image?: {
|
|
||||||
link: string;
|
|
||||||
url: string;
|
|
||||||
title: string;
|
|
||||||
width: string;
|
|
||||||
height: string;
|
|
||||||
};
|
|
||||||
pagenationLinks?: {
|
|
||||||
self: string;
|
|
||||||
next: string;
|
|
||||||
};
|
|
||||||
title: string;
|
|
||||||
description: string;
|
|
||||||
generator: string;
|
|
||||||
link: string;
|
|
||||||
language?: string;
|
|
||||||
lastBuildDate?: string;
|
|
||||||
[other: string]: unknown;
|
|
||||||
}
|
|
||||||
|
|
||||||
function getTitle(item: RawFeed | RawFeedItem): string {
|
|
||||||
const titleValues: (keyof RawFeed | keyof RawFeedItem)[] = [
|
|
||||||
"title",
|
|
||||||
"url",
|
|
||||||
"link",
|
|
||||||
];
|
|
||||||
const keys = Object.keys(item);
|
|
||||||
const titleProperty = titleValues.find(
|
|
||||||
(titleValue) => keys.includes(titleValue) && item[titleValue],
|
|
||||||
);
|
|
||||||
return titleProperty ? (item[titleProperty] as string) : "";
|
|
||||||
}
|
|
||||||
|
|
||||||
function getLink(item: RawFeed | RawFeedItem): string {
|
|
||||||
const linkValues: (keyof RawFeed | keyof RawFeedItem)[] = [
|
|
||||||
"link",
|
|
||||||
"url",
|
|
||||||
"guid",
|
|
||||||
"home_page_url",
|
|
||||||
];
|
|
||||||
const keys = Object.keys(item);
|
|
||||||
const linkProperty = linkValues.find((linkValue) => keys.includes(linkValue));
|
|
||||||
return linkProperty ? (item[linkProperty] as string) : "";
|
|
||||||
}
|
|
||||||
|
|
||||||
function getTimestamp(item: RawFeedItem): number {
|
|
||||||
const dateString =
|
|
||||||
item.pubDate || item.isoDate || item.date || item.date_published;
|
|
||||||
if (!dateString || typeof dateString !== "string") {
|
|
||||||
return Date.now();
|
|
||||||
}
|
|
||||||
const timestamp = new Date(dateString).getTime();
|
|
||||||
return Number.isNaN(timestamp) ? Date.now() : timestamp;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function parseFeedContents(
|
async function parseFeedContents(
|
||||||
feedUrl: string,
|
feedUrl: string,
|
||||||
category: string,
|
category: string,
|
||||||
): Promise<FeedItem[]> {
|
): Promise<FeedItem[]> {
|
||||||
console.log(`Fetching: ${feedUrl}...`);
|
console.log(`Fetching: ${feedUrl}...`);
|
||||||
const response = await fetch(feedUrl);
|
let items: FeedItem[] = [];
|
||||||
const body = await getRawFeedContents(response);
|
|
||||||
if (!body) {
|
|
||||||
throw new Error(`Failed to fetch feed: ${feedUrl}`);
|
|
||||||
}
|
|
||||||
try {
|
try {
|
||||||
const rawFeed = (
|
const result = await extract(feedUrl, {
|
||||||
typeof body === "string" ? await parser.parseString(body) : body
|
descriptionMaxLen: 1,
|
||||||
) as RawFeed;
|
useISODateFormat: false,
|
||||||
const feedName = getTitle(rawFeed);
|
});
|
||||||
const feedLink = getLink(rawFeed);
|
items = (result.entries ?? []).map((entry) => ({
|
||||||
const items: FeedItem[] = rawFeed.items.flatMap((item) => ({
|
feedName: result.title,
|
||||||
feedName,
|
feedLink: result.link,
|
||||||
feedLink,
|
|
||||||
category,
|
category,
|
||||||
title: item.title,
|
title: entry.title,
|
||||||
pubIsoDate: getTimestamp(item),
|
pubIsoDate: new Date(entry.published).getTime(),
|
||||||
link: item.link,
|
link: entry.link,
|
||||||
}));
|
}));
|
||||||
return items;
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error(`${feedUrl}\n${err}`);
|
console.error(`${feedUrl}\n${err}`);
|
||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
|
return items;
|
||||||
}
|
}
|
||||||
|
|
||||||
export default async function getAllFeedItems(): Promise<{
|
export default async function getAllFeedItems(): Promise<{
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue