Files
astro-website/site/src/lib/ingest/youtube.ts
2026-02-10 02:34:25 -05:00

140 lines
4.3 KiB
TypeScript

import Parser from "rss-parser";
import type { ContentItem } from "../content/types";
type YoutubeApiVideo = {
id: string;
url: string;
title: string;
summary?: string;
publishedAt: string;
thumbnailUrl?: string;
views?: number;
};
function stripHtml(s: string) {
return (s || "")
.replace(/<[^>]+>/g, " ")
.replace(/\s+/g, " ")
.trim();
}
function truncate(s: string, n: number) {
const t = stripHtml(s);
if (!t) return "";
if (t.length <= n) return t;
return `${t.slice(0, Math.max(0, n - 1)).trimEnd()}`;
}
export async function fetchYoutubeViaRss(channelId: string, limit = 20): Promise<ContentItem[]> {
const feedUrl = `https://www.youtube.com/feeds/videos.xml?channel_id=${encodeURIComponent(channelId)}`;
const parser = new Parser();
const feed = await parser.parseURL(feedUrl);
return normalizeYoutubeRssFeedItems(feed.items || [], limit);
}
async function youtubeApiGetJson<T>(url: string): Promise<T> {
const res = await fetch(url);
if (!res.ok) throw new Error(`YouTube API request failed: ${res.status} ${res.statusText}`);
return (await res.json()) as T;
}
export function normalizeYoutubeRssFeedItems(items: any[], limit: number): ContentItem[] {
return (items || [])
.slice(0, limit)
.map((it) => {
const url = it.link || "";
const id = (it.id || url).toString();
const publishedAt = (it.isoDate || it.pubDate || new Date(0).toISOString()).toString();
const summary = truncate(
(it.contentSnippet || it.summary || it.content || it["content:encoded"] || "").toString(),
240,
);
return {
id,
source: "youtube" as const,
url,
title: (it.title || "").toString(),
summary: summary || undefined,
publishedAt: new Date(publishedAt).toISOString(),
thumbnailUrl: (it.enclosure?.url || undefined) as string | undefined,
};
})
.filter((x) => x.url && x.title);
}
export function normalizeYoutubeApiVideos(
items: Array<{
id: string;
snippet: {
title: string;
description?: string;
publishedAt: string;
thumbnails?: Record<string, { url: string }>;
};
statistics?: { viewCount?: string };
}>,
): ContentItem[] {
const normalized: YoutubeApiVideo[] = (items || []).map((v) => ({
id: v.id,
url: `https://www.youtube.com/watch?v=${encodeURIComponent(v.id)}`,
title: v.snippet.title,
summary: v.snippet.description ? truncate(v.snippet.description, 240) : undefined,
publishedAt: new Date(v.snippet.publishedAt).toISOString(),
thumbnailUrl: v.snippet.thumbnails?.high?.url || v.snippet.thumbnails?.default?.url,
views: v.statistics?.viewCount ? Number(v.statistics.viewCount) : undefined,
}));
return normalized.map<ContentItem>((v) => ({
id: v.id,
source: "youtube",
url: v.url,
title: v.title,
summary: v.summary,
publishedAt: v.publishedAt,
thumbnailUrl: v.thumbnailUrl,
metrics: v.views !== undefined ? { views: v.views } : undefined,
}));
}
export async function fetchYoutubeViaApi(
channelId: string,
apiKey: string,
limit = 20,
): Promise<ContentItem[]> {
// 1) Get latest video IDs from channel.
const searchUrl =
"https://www.googleapis.com/youtube/v3/search" +
`?part=snippet&channelId=${encodeURIComponent(channelId)}` +
`&maxResults=${encodeURIComponent(String(limit))}` +
`&order=date&type=video&key=${encodeURIComponent(apiKey)}`;
const search = await youtubeApiGetJson<{
items: Array<{
id: { videoId: string };
snippet: { title: string; publishedAt: string; thumbnails?: any };
}>;
}>(searchUrl);
const videoIds = (search.items || []).map((x) => x.id.videoId).filter(Boolean);
if (videoIds.length === 0) return [];
// 2) Fetch statistics.
const videosUrl =
"https://www.googleapis.com/youtube/v3/videos" +
`?part=snippet,statistics&maxResults=${encodeURIComponent(String(videoIds.length))}` +
`&id=${encodeURIComponent(videoIds.join(","))}` +
`&key=${encodeURIComponent(apiKey)}`;
const videos = await youtubeApiGetJson<{
items: Array<{
id: string;
snippet: { title: string; publishedAt: string; thumbnails?: Record<string, { url: string }> };
statistics?: { viewCount?: string };
}>;
}>(videosUrl);
return normalizeYoutubeApiVideos(videos.items || []);
}