Files
astro-website/site/scripts/fetch-content.ts
Santhosh Janardhanan 5d07e57256
Some checks failed
ci / site (push) Has been cancelled
publish-image / publish (push) Has been cancelled
reduce bounce rate
2026-02-10 17:36:34 -05:00

219 lines
7.3 KiB
TypeScript

import "dotenv/config";
import { promises as fs } from "node:fs";
import path from "node:path";
import { getIngestConfigFromEnv } from "../src/lib/config";
import { createCacheFromEnv } from "../src/lib/cache";
import { cachedCompute } from "../src/lib/cache/memoize";
import type { ContentCache, ContentItem } from "../src/lib/content/types";
import { readInstagramEmbedPosts } from "../src/lib/ingest/instagram";
import { fetchPodcastRss } from "../src/lib/ingest/podcast";
import { fetchWordpressContent } from "../src/lib/ingest/wordpress";
import { fetchYoutubeViaApi, fetchYoutubeViaRss } from "../src/lib/ingest/youtube";
function log(msg: string) {
// simple, cron-friendly logs
// eslint-disable-next-line no-console
console.log(`[fetch-content] ${msg}`);
}
async function writeAtomic(filePath: string, content: string) {
const tmpPath = `${filePath}.tmp`;
await fs.mkdir(path.dirname(filePath), { recursive: true });
await fs.writeFile(tmpPath, content, "utf8");
await fs.rename(tmpPath, filePath);
}
function dedupe(items: ContentItem[]): ContentItem[] {
const seen = new Set<string>();
const out: ContentItem[] = [];
for (const it of items) {
const k = `${it.source}:${it.id}`;
if (seen.has(k)) continue;
seen.add(k);
out.push(it);
}
return out;
}
function normalizeSpotifyEpisodeUrl(value: string): string | undefined {
const v = (value || "").trim();
if (!v) return undefined;
if (v.startsWith("https://open.spotify.com/episode/")) return v;
if (v.startsWith("https://open.spotify.com/embed/episode/")) {
return v.replace("/embed/episode/", "/episode/");
}
if (v.startsWith("spotify:episode:")) {
const id = v.split(":")[2];
if (!id) return undefined;
return `https://open.spotify.com/episode/${id}`;
}
if (/^[A-Za-z0-9]{10,30}$/.test(v)) {
return `https://open.spotify.com/episode/${v}`;
}
return undefined;
}
async function readPodcastSpotifyOverrideMap(logFn: (msg: string) => void) {
const mapPath = path.join(process.cwd(), "content", "podcast-spotify-map.json");
try {
const raw = await fs.readFile(mapPath, "utf8");
const parsed = JSON.parse(raw) as Record<string, string>;
if (!parsed || typeof parsed !== "object") return undefined;
return parsed;
} catch {
logFn("Podcast: spotify override map not found (content/podcast-spotify-map.json)");
return undefined;
}
}
async function main() {
const cfg = getIngestConfigFromEnv(process.env);
const generatedAt = new Date().toISOString();
const all: ContentItem[] = [];
const outPath = path.join(process.cwd(), "content", "cache", "content.json");
const kv = await createCacheFromEnv(process.env, {
namespace: "fast-website",
log,
});
// Read the existing cache so we can keep last-known-good sections if a source fails.
let existing: ContentCache | undefined;
try {
const raw = await fs.readFile(outPath, "utf8");
existing = JSON.parse(raw) as ContentCache;
} catch {
existing = undefined;
}
// YouTube
if (!cfg.youtubeChannelId) {
log("YouTube: skipped (missing YOUTUBE_CHANNEL_ID)");
} else if (cfg.youtubeApiKey) {
try {
const cacheKey = `youtube:api:${cfg.youtubeChannelId}:25`;
const { value: items, cached } = await cachedCompute(kv, cacheKey, () =>
fetchYoutubeViaApi(cfg.youtubeChannelId!, cfg.youtubeApiKey!, 25),
);
log(`YouTube: API ${cached ? "cache" : "live"} (${items.length} items)`);
log(`YouTube: API ok (${items.length} items)`);
all.push(...items);
} catch (e) {
log(`YouTube: API failed (${String(e)}), falling back to RSS`);
const cacheKey = `youtube:rss:${cfg.youtubeChannelId}:25`;
const { value: items, cached } = await cachedCompute(kv, cacheKey, () =>
fetchYoutubeViaRss(cfg.youtubeChannelId!, 25),
);
log(`YouTube: RSS ${cached ? "cache" : "live"} (${items.length} items)`);
log(`YouTube: RSS ok (${items.length} items)`);
all.push(...items);
}
} else {
const cacheKey = `youtube:rss:${cfg.youtubeChannelId}:25`;
const { value: items, cached } = await cachedCompute(kv, cacheKey, () =>
fetchYoutubeViaRss(cfg.youtubeChannelId!, 25),
);
log(`YouTube: RSS ${cached ? "cache" : "live"} (${items.length} items)`);
log(`YouTube: RSS ok (${items.length} items)`);
all.push(...items);
}
// Podcast
if (!cfg.podcastRssUrl) {
log("Podcast: skipped (missing PODCAST_RSS_URL)");
} else {
try {
const cacheKey = `podcast:rss:${cfg.podcastRssUrl}:50`;
const { value: items, cached } = await cachedCompute(kv, cacheKey, () =>
fetchPodcastRss(cfg.podcastRssUrl!, 50),
);
log(`Podcast: RSS ${cached ? "cache" : "live"} (${items.length} items)`);
log(`Podcast: RSS ok (${items.length} items)`);
const overrideMap = await readPodcastSpotifyOverrideMap(log);
if (overrideMap) {
let overridden = 0;
for (const it of items) {
if (it.source !== "podcast") continue;
const mapped = overrideMap[it.id] || overrideMap[it.url];
const nextUrl = mapped ? normalizeSpotifyEpisodeUrl(mapped) : undefined;
if (!nextUrl) continue;
it.url = nextUrl;
overridden++;
}
if (overridden > 0) {
log(`Podcast: applied Spotify URL overrides (${overridden} items)`);
}
}
all.push(...items);
} catch (e) {
log(`Podcast: RSS failed (${String(e)})`);
}
}
// Instagram (embed-first list)
try {
const filePath = path.isAbsolute(cfg.instagramPostUrlsFile)
? cfg.instagramPostUrlsFile
: path.join(process.cwd(), cfg.instagramPostUrlsFile);
const items = await readInstagramEmbedPosts(filePath);
log(`Instagram: embed list ok (${items.length} items)`);
all.push(...items);
} catch (e) {
log(`Instagram: embed list failed (${String(e)})`);
}
// WordPress (optional; powers /blog)
let wordpress: ContentCache["wordpress"] = { posts: [], pages: [], categories: [] };
if (!cfg.wordpressBaseUrl) {
log("WordPress: skipped (missing WORDPRESS_BASE_URL)");
wordpress = existing?.wordpress || wordpress;
} else {
try {
const cacheKey = `wp:content:${cfg.wordpressBaseUrl}`;
const { value: wp, cached } = await cachedCompute(kv, cacheKey, () =>
fetchWordpressContent({
baseUrl: cfg.wordpressBaseUrl!,
username: cfg.wordpressUsername,
appPassword: cfg.wordpressAppPassword,
}),
);
log(
`WordPress: wp-json ${cached ? "cache" : "live"} (${wp.posts.length} posts, ${wp.pages.length} pages, ${wp.categories.length} categories)`,
);
wordpress = wp;
log(
`WordPress: wp-json ok (${wp.posts.length} posts, ${wp.pages.length} pages, ${wp.categories.length} categories)`,
);
} catch (e) {
log(`WordPress: wp-json failed (${String(e)})`);
// Keep last-known-good WP content if present, otherwise fall back to empty.
wordpress = existing?.wordpress || wordpress;
}
}
const contentCache: ContentCache = {
generatedAt,
items: dedupe(all),
wordpress,
};
await writeAtomic(outPath, JSON.stringify(contentCache, null, 2));
log(`Wrote cache: ${outPath} (${contentCache.items.length} total items)`);
await kv.close();
}
main().catch((e) => {
log(`fatal: ${String(e)}`);
process.exitCode = 1;
});