Files
astro-website/site/scripts/fetch-content.ts
2026-02-10 01:20:58 -05:00

167 lines
5.6 KiB
TypeScript

import "dotenv/config";
import { promises as fs } from "node:fs";
import path from "node:path";
import { getIngestConfigFromEnv } from "../src/lib/config";
import { createCacheFromEnv } from "../src/lib/cache";
import { cachedCompute } from "../src/lib/cache/memoize";
import type { ContentCache, ContentItem } from "../src/lib/content/types";
import { readInstagramEmbedPosts } from "../src/lib/ingest/instagram";
import { fetchPodcastRss } from "../src/lib/ingest/podcast";
import { fetchWordpressContent } from "../src/lib/ingest/wordpress";
import { fetchYoutubeViaApi, fetchYoutubeViaRss } from "../src/lib/ingest/youtube";
function log(msg: string) {
// simple, cron-friendly logs
// eslint-disable-next-line no-console
console.log(`[fetch-content] ${msg}`);
}
async function writeAtomic(filePath: string, content: string) {
const tmpPath = `${filePath}.tmp`;
await fs.mkdir(path.dirname(filePath), { recursive: true });
await fs.writeFile(tmpPath, content, "utf8");
await fs.rename(tmpPath, filePath);
}
function dedupe(items: ContentItem[]): ContentItem[] {
const seen = new Set<string>();
const out: ContentItem[] = [];
for (const it of items) {
const k = `${it.source}:${it.id}`;
if (seen.has(k)) continue;
seen.add(k);
out.push(it);
}
return out;
}
async function main() {
const cfg = getIngestConfigFromEnv(process.env);
const generatedAt = new Date().toISOString();
const all: ContentItem[] = [];
const outPath = path.join(process.cwd(), "content", "cache", "content.json");
const kv = await createCacheFromEnv(process.env, {
namespace: "fast-website",
log,
});
// Read the existing cache so we can keep last-known-good sections if a source fails.
let existing: ContentCache | undefined;
try {
const raw = await fs.readFile(outPath, "utf8");
existing = JSON.parse(raw) as ContentCache;
} catch {
existing = undefined;
}
// YouTube
if (!cfg.youtubeChannelId) {
log("YouTube: skipped (missing YOUTUBE_CHANNEL_ID)");
} else if (cfg.youtubeApiKey) {
try {
const cacheKey = `youtube:api:${cfg.youtubeChannelId}:25`;
const { value: items, cached } = await cachedCompute(kv, cacheKey, () =>
fetchYoutubeViaApi(cfg.youtubeChannelId!, cfg.youtubeApiKey!, 25),
);
log(`YouTube: API ${cached ? "cache" : "live"} (${items.length} items)`);
log(`YouTube: API ok (${items.length} items)`);
all.push(...items);
} catch (e) {
log(`YouTube: API failed (${String(e)}), falling back to RSS`);
const cacheKey = `youtube:rss:${cfg.youtubeChannelId}:25`;
const { value: items, cached } = await cachedCompute(kv, cacheKey, () =>
fetchYoutubeViaRss(cfg.youtubeChannelId!, 25),
);
log(`YouTube: RSS ${cached ? "cache" : "live"} (${items.length} items)`);
log(`YouTube: RSS ok (${items.length} items)`);
all.push(...items);
}
} else {
const cacheKey = `youtube:rss:${cfg.youtubeChannelId}:25`;
const { value: items, cached } = await cachedCompute(kv, cacheKey, () =>
fetchYoutubeViaRss(cfg.youtubeChannelId!, 25),
);
log(`YouTube: RSS ${cached ? "cache" : "live"} (${items.length} items)`);
log(`YouTube: RSS ok (${items.length} items)`);
all.push(...items);
}
// Podcast
if (!cfg.podcastRssUrl) {
log("Podcast: skipped (missing PODCAST_RSS_URL)");
} else {
try {
const cacheKey = `podcast:rss:${cfg.podcastRssUrl}:50`;
const { value: items, cached } = await cachedCompute(kv, cacheKey, () =>
fetchPodcastRss(cfg.podcastRssUrl!, 50),
);
log(`Podcast: RSS ${cached ? "cache" : "live"} (${items.length} items)`);
log(`Podcast: RSS ok (${items.length} items)`);
all.push(...items);
} catch (e) {
log(`Podcast: RSS failed (${String(e)})`);
}
}
// Instagram (embed-first list)
try {
const filePath = path.isAbsolute(cfg.instagramPostUrlsFile)
? cfg.instagramPostUrlsFile
: path.join(process.cwd(), cfg.instagramPostUrlsFile);
const items = await readInstagramEmbedPosts(filePath);
log(`Instagram: embed list ok (${items.length} items)`);
all.push(...items);
} catch (e) {
log(`Instagram: embed list failed (${String(e)})`);
}
// WordPress (optional; powers /blog)
let wordpress: ContentCache["wordpress"] = { posts: [], pages: [], categories: [] };
if (!cfg.wordpressBaseUrl) {
log("WordPress: skipped (missing WORDPRESS_BASE_URL)");
wordpress = existing?.wordpress || wordpress;
} else {
try {
const cacheKey = `wp:content:${cfg.wordpressBaseUrl}`;
const { value: wp, cached } = await cachedCompute(kv, cacheKey, () =>
fetchWordpressContent({
baseUrl: cfg.wordpressBaseUrl!,
username: cfg.wordpressUsername,
appPassword: cfg.wordpressAppPassword,
}),
);
log(
`WordPress: wp-json ${cached ? "cache" : "live"} (${wp.posts.length} posts, ${wp.pages.length} pages, ${wp.categories.length} categories)`,
);
wordpress = wp;
log(
`WordPress: wp-json ok (${wp.posts.length} posts, ${wp.pages.length} pages, ${wp.categories.length} categories)`,
);
} catch (e) {
log(`WordPress: wp-json failed (${String(e)})`);
// Keep last-known-good WP content if present, otherwise fall back to empty.
wordpress = existing?.wordpress || wordpress;
}
}
const contentCache: ContentCache = {
generatedAt,
items: dedupe(all),
wordpress,
};
await writeAtomic(outPath, JSON.stringify(contentCache, null, 2));
log(`Wrote cache: ${outPath} (${contentCache.items.length} total items)`);
await kv.close();
}
main().catch((e) => {
log(`fatal: ${String(e)}`);
process.exitCode = 1;
});