219 lines
7.3 KiB
TypeScript
219 lines
7.3 KiB
TypeScript
import "dotenv/config";
|
|
|
|
import { promises as fs } from "node:fs";
|
|
import path from "node:path";
|
|
|
|
import { getIngestConfigFromEnv } from "../src/lib/config";
|
|
import { createCacheFromEnv } from "../src/lib/cache";
|
|
import { cachedCompute } from "../src/lib/cache/memoize";
|
|
import type { ContentCache, ContentItem } from "../src/lib/content/types";
|
|
import { readInstagramEmbedPosts } from "../src/lib/ingest/instagram";
|
|
import { fetchPodcastRss } from "../src/lib/ingest/podcast";
|
|
import { fetchWordpressContent } from "../src/lib/ingest/wordpress";
|
|
import { fetchYoutubeViaApi, fetchYoutubeViaRss } from "../src/lib/ingest/youtube";
|
|
|
|
function log(msg: string) {
|
|
// simple, cron-friendly logs
|
|
// eslint-disable-next-line no-console
|
|
console.log(`[fetch-content] ${msg}`);
|
|
}
|
|
|
|
async function writeAtomic(filePath: string, content: string) {
|
|
const tmpPath = `${filePath}.tmp`;
|
|
await fs.mkdir(path.dirname(filePath), { recursive: true });
|
|
await fs.writeFile(tmpPath, content, "utf8");
|
|
await fs.rename(tmpPath, filePath);
|
|
}
|
|
|
|
function dedupe(items: ContentItem[]): ContentItem[] {
|
|
const seen = new Set<string>();
|
|
const out: ContentItem[] = [];
|
|
for (const it of items) {
|
|
const k = `${it.source}:${it.id}`;
|
|
if (seen.has(k)) continue;
|
|
seen.add(k);
|
|
out.push(it);
|
|
}
|
|
return out;
|
|
}
|
|
|
|
function normalizeSpotifyEpisodeUrl(value: string): string | undefined {
|
|
const v = (value || "").trim();
|
|
if (!v) return undefined;
|
|
|
|
if (v.startsWith("https://open.spotify.com/episode/")) return v;
|
|
if (v.startsWith("https://open.spotify.com/embed/episode/")) {
|
|
return v.replace("/embed/episode/", "/episode/");
|
|
}
|
|
|
|
if (v.startsWith("spotify:episode:")) {
|
|
const id = v.split(":")[2];
|
|
if (!id) return undefined;
|
|
return `https://open.spotify.com/episode/${id}`;
|
|
}
|
|
|
|
if (/^[A-Za-z0-9]{10,30}$/.test(v)) {
|
|
return `https://open.spotify.com/episode/${v}`;
|
|
}
|
|
|
|
return undefined;
|
|
}
|
|
|
|
async function readPodcastSpotifyOverrideMap(logFn: (msg: string) => void) {
|
|
const mapPath = path.join(process.cwd(), "content", "podcast-spotify-map.json");
|
|
try {
|
|
const raw = await fs.readFile(mapPath, "utf8");
|
|
const parsed = JSON.parse(raw) as Record<string, string>;
|
|
if (!parsed || typeof parsed !== "object") return undefined;
|
|
return parsed;
|
|
} catch {
|
|
logFn("Podcast: spotify override map not found (content/podcast-spotify-map.json)");
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
async function main() {
|
|
const cfg = getIngestConfigFromEnv(process.env);
|
|
const generatedAt = new Date().toISOString();
|
|
|
|
const all: ContentItem[] = [];
|
|
const outPath = path.join(process.cwd(), "content", "cache", "content.json");
|
|
|
|
const kv = await createCacheFromEnv(process.env, {
|
|
namespace: "fast-website",
|
|
log,
|
|
});
|
|
|
|
// Read the existing cache so we can keep last-known-good sections if a source fails.
|
|
let existing: ContentCache | undefined;
|
|
try {
|
|
const raw = await fs.readFile(outPath, "utf8");
|
|
existing = JSON.parse(raw) as ContentCache;
|
|
} catch {
|
|
existing = undefined;
|
|
}
|
|
|
|
// YouTube
|
|
if (!cfg.youtubeChannelId) {
|
|
log("YouTube: skipped (missing YOUTUBE_CHANNEL_ID)");
|
|
} else if (cfg.youtubeApiKey) {
|
|
try {
|
|
const cacheKey = `youtube:api:${cfg.youtubeChannelId}:25`;
|
|
const { value: items, cached } = await cachedCompute(kv, cacheKey, () =>
|
|
fetchYoutubeViaApi(cfg.youtubeChannelId!, cfg.youtubeApiKey!, 25),
|
|
);
|
|
log(`YouTube: API ${cached ? "cache" : "live"} (${items.length} items)`);
|
|
log(`YouTube: API ok (${items.length} items)`);
|
|
all.push(...items);
|
|
} catch (e) {
|
|
log(`YouTube: API failed (${String(e)}), falling back to RSS`);
|
|
const cacheKey = `youtube:rss:${cfg.youtubeChannelId}:25`;
|
|
const { value: items, cached } = await cachedCompute(kv, cacheKey, () =>
|
|
fetchYoutubeViaRss(cfg.youtubeChannelId!, 25),
|
|
);
|
|
log(`YouTube: RSS ${cached ? "cache" : "live"} (${items.length} items)`);
|
|
log(`YouTube: RSS ok (${items.length} items)`);
|
|
all.push(...items);
|
|
}
|
|
} else {
|
|
const cacheKey = `youtube:rss:${cfg.youtubeChannelId}:25`;
|
|
const { value: items, cached } = await cachedCompute(kv, cacheKey, () =>
|
|
fetchYoutubeViaRss(cfg.youtubeChannelId!, 25),
|
|
);
|
|
log(`YouTube: RSS ${cached ? "cache" : "live"} (${items.length} items)`);
|
|
log(`YouTube: RSS ok (${items.length} items)`);
|
|
all.push(...items);
|
|
}
|
|
|
|
// Podcast
|
|
if (!cfg.podcastRssUrl) {
|
|
log("Podcast: skipped (missing PODCAST_RSS_URL)");
|
|
} else {
|
|
try {
|
|
const cacheKey = `podcast:rss:${cfg.podcastRssUrl}:50`;
|
|
const { value: items, cached } = await cachedCompute(kv, cacheKey, () =>
|
|
fetchPodcastRss(cfg.podcastRssUrl!, 50),
|
|
);
|
|
log(`Podcast: RSS ${cached ? "cache" : "live"} (${items.length} items)`);
|
|
log(`Podcast: RSS ok (${items.length} items)`);
|
|
|
|
const overrideMap = await readPodcastSpotifyOverrideMap(log);
|
|
if (overrideMap) {
|
|
let overridden = 0;
|
|
for (const it of items) {
|
|
if (it.source !== "podcast") continue;
|
|
const mapped = overrideMap[it.id] || overrideMap[it.url];
|
|
const nextUrl = mapped ? normalizeSpotifyEpisodeUrl(mapped) : undefined;
|
|
if (!nextUrl) continue;
|
|
it.url = nextUrl;
|
|
overridden++;
|
|
}
|
|
if (overridden > 0) {
|
|
log(`Podcast: applied Spotify URL overrides (${overridden} items)`);
|
|
}
|
|
}
|
|
|
|
all.push(...items);
|
|
} catch (e) {
|
|
log(`Podcast: RSS failed (${String(e)})`);
|
|
}
|
|
}
|
|
|
|
// Instagram (embed-first list)
|
|
try {
|
|
const filePath = path.isAbsolute(cfg.instagramPostUrlsFile)
|
|
? cfg.instagramPostUrlsFile
|
|
: path.join(process.cwd(), cfg.instagramPostUrlsFile);
|
|
const items = await readInstagramEmbedPosts(filePath);
|
|
log(`Instagram: embed list ok (${items.length} items)`);
|
|
all.push(...items);
|
|
} catch (e) {
|
|
log(`Instagram: embed list failed (${String(e)})`);
|
|
}
|
|
|
|
// WordPress (optional; powers /blog)
|
|
let wordpress: ContentCache["wordpress"] = { posts: [], pages: [], categories: [] };
|
|
if (!cfg.wordpressBaseUrl) {
|
|
log("WordPress: skipped (missing WORDPRESS_BASE_URL)");
|
|
wordpress = existing?.wordpress || wordpress;
|
|
} else {
|
|
try {
|
|
const cacheKey = `wp:content:${cfg.wordpressBaseUrl}`;
|
|
const { value: wp, cached } = await cachedCompute(kv, cacheKey, () =>
|
|
fetchWordpressContent({
|
|
baseUrl: cfg.wordpressBaseUrl!,
|
|
username: cfg.wordpressUsername,
|
|
appPassword: cfg.wordpressAppPassword,
|
|
}),
|
|
);
|
|
log(
|
|
`WordPress: wp-json ${cached ? "cache" : "live"} (${wp.posts.length} posts, ${wp.pages.length} pages, ${wp.categories.length} categories)`,
|
|
);
|
|
wordpress = wp;
|
|
log(
|
|
`WordPress: wp-json ok (${wp.posts.length} posts, ${wp.pages.length} pages, ${wp.categories.length} categories)`,
|
|
);
|
|
} catch (e) {
|
|
log(`WordPress: wp-json failed (${String(e)})`);
|
|
// Keep last-known-good WP content if present, otherwise fall back to empty.
|
|
wordpress = existing?.wordpress || wordpress;
|
|
}
|
|
}
|
|
|
|
const contentCache: ContentCache = {
|
|
generatedAt,
|
|
items: dedupe(all),
|
|
wordpress,
|
|
};
|
|
|
|
await writeAtomic(outPath, JSON.stringify(contentCache, null, 2));
|
|
log(`Wrote cache: ${outPath} (${contentCache.items.length} total items)`);
|
|
|
|
await kv.close();
|
|
}
|
|
|
|
main().catch((e) => {
|
|
log(`fatal: ${String(e)}`);
|
|
process.exitCode = 1;
|
|
});
|