Files
clawfort/backend/main.py
2026-02-13 00:49:22 -05:00

222 lines
7.2 KiB
Python

import logging
import os
from apscheduler.schedulers.background import BackgroundScheduler
from fastapi import Depends, FastAPI, Query, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.middleware.gzip import GZipMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
from backend import config
from backend.database import get_db, init_db
from backend.models import NewsItem
from backend.news_service import scheduled_news_fetch
from backend.repository import (
archive_old_news,
delete_archived_news,
get_latest_news,
get_news_paginated,
get_translation,
normalize_language,
resolve_news_content,
resolve_summary_fields,
)
from backend.schemas import HealthResponse, NewsItemResponse, PaginatedNewsResponse
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)
logger = logging.getLogger(__name__)
app = FastAPI(title="ClawFort News API", version="0.1.0")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.add_middleware(GZipMiddleware, minimum_size=500)
@app.middleware("http")
async def add_cache_headers(request: Request, call_next):
response = await call_next(request)
path = request.url.path
if path.startswith("/static/"):
response.headers.setdefault("Cache-Control", "public, max-age=604800, immutable")
elif path.startswith("/api/"):
response.headers.setdefault(
"Cache-Control", "public, max-age=60, stale-while-revalidate=120"
)
response.headers.setdefault("Vary", "Accept-Encoding")
elif path in {"/", "/terms", "/attribution"}:
response.headers.setdefault(
"Cache-Control", "public, max-age=300, stale-while-revalidate=600"
)
response.headers.setdefault("X-Content-Type-Options", "nosniff")
return response
static_dir = os.path.join(os.path.dirname(__file__), "static")
app.mount("/static", StaticFiles(directory=static_dir), name="static")
scheduler = BackgroundScheduler()
def nightly_cleanup() -> None:
from backend.database import SessionLocal
db = SessionLocal()
try:
archived = archive_old_news(db, config.RETENTION_DAYS)
deleted = delete_archived_news(db, days_after_archive=60)
logger.info("Nightly cleanup: archived=%d, deleted=%d", archived, deleted)
finally:
db.close()
@app.on_event("startup")
async def startup_event() -> None:
if not config.PERPLEXITY_API_KEY:
logger.error("PERPLEXITY_API_KEY is not set — news fetching will fail")
os.makedirs("data", exist_ok=True)
init_db()
logger.info("Database initialized")
scheduler.add_job(scheduled_news_fetch, "interval", hours=1, id="news_fetch")
scheduler.add_job(nightly_cleanup, "cron", hour=3, minute=0, id="nightly_cleanup")
scheduler.start()
logger.info("Scheduler started: hourly news fetch + nightly cleanup")
@app.on_event("shutdown")
async def shutdown_event() -> None:
scheduler.shutdown(wait=False)
logger.info("Scheduler shut down")
@app.get("/api/news", response_model=PaginatedNewsResponse)
def api_get_news(
cursor: int | None = Query(None, description="Cursor for pagination (last item ID)"),
limit: int = Query(10, ge=1, le=50),
exclude_hero: int | None = Query(None, description="Hero item ID to exclude from feed"),
language: str = Query("en", description="Language code: en, ta, ml"),
db: Session = Depends(get_db),
) -> PaginatedNewsResponse:
lang = normalize_language(language)
items = get_news_paginated(db, cursor=cursor, limit=limit + 1, exclude_id=exclude_hero)
has_more = len(items) > limit
if has_more:
items = items[:limit]
next_cursor = items[-1].id if items and has_more else None
response_items: list[NewsItemResponse] = []
for item in items:
translation = None
if lang != "en":
translation = get_translation(db, item.id, lang)
headline, summary = resolve_news_content(item, translation)
tldr_points, summary_body, source_citation = resolve_summary_fields(item, translation)
response_items.append(
NewsItemResponse(
id=item.id,
headline=headline,
summary=summary,
source_url=item.source_url,
image_url=item.image_url,
image_credit=item.image_credit,
tldr_points=tldr_points,
summary_body=summary_body,
source_citation=source_citation,
summary_image_url=item.summary_image_url,
summary_image_credit=item.summary_image_credit,
published_at=item.published_at,
created_at=item.created_at,
language=lang if translation is not None else "en",
)
)
return PaginatedNewsResponse(
items=response_items,
next_cursor=next_cursor,
has_more=has_more,
)
@app.get("/api/news/latest", response_model=NewsItemResponse | None)
def api_get_latest_news(
language: str = Query("en", description="Language code: en, ta, ml"),
db: Session = Depends(get_db),
) -> NewsItemResponse | None:
lang = normalize_language(language)
item = get_latest_news(db)
if not item:
return None
translation = None
if lang != "en":
translation = get_translation(db, item.id, lang)
headline, summary = resolve_news_content(item, translation)
tldr_points, summary_body, source_citation = resolve_summary_fields(item, translation)
return NewsItemResponse(
id=item.id,
headline=headline,
summary=summary,
source_url=item.source_url,
image_url=item.image_url,
image_credit=item.image_credit,
tldr_points=tldr_points,
summary_body=summary_body,
source_citation=source_citation,
summary_image_url=item.summary_image_url,
summary_image_credit=item.summary_image_credit,
published_at=item.published_at,
created_at=item.created_at,
language=lang if translation is not None else "en",
)
@app.get("/api/health", response_model=HealthResponse)
def api_health(db: Session = Depends(get_db)) -> HealthResponse:
count = db.query(NewsItem).filter(NewsItem.archived.is_(False)).count()
return HealthResponse(status="ok", version="0.1.0", news_count=count)
frontend_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "frontend")
@app.get("/")
async def serve_frontend() -> FileResponse:
return FileResponse(os.path.join(frontend_dir, "index.html"))
@app.get("/terms")
async def serve_terms() -> FileResponse:
return FileResponse(os.path.join(frontend_dir, "terms.html"))
@app.get("/attribution")
async def serve_attribution() -> FileResponse:
return FileResponse(os.path.join(frontend_dir, "attribution.html"))
@app.get("/config")
async def serve_config() -> dict:
return {
"umami_script_url": config.UMAMI_SCRIPT_URL,
"umami_website_id": config.UMAMI_WEBSITE_ID,
"supported_languages": config.SUPPORTED_LANGUAGES,
"default_language": "en",
}