Initial Commit
This commit is contained in:
0
backend/__init__.py
Normal file
0
backend/__init__.py
Normal file
BIN
backend/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
backend/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
backend/__pycache__/cli.cpython-313.pyc
Normal file
BIN
backend/__pycache__/cli.cpython-313.pyc
Normal file
Binary file not shown.
BIN
backend/__pycache__/config.cpython-313.pyc
Normal file
BIN
backend/__pycache__/config.cpython-313.pyc
Normal file
Binary file not shown.
BIN
backend/__pycache__/database.cpython-313.pyc
Normal file
BIN
backend/__pycache__/database.cpython-313.pyc
Normal file
Binary file not shown.
BIN
backend/__pycache__/main.cpython-313.pyc
Normal file
BIN
backend/__pycache__/main.cpython-313.pyc
Normal file
Binary file not shown.
BIN
backend/__pycache__/models.cpython-313.pyc
Normal file
BIN
backend/__pycache__/models.cpython-313.pyc
Normal file
Binary file not shown.
BIN
backend/__pycache__/news_service.cpython-313.pyc
Normal file
BIN
backend/__pycache__/news_service.cpython-313.pyc
Normal file
Binary file not shown.
BIN
backend/__pycache__/repository.cpython-313.pyc
Normal file
BIN
backend/__pycache__/repository.cpython-313.pyc
Normal file
Binary file not shown.
BIN
backend/__pycache__/schemas.cpython-313.pyc
Normal file
BIN
backend/__pycache__/schemas.cpython-313.pyc
Normal file
Binary file not shown.
71
backend/cli.py
Normal file
71
backend/cli.py
Normal file
@@ -0,0 +1,71 @@
|
||||
import argparse
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
from backend import config
|
||||
from backend.database import init_db
|
||||
from backend.news_service import process_and_store_news
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(prog="clawfort", description="ClawFort operations CLI")
|
||||
subparsers = parser.add_subparsers(dest="command", required=True)
|
||||
|
||||
force_fetch_parser = subparsers.add_parser(
|
||||
"force-fetch",
|
||||
help="Run one immediate news fetch cycle",
|
||||
description="Trigger one immediate news fetch run outside scheduler cadence.",
|
||||
)
|
||||
force_fetch_parser.set_defaults(handler=handle_force_fetch)
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
def validate_runtime() -> None:
|
||||
if not config.PERPLEXITY_API_KEY and not config.OPENROUTER_API_KEY:
|
||||
raise RuntimeError(
|
||||
"No provider API key configured. Set PERPLEXITY_API_KEY or OPENROUTER_API_KEY in the environment."
|
||||
)
|
||||
|
||||
|
||||
def handle_force_fetch(_: argparse.Namespace) -> int:
|
||||
start = time.monotonic()
|
||||
|
||||
try:
|
||||
validate_runtime()
|
||||
os.makedirs("data", exist_ok=True)
|
||||
init_db()
|
||||
|
||||
stored_count = asyncio.run(process_and_store_news())
|
||||
elapsed = time.monotonic() - start
|
||||
|
||||
print(f"force-fetch succeeded: stored={stored_count} elapsed={elapsed:.1f}s")
|
||||
return 0
|
||||
except Exception as exc:
|
||||
logger.exception("force-fetch failed")
|
||||
print(f"force-fetch failed: {exc}", file=sys.stderr)
|
||||
print(
|
||||
"Check API keys, network connectivity, and provider status, then retry the command.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
parser = build_parser()
|
||||
args = parser.parse_args(argv)
|
||||
handler = args.handler
|
||||
return handler(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
23
backend/config.py
Normal file
23
backend/config.py
Normal file
@@ -0,0 +1,23 @@
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY", "")
|
||||
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
|
||||
IMAGE_QUALITY = int(os.getenv("IMAGE_QUALITY", "85"))
|
||||
RETENTION_DAYS = int(os.getenv("RETENTION_DAYS", "30"))
|
||||
UMAMI_SCRIPT_URL = os.getenv("UMAMI_SCRIPT_URL", "")
|
||||
UMAMI_WEBSITE_ID = os.getenv("UMAMI_WEBSITE_ID", "")
|
||||
|
||||
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
||||
PERPLEXITY_MODEL = "sonar"
|
||||
|
||||
OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
|
||||
OPENROUTER_MODEL = "google/gemini-2.0-flash-001"
|
||||
|
||||
SUPPORTED_LANGUAGES = ["en", "ta", "ml"]
|
||||
|
||||
STATIC_IMAGES_DIR = os.path.join(os.path.dirname(__file__), "static", "images")
|
||||
os.makedirs(STATIC_IMAGES_DIR, exist_ok=True)
|
||||
27
backend/database.py
Normal file
27
backend/database.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from collections.abc import Generator
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker
|
||||
|
||||
DATABASE_URL = "sqlite:///./data/clawfort.db"
|
||||
|
||||
engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})
|
||||
SessionLocal = sessionmaker(bind=engine, autocommit=False, autoflush=False)
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
pass
|
||||
|
||||
|
||||
def get_db() -> Generator[Session, None, None]:
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def init_db() -> None:
|
||||
from backend.models import NewsItem, NewsTranslation # noqa: F401
|
||||
|
||||
Base.metadata.create_all(bind=engine)
|
||||
16
backend/init_db.py
Normal file
16
backend/init_db.py
Normal file
@@ -0,0 +1,16 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from backend.database import init_db
|
||||
|
||||
|
||||
def main() -> None:
|
||||
os.makedirs("data", exist_ok=True)
|
||||
init_db()
|
||||
print("Database initialized successfully.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
173
backend/main.py
Normal file
173
backend/main.py
Normal file
@@ -0,0 +1,173 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
from apscheduler.schedulers.background import BackgroundScheduler
|
||||
from fastapi import Depends, FastAPI, Query
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from backend import config
|
||||
from backend.database import get_db, init_db
|
||||
from backend.models import NewsItem
|
||||
from backend.news_service import scheduled_news_fetch
|
||||
from backend.repository import (
|
||||
archive_old_news,
|
||||
delete_archived_news,
|
||||
get_latest_news,
|
||||
get_news_paginated,
|
||||
get_translation,
|
||||
normalize_language,
|
||||
resolve_news_content,
|
||||
)
|
||||
from backend.schemas import HealthResponse, NewsItemResponse, PaginatedNewsResponse
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
app = FastAPI(title="ClawFort News API", version="0.1.0")
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
static_dir = os.path.join(os.path.dirname(__file__), "static")
|
||||
app.mount("/static", StaticFiles(directory=static_dir), name="static")
|
||||
|
||||
scheduler = BackgroundScheduler()
|
||||
|
||||
|
||||
def nightly_cleanup() -> None:
|
||||
from backend.database import SessionLocal
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
archived = archive_old_news(db, config.RETENTION_DAYS)
|
||||
deleted = delete_archived_news(db, days_after_archive=60)
|
||||
logger.info("Nightly cleanup: archived=%d, deleted=%d", archived, deleted)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event() -> None:
|
||||
if not config.PERPLEXITY_API_KEY:
|
||||
logger.error("PERPLEXITY_API_KEY is not set — news fetching will fail")
|
||||
|
||||
os.makedirs("data", exist_ok=True)
|
||||
init_db()
|
||||
logger.info("Database initialized")
|
||||
|
||||
scheduler.add_job(scheduled_news_fetch, "interval", hours=1, id="news_fetch")
|
||||
scheduler.add_job(nightly_cleanup, "cron", hour=3, minute=0, id="nightly_cleanup")
|
||||
scheduler.start()
|
||||
logger.info("Scheduler started: hourly news fetch + nightly cleanup")
|
||||
|
||||
|
||||
@app.on_event("shutdown")
|
||||
async def shutdown_event() -> None:
|
||||
scheduler.shutdown(wait=False)
|
||||
logger.info("Scheduler shut down")
|
||||
|
||||
|
||||
@app.get("/api/news", response_model=PaginatedNewsResponse)
|
||||
def api_get_news(
|
||||
cursor: int | None = Query(None, description="Cursor for pagination (last item ID)"),
|
||||
limit: int = Query(10, ge=1, le=50),
|
||||
exclude_hero: int | None = Query(None, description="Hero item ID to exclude from feed"),
|
||||
language: str = Query("en", description="Language code: en, ta, ml"),
|
||||
db: Session = Depends(get_db),
|
||||
) -> PaginatedNewsResponse:
|
||||
lang = normalize_language(language)
|
||||
items = get_news_paginated(db, cursor=cursor, limit=limit + 1, exclude_id=exclude_hero)
|
||||
|
||||
has_more = len(items) > limit
|
||||
if has_more:
|
||||
items = items[:limit]
|
||||
|
||||
next_cursor = items[-1].id if items and has_more else None
|
||||
|
||||
response_items: list[NewsItemResponse] = []
|
||||
for item in items:
|
||||
translation = None
|
||||
if lang != "en":
|
||||
translation = get_translation(db, item.id, lang)
|
||||
headline, summary = resolve_news_content(item, translation)
|
||||
response_items.append(
|
||||
NewsItemResponse(
|
||||
id=item.id,
|
||||
headline=headline,
|
||||
summary=summary,
|
||||
source_url=item.source_url,
|
||||
image_url=item.image_url,
|
||||
image_credit=item.image_credit,
|
||||
published_at=item.published_at,
|
||||
created_at=item.created_at,
|
||||
language=lang if translation is not None else "en",
|
||||
)
|
||||
)
|
||||
|
||||
return PaginatedNewsResponse(
|
||||
items=response_items,
|
||||
next_cursor=next_cursor,
|
||||
has_more=has_more,
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/news/latest", response_model=NewsItemResponse | None)
|
||||
def api_get_latest_news(
|
||||
language: str = Query("en", description="Language code: en, ta, ml"),
|
||||
db: Session = Depends(get_db),
|
||||
) -> NewsItemResponse | None:
|
||||
lang = normalize_language(language)
|
||||
item = get_latest_news(db)
|
||||
if not item:
|
||||
return None
|
||||
|
||||
translation = None
|
||||
if lang != "en":
|
||||
translation = get_translation(db, item.id, lang)
|
||||
headline, summary = resolve_news_content(item, translation)
|
||||
return NewsItemResponse(
|
||||
id=item.id,
|
||||
headline=headline,
|
||||
summary=summary,
|
||||
source_url=item.source_url,
|
||||
image_url=item.image_url,
|
||||
image_credit=item.image_credit,
|
||||
published_at=item.published_at,
|
||||
created_at=item.created_at,
|
||||
language=lang if translation is not None else "en",
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/health", response_model=HealthResponse)
|
||||
def api_health(db: Session = Depends(get_db)) -> HealthResponse:
|
||||
count = db.query(NewsItem).filter(NewsItem.archived.is_(False)).count()
|
||||
return HealthResponse(status="ok", version="0.1.0", news_count=count)
|
||||
|
||||
|
||||
frontend_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "frontend")
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def serve_frontend() -> FileResponse:
|
||||
return FileResponse(os.path.join(frontend_dir, "index.html"))
|
||||
|
||||
|
||||
@app.get("/config")
|
||||
async def serve_config() -> dict:
|
||||
return {
|
||||
"umami_script_url": config.UMAMI_SCRIPT_URL,
|
||||
"umami_website_id": config.UMAMI_WEBSITE_ID,
|
||||
"supported_languages": config.SUPPORTED_LANGUAGES,
|
||||
"default_language": "en",
|
||||
}
|
||||
45
backend/models.py
Normal file
45
backend/models.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import datetime
|
||||
|
||||
from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, String, Text, UniqueConstraint
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from backend.database import Base
|
||||
|
||||
|
||||
class NewsItem(Base):
|
||||
__tablename__ = "news_items"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
headline: Mapped[str] = mapped_column(String(500), nullable=False, index=True)
|
||||
summary: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
source_url: Mapped[str | None] = mapped_column(String(2000), nullable=True)
|
||||
image_url: Mapped[str | None] = mapped_column(String(2000), nullable=True)
|
||||
image_credit: Mapped[str | None] = mapped_column(String(500), nullable=True)
|
||||
published_at: Mapped[datetime.datetime] = mapped_column(
|
||||
DateTime, nullable=False, default=datetime.datetime.utcnow
|
||||
)
|
||||
created_at: Mapped[datetime.datetime] = mapped_column(
|
||||
DateTime, nullable=False, default=datetime.datetime.utcnow
|
||||
)
|
||||
archived: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
translations: Mapped[list["NewsTranslation"]] = relationship(
|
||||
back_populates="news_item", cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
|
||||
class NewsTranslation(Base):
|
||||
__tablename__ = "news_translations"
|
||||
__table_args__ = (UniqueConstraint("news_item_id", "language", name="uq_news_item_language"),)
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
news_item_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("news_items.id"), nullable=False, index=True
|
||||
)
|
||||
language: Mapped[str] = mapped_column(String(5), nullable=False, index=True)
|
||||
headline: Mapped[str] = mapped_column(String(500), nullable=False)
|
||||
summary: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
created_at: Mapped[datetime.datetime] = mapped_column(
|
||||
DateTime, nullable=False, default=datetime.datetime.utcnow
|
||||
)
|
||||
|
||||
news_item: Mapped[NewsItem] = relationship(back_populates="translations")
|
||||
299
backend/news_service.py
Normal file
299
backend/news_service.py
Normal file
@@ -0,0 +1,299 @@
|
||||
import asyncio
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from io import BytesIO
|
||||
|
||||
import httpx
|
||||
from PIL import Image
|
||||
|
||||
from backend import config
|
||||
from backend.database import SessionLocal
|
||||
from backend.repository import (
|
||||
create_news,
|
||||
create_translation,
|
||||
headline_exists_within_24h,
|
||||
translation_exists,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
PLACEHOLDER_IMAGE_PATH = "/static/images/placeholder.png"
|
||||
|
||||
|
||||
async def call_perplexity_api(query: str) -> dict | None:
|
||||
headers = {
|
||||
"Authorization": f"Bearer {config.PERPLEXITY_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
payload = {
|
||||
"model": config.PERPLEXITY_MODEL,
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You are a news aggregator. Return a JSON array of news items. "
|
||||
"Each item must have: headline, summary (2-3 sentences), source_url, "
|
||||
"image_url (a relevant image URL if available), image_credit. "
|
||||
"Return between 3 and 5 items. Respond ONLY with valid JSON array, no markdown."
|
||||
),
|
||||
},
|
||||
{"role": "user", "content": query},
|
||||
],
|
||||
"temperature": 0.3,
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.post(config.PERPLEXITY_API_URL, headers=headers, json=payload)
|
||||
cost_info = {
|
||||
"model": config.PERPLEXITY_MODEL,
|
||||
"status": response.status_code,
|
||||
"usage": response.json().get("usage", {}),
|
||||
}
|
||||
logger.info("Perplexity API cost: %s", json.dumps(cost_info))
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
async def call_openrouter_api(query: str) -> dict | None:
|
||||
if not config.OPENROUTER_API_KEY:
|
||||
return None
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {config.OPENROUTER_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
payload = {
|
||||
"model": config.OPENROUTER_MODEL,
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You are a news aggregator. Return a JSON array of news items. "
|
||||
"Each item must have: headline, summary (2-3 sentences), source_url, "
|
||||
"image_url (a relevant image URL if available), image_credit. "
|
||||
"Return between 3 and 5 items. Respond ONLY with valid JSON array, no markdown."
|
||||
),
|
||||
},
|
||||
{"role": "user", "content": query},
|
||||
],
|
||||
"temperature": 0.3,
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.post(config.OPENROUTER_API_URL, headers=headers, json=payload)
|
||||
cost_info = {
|
||||
"model": config.OPENROUTER_MODEL,
|
||||
"status": response.status_code,
|
||||
"usage": response.json().get("usage", {}),
|
||||
}
|
||||
logger.info("OpenRouter API cost: %s", json.dumps(cost_info))
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
async def call_perplexity_translation_api(
|
||||
headline: str, summary: str, language: str
|
||||
) -> dict | None:
|
||||
headers = {
|
||||
"Authorization": f"Bearer {config.PERPLEXITY_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
payload = {
|
||||
"model": config.PERPLEXITY_MODEL,
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"Translate the given headline and summary to the target language. "
|
||||
"Return only valid JSON object with keys: headline, summary. "
|
||||
"No markdown, no extra text."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": json.dumps(
|
||||
{
|
||||
"target_language": language,
|
||||
"headline": headline,
|
||||
"summary": summary,
|
||||
}
|
||||
),
|
||||
},
|
||||
],
|
||||
"temperature": 0.1,
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.post(config.PERPLEXITY_API_URL, headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
def parse_translation_response(response: dict) -> dict | None:
|
||||
content = response.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
content = content.strip()
|
||||
if content.startswith("```"):
|
||||
content = content.split("\n", 1)[-1].rsplit("```", 1)[0]
|
||||
|
||||
try:
|
||||
parsed = json.loads(content)
|
||||
if isinstance(parsed, dict):
|
||||
headline = str(parsed.get("headline", "")).strip()
|
||||
summary = str(parsed.get("summary", "")).strip()
|
||||
if headline and summary:
|
||||
return {"headline": headline, "summary": summary}
|
||||
except json.JSONDecodeError:
|
||||
logger.error("Failed to parse translation response: %s", content[:200])
|
||||
return None
|
||||
|
||||
|
||||
async def generate_translations(headline: str, summary: str) -> dict[str, dict]:
|
||||
translations: dict[str, dict] = {}
|
||||
language_names = {"ta": "Tamil", "ml": "Malayalam"}
|
||||
|
||||
if not config.PERPLEXITY_API_KEY:
|
||||
return translations
|
||||
|
||||
for language_code, language_name in language_names.items():
|
||||
try:
|
||||
response = await call_perplexity_translation_api(headline, summary, language_name)
|
||||
if response:
|
||||
parsed = parse_translation_response(response)
|
||||
if parsed:
|
||||
translations[language_code] = parsed
|
||||
except Exception:
|
||||
logger.exception("Translation generation failed for %s", language_code)
|
||||
|
||||
return translations
|
||||
|
||||
|
||||
def parse_news_response(response: dict) -> list[dict]:
|
||||
content = response.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
content = content.strip()
|
||||
if content.startswith("```"):
|
||||
content = content.split("\n", 1)[-1].rsplit("```", 1)[0]
|
||||
|
||||
try:
|
||||
items = json.loads(content)
|
||||
if isinstance(items, list):
|
||||
return items
|
||||
except json.JSONDecodeError:
|
||||
logger.error("Failed to parse news response: %s", content[:200])
|
||||
return []
|
||||
|
||||
|
||||
async def download_and_optimize_image(image_url: str) -> str | None:
|
||||
if not image_url:
|
||||
return None
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
|
||||
response = await client.get(image_url)
|
||||
response.raise_for_status()
|
||||
|
||||
img = Image.open(BytesIO(response.content))
|
||||
|
||||
if img.width > 1200:
|
||||
ratio = 1200 / img.width
|
||||
new_height = int(img.height * ratio)
|
||||
img = img.resize((1200, new_height), Image.Resampling.LANCZOS)
|
||||
|
||||
if img.mode in ("RGBA", "P"):
|
||||
img = img.convert("RGB")
|
||||
|
||||
filename = hashlib.md5(image_url.encode()).hexdigest() + ".jpg"
|
||||
filepath = os.path.join(config.STATIC_IMAGES_DIR, filename)
|
||||
|
||||
img.save(filepath, "JPEG", quality=config.IMAGE_QUALITY, optimize=True)
|
||||
return f"/static/images/{filename}"
|
||||
except Exception:
|
||||
logger.exception("Failed to download/optimize image: %s", image_url)
|
||||
return None
|
||||
|
||||
|
||||
async def fetch_news_with_retry(max_attempts: int = 3) -> list[dict]:
|
||||
query = "What are the latest AI news from the last hour? Include source URLs and image URLs."
|
||||
|
||||
for attempt in range(max_attempts):
|
||||
try:
|
||||
response = await call_perplexity_api(query)
|
||||
if response:
|
||||
return parse_news_response(response)
|
||||
except Exception:
|
||||
wait = 2**attempt
|
||||
logger.warning("Perplexity API attempt %d failed, retrying in %ds", attempt + 1, wait)
|
||||
await asyncio.sleep(wait)
|
||||
|
||||
logger.warning("Perplexity API exhausted, trying OpenRouter fallback")
|
||||
try:
|
||||
response = await call_openrouter_api(query)
|
||||
if response:
|
||||
return parse_news_response(response)
|
||||
except Exception:
|
||||
logger.exception("OpenRouter fallback also failed")
|
||||
|
||||
return []
|
||||
|
||||
|
||||
async def process_and_store_news() -> int:
|
||||
items = await fetch_news_with_retry()
|
||||
if not items:
|
||||
logger.warning("No news items fetched this cycle")
|
||||
return 0
|
||||
|
||||
db = SessionLocal()
|
||||
stored = 0
|
||||
try:
|
||||
for item in items:
|
||||
headline = item.get("headline", "").strip()
|
||||
summary = item.get("summary", "").strip()
|
||||
|
||||
if not headline or not summary:
|
||||
continue
|
||||
|
||||
if headline_exists_within_24h(db, headline):
|
||||
logger.debug("Duplicate headline skipped: %s", headline[:80])
|
||||
continue
|
||||
|
||||
local_image = await download_and_optimize_image(item.get("image_url", ""))
|
||||
image_url = local_image or PLACEHOLDER_IMAGE_PATH
|
||||
|
||||
created_news_item = create_news(
|
||||
db=db,
|
||||
headline=headline,
|
||||
summary=summary,
|
||||
source_url=item.get("source_url"),
|
||||
image_url=image_url,
|
||||
image_credit=item.get("image_credit"),
|
||||
)
|
||||
|
||||
translations = await generate_translations(headline, summary)
|
||||
for language_code, payload in translations.items():
|
||||
if translation_exists(db, created_news_item.id, language_code):
|
||||
continue
|
||||
create_translation(
|
||||
db=db,
|
||||
news_item_id=created_news_item.id,
|
||||
language=language_code,
|
||||
headline=payload["headline"],
|
||||
summary=payload["summary"],
|
||||
)
|
||||
|
||||
stored += 1
|
||||
|
||||
logger.info("Stored %d new news items", stored)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
return stored
|
||||
|
||||
|
||||
def scheduled_news_fetch() -> None:
|
||||
start = time.monotonic()
|
||||
logger.info("Starting scheduled news fetch")
|
||||
count = asyncio.run(process_and_store_news())
|
||||
elapsed = time.monotonic() - start
|
||||
logger.info("Scheduled news fetch complete: %d items in %.1fs", count, elapsed)
|
||||
171
backend/repository.py
Normal file
171
backend/repository.py
Normal file
@@ -0,0 +1,171 @@
|
||||
import datetime
|
||||
|
||||
from sqlalchemy import and_, desc
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from backend.models import NewsItem, NewsTranslation
|
||||
|
||||
SUPPORTED_LANGUAGES = {"en", "ta", "ml"}
|
||||
|
||||
|
||||
def create_news(
|
||||
db: Session,
|
||||
headline: str,
|
||||
summary: str,
|
||||
source_url: str | None = None,
|
||||
image_url: str | None = None,
|
||||
image_credit: str | None = None,
|
||||
published_at: datetime.datetime | None = None,
|
||||
) -> NewsItem:
|
||||
item = NewsItem(
|
||||
headline=headline,
|
||||
summary=summary,
|
||||
source_url=source_url,
|
||||
image_url=image_url,
|
||||
image_credit=image_credit,
|
||||
published_at=published_at or datetime.datetime.utcnow(),
|
||||
)
|
||||
db.add(item)
|
||||
db.commit()
|
||||
db.refresh(item)
|
||||
return item
|
||||
|
||||
|
||||
def get_recent_news(db: Session, limit: int = 10) -> list[NewsItem]:
|
||||
return (
|
||||
db.query(NewsItem)
|
||||
.filter(NewsItem.archived.is_(False))
|
||||
.order_by(desc(NewsItem.published_at))
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
|
||||
|
||||
def get_latest_news(db: Session) -> NewsItem | None:
|
||||
return (
|
||||
db.query(NewsItem)
|
||||
.filter(NewsItem.archived.is_(False))
|
||||
.order_by(desc(NewsItem.published_at))
|
||||
.first()
|
||||
)
|
||||
|
||||
|
||||
def create_translation(
|
||||
db: Session,
|
||||
news_item_id: int,
|
||||
language: str,
|
||||
headline: str,
|
||||
summary: str,
|
||||
) -> NewsTranslation:
|
||||
translation = NewsTranslation(
|
||||
news_item_id=news_item_id,
|
||||
language=language,
|
||||
headline=headline,
|
||||
summary=summary,
|
||||
)
|
||||
db.add(translation)
|
||||
db.commit()
|
||||
db.refresh(translation)
|
||||
return translation
|
||||
|
||||
|
||||
def get_translation(db: Session, news_item_id: int, language: str) -> NewsTranslation | None:
|
||||
return (
|
||||
db.query(NewsTranslation)
|
||||
.filter(
|
||||
and_(
|
||||
NewsTranslation.news_item_id == news_item_id,
|
||||
NewsTranslation.language == language,
|
||||
)
|
||||
)
|
||||
.first()
|
||||
)
|
||||
|
||||
|
||||
def translation_exists(db: Session, news_item_id: int, language: str) -> bool:
|
||||
return get_translation(db, news_item_id, language) is not None
|
||||
|
||||
|
||||
def get_translations_by_article(db: Session, news_item_id: int) -> list[NewsTranslation]:
|
||||
return (
|
||||
db.query(NewsTranslation)
|
||||
.filter(NewsTranslation.news_item_id == news_item_id)
|
||||
.order_by(NewsTranslation.language.asc())
|
||||
.all()
|
||||
)
|
||||
|
||||
|
||||
def resolve_news_content(item: NewsItem, translation: NewsTranslation | None) -> tuple[str, str]:
|
||||
if translation is None:
|
||||
return item.headline, item.summary
|
||||
return translation.headline, translation.summary
|
||||
|
||||
|
||||
def normalize_language(language: str | None) -> str:
|
||||
if not language:
|
||||
return "en"
|
||||
lower = language.lower()
|
||||
if lower not in SUPPORTED_LANGUAGES:
|
||||
return "en"
|
||||
return lower
|
||||
|
||||
|
||||
def get_news_paginated(
|
||||
db: Session, cursor: int | None = None, limit: int = 10, exclude_id: int | None = None
|
||||
) -> list[NewsItem]:
|
||||
query = db.query(NewsItem).filter(NewsItem.archived.is_(False))
|
||||
|
||||
if exclude_id is not None:
|
||||
query = query.filter(NewsItem.id != exclude_id)
|
||||
|
||||
if cursor is not None:
|
||||
query = query.filter(NewsItem.id < cursor)
|
||||
|
||||
return query.order_by(desc(NewsItem.id)).limit(limit).all()
|
||||
|
||||
|
||||
def headline_exists_within_24h(db: Session, headline: str) -> bool:
|
||||
cutoff = datetime.datetime.utcnow() - datetime.timedelta(hours=24)
|
||||
return (
|
||||
db.query(NewsItem)
|
||||
.filter(
|
||||
and_(
|
||||
NewsItem.headline == headline,
|
||||
NewsItem.created_at >= cutoff,
|
||||
)
|
||||
)
|
||||
.first()
|
||||
is not None
|
||||
)
|
||||
|
||||
|
||||
def archive_old_news(db: Session, retention_days: int = 30) -> int:
|
||||
cutoff = datetime.datetime.utcnow() - datetime.timedelta(days=retention_days)
|
||||
count = (
|
||||
db.query(NewsItem)
|
||||
.filter(
|
||||
and_(
|
||||
NewsItem.created_at < cutoff,
|
||||
NewsItem.archived.is_(False),
|
||||
)
|
||||
)
|
||||
.update({"archived": True})
|
||||
)
|
||||
db.commit()
|
||||
return count
|
||||
|
||||
|
||||
def delete_archived_news(db: Session, days_after_archive: int = 60) -> int:
|
||||
cutoff = datetime.datetime.utcnow() - datetime.timedelta(days=days_after_archive)
|
||||
count = (
|
||||
db.query(NewsItem)
|
||||
.filter(
|
||||
and_(
|
||||
NewsItem.archived.is_(True),
|
||||
NewsItem.created_at < cutoff,
|
||||
)
|
||||
)
|
||||
.delete()
|
||||
)
|
||||
db.commit()
|
||||
return count
|
||||
40
backend/schemas.py
Normal file
40
backend/schemas.py
Normal file
@@ -0,0 +1,40 @@
|
||||
import datetime
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class NewsItemResponse(BaseModel):
|
||||
id: int
|
||||
headline: str
|
||||
summary: str
|
||||
source_url: str | None = None
|
||||
image_url: str | None = None
|
||||
image_credit: str | None = None
|
||||
published_at: datetime.datetime
|
||||
created_at: datetime.datetime
|
||||
language: str
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class PaginatedNewsResponse(BaseModel):
|
||||
items: list[NewsItemResponse]
|
||||
next_cursor: int | None = None
|
||||
has_more: bool = False
|
||||
|
||||
|
||||
class NewsTranslationResponse(BaseModel):
|
||||
id: int
|
||||
news_item_id: int
|
||||
language: str
|
||||
headline: str
|
||||
summary: str
|
||||
created_at: datetime.datetime
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
status: str
|
||||
version: str
|
||||
news_count: int
|
||||
BIN
backend/static/images/48911d15e0f0b9cc0287d2c3608f6a0c.jpg
Normal file
BIN
backend/static/images/48911d15e0f0b9cc0287d2c3608f6a0c.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 182 KiB |
BIN
backend/static/images/placeholder.png
Normal file
BIN
backend/static/images/placeholder.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.5 KiB |
Reference in New Issue
Block a user