Initial Commit

This commit is contained in:
2026-02-12 16:50:29 -05:00
commit a1da041f14
74 changed files with 6140 additions and 0 deletions

299
backend/news_service.py Normal file
View File

@@ -0,0 +1,299 @@
import asyncio
import hashlib
import json
import logging
import os
import time
from io import BytesIO
import httpx
from PIL import Image
from backend import config
from backend.database import SessionLocal
from backend.repository import (
create_news,
create_translation,
headline_exists_within_24h,
translation_exists,
)
logger = logging.getLogger(__name__)
PLACEHOLDER_IMAGE_PATH = "/static/images/placeholder.png"
async def call_perplexity_api(query: str) -> dict | None:
headers = {
"Authorization": f"Bearer {config.PERPLEXITY_API_KEY}",
"Content-Type": "application/json",
}
payload = {
"model": config.PERPLEXITY_MODEL,
"messages": [
{
"role": "system",
"content": (
"You are a news aggregator. Return a JSON array of news items. "
"Each item must have: headline, summary (2-3 sentences), source_url, "
"image_url (a relevant image URL if available), image_credit. "
"Return between 3 and 5 items. Respond ONLY with valid JSON array, no markdown."
),
},
{"role": "user", "content": query},
],
"temperature": 0.3,
}
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(config.PERPLEXITY_API_URL, headers=headers, json=payload)
cost_info = {
"model": config.PERPLEXITY_MODEL,
"status": response.status_code,
"usage": response.json().get("usage", {}),
}
logger.info("Perplexity API cost: %s", json.dumps(cost_info))
response.raise_for_status()
return response.json()
async def call_openrouter_api(query: str) -> dict | None:
if not config.OPENROUTER_API_KEY:
return None
headers = {
"Authorization": f"Bearer {config.OPENROUTER_API_KEY}",
"Content-Type": "application/json",
}
payload = {
"model": config.OPENROUTER_MODEL,
"messages": [
{
"role": "system",
"content": (
"You are a news aggregator. Return a JSON array of news items. "
"Each item must have: headline, summary (2-3 sentences), source_url, "
"image_url (a relevant image URL if available), image_credit. "
"Return between 3 and 5 items. Respond ONLY with valid JSON array, no markdown."
),
},
{"role": "user", "content": query},
],
"temperature": 0.3,
}
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(config.OPENROUTER_API_URL, headers=headers, json=payload)
cost_info = {
"model": config.OPENROUTER_MODEL,
"status": response.status_code,
"usage": response.json().get("usage", {}),
}
logger.info("OpenRouter API cost: %s", json.dumps(cost_info))
response.raise_for_status()
return response.json()
async def call_perplexity_translation_api(
headline: str, summary: str, language: str
) -> dict | None:
headers = {
"Authorization": f"Bearer {config.PERPLEXITY_API_KEY}",
"Content-Type": "application/json",
}
payload = {
"model": config.PERPLEXITY_MODEL,
"messages": [
{
"role": "system",
"content": (
"Translate the given headline and summary to the target language. "
"Return only valid JSON object with keys: headline, summary. "
"No markdown, no extra text."
),
},
{
"role": "user",
"content": json.dumps(
{
"target_language": language,
"headline": headline,
"summary": summary,
}
),
},
],
"temperature": 0.1,
}
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(config.PERPLEXITY_API_URL, headers=headers, json=payload)
response.raise_for_status()
return response.json()
def parse_translation_response(response: dict) -> dict | None:
content = response.get("choices", [{}])[0].get("message", {}).get("content", "")
content = content.strip()
if content.startswith("```"):
content = content.split("\n", 1)[-1].rsplit("```", 1)[0]
try:
parsed = json.loads(content)
if isinstance(parsed, dict):
headline = str(parsed.get("headline", "")).strip()
summary = str(parsed.get("summary", "")).strip()
if headline and summary:
return {"headline": headline, "summary": summary}
except json.JSONDecodeError:
logger.error("Failed to parse translation response: %s", content[:200])
return None
async def generate_translations(headline: str, summary: str) -> dict[str, dict]:
translations: dict[str, dict] = {}
language_names = {"ta": "Tamil", "ml": "Malayalam"}
if not config.PERPLEXITY_API_KEY:
return translations
for language_code, language_name in language_names.items():
try:
response = await call_perplexity_translation_api(headline, summary, language_name)
if response:
parsed = parse_translation_response(response)
if parsed:
translations[language_code] = parsed
except Exception:
logger.exception("Translation generation failed for %s", language_code)
return translations
def parse_news_response(response: dict) -> list[dict]:
content = response.get("choices", [{}])[0].get("message", {}).get("content", "")
content = content.strip()
if content.startswith("```"):
content = content.split("\n", 1)[-1].rsplit("```", 1)[0]
try:
items = json.loads(content)
if isinstance(items, list):
return items
except json.JSONDecodeError:
logger.error("Failed to parse news response: %s", content[:200])
return []
async def download_and_optimize_image(image_url: str) -> str | None:
if not image_url:
return None
try:
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
response = await client.get(image_url)
response.raise_for_status()
img = Image.open(BytesIO(response.content))
if img.width > 1200:
ratio = 1200 / img.width
new_height = int(img.height * ratio)
img = img.resize((1200, new_height), Image.Resampling.LANCZOS)
if img.mode in ("RGBA", "P"):
img = img.convert("RGB")
filename = hashlib.md5(image_url.encode()).hexdigest() + ".jpg"
filepath = os.path.join(config.STATIC_IMAGES_DIR, filename)
img.save(filepath, "JPEG", quality=config.IMAGE_QUALITY, optimize=True)
return f"/static/images/{filename}"
except Exception:
logger.exception("Failed to download/optimize image: %s", image_url)
return None
async def fetch_news_with_retry(max_attempts: int = 3) -> list[dict]:
query = "What are the latest AI news from the last hour? Include source URLs and image URLs."
for attempt in range(max_attempts):
try:
response = await call_perplexity_api(query)
if response:
return parse_news_response(response)
except Exception:
wait = 2**attempt
logger.warning("Perplexity API attempt %d failed, retrying in %ds", attempt + 1, wait)
await asyncio.sleep(wait)
logger.warning("Perplexity API exhausted, trying OpenRouter fallback")
try:
response = await call_openrouter_api(query)
if response:
return parse_news_response(response)
except Exception:
logger.exception("OpenRouter fallback also failed")
return []
async def process_and_store_news() -> int:
items = await fetch_news_with_retry()
if not items:
logger.warning("No news items fetched this cycle")
return 0
db = SessionLocal()
stored = 0
try:
for item in items:
headline = item.get("headline", "").strip()
summary = item.get("summary", "").strip()
if not headline or not summary:
continue
if headline_exists_within_24h(db, headline):
logger.debug("Duplicate headline skipped: %s", headline[:80])
continue
local_image = await download_and_optimize_image(item.get("image_url", ""))
image_url = local_image or PLACEHOLDER_IMAGE_PATH
created_news_item = create_news(
db=db,
headline=headline,
summary=summary,
source_url=item.get("source_url"),
image_url=image_url,
image_credit=item.get("image_credit"),
)
translations = await generate_translations(headline, summary)
for language_code, payload in translations.items():
if translation_exists(db, created_news_item.id, language_code):
continue
create_translation(
db=db,
news_item_id=created_news_item.id,
language=language_code,
headline=payload["headline"],
summary=payload["summary"],
)
stored += 1
logger.info("Stored %d new news items", stored)
finally:
db.close()
return stored
def scheduled_news_fetch() -> None:
start = time.monotonic()
logger.info("Starting scheduled news fetch")
count = asyncio.run(process_and_store_news())
elapsed = time.monotonic() - start
logger.info("Scheduled news fetch complete: %d items in %.1fs", count, elapsed)