Initial Commit
This commit is contained in:
299
backend/news_service.py
Normal file
299
backend/news_service.py
Normal file
@@ -0,0 +1,299 @@
|
||||
import asyncio
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from io import BytesIO
|
||||
|
||||
import httpx
|
||||
from PIL import Image
|
||||
|
||||
from backend import config
|
||||
from backend.database import SessionLocal
|
||||
from backend.repository import (
|
||||
create_news,
|
||||
create_translation,
|
||||
headline_exists_within_24h,
|
||||
translation_exists,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
PLACEHOLDER_IMAGE_PATH = "/static/images/placeholder.png"
|
||||
|
||||
|
||||
async def call_perplexity_api(query: str) -> dict | None:
|
||||
headers = {
|
||||
"Authorization": f"Bearer {config.PERPLEXITY_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
payload = {
|
||||
"model": config.PERPLEXITY_MODEL,
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You are a news aggregator. Return a JSON array of news items. "
|
||||
"Each item must have: headline, summary (2-3 sentences), source_url, "
|
||||
"image_url (a relevant image URL if available), image_credit. "
|
||||
"Return between 3 and 5 items. Respond ONLY with valid JSON array, no markdown."
|
||||
),
|
||||
},
|
||||
{"role": "user", "content": query},
|
||||
],
|
||||
"temperature": 0.3,
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.post(config.PERPLEXITY_API_URL, headers=headers, json=payload)
|
||||
cost_info = {
|
||||
"model": config.PERPLEXITY_MODEL,
|
||||
"status": response.status_code,
|
||||
"usage": response.json().get("usage", {}),
|
||||
}
|
||||
logger.info("Perplexity API cost: %s", json.dumps(cost_info))
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
async def call_openrouter_api(query: str) -> dict | None:
|
||||
if not config.OPENROUTER_API_KEY:
|
||||
return None
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {config.OPENROUTER_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
payload = {
|
||||
"model": config.OPENROUTER_MODEL,
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You are a news aggregator. Return a JSON array of news items. "
|
||||
"Each item must have: headline, summary (2-3 sentences), source_url, "
|
||||
"image_url (a relevant image URL if available), image_credit. "
|
||||
"Return between 3 and 5 items. Respond ONLY with valid JSON array, no markdown."
|
||||
),
|
||||
},
|
||||
{"role": "user", "content": query},
|
||||
],
|
||||
"temperature": 0.3,
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.post(config.OPENROUTER_API_URL, headers=headers, json=payload)
|
||||
cost_info = {
|
||||
"model": config.OPENROUTER_MODEL,
|
||||
"status": response.status_code,
|
||||
"usage": response.json().get("usage", {}),
|
||||
}
|
||||
logger.info("OpenRouter API cost: %s", json.dumps(cost_info))
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
async def call_perplexity_translation_api(
|
||||
headline: str, summary: str, language: str
|
||||
) -> dict | None:
|
||||
headers = {
|
||||
"Authorization": f"Bearer {config.PERPLEXITY_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
payload = {
|
||||
"model": config.PERPLEXITY_MODEL,
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"Translate the given headline and summary to the target language. "
|
||||
"Return only valid JSON object with keys: headline, summary. "
|
||||
"No markdown, no extra text."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": json.dumps(
|
||||
{
|
||||
"target_language": language,
|
||||
"headline": headline,
|
||||
"summary": summary,
|
||||
}
|
||||
),
|
||||
},
|
||||
],
|
||||
"temperature": 0.1,
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.post(config.PERPLEXITY_API_URL, headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
def parse_translation_response(response: dict) -> dict | None:
|
||||
content = response.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
content = content.strip()
|
||||
if content.startswith("```"):
|
||||
content = content.split("\n", 1)[-1].rsplit("```", 1)[0]
|
||||
|
||||
try:
|
||||
parsed = json.loads(content)
|
||||
if isinstance(parsed, dict):
|
||||
headline = str(parsed.get("headline", "")).strip()
|
||||
summary = str(parsed.get("summary", "")).strip()
|
||||
if headline and summary:
|
||||
return {"headline": headline, "summary": summary}
|
||||
except json.JSONDecodeError:
|
||||
logger.error("Failed to parse translation response: %s", content[:200])
|
||||
return None
|
||||
|
||||
|
||||
async def generate_translations(headline: str, summary: str) -> dict[str, dict]:
|
||||
translations: dict[str, dict] = {}
|
||||
language_names = {"ta": "Tamil", "ml": "Malayalam"}
|
||||
|
||||
if not config.PERPLEXITY_API_KEY:
|
||||
return translations
|
||||
|
||||
for language_code, language_name in language_names.items():
|
||||
try:
|
||||
response = await call_perplexity_translation_api(headline, summary, language_name)
|
||||
if response:
|
||||
parsed = parse_translation_response(response)
|
||||
if parsed:
|
||||
translations[language_code] = parsed
|
||||
except Exception:
|
||||
logger.exception("Translation generation failed for %s", language_code)
|
||||
|
||||
return translations
|
||||
|
||||
|
||||
def parse_news_response(response: dict) -> list[dict]:
|
||||
content = response.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
content = content.strip()
|
||||
if content.startswith("```"):
|
||||
content = content.split("\n", 1)[-1].rsplit("```", 1)[0]
|
||||
|
||||
try:
|
||||
items = json.loads(content)
|
||||
if isinstance(items, list):
|
||||
return items
|
||||
except json.JSONDecodeError:
|
||||
logger.error("Failed to parse news response: %s", content[:200])
|
||||
return []
|
||||
|
||||
|
||||
async def download_and_optimize_image(image_url: str) -> str | None:
|
||||
if not image_url:
|
||||
return None
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
|
||||
response = await client.get(image_url)
|
||||
response.raise_for_status()
|
||||
|
||||
img = Image.open(BytesIO(response.content))
|
||||
|
||||
if img.width > 1200:
|
||||
ratio = 1200 / img.width
|
||||
new_height = int(img.height * ratio)
|
||||
img = img.resize((1200, new_height), Image.Resampling.LANCZOS)
|
||||
|
||||
if img.mode in ("RGBA", "P"):
|
||||
img = img.convert("RGB")
|
||||
|
||||
filename = hashlib.md5(image_url.encode()).hexdigest() + ".jpg"
|
||||
filepath = os.path.join(config.STATIC_IMAGES_DIR, filename)
|
||||
|
||||
img.save(filepath, "JPEG", quality=config.IMAGE_QUALITY, optimize=True)
|
||||
return f"/static/images/{filename}"
|
||||
except Exception:
|
||||
logger.exception("Failed to download/optimize image: %s", image_url)
|
||||
return None
|
||||
|
||||
|
||||
async def fetch_news_with_retry(max_attempts: int = 3) -> list[dict]:
|
||||
query = "What are the latest AI news from the last hour? Include source URLs and image URLs."
|
||||
|
||||
for attempt in range(max_attempts):
|
||||
try:
|
||||
response = await call_perplexity_api(query)
|
||||
if response:
|
||||
return parse_news_response(response)
|
||||
except Exception:
|
||||
wait = 2**attempt
|
||||
logger.warning("Perplexity API attempt %d failed, retrying in %ds", attempt + 1, wait)
|
||||
await asyncio.sleep(wait)
|
||||
|
||||
logger.warning("Perplexity API exhausted, trying OpenRouter fallback")
|
||||
try:
|
||||
response = await call_openrouter_api(query)
|
||||
if response:
|
||||
return parse_news_response(response)
|
||||
except Exception:
|
||||
logger.exception("OpenRouter fallback also failed")
|
||||
|
||||
return []
|
||||
|
||||
|
||||
async def process_and_store_news() -> int:
|
||||
items = await fetch_news_with_retry()
|
||||
if not items:
|
||||
logger.warning("No news items fetched this cycle")
|
||||
return 0
|
||||
|
||||
db = SessionLocal()
|
||||
stored = 0
|
||||
try:
|
||||
for item in items:
|
||||
headline = item.get("headline", "").strip()
|
||||
summary = item.get("summary", "").strip()
|
||||
|
||||
if not headline or not summary:
|
||||
continue
|
||||
|
||||
if headline_exists_within_24h(db, headline):
|
||||
logger.debug("Duplicate headline skipped: %s", headline[:80])
|
||||
continue
|
||||
|
||||
local_image = await download_and_optimize_image(item.get("image_url", ""))
|
||||
image_url = local_image or PLACEHOLDER_IMAGE_PATH
|
||||
|
||||
created_news_item = create_news(
|
||||
db=db,
|
||||
headline=headline,
|
||||
summary=summary,
|
||||
source_url=item.get("source_url"),
|
||||
image_url=image_url,
|
||||
image_credit=item.get("image_credit"),
|
||||
)
|
||||
|
||||
translations = await generate_translations(headline, summary)
|
||||
for language_code, payload in translations.items():
|
||||
if translation_exists(db, created_news_item.id, language_code):
|
||||
continue
|
||||
create_translation(
|
||||
db=db,
|
||||
news_item_id=created_news_item.id,
|
||||
language=language_code,
|
||||
headline=payload["headline"],
|
||||
summary=payload["summary"],
|
||||
)
|
||||
|
||||
stored += 1
|
||||
|
||||
logger.info("Stored %d new news items", stored)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
return stored
|
||||
|
||||
|
||||
def scheduled_news_fetch() -> None:
|
||||
start = time.monotonic()
|
||||
logger.info("Starting scheduled news fetch")
|
||||
count = asyncio.run(process_and_store_news())
|
||||
elapsed = time.monotonic() - start
|
||||
logger.info("Scheduled news fetch complete: %d items in %.1fs", count, elapsed)
|
||||
Reference in New Issue
Block a user