First deployment
Some checks failed
quality-gates / lint-and-test (push) Has been cancelled
quality-gates / security-scan (push) Has been cancelled

This commit is contained in:
2026-02-13 09:14:04 -05:00
parent 0e21e035f5
commit 679561bcdb
128 changed files with 3479 additions and 120 deletions

View File

@@ -25,6 +25,49 @@ logger = logging.getLogger(__name__)
PLACEHOLDER_IMAGE_PATH = "/static/images/placeholder.png"
GENERIC_AI_FALLBACK_URL = "https://placehold.co/1200x630/0f172a/e2e8f0/png?text=AI+News"
GENERIC_FINANCE_FALLBACK_URL = "https://placehold.co/1200x630/0f172a/e2e8f0/png?text=Market+News"
_FINANCE_TOPIC_TERMS = frozenset(
{
"finance",
"financial",
"market",
"markets",
"stock",
"stocks",
"share",
"shares",
"earnings",
"investor",
"investors",
"nasdaq",
"nyse",
"dow",
"s&p",
"bank",
"banking",
"revenue",
"profit",
"trading",
"ipo",
"valuation",
}
)
_FINANCE_IMAGE_BLOCKLIST = (
"cat",
"dog",
"pet",
"lion",
"tiger",
"bird",
"horse",
"portrait",
"selfie",
"wedding",
"food",
"nature-only",
)
async def call_perplexity_api(query: str) -> dict | None:
@@ -174,6 +217,43 @@ def parse_translation_response(response: dict) -> dict | None:
return None
def validate_translation_quality(
headline: str, summary: str, language_code: str
) -> tuple[bool, str | None]:
text = f"{headline} {summary}".strip()
if not headline or not summary:
return False, "empty-content"
if len(text) < 20:
return False, "too-short"
repeated_runs = re.search(r"(.)\1{6,}", text)
if repeated_runs:
return False, "repeated-sequence"
lines = [segment.strip() for segment in re.split(r"[.!?]\s+", text) if segment.strip()]
if lines:
unique_ratio = len(set(lines)) / len(lines)
if unique_ratio < 0.4:
return False, "low-unique-content"
if language_code == "ta":
script_hits = sum(1 for char in text if "\u0b80" <= char <= "\u0bff")
elif language_code == "ml":
script_hits = sum(1 for char in text if "\u0d00" <= char <= "\u0d7f")
else:
return True, None
alpha_hits = sum(1 for char in text if char.isalpha())
if alpha_hits == 0:
return False, "no-alpha-content"
script_ratio = script_hits / alpha_hits
if script_ratio < 0.35:
return False, "script-mismatch"
return True, None
async def generate_translations(
headline: str,
summary: str,
@@ -200,7 +280,20 @@ async def generate_translations(
if response:
parsed = parse_translation_response(response)
if parsed:
translations[language_code] = parsed
is_valid, reason = validate_translation_quality(
parsed["headline"],
parsed["summary"],
language_code,
)
if is_valid:
logger.info("Translation accepted for %s", language_code)
translations[language_code] = parsed
else:
logger.warning(
"Translation rejected for %s: %s",
language_code,
reason,
)
except Exception:
logger.exception("Translation generation failed for %s", language_code)
@@ -467,7 +560,7 @@ async def fetch_pixabay_image(query: str) -> tuple[str | None, str | None]:
except Exception:
logger.exception("Pixabay image retrieval failed")
return GENERIC_AI_FALLBACK_URL, "Generic AI fallback"
return None, None
async def fetch_unsplash_image(query: str) -> tuple[str | None, str | None]:
@@ -591,6 +684,15 @@ def get_enabled_providers() -> list[
async def fetch_royalty_free_image(query: str) -> tuple[str | None, str | None]:
"""Fetch royalty-free image using provider chain with fallback."""
def is_finance_story(text: str) -> bool:
lowered = (text or "").lower()
return any(term in lowered for term in _FINANCE_TOPIC_TERMS)
def is_finance_safe_image(image_url: str, credit: str | None) -> bool:
haystack = f"{image_url or ''} {credit or ''}".lower()
return not any(term in haystack for term in _FINANCE_IMAGE_BLOCKLIST)
# MCP endpoint takes highest priority if configured
if config.ROYALTY_IMAGE_MCP_ENDPOINT:
try:
@@ -610,15 +712,35 @@ async def fetch_royalty_free_image(query: str) -> tuple[str | None, str | None]:
# Extract keywords for better image search
refined_query = extract_image_keywords(query)
finance_story = is_finance_story(query)
query_variants = [refined_query]
if finance_story:
query_variants = [
f"{refined_query} stock market trading chart finance business",
refined_query,
]
# Try each enabled provider in order
for provider_name, fetch_fn in get_enabled_providers():
try:
image_url, credit = await fetch_fn(refined_query)
if image_url:
for query_variant in query_variants:
for provider_name, fetch_fn in get_enabled_providers():
try:
image_url, credit = await fetch_fn(query_variant)
if not image_url:
continue
if finance_story and not is_finance_safe_image(image_url, credit):
logger.info(
"Rejected non-finance-safe image from %s for query '%s': %s",
provider_name,
query_variant,
image_url,
)
continue
return image_url, credit
except Exception:
logger.exception("%s image retrieval failed", provider_name.capitalize())
except Exception:
logger.exception("%s image retrieval failed", provider_name.capitalize())
if finance_story:
return GENERIC_FINANCE_FALLBACK_URL, "Finance-safe fallback"
return None, None