First deployment
This commit is contained in:
@@ -25,6 +25,49 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
PLACEHOLDER_IMAGE_PATH = "/static/images/placeholder.png"
|
||||
GENERIC_AI_FALLBACK_URL = "https://placehold.co/1200x630/0f172a/e2e8f0/png?text=AI+News"
|
||||
GENERIC_FINANCE_FALLBACK_URL = "https://placehold.co/1200x630/0f172a/e2e8f0/png?text=Market+News"
|
||||
|
||||
_FINANCE_TOPIC_TERMS = frozenset(
|
||||
{
|
||||
"finance",
|
||||
"financial",
|
||||
"market",
|
||||
"markets",
|
||||
"stock",
|
||||
"stocks",
|
||||
"share",
|
||||
"shares",
|
||||
"earnings",
|
||||
"investor",
|
||||
"investors",
|
||||
"nasdaq",
|
||||
"nyse",
|
||||
"dow",
|
||||
"s&p",
|
||||
"bank",
|
||||
"banking",
|
||||
"revenue",
|
||||
"profit",
|
||||
"trading",
|
||||
"ipo",
|
||||
"valuation",
|
||||
}
|
||||
)
|
||||
|
||||
_FINANCE_IMAGE_BLOCKLIST = (
|
||||
"cat",
|
||||
"dog",
|
||||
"pet",
|
||||
"lion",
|
||||
"tiger",
|
||||
"bird",
|
||||
"horse",
|
||||
"portrait",
|
||||
"selfie",
|
||||
"wedding",
|
||||
"food",
|
||||
"nature-only",
|
||||
)
|
||||
|
||||
|
||||
async def call_perplexity_api(query: str) -> dict | None:
|
||||
@@ -174,6 +217,43 @@ def parse_translation_response(response: dict) -> dict | None:
|
||||
return None
|
||||
|
||||
|
||||
def validate_translation_quality(
|
||||
headline: str, summary: str, language_code: str
|
||||
) -> tuple[bool, str | None]:
|
||||
text = f"{headline} {summary}".strip()
|
||||
if not headline or not summary:
|
||||
return False, "empty-content"
|
||||
if len(text) < 20:
|
||||
return False, "too-short"
|
||||
|
||||
repeated_runs = re.search(r"(.)\1{6,}", text)
|
||||
if repeated_runs:
|
||||
return False, "repeated-sequence"
|
||||
|
||||
lines = [segment.strip() for segment in re.split(r"[.!?]\s+", text) if segment.strip()]
|
||||
if lines:
|
||||
unique_ratio = len(set(lines)) / len(lines)
|
||||
if unique_ratio < 0.4:
|
||||
return False, "low-unique-content"
|
||||
|
||||
if language_code == "ta":
|
||||
script_hits = sum(1 for char in text if "\u0b80" <= char <= "\u0bff")
|
||||
elif language_code == "ml":
|
||||
script_hits = sum(1 for char in text if "\u0d00" <= char <= "\u0d7f")
|
||||
else:
|
||||
return True, None
|
||||
|
||||
alpha_hits = sum(1 for char in text if char.isalpha())
|
||||
if alpha_hits == 0:
|
||||
return False, "no-alpha-content"
|
||||
|
||||
script_ratio = script_hits / alpha_hits
|
||||
if script_ratio < 0.35:
|
||||
return False, "script-mismatch"
|
||||
|
||||
return True, None
|
||||
|
||||
|
||||
async def generate_translations(
|
||||
headline: str,
|
||||
summary: str,
|
||||
@@ -200,7 +280,20 @@ async def generate_translations(
|
||||
if response:
|
||||
parsed = parse_translation_response(response)
|
||||
if parsed:
|
||||
translations[language_code] = parsed
|
||||
is_valid, reason = validate_translation_quality(
|
||||
parsed["headline"],
|
||||
parsed["summary"],
|
||||
language_code,
|
||||
)
|
||||
if is_valid:
|
||||
logger.info("Translation accepted for %s", language_code)
|
||||
translations[language_code] = parsed
|
||||
else:
|
||||
logger.warning(
|
||||
"Translation rejected for %s: %s",
|
||||
language_code,
|
||||
reason,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Translation generation failed for %s", language_code)
|
||||
|
||||
@@ -467,7 +560,7 @@ async def fetch_pixabay_image(query: str) -> tuple[str | None, str | None]:
|
||||
except Exception:
|
||||
logger.exception("Pixabay image retrieval failed")
|
||||
|
||||
return GENERIC_AI_FALLBACK_URL, "Generic AI fallback"
|
||||
return None, None
|
||||
|
||||
|
||||
async def fetch_unsplash_image(query: str) -> tuple[str | None, str | None]:
|
||||
@@ -591,6 +684,15 @@ def get_enabled_providers() -> list[
|
||||
|
||||
async def fetch_royalty_free_image(query: str) -> tuple[str | None, str | None]:
|
||||
"""Fetch royalty-free image using provider chain with fallback."""
|
||||
|
||||
def is_finance_story(text: str) -> bool:
|
||||
lowered = (text or "").lower()
|
||||
return any(term in lowered for term in _FINANCE_TOPIC_TERMS)
|
||||
|
||||
def is_finance_safe_image(image_url: str, credit: str | None) -> bool:
|
||||
haystack = f"{image_url or ''} {credit or ''}".lower()
|
||||
return not any(term in haystack for term in _FINANCE_IMAGE_BLOCKLIST)
|
||||
|
||||
# MCP endpoint takes highest priority if configured
|
||||
if config.ROYALTY_IMAGE_MCP_ENDPOINT:
|
||||
try:
|
||||
@@ -610,15 +712,35 @@ async def fetch_royalty_free_image(query: str) -> tuple[str | None, str | None]:
|
||||
|
||||
# Extract keywords for better image search
|
||||
refined_query = extract_image_keywords(query)
|
||||
finance_story = is_finance_story(query)
|
||||
query_variants = [refined_query]
|
||||
if finance_story:
|
||||
query_variants = [
|
||||
f"{refined_query} stock market trading chart finance business",
|
||||
refined_query,
|
||||
]
|
||||
|
||||
# Try each enabled provider in order
|
||||
for provider_name, fetch_fn in get_enabled_providers():
|
||||
try:
|
||||
image_url, credit = await fetch_fn(refined_query)
|
||||
if image_url:
|
||||
for query_variant in query_variants:
|
||||
for provider_name, fetch_fn in get_enabled_providers():
|
||||
try:
|
||||
image_url, credit = await fetch_fn(query_variant)
|
||||
if not image_url:
|
||||
continue
|
||||
if finance_story and not is_finance_safe_image(image_url, credit):
|
||||
logger.info(
|
||||
"Rejected non-finance-safe image from %s for query '%s': %s",
|
||||
provider_name,
|
||||
query_variant,
|
||||
image_url,
|
||||
)
|
||||
continue
|
||||
return image_url, credit
|
||||
except Exception:
|
||||
logger.exception("%s image retrieval failed", provider_name.capitalize())
|
||||
except Exception:
|
||||
logger.exception("%s image retrieval failed", provider_name.capitalize())
|
||||
|
||||
if finance_story:
|
||||
return GENERIC_FINANCE_FALLBACK_URL, "Finance-safe fallback"
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user