First deployment
This commit is contained in:
141
backend/cli.py
141
backend/cli.py
@@ -14,6 +14,7 @@ from backend import config
|
||||
from backend.database import SessionLocal, init_db
|
||||
from backend.models import NewsItem
|
||||
from backend.news_service import (
|
||||
GENERIC_AI_FALLBACK_URL,
|
||||
download_and_optimize_image,
|
||||
extract_image_keywords,
|
||||
fetch_royalty_free_image,
|
||||
@@ -87,56 +88,105 @@ def build_contextual_query(headline: str, summary: str | None) -> str:
|
||||
return cleaned
|
||||
|
||||
|
||||
def resolve_article_id_from_permalink(value: str | None) -> int | None:
|
||||
if not value:
|
||||
return None
|
||||
if value.isdigit():
|
||||
return int(value)
|
||||
match = re.search(r"(?:\?|&)article=(\d+)", value)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
return None
|
||||
|
||||
|
||||
def is_unrelated_image_candidate(image_url: str | None, image_credit: str | None) -> bool:
|
||||
text = f"{image_url or ''} {image_credit or ''}".lower()
|
||||
blocked = (
|
||||
"cat",
|
||||
"dog",
|
||||
"pet",
|
||||
"animal",
|
||||
"wildlife",
|
||||
"lion",
|
||||
"tiger",
|
||||
"bird",
|
||||
"horse",
|
||||
)
|
||||
return any(term in text for term in blocked)
|
||||
|
||||
|
||||
async def refetch_image_for_item(
|
||||
item: NewsItem,
|
||||
max_attempts: int,
|
||||
) -> tuple[str | None, str | None, str]:
|
||||
query = build_contextual_query(item.headline, item.summary)
|
||||
current_summary_image = item.summary_image_url
|
||||
query_variants = [
|
||||
f"{query} alternative angle",
|
||||
f"{query} concept illustration",
|
||||
query,
|
||||
]
|
||||
|
||||
for query_variant in query_variants:
|
||||
for attempt in range(max_attempts):
|
||||
try:
|
||||
image_url, image_credit = await fetch_royalty_free_image(query_variant)
|
||||
if not image_url:
|
||||
raise RuntimeError("no-image-url")
|
||||
if is_unrelated_image_candidate(image_url, image_credit):
|
||||
logger.info("Rejected unrelated image candidate: %s", image_url)
|
||||
continue
|
||||
local_image = await download_and_optimize_image(image_url)
|
||||
if not local_image:
|
||||
raise RuntimeError("image-download-or-optimize-failed")
|
||||
if current_summary_image and local_image == current_summary_image:
|
||||
logger.info("Rejected duplicate image candidate for article=%s", item.id)
|
||||
continue
|
||||
return local_image, image_credit, "provider"
|
||||
except Exception:
|
||||
if attempt < max_attempts - 1:
|
||||
delay = 2**attempt
|
||||
await asyncio.sleep(delay)
|
||||
|
||||
fallback_local = await download_and_optimize_image(GENERIC_AI_FALLBACK_URL)
|
||||
if fallback_local and fallback_local != current_summary_image:
|
||||
return fallback_local, "AI-themed fallback", "fallback"
|
||||
return None, None, "none"
|
||||
|
||||
|
||||
async def refetch_images_for_latest(
|
||||
limit: int,
|
||||
max_attempts: int,
|
||||
dry_run: bool,
|
||||
target_article_id: int | None = None,
|
||||
) -> tuple[int, int]:
|
||||
db = SessionLocal()
|
||||
processed = 0
|
||||
refreshed = 0
|
||||
|
||||
try:
|
||||
items = (
|
||||
db.query(NewsItem)
|
||||
.filter(NewsItem.archived.is_(False))
|
||||
.order_by(desc(NewsItem.published_at))
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
if target_article_id is not None:
|
||||
items = (
|
||||
db.query(NewsItem)
|
||||
.filter(NewsItem.archived.is_(False), NewsItem.id == target_article_id)
|
||||
.all()
|
||||
)
|
||||
else:
|
||||
items = (
|
||||
db.query(NewsItem)
|
||||
.filter(NewsItem.archived.is_(False))
|
||||
.order_by(desc(NewsItem.published_at))
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
|
||||
total = len(items)
|
||||
for idx, item in enumerate(items, start=1):
|
||||
processed += 1
|
||||
query = build_contextual_query(item.headline, item.summary)
|
||||
|
||||
image_url: str | None = None
|
||||
image_credit: str | None = None
|
||||
local_image: str | None = None
|
||||
|
||||
for attempt in range(max_attempts):
|
||||
try:
|
||||
image_url, image_credit = await fetch_royalty_free_image(query)
|
||||
if not image_url:
|
||||
raise RuntimeError("no-image-url")
|
||||
local_image = await download_and_optimize_image(image_url)
|
||||
if not local_image:
|
||||
raise RuntimeError("image-download-or-optimize-failed")
|
||||
break
|
||||
except Exception:
|
||||
if attempt == max_attempts - 1:
|
||||
logger.exception("Image refetch failed for item=%s after retries", item.id)
|
||||
image_url = None
|
||||
local_image = None
|
||||
break
|
||||
delay = 2**attempt
|
||||
logger.warning(
|
||||
"Refetch retry item=%s attempt=%d delay=%ds",
|
||||
item.id,
|
||||
attempt + 1,
|
||||
delay,
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
local_image, image_credit, decision = await refetch_image_for_item(
|
||||
item=item,
|
||||
max_attempts=max_attempts,
|
||||
)
|
||||
|
||||
if local_image:
|
||||
refreshed += 1
|
||||
@@ -152,6 +202,7 @@ async def refetch_images_for_latest(
|
||||
total=total,
|
||||
refreshed=refreshed,
|
||||
article_id=item.id,
|
||||
decision=decision,
|
||||
)
|
||||
|
||||
return processed, refreshed
|
||||
@@ -186,6 +237,12 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
help="Refetch and optimize latest article images",
|
||||
)
|
||||
refetch_parser.add_argument("--limit", type=positive_int, default=30)
|
||||
refetch_parser.add_argument(
|
||||
"--permalink",
|
||||
type=str,
|
||||
default="",
|
||||
help="Target one article by permalink (for example '/?article=123' or '123')",
|
||||
)
|
||||
refetch_parser.add_argument("--max-attempts", type=positive_int, default=4)
|
||||
refetch_parser.add_argument("--dry-run", action="store_true")
|
||||
refetch_parser.set_defaults(handler=handle_admin_refetch_images)
|
||||
@@ -280,11 +337,22 @@ def handle_admin_refetch_images(args: argparse.Namespace) -> int:
|
||||
start = time.monotonic()
|
||||
try:
|
||||
init_db()
|
||||
target_article_id = resolve_article_id_from_permalink(args.permalink)
|
||||
if args.permalink and target_article_id is None:
|
||||
print_result(
|
||||
"refetch-images",
|
||||
"blocked",
|
||||
reason="invalid-permalink",
|
||||
hint="use '/?article=<id>' or raw numeric id",
|
||||
)
|
||||
return 2
|
||||
|
||||
processed, refreshed = asyncio.run(
|
||||
refetch_images_for_latest(
|
||||
limit=min(args.limit, 30),
|
||||
max_attempts=args.max_attempts,
|
||||
dry_run=args.dry_run,
|
||||
target_article_id=target_article_id,
|
||||
)
|
||||
)
|
||||
elapsed = time.monotonic() - start
|
||||
@@ -293,6 +361,7 @@ def handle_admin_refetch_images(args: argparse.Namespace) -> int:
|
||||
"ok",
|
||||
processed=processed,
|
||||
refreshed=refreshed,
|
||||
target_article_id=target_article_id,
|
||||
dry_run=args.dry_run,
|
||||
elapsed=f"{elapsed:.1f}s",
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user