paper-insight-api / backfill_thumbnails.py
winbeau's picture
Backend snapshot from 760fa78
353b9f4
import asyncio
import sys
from pathlib import Path
# Add backend directory to python path so we can import app modules
sys.path.append(str(Path(__file__).parent))
from sqlmodel import select
from app.database import get_sync_session
from app.models import Paper
from app.services.pdf_renderer import generate_thumbnail
async def backfill_thumbnails():
print("Starting thumbnail backfill...")
session = get_sync_session()
try:
# Find papers without thumbnails
statement = select(Paper).where(Paper.thumbnail_url == None)
papers = session.exec(statement).all()
print(f"Found {len(papers)} papers needing thumbnails.")
for i, paper in enumerate(papers):
print(f"[{i+1}/{len(papers)}] Processing {paper.arxiv_id}...")
if not paper.pdf_url:
print(f" Skipping {paper.arxiv_id}: No PDF URL")
continue
thumbnail_url = await generate_thumbnail(paper.arxiv_id, paper.pdf_url)
if thumbnail_url:
paper.thumbnail_url = thumbnail_url
session.add(paper)
session.commit()
print(f" Generated: {thumbnail_url}")
else:
print(f" Failed to generate thumbnail for {paper.arxiv_id}")
except Exception as e:
print(f"Error during backfill: {e}")
finally:
session.close()
print("Backfill completed.")
if __name__ == "__main__":
asyncio.run(backfill_thumbnails())