Spaces:
Sleeping
Sleeping
| import asyncio | |
| import sys | |
| from pathlib import Path | |
| # Add backend directory to python path so we can import app modules | |
| sys.path.append(str(Path(__file__).parent)) | |
| from sqlmodel import select | |
| from app.database import get_sync_session | |
| from app.models import Paper | |
| from app.services.pdf_renderer import generate_thumbnail | |
| async def backfill_thumbnails(): | |
| print("Starting thumbnail backfill...") | |
| session = get_sync_session() | |
| try: | |
| # Find papers without thumbnails | |
| statement = select(Paper).where(Paper.thumbnail_url == None) | |
| papers = session.exec(statement).all() | |
| print(f"Found {len(papers)} papers needing thumbnails.") | |
| for i, paper in enumerate(papers): | |
| print(f"[{i+1}/{len(papers)}] Processing {paper.arxiv_id}...") | |
| if not paper.pdf_url: | |
| print(f" Skipping {paper.arxiv_id}: No PDF URL") | |
| continue | |
| thumbnail_url = await generate_thumbnail(paper.arxiv_id, paper.pdf_url) | |
| if thumbnail_url: | |
| paper.thumbnail_url = thumbnail_url | |
| session.add(paper) | |
| session.commit() | |
| print(f" Generated: {thumbnail_url}") | |
| else: | |
| print(f" Failed to generate thumbnail for {paper.arxiv_id}") | |
| except Exception as e: | |
| print(f"Error during backfill: {e}") | |
| finally: | |
| session.close() | |
| print("Backfill completed.") | |
| if __name__ == "__main__": | |
| asyncio.run(backfill_thumbnails()) | |