File size: 7,806 Bytes
38c016b 486eff6 38c016b 486eff6 38c016b 486eff6 38c016b 486eff6 38c016b 486eff6 38c016b 486eff6 38c016b 486eff6 38c016b 486eff6 38c016b 486eff6 38c016b 486eff6 38c016b 486eff6 38c016b 486eff6 38c016b 486eff6 38c016b 486eff6 38c016b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
"""
FastAPI backend for crossword puzzle generator with vector similarity search.
"""
import os
import logging
import time
from datetime import datetime
from contextlib import asynccontextmanager
from pathlib import Path
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
import uvicorn
from dotenv import load_dotenv
from src.routes.api import router as api_router
from src.services.thematic_word_service import ThematicWordService
# Load environment variables
load_dotenv()
# Set up logging with filename and line numbers
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(filename)s:%(lineno)d - %(levelname)s - %(message)s',
datefmt='%H:%M:%S'
)
logger = logging.getLogger(__name__)
# All services now use standard Python logging with filename/line numbers
# Global thematic service instance
thematic_service = None
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Initialize and cleanup application resources."""
global thematic_service
# Startup
startup_time = time.time()
logger.info("π Initializing Python backend with thematic word service...")
# Initialize thematic service
try:
service_start = time.time()
logger.info("π§ Creating ThematicWordService instance...")
thematic_service = ThematicWordService()
# Log cache configuration for debugging
cache_status = thematic_service.get_cache_status()
logger.info(f"π Cache directory: {cache_status['cache_directory']}")
logger.info(f"π Cache directory exists: {os.path.exists(cache_status['cache_directory'])}")
logger.info(f"βοΈ Cache directory writable: {os.access(cache_status['cache_directory'], os.W_OK)}")
# Check for existing cache files
cache_complete = cache_status['complete']
logger.info(f"π¦ Existing cache complete: {cache_complete}")
if not cache_complete:
for cache_type in ['vocabulary_cache', 'frequency_cache', 'embeddings_cache']:
cache_info = cache_status[cache_type]
logger.info(f" {cache_type}: exists={cache_info['exists']}, path={cache_info['path']}")
# Force eager initialization to create cache files
logger.info("β‘ Starting thematic service initialization (creating cache files)...")
await thematic_service.initialize_async()
# Verify cache files were created
cache_status_after = thematic_service.get_cache_status()
logger.info(f"β
Cache status after initialization: complete={cache_status_after['complete']}")
for cache_type in ['vocabulary_cache', 'frequency_cache', 'embeddings_cache']:
cache_info = cache_status_after[cache_type]
if cache_info['exists']:
logger.info(f" β
{cache_type}: {cache_info.get('size_mb', 0):.1f}MB")
else:
logger.warning(f" β {cache_type}: NOT CREATED")
init_time = time.time() - service_start
logger.info(f"π Thematic service initialized in {init_time:.2f}s")
# Initialize WordNet clue generator during startup
logger.info("π§ Initializing WordNet clue generator...")
try:
wordnet_start = time.time()
from src.services.wordnet_clue_generator import WordNetClueGenerator
cache_dir = thematic_service.cache_dir if thematic_service else "./cache"
wordnet_generator = WordNetClueGenerator(cache_dir=str(cache_dir))
wordnet_generator.initialize()
# Store in thematic service for later use
if thematic_service:
thematic_service._wordnet_generator = wordnet_generator
wordnet_time = time.time() - wordnet_start
logger.info(f"β
WordNet clue generator initialized in {wordnet_time:.2f}s")
except Exception as e:
logger.warning(f"β οΈ Failed to initialize WordNet clue generator during startup: {e}")
logger.info("π WordNet clue generator will be initialized on first use")
except ImportError as e:
logger.error(f"β Missing dependencies for thematic service: {e}")
logger.error("π‘ Install missing packages: pip install wordfreq sentence-transformers torch scikit-learn")
raise # Fail fast on missing dependencies
except PermissionError as e:
logger.error(f"β Permission error with cache directory: {e}")
logger.error(f"π‘ Check cache directory permissions: {thematic_service.cache_dir if 'thematic_service' in locals() else 'unknown'}")
raise # Fail fast on permission issues
except Exception as e:
logger.error(f"β Failed to initialize thematic service: {e}")
logger.error(f"π Error type: {type(e).__name__}")
import traceback
logger.error(f"π Full traceback: {traceback.format_exc()}")
raise # Fail fast instead of continuing without service
# Make thematic service available to routes
app.state.thematic_service = thematic_service
yield
# Shutdown
logger.info("π Shutting down Python backend...")
# Thematic service doesn't need cleanup, but we can add it if needed in the future
# Create FastAPI app
app = FastAPI(
title="Crossword Puzzle Generator API",
description="Python backend with AI-powered thematic word generation",
version="2.0.0",
lifespan=lifespan
)
# CORS configuration
cors_origins = []
if os.getenv("NODE_ENV") == "production":
# Production: same origin
cors_origins = ["*"] # HuggingFace Spaces
else:
# Development: allow dev servers
cors_origins = [
"http://localhost:5173", # Vite dev server
"http://localhost:3000", # Alternative dev server
"http://localhost:7860", # Local production test
]
app.add_middleware(
CORSMiddleware,
allow_origins=cors_origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Include API routes
app.include_router(api_router, prefix="/api")
# Serve static files (frontend)
static_path = Path(__file__).parent / "public"
if static_path.exists():
app.mount("/assets", StaticFiles(directory=static_path / "assets"), name="assets")
@app.get("/")
async def serve_frontend():
"""Serve the React frontend."""
index_path = static_path / "index.html"
if index_path.exists():
return FileResponse(index_path)
else:
raise HTTPException(status_code=404, detail="Frontend not found")
@app.get("/{full_path:path}")
async def serve_spa_routes(full_path: str):
"""Serve React SPA routes."""
# For any non-API route, serve the React app
if not full_path.startswith("api/"):
index_path = static_path / "index.html"
if index_path.exists():
return FileResponse(index_path)
raise HTTPException(status_code=404, detail="Not found")
@app.get("/health")
async def health_check():
"""Health check endpoint."""
return {
"status": "healthy",
"backend": "python",
"vector_search": vector_service.is_initialized if vector_service else False
}
if __name__ == "__main__":
port = int(os.getenv("PORT", 7860))
host = "0.0.0.0" if os.getenv("NODE_ENV") == "production" else "127.0.0.1"
logger.info(f"π Starting Python backend on {host}:{port}")
uvicorn.run(
"app:app",
host=host,
port=port,
reload=os.getenv("NODE_ENV") != "production"
) |