File size: 7,806 Bytes
38c016b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
486eff6
38c016b
 
 
 
486eff6
 
 
 
 
 
38c016b
 
486eff6
38c016b
486eff6
 
38c016b
 
 
 
486eff6
38c016b
 
 
486eff6
38c016b
486eff6
38c016b
 
486eff6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38c016b
486eff6
 
 
 
 
 
 
 
 
 
 
 
 
38c016b
 
486eff6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38c016b
486eff6
 
 
 
 
38c016b
486eff6
 
38c016b
 
 
 
 
486eff6
38c016b
 
 
 
486eff6
38c016b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
"""
FastAPI backend for crossword puzzle generator with vector similarity search.
"""

import os
import logging
import time
from datetime import datetime
from contextlib import asynccontextmanager
from pathlib import Path

from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
import uvicorn
from dotenv import load_dotenv

from src.routes.api import router as api_router
from src.services.thematic_word_service import ThematicWordService

# Load environment variables
load_dotenv()

# Set up logging with filename and line numbers
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(filename)s:%(lineno)d - %(levelname)s - %(message)s',
    datefmt='%H:%M:%S'
)
logger = logging.getLogger(__name__)

# All services now use standard Python logging with filename/line numbers

# Global thematic service instance
thematic_service = None

@asynccontextmanager
async def lifespan(app: FastAPI):
    """Initialize and cleanup application resources."""
    global thematic_service
    
    # Startup
    startup_time = time.time()
    logger.info("πŸš€ Initializing Python backend with thematic word service...")
    
    # Initialize thematic service
    try:
        service_start = time.time()
        logger.info("πŸ”§ Creating ThematicWordService instance...")
        thematic_service = ThematicWordService()
        
        # Log cache configuration for debugging
        cache_status = thematic_service.get_cache_status()
        logger.info(f"πŸ“ Cache directory: {cache_status['cache_directory']}")
        logger.info(f"πŸ” Cache directory exists: {os.path.exists(cache_status['cache_directory'])}")
        logger.info(f"✏️ Cache directory writable: {os.access(cache_status['cache_directory'], os.W_OK)}")
        
        # Check for existing cache files
        cache_complete = cache_status['complete']
        logger.info(f"πŸ“¦ Existing cache complete: {cache_complete}")
        if not cache_complete:
            for cache_type in ['vocabulary_cache', 'frequency_cache', 'embeddings_cache']:
                cache_info = cache_status[cache_type]
                logger.info(f"  {cache_type}: exists={cache_info['exists']}, path={cache_info['path']}")
        
        # Force eager initialization to create cache files
        logger.info("⚑ Starting thematic service initialization (creating cache files)...")
        await thematic_service.initialize_async()
        
        # Verify cache files were created
        cache_status_after = thematic_service.get_cache_status()
        logger.info(f"βœ… Cache status after initialization: complete={cache_status_after['complete']}")
        for cache_type in ['vocabulary_cache', 'frequency_cache', 'embeddings_cache']:
            cache_info = cache_status_after[cache_type]
            if cache_info['exists']:
                logger.info(f"  βœ… {cache_type}: {cache_info.get('size_mb', 0):.1f}MB")
            else:
                logger.warning(f"  ❌ {cache_type}: NOT CREATED")
        
        init_time = time.time() - service_start
        logger.info(f"πŸŽ‰ Thematic service initialized in {init_time:.2f}s")
        
        # Initialize WordNet clue generator during startup
        logger.info("πŸ”§ Initializing WordNet clue generator...")
        try:
            wordnet_start = time.time()
            from src.services.wordnet_clue_generator import WordNetClueGenerator
            cache_dir = thematic_service.cache_dir if thematic_service else "./cache"
            wordnet_generator = WordNetClueGenerator(cache_dir=str(cache_dir))
            wordnet_generator.initialize()
            
            # Store in thematic service for later use
            if thematic_service:
                thematic_service._wordnet_generator = wordnet_generator
                
            wordnet_time = time.time() - wordnet_start
            logger.info(f"βœ… WordNet clue generator initialized in {wordnet_time:.2f}s")
        except Exception as e:
            logger.warning(f"⚠️ Failed to initialize WordNet clue generator during startup: {e}")
            logger.info("πŸ“ WordNet clue generator will be initialized on first use")
            
    except ImportError as e:
        logger.error(f"❌ Missing dependencies for thematic service: {e}")
        logger.error("πŸ’‘ Install missing packages: pip install wordfreq sentence-transformers torch scikit-learn")
        raise  # Fail fast on missing dependencies
    except PermissionError as e:
        logger.error(f"❌ Permission error with cache directory: {e}")
        logger.error(f"πŸ’‘ Check cache directory permissions: {thematic_service.cache_dir if 'thematic_service' in locals() else 'unknown'}")
        raise  # Fail fast on permission issues
    except Exception as e:
        logger.error(f"❌ Failed to initialize thematic service: {e}")
        logger.error(f"πŸ” Error type: {type(e).__name__}")
        import traceback
        logger.error(f"πŸ“‹ Full traceback: {traceback.format_exc()}")
        raise  # Fail fast instead of continuing without service
    
    # Make thematic service available to routes
    app.state.thematic_service = thematic_service
    
    yield
    
    # Shutdown
    logger.info("πŸ›‘ Shutting down Python backend...")
    # Thematic service doesn't need cleanup, but we can add it if needed in the future

# Create FastAPI app
app = FastAPI(
    title="Crossword Puzzle Generator API",
    description="Python backend with AI-powered thematic word generation",
    version="2.0.0",
    lifespan=lifespan
)

# CORS configuration
cors_origins = []
if os.getenv("NODE_ENV") == "production":
    # Production: same origin
    cors_origins = ["*"]  # HuggingFace Spaces
else:
    # Development: allow dev servers
    cors_origins = [
        "http://localhost:5173",  # Vite dev server
        "http://localhost:3000",  # Alternative dev server
        "http://localhost:7860",  # Local production test
    ]

app.add_middleware(
    CORSMiddleware,
    allow_origins=cors_origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Include API routes
app.include_router(api_router, prefix="/api")

# Serve static files (frontend)
static_path = Path(__file__).parent / "public"
if static_path.exists():
    app.mount("/assets", StaticFiles(directory=static_path / "assets"), name="assets")
    
    @app.get("/")
    async def serve_frontend():
        """Serve the React frontend."""
        index_path = static_path / "index.html"
        if index_path.exists():
            return FileResponse(index_path)
        else:
            raise HTTPException(status_code=404, detail="Frontend not found")
    
    @app.get("/{full_path:path}")
    async def serve_spa_routes(full_path: str):
        """Serve React SPA routes."""
        # For any non-API route, serve the React app
        if not full_path.startswith("api/"):
            index_path = static_path / "index.html"
            if index_path.exists():
                return FileResponse(index_path)
        raise HTTPException(status_code=404, detail="Not found")

@app.get("/health")
async def health_check():
    """Health check endpoint."""
    return {
        "status": "healthy",
        "backend": "python",
        "vector_search": vector_service.is_initialized if vector_service else False
    }

if __name__ == "__main__":
    port = int(os.getenv("PORT", 7860))
    host = "0.0.0.0" if os.getenv("NODE_ENV") == "production" else "127.0.0.1"
    
    logger.info(f"🐍 Starting Python backend on {host}:{port}")
    uvicorn.run(
        "app:app",
        host=host,
        port=port,
        reload=os.getenv("NODE_ENV") != "production"
    )