import nh3 from fastapi import FastAPI, Query from doc_searcher_v2 import DocSearcherV2 from huggingface_hub import login from config import HUGGING_FACE_API_KEY, COLLECTION_NAME, API_KEY import time import logging # Setup logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) login(HUGGING_FACE_API_KEY) app = FastAPI( title="Senatus Legal Search API", description="Semantička pretraga pravnih dokumenata sa Qwen3 reranker-om", version="2.0" ) # Globalno inicijalizuj DocSearcher (učitava se samo jednom) doc_searcher_v2 = DocSearcherV2(collection_name=COLLECTION_NAME) ALLOWED_API_KEY = str(API_KEY) @app.on_event("startup") async def warm_up(): """Warm up modela pri pokretanju aplikacije""" logger.info("🔥 Warming up models...") start = time.time() try: # Pokreni dummy query da učita sve modele u memoriju _ = await doc_searcher_v2.search_semantic("test", qdrant_limit=5, top_k=1) elapsed = time.time() - start logger.info(f"✅ Models ready! Warmup took {elapsed:.1f}s") except Exception as e: logger.error(f"❌ Warmup failed: {e}") @app.get("/") async def root(): """Root endpoint za provjeru da li aplikacija radi""" return { "status": "ok", "message": "Senatus API is running", "version": "2.0", "endpoints": { "search": "/api/v2/search", "docs": "/docs", "health": "/health" } } @app.get("/health") async def health(): """Health check endpoint""" return { "status": "healthy", "models_loaded": True, "version": "2.0" } @app.get("/api/v2/search") async def v2_search( q: str = Query(..., description="Query tekst za pretragu"), limit: int = Query(3, ge=1, le=10, description="Broj rezultata (1-10, default: 3)"), qdrant_limit: int = Query(20, ge=10, le=50, description="Broj chunk-ova iz Qdrant-a (10-50, default: 20)") ): """ Semantička pretraga pravnih dokumenata - **q**: Tekstualni upit - **limit**: Broj rezultata za vraćanje (default: 3, max: 10) - **qdrant_limit**: Broj dokumenata iz Qdrant-a prije rerankinga (default: 20, max: 50) Returns: Lista rezultata sa skorovima, sortirana po relevantnosti """ logger.info(f"Search query: '{q}' | limit={limit} | qdrant_limit={qdrant_limit}") start = time.time() # Sanitizacija inputa query = q.lower() xss = nh3.clean(query) # Pretraga sa parametrima data = await doc_searcher_v2.search_semantic( text=xss, qdrant_limit=qdrant_limit, top_k=limit ) elapsed = time.time() - start logger.info(f"Search completed in {elapsed:.2f}s | Results: {len(data)}") return data