senatus-dev / app.py
senatus123's picture
Upload app.py with huggingface_hub
a4d342b verified
raw
history blame
2.81 kB
import nh3
from fastapi import FastAPI, Query
from doc_searcher_v2 import DocSearcherV2
from huggingface_hub import login
from config import HUGGING_FACE_API_KEY, COLLECTION_NAME, API_KEY
import time
import logging
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
login(HUGGING_FACE_API_KEY)
app = FastAPI(
title="Senatus Legal Search API",
description="Semantička pretraga pravnih dokumenata sa Qwen3 reranker-om",
version="2.0"
)
# Globalno inicijalizuj DocSearcher (učitava se samo jednom)
doc_searcher_v2 = DocSearcherV2(collection_name=COLLECTION_NAME)
ALLOWED_API_KEY = str(API_KEY)
@app.on_event("startup")
async def warm_up():
"""Warm up modela pri pokretanju aplikacije"""
logger.info("🔥 Warming up models...")
start = time.time()
try:
# Pokreni dummy query da učita sve modele u memoriju
_ = await doc_searcher_v2.search_semantic("test", qdrant_limit=5, top_k=1)
elapsed = time.time() - start
logger.info(f"✅ Models ready! Warmup took {elapsed:.1f}s")
except Exception as e:
logger.error(f"❌ Warmup failed: {e}")
@app.get("/")
async def root():
"""Root endpoint za provjeru da li aplikacija radi"""
return {
"status": "ok",
"message": "Senatus API is running",
"version": "2.0",
"endpoints": {
"search": "/api/v2/search",
"docs": "/docs",
"health": "/health"
}
}
@app.get("/health")
async def health():
"""Health check endpoint"""
return {
"status": "healthy",
"models_loaded": True,
"version": "2.0"
}
@app.get("/api/v2/search")
async def v2_search(
q: str = Query(..., description="Query tekst za pretragu"),
limit: int = Query(3, ge=1, le=10, description="Broj rezultata (1-10, default: 3)"),
qdrant_limit: int = Query(20, ge=10, le=50, description="Broj chunk-ova iz Qdrant-a (10-50, default: 20)")
):
"""
Semantička pretraga pravnih dokumenata
- **q**: Tekstualni upit
- **limit**: Broj rezultata za vraćanje (default: 3, max: 10)
- **qdrant_limit**: Broj dokumenata iz Qdrant-a prije rerankinga (default: 20, max: 50)
Returns: Lista rezultata sa skorovima, sortirana po relevantnosti
"""
logger.info(f"Search query: '{q}' | limit={limit} | qdrant_limit={qdrant_limit}")
start = time.time()
# Sanitizacija inputa
query = q.lower()
xss = nh3.clean(query)
# Pretraga sa parametrima
data = await doc_searcher_v2.search_semantic(
text=xss,
qdrant_limit=qdrant_limit,
top_k=limit
)
elapsed = time.time() - start
logger.info(f"Search completed in {elapsed:.2f}s | Results: {len(data)}")
return data