Spaces:
Sleeping
Sleeping
Added database stats tab
Browse files- src/api/main.py +48 -0
src/api/main.py
CHANGED
|
@@ -9,6 +9,13 @@ from pydantic import BaseModel, Field
|
|
| 9 |
|
| 10 |
from .logging_utils import get_logger
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
logger = get_logger("api")
|
| 14 |
|
|
@@ -240,6 +247,47 @@ def health():
|
|
| 240 |
"faiss_search_available": FAISS_AVAILABLE,
|
| 241 |
}
|
| 242 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
|
| 244 |
@app.post("/search", response_model=SearchResponse)
|
| 245 |
def search(req: SearchRequest):
|
|
|
|
| 9 |
|
| 10 |
from .logging_utils import get_logger
|
| 11 |
|
| 12 |
+
from fastapi import FastAPI, Request, HTTPException
|
| 13 |
+
from sqlalchemy.orm import Session
|
| 14 |
+
from sqlalchemy import create_engine, func
|
| 15 |
+
import os
|
| 16 |
+
|
| 17 |
+
# Add database imports
|
| 18 |
+
from .bm25_utils import Whole_Blogs, engine
|
| 19 |
|
| 20 |
logger = get_logger("api")
|
| 21 |
|
|
|
|
| 247 |
"faiss_search_available": FAISS_AVAILABLE,
|
| 248 |
}
|
| 249 |
|
| 250 |
+
@app.get("/stats")
|
| 251 |
+
def get_database_stats():
|
| 252 |
+
"""Get database statistics for EDA"""
|
| 253 |
+
from sqlalchemy import func
|
| 254 |
+
|
| 255 |
+
try:
|
| 256 |
+
with Session(engine) as session:
|
| 257 |
+
total_posts = session.query(func.count(Whole_Blogs.id)).scalar()
|
| 258 |
+
unique_locations = session.query(func.count(func.distinct(Whole_Blogs.location_name))).scalar()
|
| 259 |
+
unique_blogs = session.query(func.count(func.distinct(Whole_Blogs.blog_url))).scalar()
|
| 260 |
+
unique_authors = session.query(func.count(func.distinct(Whole_Blogs.page_author))).scalar()
|
| 261 |
+
|
| 262 |
+
# Top 20 locations
|
| 263 |
+
top_locations = session.query(
|
| 264 |
+
Whole_Blogs.location_name,
|
| 265 |
+
func.count(Whole_Blogs.id).label('count')
|
| 266 |
+
).group_by(Whole_Blogs.location_name)\
|
| 267 |
+
.order_by(func.count(Whole_Blogs.id).desc())\
|
| 268 |
+
.limit(20).all()
|
| 269 |
+
|
| 270 |
+
# All coordinates for mapping
|
| 271 |
+
coordinates = session.query(
|
| 272 |
+
Whole_Blogs.latitude,
|
| 273 |
+
Whole_Blogs.longitude
|
| 274 |
+
).filter(
|
| 275 |
+
Whole_Blogs.latitude.isnot(None),
|
| 276 |
+
Whole_Blogs.longitude.isnot(None)
|
| 277 |
+
).all()
|
| 278 |
+
|
| 279 |
+
return {
|
| 280 |
+
"total_posts": total_posts,
|
| 281 |
+
"unique_locations": unique_locations,
|
| 282 |
+
"unique_blogs": unique_blogs,
|
| 283 |
+
"unique_authors": unique_authors,
|
| 284 |
+
"top_locations": [{"location": loc, "count": cnt} for loc, cnt in top_locations],
|
| 285 |
+
"coordinates": [{"lat": lat, "lon": lon} for lat, lon in coordinates]
|
| 286 |
+
}
|
| 287 |
+
except Exception as e:
|
| 288 |
+
logger.error(f"Database stats error: {e}")
|
| 289 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 290 |
+
|
| 291 |
|
| 292 |
@app.post("/search", response_model=SearchResponse)
|
| 293 |
def search(req: SearchRequest):
|