dcorcoran commited on
Commit
191402b
·
1 Parent(s): 088378b

Added database stats tab

Browse files
Files changed (1) hide show
  1. src/api/main.py +48 -0
src/api/main.py CHANGED
@@ -9,6 +9,13 @@ from pydantic import BaseModel, Field
9
 
10
  from .logging_utils import get_logger
11
 
 
 
 
 
 
 
 
12
 
13
  logger = get_logger("api")
14
 
@@ -240,6 +247,47 @@ def health():
240
  "faiss_search_available": FAISS_AVAILABLE,
241
  }
242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
  @app.post("/search", response_model=SearchResponse)
245
  def search(req: SearchRequest):
 
9
 
10
  from .logging_utils import get_logger
11
 
12
+ from fastapi import FastAPI, Request, HTTPException
13
+ from sqlalchemy.orm import Session
14
+ from sqlalchemy import create_engine, func
15
+ import os
16
+
17
+ # Add database imports
18
+ from .bm25_utils import Whole_Blogs, engine
19
 
20
  logger = get_logger("api")
21
 
 
247
  "faiss_search_available": FAISS_AVAILABLE,
248
  }
249
 
250
+ @app.get("/stats")
251
+ def get_database_stats():
252
+ """Get database statistics for EDA"""
253
+ from sqlalchemy import func
254
+
255
+ try:
256
+ with Session(engine) as session:
257
+ total_posts = session.query(func.count(Whole_Blogs.id)).scalar()
258
+ unique_locations = session.query(func.count(func.distinct(Whole_Blogs.location_name))).scalar()
259
+ unique_blogs = session.query(func.count(func.distinct(Whole_Blogs.blog_url))).scalar()
260
+ unique_authors = session.query(func.count(func.distinct(Whole_Blogs.page_author))).scalar()
261
+
262
+ # Top 20 locations
263
+ top_locations = session.query(
264
+ Whole_Blogs.location_name,
265
+ func.count(Whole_Blogs.id).label('count')
266
+ ).group_by(Whole_Blogs.location_name)\
267
+ .order_by(func.count(Whole_Blogs.id).desc())\
268
+ .limit(20).all()
269
+
270
+ # All coordinates for mapping
271
+ coordinates = session.query(
272
+ Whole_Blogs.latitude,
273
+ Whole_Blogs.longitude
274
+ ).filter(
275
+ Whole_Blogs.latitude.isnot(None),
276
+ Whole_Blogs.longitude.isnot(None)
277
+ ).all()
278
+
279
+ return {
280
+ "total_posts": total_posts,
281
+ "unique_locations": unique_locations,
282
+ "unique_blogs": unique_blogs,
283
+ "unique_authors": unique_authors,
284
+ "top_locations": [{"location": loc, "count": cnt} for loc, cnt in top_locations],
285
+ "coordinates": [{"lat": lat, "lon": lon} for lat, lon in coordinates]
286
+ }
287
+ except Exception as e:
288
+ logger.error(f"Database stats error: {e}")
289
+ raise HTTPException(status_code=500, detail=str(e))
290
+
291
 
292
  @app.post("/search", response_model=SearchResponse)
293
  def search(req: SearchRequest):