upk's picture
Fixes for routing and search
cd35f9e
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List, Optional
import os
import threading
from database import search_papers
from nlp_utils import get_keywords_from_abstract
from fastapi.staticfiles import StaticFiles
import data_pipeline
app = FastAPI()
frontend_dir = os.path.join(os.path.dirname(__file__), "../frontend")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Run data pipeline on startup in a separate thread
@app.on_event("startup")
def startup_event():
print("Running data pipeline check on startup...")
threading.Thread(target=data_pipeline.populate_db).start()
class SearchRequest(BaseModel):
query: str
query_type: str # "keywords" or "abstract"
sort_by: Optional[str] = "relevance"
# API routes (defined before static mount)
@app.post("/api/search")
def search(request: SearchRequest):
query = request.query.strip()
if not query:
raise HTTPException(status_code=400, detail="Query cannot be empty")
keywords_to_search = []
extracted_keywords = []
if request.query_type == "abstract":
# Extract keywords using KeyBERT
extracted_keywords = get_keywords_from_abstract(query, top_n=8)
keywords_to_search = extracted_keywords
else:
# Split keywords by comma or space
if "," in query:
keywords_to_search = [k.strip() for k in query.split(",") if k.strip()]
else:
keywords_to_search = [k.strip() for k in query.split() if k.strip()]
if not keywords_to_search:
raise HTTPException(status_code=400, detail="No valid keywords found")
results = search_papers(keywords_to_search, sort_by=request.sort_by)
return {
"results": results,
"extracted_keywords": extracted_keywords if request.query_type == "abstract" else []
}
# Serve static files from the root last
app.mount("/", StaticFiles(directory=frontend_dir, html=True), name="frontend")
@app.get("/debug")
def debug_clembench():
import re
bib_file = os.path.join(os.path.dirname(__file__), "anthology.bib")
if not os.path.exists(bib_file):
return {"error": "file not found"}
with open(bib_file, 'r', encoding='utf-8') as f:
content = f.read()
entries = re.split(r'\n@', content)
for entry in entries:
if "clembench: Using Game Play" in entry:
# Let's see what our regex parser does
field_split_re = re.compile(r'\n\s*([a-zA-Z_]+)\s*=\s*')
raw = entry.strip()
if raw.endswith('}'): raw = raw[:-1]
parts = field_split_re.split(raw)
parsed_keys = [parts[i].lower() for i in range(1, len(parts)-1, 2)]
return {
"raw_entry": entry,
"parsed_keys": parsed_keys,
"abstract_val": parts[parsed_keys.index('abstract')*2 + 2] if 'abstract' in parsed_keys else "NO ABSTRACT KEY FOUND"
}
return {"error": "entry not found"}