Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| from typing import List, Optional | |
| import os | |
| import threading | |
| from database import search_papers | |
| from nlp_utils import get_keywords_from_abstract | |
| from fastapi.staticfiles import StaticFiles | |
| import data_pipeline | |
| app = FastAPI() | |
| frontend_dir = os.path.join(os.path.dirname(__file__), "../frontend") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Run data pipeline on startup in a separate thread | |
| def startup_event(): | |
| print("Running data pipeline check on startup...") | |
| threading.Thread(target=data_pipeline.populate_db).start() | |
| class SearchRequest(BaseModel): | |
| query: str | |
| query_type: str # "keywords" or "abstract" | |
| sort_by: Optional[str] = "relevance" | |
| # API routes (defined before static mount) | |
| def search(request: SearchRequest): | |
| query = request.query.strip() | |
| if not query: | |
| raise HTTPException(status_code=400, detail="Query cannot be empty") | |
| keywords_to_search = [] | |
| extracted_keywords = [] | |
| if request.query_type == "abstract": | |
| # Extract keywords using KeyBERT | |
| extracted_keywords = get_keywords_from_abstract(query, top_n=8) | |
| keywords_to_search = extracted_keywords | |
| else: | |
| # Split keywords by comma or space | |
| if "," in query: | |
| keywords_to_search = [k.strip() for k in query.split(",") if k.strip()] | |
| else: | |
| keywords_to_search = [k.strip() for k in query.split() if k.strip()] | |
| if not keywords_to_search: | |
| raise HTTPException(status_code=400, detail="No valid keywords found") | |
| results = search_papers(keywords_to_search, sort_by=request.sort_by) | |
| return { | |
| "results": results, | |
| "extracted_keywords": extracted_keywords if request.query_type == "abstract" else [] | |
| } | |
| # Serve static files from the root last | |
| app.mount("/", StaticFiles(directory=frontend_dir, html=True), name="frontend") | |
| def debug_clembench(): | |
| import re | |
| bib_file = os.path.join(os.path.dirname(__file__), "anthology.bib") | |
| if not os.path.exists(bib_file): | |
| return {"error": "file not found"} | |
| with open(bib_file, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| entries = re.split(r'\n@', content) | |
| for entry in entries: | |
| if "clembench: Using Game Play" in entry: | |
| # Let's see what our regex parser does | |
| field_split_re = re.compile(r'\n\s*([a-zA-Z_]+)\s*=\s*') | |
| raw = entry.strip() | |
| if raw.endswith('}'): raw = raw[:-1] | |
| parts = field_split_re.split(raw) | |
| parsed_keys = [parts[i].lower() for i in range(1, len(parts)-1, 2)] | |
| return { | |
| "raw_entry": entry, | |
| "parsed_keys": parsed_keys, | |
| "abstract_val": parts[parsed_keys.index('abstract')*2 + 2] if 'abstract' in parsed_keys else "NO ABSTRACT KEY FOUND" | |
| } | |
| return {"error": "entry not found"} | |