Spaces:

upk
/

Related-Work-Finder

Sleeping

App Files Files Community

Related-Work-Finder / backend /main.py

upk

Fixes for routing and search

cd35f9e about 1 month ago

raw

history blame contribute delete

3.19 kB

	from fastapi import FastAPI, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel
	from typing import List, Optional
	import os
	import threading

	from database import search_papers
	from nlp_utils import get_keywords_from_abstract
	from fastapi.staticfiles import StaticFiles
	import data_pipeline

	app = FastAPI()

	frontend_dir = os.path.join(os.path.dirname(__file__), "../frontend")

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Run data pipeline on startup in a separate thread
	@app.on_event("startup")
	def startup_event():
	print("Running data pipeline check on startup...")
	threading.Thread(target=data_pipeline.populate_db).start()

	class SearchRequest(BaseModel):
	query: str
	query_type: str # "keywords" or "abstract"
	sort_by: Optional[str] = "relevance"

	# API routes (defined before static mount)
	@app.post("/api/search")
	def search(request: SearchRequest):
	query = request.query.strip()
	if not query:
	raise HTTPException(status_code=400, detail="Query cannot be empty")

	keywords_to_search = []
	extracted_keywords = []

	if request.query_type == "abstract":
	# Extract keywords using KeyBERT
	extracted_keywords = get_keywords_from_abstract(query, top_n=8)
	keywords_to_search = extracted_keywords
	else:
	# Split keywords by comma or space
	if "," in query:
	keywords_to_search = [k.strip() for k in query.split(",") if k.strip()]
	else:
	keywords_to_search = [k.strip() for k in query.split() if k.strip()]

	if not keywords_to_search:
	raise HTTPException(status_code=400, detail="No valid keywords found")

	results = search_papers(keywords_to_search, sort_by=request.sort_by)

	return {
	"results": results,
	"extracted_keywords": extracted_keywords if request.query_type == "abstract" else []
	}

	# Serve static files from the root last
	app.mount("/", StaticFiles(directory=frontend_dir, html=True), name="frontend")

	@app.get("/debug")
	def debug_clembench():
	import re
	bib_file = os.path.join(os.path.dirname(__file__), "anthology.bib")
	if not os.path.exists(bib_file):
	return {"error": "file not found"}
	with open(bib_file, 'r', encoding='utf-8') as f:
	content = f.read()
	entries = re.split(r'\n@', content)
	for entry in entries:
	if "clembench: Using Game Play" in entry:
	# Let's see what our regex parser does
	field_split_re = re.compile(r'\n\s([a-zA-Z_]+)\s=\s*')
	raw = entry.strip()
	if raw.endswith('}'): raw = raw[:-1]
	parts = field_split_re.split(raw)
	parsed_keys = [parts[i].lower() for i in range(1, len(parts)-1, 2)]

	return {
	"raw_entry": entry,
	"parsed_keys": parsed_keys,
	"abstract_val": parts[parsed_keys.index('abstract')*2 + 2] if 'abstract' in parsed_keys else "NO ABSTRACT KEY FOUND"
	}
	return {"error": "entry not found"}