| | from fastapi import FastAPI, HTTPException, Query |
| | from fastapi.middleware.cors import CORSMiddleware |
| | from pydantic import BaseModel |
| | import pandas as pd |
| | import numpy as np |
| | import faiss |
| | from sentence_transformers import SentenceTransformer |
| | import os |
| | import gdown |
| |
|
| | app = FastAPI(title="Research Paper Recommendation API") |
| |
|
| | app.add_middleware( |
| | CORSMiddleware, |
| | allow_origins=["*"], |
| | allow_credentials=True, |
| | allow_methods=["*"], |
| | allow_headers=["*"], |
| | ) |
| |
|
| | print("🔄 Loading dataset and FAISS index...") |
| | def download_from_drive(file_id, output): |
| | if not os.path.exists(output): |
| | print(f"📥 Downloading {output} from Google Drive...") |
| | url = f"https://drive.google.com/uc?id={file_id}" |
| | gdown.download(url, output, quiet=False) |
| | else: |
| | print(f"✅ {output} already exists, skipping download.") |
| |
|
| | download_from_drive("1ME6Bb5WjVbIYr4-0iF0kUa35-82DTsUn", "papers_with_embeddings.csv") |
| | download_from_drive("1IwOkhBu-odM2GYmvZdS1Q5AHkf0pDV02", "embeddings.npy") |
| | download_from_drive("1MEB_4ZGunbxi65jW9UN0L8bnp9VUCH8s", "papers_index.faiss") |
| |
|
| | try: |
| | df = pd.read_csv("papers_with_embeddings.csv") |
| | embeddings = np.load("embeddings.npy") |
| | index = faiss.read_index("papers_index.faiss") |
| | except Exception as e: |
| | raise RuntimeError(f" Failed to load files: {e}") |
| |
|
| | model = SentenceTransformer("allenai/specter2_base") |
| |
|
| | print(f" Loaded {len(df)} papers and FAISS index with {index.ntotal} vectors.") |
| |
|
| | class Paper(BaseModel): |
| | id: str |
| | title: str |
| | authors: str |
| | update_date: str |
| | abstract: str | None = None |
| | category_code: str | None = None |
| |
|
| | def get_recommendations(paper_id: str, top_k: int = 6): |
| | paper = df[df["id"].astype(str) == str(paper_id)] |
| | if paper.empty: |
| | raise HTTPException(status_code=404, detail="Paper not found") |
| |
|
| | text = f"{paper.iloc[0]['title']}. {paper.iloc[0]['abstract']}" |
| | query_vec = model.encode([text], normalize_embeddings=True) |
| |
|
| | D, I = index.search(query_vec, top_k + 1) |
| | recs = df.iloc[I[0]].copy() |
| | recs["similarity"] = D[0] |
| | |
| | recs = recs[recs["id"].astype(str) != str(paper_id)] |
| |
|
| | return recs[["id", "title", "authors", "update_date", "abstract", "similarity"]].head(top_k).to_dict(orient="records") |
| |
|
| |
|
| | def search_papers(query_text: str, top_k: int = 50): |
| | if not query_text.strip(): |
| | raise HTTPException(status_code=400, detail="Query cannot be empty.") |
| |
|
| | query_vec = model.encode([query_text], normalize_embeddings=True) |
| | D, I = index.search(query_vec, top_k) |
| |
|
| | recs = df.iloc[I[0]].copy() |
| | recs["similarity"] = D[0] |
| | |
| | return recs[["id", "title", "authors", "update_date", "abstract", "similarity"]].to_dict(orient="records") |
| |
|
| |
|
| | @app.get("/") |
| | def root(): |
| | return {"message": "SPECTER + FAISS Recommendation API is running 🚀"} |
| |
|
| | @app.get("/paper/{paper_id}") |
| | def get_paper(paper_id: str): |
| | paper = df[df["id"].astype(str) == str(paper_id)] |
| | if paper.empty: |
| | raise HTTPException(status_code=404, detail="Paper not found") |
| | return paper.iloc[0].to_dict() |
| |
|
| | @app.get("/recommend/{paper_id}") |
| | def recommend_papers(paper_id: str, top_k: int = 6): |
| | try: |
| | recs = get_recommendations(paper_id, top_k) |
| | return recs |
| | except HTTPException: |
| | raise |
| | except Exception as e: |
| | raise HTTPException(status_code=500, detail=f"Recommendation error: {e}") |
| |
|
| | @app.get("/search") |
| | def search_endpoint(query: str = Query(..., description="Search text query"), top_k: int = 50): |
| | """ |
| | Search for semantically similar papers using SPECTER embeddings. |
| | Example: /search?query=graph neural networks |
| | """ |
| | try: |
| | results = search_papers(query, top_k) |
| | return results |
| | except Exception as e: |
| | raise HTTPException(status_code=500, detail=f"Search error: {e}") |
| |
|
| |
|