Spaces:
Runtime error
Runtime error
| import uuid | |
| import hashlib | |
| from pathlib import Path | |
| from typing import Optional | |
| from fastapi import FastAPI, UploadFile, File, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.responses import FileResponse | |
| from pydantic import BaseModel | |
| from config import config | |
| from vector_store import vector_store | |
| from agent import agent | |
| from document import ingest_pdf, chunk_document | |
| from arxiv_tool import arxiv_tool | |
| config.ensure_directories() | |
| app = FastAPI(title="Document Q&A API", version="1.0.0") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=[ | |
| config.FRONTEND_URL, | |
| "https://stochast.netlify.app", | |
| "http://localhost:3000", | |
| "http://localhost:8000" | |
| ], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| class QueryRequest(BaseModel): | |
| question: str | |
| auto_fetch_arxiv: bool = False | |
| paper_filter: Optional[str] = None | |
| class ArxivSearchRequest(BaseModel): | |
| query: str | |
| max_results: int = 5 | |
| async def get_stats(): | |
| return vector_store.get_stats() | |
| async def get_papers(): | |
| return {"papers": vector_store.get_papers()} | |
| async def upload_paper(file: UploadFile = File(...)): | |
| if not file.filename.lower().endswith('.pdf'): | |
| raise HTTPException(status_code=400, detail="Only PDF files are accepted") | |
| paper_id = str(uuid.uuid4())[:8] | |
| filepath = config.PAPERS_DIR / f"{paper_id}_{file.filename}" | |
| content = await file.read() | |
| with open(filepath, 'wb') as f: | |
| f.write(content) | |
| doc = ingest_pdf(filepath) | |
| if not doc: | |
| filepath.unlink(missing_ok=True) | |
| raise HTTPException(status_code=400, detail="Failed to process PDF") | |
| chunks = chunk_document(doc, paper_id) | |
| added = vector_store.add_chunks(chunks) | |
| return { | |
| "paper_id": paper_id, | |
| "paper_name": doc.title, | |
| "chunks_created": added, | |
| "pages": doc.page_count | |
| } | |
| async def query_documents(request: QueryRequest): | |
| response = agent.query(request.question, request.paper_filter) | |
| return { | |
| "answer": response.answer, | |
| "sources": response.sources, | |
| "arxiv_fetched": response.arxiv_fetched | |
| } | |
| async def clear_history(): | |
| agent.clear_history() | |
| return {"status": "ok"} | |
| async def delete_paper(paper_name: str): | |
| success = vector_store.delete_paper(paper_name) | |
| if not success: | |
| raise HTTPException(status_code=404, detail="Paper not found") | |
| return {"status": "deleted"} | |
| async def get_paper_pdf(paper_name: str): | |
| pdf_files = list(config.PAPERS_DIR.glob("*.pdf")) | |
| for pdf in pdf_files: | |
| if paper_name.lower() in pdf.stem.lower(): | |
| return FileResponse( | |
| path=pdf, | |
| media_type="application/pdf", | |
| headers={"Content-Disposition": "inline"} | |
| ) | |
| raise HTTPException(status_code=404, detail="PDF not found") | |
| async def search_arxiv(request: ArxivSearchRequest): | |
| papers = arxiv_tool.search(request.query, request.max_results) | |
| return { | |
| "papers": [ | |
| { | |
| "arxiv_id": p.arxiv_id, | |
| "title": p.title, | |
| "authors": p.authors, | |
| "abstract": p.abstract, | |
| "pdf_url": p.pdf_url | |
| } | |
| for p in papers | |
| ] | |
| } | |
| async def download_arxiv(arxiv_id: str): | |
| papers = arxiv_tool.search(arxiv_id, 1) | |
| if not papers: | |
| raise HTTPException(status_code=404, detail="Paper not found on Arxiv") | |
| paper = papers[0] | |
| filepath = arxiv_tool.download(paper) | |
| if not filepath: | |
| raise HTTPException(status_code=500, detail="Failed to download paper") | |
| doc = ingest_pdf(filepath) | |
| if not doc: | |
| raise HTTPException(status_code=500, detail="Failed to process paper") | |
| paper_id = hashlib.md5(arxiv_id.encode()).hexdigest()[:8] | |
| chunks = chunk_document(doc, paper_id) | |
| vector_store.add_chunks(chunks) | |
| return {"paper_name": doc.title, "chunks_created": len(chunks)} | |
| async def add_arxiv_to_chat(arxiv_id: str): | |
| papers = arxiv_tool.search(arxiv_id, 1) | |
| if not papers: | |
| raise HTTPException(status_code=404, detail="Paper not found on Arxiv") | |
| paper = papers[0] | |
| filepath = arxiv_tool.download(paper) | |
| if not filepath: | |
| raise HTTPException(status_code=500, detail="Failed to download paper") | |
| doc = ingest_pdf(filepath) | |
| if not doc: | |
| raise HTTPException(status_code=500, detail="Failed to process paper") | |
| paper_id = hashlib.md5(arxiv_id.encode()).hexdigest()[:8] | |
| chunks = chunk_document(doc, paper_id) | |
| vector_store.add_chunks(chunks) | |
| summary = f"**{paper.title}**\n\n**Authors:** {', '.join(paper.authors[:3])}" | |
| if len(paper.authors) > 3: | |
| summary += " et al." | |
| summary += f"\n\n**Abstract:** {paper.abstract[:500]}..." | |
| return { | |
| "paper_name": doc.title, | |
| "summary": summary, | |
| "sources": [{"paper": doc.title, "section": "Abstract"}] | |
| } | |
| async def root(): | |
| return {"status": "ok", "message": "Document Q&A API is running"} | |