stochastic / app.py
Sonu Prasad
initial commit
822c114
import uuid
import hashlib
from pathlib import Path
from typing import Optional
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from pydantic import BaseModel
from config import config
from vector_store import vector_store
from agent import agent
from document import ingest_pdf, chunk_document
from arxiv_tool import arxiv_tool
config.ensure_directories()
app = FastAPI(title="Document Q&A API", version="1.0.0")
app.add_middleware(
CORSMiddleware,
allow_origins=[
config.FRONTEND_URL,
"https://stochast.netlify.app",
"http://localhost:3000",
"http://localhost:8000"
],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
class QueryRequest(BaseModel):
question: str
auto_fetch_arxiv: bool = False
paper_filter: Optional[str] = None
class ArxivSearchRequest(BaseModel):
query: str
max_results: int = 5
@app.get("/api/stats")
async def get_stats():
return vector_store.get_stats()
@app.get("/api/papers")
async def get_papers():
return {"papers": vector_store.get_papers()}
@app.post("/api/upload")
async def upload_paper(file: UploadFile = File(...)):
if not file.filename.lower().endswith('.pdf'):
raise HTTPException(status_code=400, detail="Only PDF files are accepted")
paper_id = str(uuid.uuid4())[:8]
filepath = config.PAPERS_DIR / f"{paper_id}_{file.filename}"
content = await file.read()
with open(filepath, 'wb') as f:
f.write(content)
doc = ingest_pdf(filepath)
if not doc:
filepath.unlink(missing_ok=True)
raise HTTPException(status_code=400, detail="Failed to process PDF")
chunks = chunk_document(doc, paper_id)
added = vector_store.add_chunks(chunks)
return {
"paper_id": paper_id,
"paper_name": doc.title,
"chunks_created": added,
"pages": doc.page_count
}
@app.post("/api/query")
async def query_documents(request: QueryRequest):
response = agent.query(request.question, request.paper_filter)
return {
"answer": response.answer,
"sources": response.sources,
"arxiv_fetched": response.arxiv_fetched
}
@app.post("/api/clear-history")
async def clear_history():
agent.clear_history()
return {"status": "ok"}
@app.delete("/api/papers/{paper_name}")
async def delete_paper(paper_name: str):
success = vector_store.delete_paper(paper_name)
if not success:
raise HTTPException(status_code=404, detail="Paper not found")
return {"status": "deleted"}
@app.get("/api/papers/{paper_name}/pdf")
async def get_paper_pdf(paper_name: str):
pdf_files = list(config.PAPERS_DIR.glob("*.pdf"))
for pdf in pdf_files:
if paper_name.lower() in pdf.stem.lower():
return FileResponse(
path=pdf,
media_type="application/pdf",
headers={"Content-Disposition": "inline"}
)
raise HTTPException(status_code=404, detail="PDF not found")
@app.post("/api/arxiv/search")
async def search_arxiv(request: ArxivSearchRequest):
papers = arxiv_tool.search(request.query, request.max_results)
return {
"papers": [
{
"arxiv_id": p.arxiv_id,
"title": p.title,
"authors": p.authors,
"abstract": p.abstract,
"pdf_url": p.pdf_url
}
for p in papers
]
}
@app.post("/api/arxiv/download/{arxiv_id}")
async def download_arxiv(arxiv_id: str):
papers = arxiv_tool.search(arxiv_id, 1)
if not papers:
raise HTTPException(status_code=404, detail="Paper not found on Arxiv")
paper = papers[0]
filepath = arxiv_tool.download(paper)
if not filepath:
raise HTTPException(status_code=500, detail="Failed to download paper")
doc = ingest_pdf(filepath)
if not doc:
raise HTTPException(status_code=500, detail="Failed to process paper")
paper_id = hashlib.md5(arxiv_id.encode()).hexdigest()[:8]
chunks = chunk_document(doc, paper_id)
vector_store.add_chunks(chunks)
return {"paper_name": doc.title, "chunks_created": len(chunks)}
@app.post("/api/arxiv/add-to-chat/{arxiv_id}")
async def add_arxiv_to_chat(arxiv_id: str):
papers = arxiv_tool.search(arxiv_id, 1)
if not papers:
raise HTTPException(status_code=404, detail="Paper not found on Arxiv")
paper = papers[0]
filepath = arxiv_tool.download(paper)
if not filepath:
raise HTTPException(status_code=500, detail="Failed to download paper")
doc = ingest_pdf(filepath)
if not doc:
raise HTTPException(status_code=500, detail="Failed to process paper")
paper_id = hashlib.md5(arxiv_id.encode()).hexdigest()[:8]
chunks = chunk_document(doc, paper_id)
vector_store.add_chunks(chunks)
summary = f"**{paper.title}**\n\n**Authors:** {', '.join(paper.authors[:3])}"
if len(paper.authors) > 3:
summary += " et al."
summary += f"\n\n**Abstract:** {paper.abstract[:500]}..."
return {
"paper_name": doc.title,
"summary": summary,
"sources": [{"paper": doc.title, "section": "Abstract"}]
}
@app.get("/")
async def root():
return {"status": "ok", "message": "Document Q&A API is running"}