Spaces:
Running
Running
File size: 2,141 Bytes
beba6d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
from fastapi import FastAPI, UploadFile, Form, File
from fastapi.responses import JSONResponse
from app.parser import extract_text_from_pdf
from app.chunker import chunk_text
from app.retriever import store_chunks_in_pinecone, query_chunks_from_pinecone
from app.groq_llm import query_groq_llm
import uuid
from dotenv import load_dotenv
import logging
load_dotenv()
app = FastAPI()
logging.basicConfig(level=logging.INFO)
@app.post("/run")
async def run_query(file: UploadFile = File(...), question: str = Form(...)):
try:
logging.info("π₯ Received file and question: %s", question)
file_bytes = await file.read()
raw_text = extract_text_from_pdf(file_bytes)
logging.info("π Extracted %d characters of text", len(raw_text))
if not raw_text.strip():
return JSONResponse(content={"error": "No extractable text found in PDF."}, status_code=400)
chunks = chunk_text(raw_text)
logging.info("βοΈ Generated %d chunks", len(chunks))
if not chunks:
return JSONResponse(content={"error": "Failed to generate any chunks from text."}, status_code=400)
file_id = str(uuid.uuid4())
store_chunks_in_pinecone(chunks, file_id)
logging.info("π¦ Stored chunks in Pinecone with file_id: %s", file_id)
top_chunks = query_chunks_from_pinecone(question)
logging.info("π Retrieved %d top matching chunks", len(top_chunks))
if not top_chunks:
return JSONResponse(content={"error": "No relevant context found."}, status_code=400)
context = " ".join(top_chunks[:2])
answer = query_groq_llm(context, question)
return {
"question": question,
"context_used": top_chunks[:2],
"answer": answer
}
except Exception as e:
logging.exception("β Error during /run endpoint:")
return JSONResponse(content={"error": str(e)}, status_code=500)
@app.get("/")
def read_root():
return {"message": "β
LLM PDF QA API is running. Visit /docs to test."}
|