Spaces:

Dpshkh
/

pdf

Running

pdf

File size: 2,141 Bytes

beba6d9

from fastapi import FastAPI, UploadFile, Form, File
from fastapi.responses import JSONResponse
from app.parser import extract_text_from_pdf
from app.chunker import chunk_text
from app.retriever import store_chunks_in_pinecone, query_chunks_from_pinecone
from app.groq_llm import query_groq_llm

import uuid
from dotenv import load_dotenv
import logging

load_dotenv()
app = FastAPI()

logging.basicConfig(level=logging.INFO)

@app.post("/run")
async def run_query(file: UploadFile = File(...), question: str = Form(...)):
    try:
        logging.info("📥 Received file and question: %s", question)

        file_bytes = await file.read()
        raw_text = extract_text_from_pdf(file_bytes)
        logging.info("📝 Extracted %d characters of text", len(raw_text))

        if not raw_text.strip():
            return JSONResponse(content={"error": "No extractable text found in PDF."}, status_code=400)

        chunks = chunk_text(raw_text)
        logging.info("✂️ Generated %d chunks", len(chunks))

        if not chunks:
            return JSONResponse(content={"error": "Failed to generate any chunks from text."}, status_code=400)

        file_id = str(uuid.uuid4())
        store_chunks_in_pinecone(chunks, file_id)
        logging.info("📦 Stored chunks in Pinecone with file_id: %s", file_id)

        top_chunks = query_chunks_from_pinecone(question)
        logging.info("🔍 Retrieved %d top matching chunks", len(top_chunks))

        if not top_chunks:
            return JSONResponse(content={"error": "No relevant context found."}, status_code=400)

        context = " ".join(top_chunks[:2])
        answer = query_groq_llm(context, question)

        return {
            "question": question,
            "context_used": top_chunks[:2],
            "answer": answer
        }

    except Exception as e:
        logging.exception("❌ Error during /run endpoint:")
        return JSONResponse(content={"error": str(e)}, status_code=500)

@app.get("/")
def read_root():
    return {"message": "✅ LLM PDF QA API is running. Visit /docs to test."}