Spaces:

subashpoudel
/

Document-Interaction-FASTAPI

Build error

App Files Files Community

subashpoudel commited on Mar 12, 2025

Commit

fd1c9c4

verified ·

1 Parent(s): 709f873

Upload 3 files

Browse files

Files changed (3) hide show

app.py +31 -0
main.py +72 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import streamlit as st
+import requests
+# API URL
+API_URL = "http://127.0.0.1:8000"
+st.title("📄 AI Chatbot for PDF")
+# Upload PDF
+uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"])
+if uploaded_file:
+    files = {"file": uploaded_file.getvalue()}
+    response = requests.post(f"{API_URL}/upload-pdf/", files=files)
+    if response.status_code == 200:
+        st.success("PDF processed successfully!")
+    else:
+        st.error("Failed to process PDF.")
+# Chat UI
+query = st.text_input("Ask a question from the PDF")
+if st.button("Ask"):
+    if query:
+        payload = {"question": query}
+        response = requests.post(f"{API_URL}/chat/", json=payload)
+        if response.status_code == 200:
+            answer = response.json()["response"]
+            st.markdown(f"**Answer:**\n\n{answer}")
+        else:
+            st.error("Error retrieving answer.")

main.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from fastapi import FastAPI, File, UploadFile, HTTPException
+from pydantic import BaseModel
+import fitz  # PyMuPDF
+import faiss
+from sentence_transformers import SentenceTransformer
+import numpy as np
+from phi.agent import Agent
+from phi.model.groq import Groq
+app = FastAPI()
+# Load embedding model
+embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
+# Global storage
+pdf_text_chunks = []
+index = None
+def agent_response(question, retrieved_text):
+    """Generate response using AI model based on retrieved text."""
+    agent = Agent(
+        model=Groq(id="llama-3.3-70b-versatile"),
+        markdown=True,
+        description="You are an AI assistant that provides the answer based on the provided document.",
+        instructions=[
+            f"First read the question carefully. The question is: **{question}**",
+            f"Then read the document provided to you as a text. The document is: \n**{retrieved_text}**\n",
+            "Finally answer the question based on the provided document only. Don't try to give random responses."
+        ]
+    )
+    response = agent.run(question + '\n' + retrieved_text).content
+    return response
+@app.post("/upload-pdf/")
+async def upload_pdf(file: UploadFile = File(...)):
+    """Extract text from PDF, create FAISS index."""
+    global pdf_text_chunks, index
+    pdf_text_chunks = []
+    # Read the uploaded file into memory
+    pdf_data = await file.read()
+    with fitz.open("pdf", pdf_data) as doc:
+        for page in doc:
+            pdf_text_chunks.append(page.get_text("text"))
+    # Embed the chunks
+    embeddings = embedding_model.encode(pdf_text_chunks, convert_to_numpy=True)
+    # Create FAISS index
+    index = faiss.IndexFlatL2(embeddings.shape[1])
+    index.add(embeddings)
+    return {"message": "PDF processed successfully!"}
+class QueryRequest(BaseModel):
+    question: str
+@app.post("/chat/")
+async def chat(request: QueryRequest):
+    """Retrieve the most relevant chunk and generate a response."""
+    global index, pdf_text_chunks
+    if index is None:
+        raise HTTPException(status_code=400, detail="No PDF uploaded yet.")
+    # Search for relevant text
+    query_embedding = embedding_model.encode([request.question], convert_to_numpy=True)
+    _, indices = index.search(query_embedding, 5)  # Get top 5 matches
+    retrieved_texts = [pdf_text_chunks[idx] for idx in indices[0]]
+    retrieved_text_combined = "\n\n".join(retrieved_texts)
+    response = agent_response(request.question, retrieved_text_combined)
+    return {"user": request.question, "response": response}

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastapi
+uvicorn
+pymupdf
+faiss-cpu
+sentence-transformers
+phidata
+streamlit
+requests