subashpoudel commited on
Commit
fd1c9c4
·
verified ·
1 Parent(s): 709f873

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +31 -0
  2. main.py +72 -0
  3. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+
4
+ # API URL
5
+ API_URL = "http://127.0.0.1:8000"
6
+
7
+ st.title("📄 AI Chatbot for PDF")
8
+
9
+ # Upload PDF
10
+ uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"])
11
+ if uploaded_file:
12
+ files = {"file": uploaded_file.getvalue()}
13
+ response = requests.post(f"{API_URL}/upload-pdf/", files=files)
14
+
15
+ if response.status_code == 200:
16
+ st.success("PDF processed successfully!")
17
+ else:
18
+ st.error("Failed to process PDF.")
19
+
20
+ # Chat UI
21
+ query = st.text_input("Ask a question from the PDF")
22
+ if st.button("Ask"):
23
+ if query:
24
+ payload = {"question": query}
25
+ response = requests.post(f"{API_URL}/chat/", json=payload)
26
+
27
+ if response.status_code == 200:
28
+ answer = response.json()["response"]
29
+ st.markdown(f"**Answer:**\n\n{answer}")
30
+ else:
31
+ st.error("Error retrieving answer.")
main.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile, HTTPException
2
+ from pydantic import BaseModel
3
+ import fitz # PyMuPDF
4
+ import faiss
5
+ from sentence_transformers import SentenceTransformer
6
+ import numpy as np
7
+ from phi.agent import Agent
8
+ from phi.model.groq import Groq
9
+
10
+ app = FastAPI()
11
+
12
+ # Load embedding model
13
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
14
+
15
+ # Global storage
16
+ pdf_text_chunks = []
17
+ index = None
18
+
19
+ def agent_response(question, retrieved_text):
20
+ """Generate response using AI model based on retrieved text."""
21
+ agent = Agent(
22
+ model=Groq(id="llama-3.3-70b-versatile"),
23
+ markdown=True,
24
+ description="You are an AI assistant that provides the answer based on the provided document.",
25
+ instructions=[
26
+ f"First read the question carefully. The question is: **{question}**",
27
+ f"Then read the document provided to you as a text. The document is: \n**{retrieved_text}**\n",
28
+ "Finally answer the question based on the provided document only. Don't try to give random responses."
29
+ ]
30
+ )
31
+ response = agent.run(question + '\n' + retrieved_text).content
32
+ return response
33
+
34
+ @app.post("/upload-pdf/")
35
+ async def upload_pdf(file: UploadFile = File(...)):
36
+ """Extract text from PDF, create FAISS index."""
37
+ global pdf_text_chunks, index
38
+ pdf_text_chunks = []
39
+
40
+ # Read the uploaded file into memory
41
+ pdf_data = await file.read()
42
+ with fitz.open("pdf", pdf_data) as doc:
43
+ for page in doc:
44
+ pdf_text_chunks.append(page.get_text("text"))
45
+
46
+ # Embed the chunks
47
+ embeddings = embedding_model.encode(pdf_text_chunks, convert_to_numpy=True)
48
+
49
+ # Create FAISS index
50
+ index = faiss.IndexFlatL2(embeddings.shape[1])
51
+ index.add(embeddings)
52
+
53
+ return {"message": "PDF processed successfully!"}
54
+
55
+ class QueryRequest(BaseModel):
56
+ question: str
57
+
58
+ @app.post("/chat/")
59
+ async def chat(request: QueryRequest):
60
+ """Retrieve the most relevant chunk and generate a response."""
61
+ global index, pdf_text_chunks
62
+ if index is None:
63
+ raise HTTPException(status_code=400, detail="No PDF uploaded yet.")
64
+
65
+ # Search for relevant text
66
+ query_embedding = embedding_model.encode([request.question], convert_to_numpy=True)
67
+ _, indices = index.search(query_embedding, 5) # Get top 5 matches
68
+ retrieved_texts = [pdf_text_chunks[idx] for idx in indices[0]]
69
+ retrieved_text_combined = "\n\n".join(retrieved_texts)
70
+
71
+ response = agent_response(request.question, retrieved_text_combined)
72
+ return {"user": request.question, "response": response}
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ pymupdf
4
+ faiss-cpu
5
+ sentence-transformers
6
+ phidata
7
+ streamlit
8
+ requests