Spaces:
Build error
Build error
| from fastapi import FastAPI, File, UploadFile, HTTPException | |
| from pydantic import BaseModel | |
| import fitz # PyMuPDF | |
| import faiss | |
| from sentence_transformers import SentenceTransformer | |
| import numpy as np | |
| from phi.agent import Agent | |
| from phi.model.groq import Groq | |
| app = FastAPI() | |
| # Load embedding model | |
| embedding_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| # Global storage | |
| pdf_text_chunks = [] | |
| index = None | |
| def agent_response(question, retrieved_text): | |
| """Generate response using AI model based on retrieved text.""" | |
| agent = Agent( | |
| model=Groq(id="llama-3.3-70b-versatile"), | |
| markdown=True, | |
| description="You are an AI assistant that provides the answer based on the provided document.", | |
| instructions=[ | |
| f"First read the question carefully. The question is: **{question}**", | |
| f"Then read the document provided to you as a text. The document is: \n**{retrieved_text}**\n", | |
| "Finally answer the question based on the provided document only. Don't try to give random responses." | |
| ] | |
| ) | |
| response = agent.run(question + '\n' + retrieved_text).content | |
| return response | |
| async def upload_pdf(file: UploadFile = File(...)): | |
| """Extract text from PDF, create FAISS index.""" | |
| global pdf_text_chunks, index | |
| pdf_text_chunks = [] | |
| # Read the uploaded file into memory | |
| pdf_data = await file.read() | |
| with fitz.open("pdf", pdf_data) as doc: | |
| for page in doc: | |
| pdf_text_chunks.append(page.get_text("text")) | |
| # Embed the chunks | |
| embeddings = embedding_model.encode(pdf_text_chunks, convert_to_numpy=True) | |
| # Create FAISS index | |
| index = faiss.IndexFlatL2(embeddings.shape[1]) | |
| index.add(embeddings) | |
| return {"message": "PDF processed successfully!"} | |
| class QueryRequest(BaseModel): | |
| question: str | |
| async def chat(request: QueryRequest): | |
| """Retrieve the most relevant chunk and generate a response.""" | |
| global index, pdf_text_chunks | |
| if index is None: | |
| raise HTTPException(status_code=400, detail="No PDF uploaded yet.") | |
| # Search for relevant text | |
| query_embedding = embedding_model.encode([request.question], convert_to_numpy=True) | |
| _, indices = index.search(query_embedding, 5) # Get top 5 matches | |
| retrieved_texts = [pdf_text_chunks[idx] for idx in indices[0]] | |
| retrieved_text_combined = "\n\n".join(retrieved_texts) | |
| response = agent_response(request.question, retrieved_text_combined) | |
| return {"user": request.question, "response": response} | |