Amaanali01's picture
Update app.py
2b6093a verified
import os
import gradio as gr
import faiss
import numpy as np
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
from groq import Groq
# πŸ” Groq API Key (embed securely for private use only)
GROQ_API_KEY = "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"
# πŸ“¦ Load embedding model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# πŸ“‚ Global storage for vector index and chunks
chunks = []
index = None
# πŸ“„ PDF Text Extraction & Processing
def process_pdf(file):
global chunks, index
try:
reader = PdfReader(file.name)
text = "\n".join(page.extract_text() or "" for page in reader.pages)
if not text.strip():
return "❌ No text found in the PDF. Please upload a different file."
# πŸ“ Chunking
chunk_size = 300
chunk_overlap = 50
words = text.split()
chunks = [
" ".join(words[i:i + chunk_size])
for i in range(0, len(words), chunk_size - chunk_overlap)
]
# πŸ“Š Embeddings + FAISS
embeddings = model.encode(chunks)
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))
return f"βœ… Processed {len(chunks)} chunks from uploaded PDF. You can now ask questions."
except Exception as e:
return f"❌ Error processing the PDF: {str(e)}"
# ❓ Ask a Question
def ask_question(query):
if not chunks or index is None:
return "⚠️ Please upload and process a PDF first."
query_embedding = model.encode([query])
distances, indices = index.search(np.array(query_embedding), k=3)
context = "\n".join([chunks[i] for i in indices[0]])
# Use Groq API for question answering
client = Groq(api_key=GROQ_API_KEY)
prompt = f"Answer the question based on the following context:\n\n{context}\n\nQuestion: {query}"
try:
response = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama3-70b-8192"
)
return response.choices[0].message.content
except Exception as e:
return f"❌ Error from Groq API: {str(e)}"
# πŸŽ›οΈ Gradio Interface
file_input = gr.File(label="πŸ“„ Upload PDF")
question_input = gr.Textbox(label="❓ Ask a Question about the PDF")
answer_output = gr.Textbox(label="πŸ“˜ Answer")
pdf_processor = gr.Interface(fn=process_pdf, inputs=file_input, outputs="text")
pdf_qa = gr.Interface(fn=ask_question, inputs=question_input, outputs=answer_output)
# Create a tabbed interface with "Upload PDF" and "Ask a Question" tabs
app = gr.TabbedInterface(
[pdf_processor, pdf_qa],
tab_names=["Upload PDF", "Ask a Question"]
)
# Launch the app
if __name__ == "__main__":
app.launch()