sreejang's picture
Update app.py
433f762 verified
import os
import gradio as gr
import faiss
import numpy as np
from groq import Groq
from pypdf import PdfReader
from sentence_transformers import SentenceTransformer
# -----------------------
# Load models
# -----------------------
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
groq_client = Groq(
api_key=os.environ.get("GROQ_API_KEY")
# api_key=userdata.get('RAG')
)
# -----------------------
# Global storage
# -----------------------
faiss_index = None
document_chunks = []
# -----------------------
# Helper functions
# -----------------------
def read_pdf(file):
reader = PdfReader(file)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
def chunk_text(text, chunk_size=500, overlap=50):
chunks = []
start = 0
while start < len(text):
end = start + chunk_size
chunk = text[start:end]
chunks.append(chunk)
start = end - overlap
return chunks
def create_faiss_index(chunks):
global faiss_index, document_chunks
document_chunks = chunks
embeddings = embedding_model.encode(chunks)
embeddings = np.array(embeddings).astype("float32")
dimension = embeddings.shape[1]
faiss_index = faiss.IndexFlatL2(dimension)
faiss_index.add(embeddings)
def search_chunks(query, top_k=3):
query_embedding = embedding_model.encode([query])
query_embedding = np.array(query_embedding).astype("float32")
distances, indices = faiss_index.search(query_embedding, top_k)
results = []
for idx in indices[0]:
results.append(document_chunks[idx])
return results
def ask_groq(context, question):
prompt = f"""
Use the text below to answer the question.
Text:
{context}
Question:
{question}
"""
response = groq_client.chat.completions.create(
model="llama-3.3-70b-versatile",
messages=[
{"role": "user", "content": prompt}
]
)
return response.choices[0].message.content
# -----------------------
# Gradio functions
# -----------------------
def upload_pdf(file):
if file is None:
return "Please upload a PDF first."
text = read_pdf(file)
chunks = chunk_text(text)
create_faiss_index(chunks)
return "PDF processed. You can now ask questions."
def answer_question(question):
if faiss_index is None:
return "Please upload a PDF first."
relevant_chunks = search_chunks(question)
context = "\n".join(relevant_chunks)
answer = ask_groq(context, question)
return answer
# -----------------------
# Gradio UI
# -----------------------
with gr.Blocks() as demo:
gr.Markdown("# PDF Question Answer App")
pdf_file = gr.File(label="Upload PDF")
upload_btn = gr.Button("Process PDF")
status = gr.Textbox(label="Status", lines=1)
question = gr.Textbox(
label="Your Question",
lines=2,
placeholder="Type your question here"
)
ask_btn = gr.Button("Ask Question")
answer = gr.Textbox(
label="Answer",
lines=10
)
upload_btn.click(upload_pdf, inputs=pdf_file, outputs=status)
ask_btn.click(answer_question, inputs=question, outputs=answer)
demo.launch()