PDF_QnA / app.py
AamirMalik's picture
Update app.py
9c590ee verified
import os
import gradio as gr
import requests
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
# Load Groq API key from Hugging Face secrets
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
# Load embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# Initialize FAISS DB (in-memory for each session)
vectorstore = None
def load_and_embed_pdfs(pdf_files):
global vectorstore
all_text = ""
for file in pdf_files:
pdf = PdfReader(file.name)
for page in pdf.pages:
all_text += page.extract_text() or ""
# Split text
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_text(all_text)
# Embed and store in FAISS
vectorstore = FAISS.from_texts(texts, embedding_model)
return "PDFs processed and indexed. You can now ask questions."
def groq_chat_completion(messages):
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {GROQ_API_KEY}"
}
data = {
"model": "llama3-70b-8192", # Or llama-3.3-70b-versatile
"messages": messages
}
response = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=data)
response.raise_for_status()
return response.json()["choices"][0]["message"]["content"]
def ask_question(user_query, chat_history):
global vectorstore
if vectorstore is None:
return "Please upload and process PDF documents first."
# Perform similarity search
docs = vectorstore.similarity_search(user_query, k=3)
context = "\n\n".join(doc.page_content for doc in docs)
# Create chat message format
messages = [
{"role": "system", "content": "Answer only based on the provided document context."},
{"role": "user", "content": f"Context:\n{context}\n\nQuestion: {user_query}"}
]
# Get response
try:
answer = groq_chat_completion(messages)
except Exception as e:
return f"Error: {str(e)}"
chat_history.append((user_query, answer))
return "", chat_history
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## 📄 Chat with your PDF (Powered by Groq + Llama3)")
pdf_upload = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload PDF(s)")
process_btn = gr.Button("Process PDFs")
status_output = gr.Textbox(label="Status", interactive=False)
chatbot = gr.Chatbot(label="Ask Questions")
query_input = gr.Textbox(label="Your Question")
send_btn = gr.Button("Send")
# Bind actions
process_btn.click(load_and_embed_pdfs, inputs=pdf_upload, outputs=status_output)
send_btn.click(fn=ask_question, inputs=[query_input, chatbot], outputs=[query_input, chatbot])
# Launch
demo.launch()