Resume_QA_bot / app.py
tobinthomas's picture
Update app.py
d4e8eff verified
import os
import gradio as gr
import faiss
import numpy as np
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
from groq import Groq
# βœ… API Key from environment (set via Hugging Face "Secrets")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
raise ValueError("GROQ_API_KEY not found in environment variables. Set it in the Space 'Secrets' section.")
client = Groq(api_key=GROQ_API_KEY)
# βœ… Global state
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
faiss_index = None
text_chunks = []
# βœ… Helper functions
def extract_text_from_pdf(pdf_file_obj):
reader = PdfReader(pdf_file_obj)
text = ""
for page in reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text + "\n"
return text
def split_into_chunks(text, chunk_size=500):
words = text.split()
return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
def process_pdf_and_query(pdf_file, question):
global faiss_index, text_chunks
# Step 1: Process PDF
raw_text = extract_text_from_pdf(pdf_file)
text_chunks = split_into_chunks(raw_text)
embeddings = embedding_model.encode(text_chunks)
dimension = embeddings.shape[1]
faiss_index = faiss.IndexFlatL2(dimension)
faiss_index.add(np.array(embeddings))
# Step 2: Search FAISS index
if not question.strip():
return "PDF processed. Please enter a question."
query_vector = embedding_model.encode([question])
distances, indices = faiss_index.search(np.array(query_vector), k=3)
context = "\n\n".join([text_chunks[i] for i in indices[0]])
# Step 3: Use Groq API to get response
response = client.chat.completions.create(
model="llama3-8b-8192",
messages=[
{"role": "system", "content": "You are an assistant that summarizes and answers questions from documents."},
{"role": "user", "content": f"{context}\n\nQuestion: {question}"}
]
)
return response.choices[0].message.content
# βœ… Gradio interface
iface = gr.Interface(
fn=process_pdf_and_query,
inputs=[
gr.File(label="Upload PDF"),
gr.Textbox(label="Ask a question about the PDF")
],
outputs=gr.Textbox(label="Answer"),
title="Document Q&A with Groq",
description="Upload a PDF and ask a question. Powered by Groq + LLaMA 3."
)
iface.launch()