chippyjolly's picture
Update app.py
eef82e0 verified
# Install required libraries (only run this once in Colab or terminal)
# !pip install gradio faiss-cpu sentence-transformers PyPDF2 groq
import os
import gradio as gr
import faiss
import numpy as np
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
from groq import Groq
# Set up Groq API directly with the API key (explicitly)
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
client = Groq(api_key=GROQ_API_KEY)
# PDF text extraction
def extract_text_from_pdf(file):
reader = PdfReader(file)
return "\n".join(page.extract_text() for page in reader.pages if page.extract_text())
# Split text into chunks
def split_into_chunks(text, chunk_size=500):
words = text.split()
return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
# Load sentence embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
# Global FAISS index and text chunk list
faiss_index = None
text_chunks = []
# PDF processing
def process_pdf(file):
global faiss_index, text_chunks
text = extract_text_from_pdf(file)
text_chunks = split_into_chunks(text)
embeddings = embedding_model.encode(text_chunks)
faiss_index = faiss.IndexFlatL2(embeddings.shape[1])
faiss_index.add(np.array(embeddings))
return "✅ PDF processed and indexed successfully!"
# Query handling
def query_document(question, top_k=3):
if not faiss_index or not text_chunks:
return "⚠️ Please upload and process a PDF first."
query_vector = embedding_model.encode([question])
distances, indices = faiss_index.search(np.array(query_vector), top_k)
context = "\n\n".join([text_chunks[i] for i in indices[0]])
response = client.chat.completions.create(
model="llama3-8b-8192",
messages=[
{"role": "system", "content": "You are an assistant that summarizes and analyzes documents."},
{"role": "user", "content": f"{context}\n\nQuestion: {question}"}
]
)
return response.choices[0].message.content
# Gradio UI
with gr.Blocks() as app:
gr.Markdown("## 🤖 Resume Q&A Assistant\nUpload a resume (PDF) and ask questions about its content.")
with gr.Row():
pdf_input = gr.File(label="Upload your PDF", file_types=[".pdf"])
question_input = gr.Textbox(label="Ask a question about the resume")
status_output = gr.Textbox(label="Status", interactive=False)
answer_output = gr.Textbox(label="Answer", interactive=False)
def run_app(pdf_file, question):
status = process_pdf(pdf_file)
answer = query_document(question)
return status, answer
submit_btn = gr.Button("Submit")
submit_btn.click(fn=run_app, inputs=[pdf_input, question_input], outputs=[status_output, answer_output])
app.launch()