# Install required libraries (only run this once in Colab or terminal) # !pip install gradio faiss-cpu sentence-transformers PyPDF2 groq import os import gradio as gr import faiss import numpy as np from PyPDF2 import PdfReader from sentence_transformers import SentenceTransformer from groq import Groq # Set up Groq API directly with the API key (explicitly) GROQ_API_KEY = os.getenv("GROQ_API_KEY") client = Groq(api_key=GROQ_API_KEY) # PDF text extraction def extract_text_from_pdf(file): reader = PdfReader(file) return "\n".join(page.extract_text() for page in reader.pages if page.extract_text()) # Split text into chunks def split_into_chunks(text, chunk_size=500): words = text.split() return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)] # Load sentence embedding model embedding_model = SentenceTransformer("all-MiniLM-L6-v2") # Global FAISS index and text chunk list faiss_index = None text_chunks = [] # PDF processing def process_pdf(file): global faiss_index, text_chunks text = extract_text_from_pdf(file) text_chunks = split_into_chunks(text) embeddings = embedding_model.encode(text_chunks) faiss_index = faiss.IndexFlatL2(embeddings.shape[1]) faiss_index.add(np.array(embeddings)) return "✅ PDF processed and indexed successfully!" # Query handling def query_document(question, top_k=3): if not faiss_index or not text_chunks: return "⚠️ Please upload and process a PDF first." query_vector = embedding_model.encode([question]) distances, indices = faiss_index.search(np.array(query_vector), top_k) context = "\n\n".join([text_chunks[i] for i in indices[0]]) response = client.chat.completions.create( model="llama3-8b-8192", messages=[ {"role": "system", "content": "You are an assistant that summarizes and analyzes documents."}, {"role": "user", "content": f"{context}\n\nQuestion: {question}"} ] ) return response.choices[0].message.content # Gradio UI with gr.Blocks() as app: gr.Markdown("## 🤖 Resume Q&A Assistant\nUpload a resume (PDF) and ask questions about its content.") with gr.Row(): pdf_input = gr.File(label="Upload your PDF", file_types=[".pdf"]) question_input = gr.Textbox(label="Ask a question about the resume") status_output = gr.Textbox(label="Status", interactive=False) answer_output = gr.Textbox(label="Answer", interactive=False) def run_app(pdf_file, question): status = process_pdf(pdf_file) answer = query_document(question) return status, answer submit_btn = gr.Button("Submit") submit_btn.click(fn=run_app, inputs=[pdf_input, question_input], outputs=[status_output, answer_output]) app.launch()