Spaces:
Sleeping
Sleeping
| # Install required libraries (only run this once in Colab or terminal) | |
| # !pip install gradio faiss-cpu sentence-transformers PyPDF2 groq | |
| import os | |
| import gradio as gr | |
| import faiss | |
| import numpy as np | |
| from PyPDF2 import PdfReader | |
| from sentence_transformers import SentenceTransformer | |
| from groq import Groq | |
| # Set up Groq API directly with the API key (explicitly) | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| client = Groq(api_key=GROQ_API_KEY) | |
| # PDF text extraction | |
| def extract_text_from_pdf(file): | |
| reader = PdfReader(file) | |
| return "\n".join(page.extract_text() for page in reader.pages if page.extract_text()) | |
| # Split text into chunks | |
| def split_into_chunks(text, chunk_size=500): | |
| words = text.split() | |
| return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)] | |
| # Load sentence embedding model | |
| embedding_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| # Global FAISS index and text chunk list | |
| faiss_index = None | |
| text_chunks = [] | |
| # PDF processing | |
| def process_pdf(file): | |
| global faiss_index, text_chunks | |
| text = extract_text_from_pdf(file) | |
| text_chunks = split_into_chunks(text) | |
| embeddings = embedding_model.encode(text_chunks) | |
| faiss_index = faiss.IndexFlatL2(embeddings.shape[1]) | |
| faiss_index.add(np.array(embeddings)) | |
| return "✅ PDF processed and indexed successfully!" | |
| # Query handling | |
| def query_document(question, top_k=3): | |
| if not faiss_index or not text_chunks: | |
| return "⚠️ Please upload and process a PDF first." | |
| query_vector = embedding_model.encode([question]) | |
| distances, indices = faiss_index.search(np.array(query_vector), top_k) | |
| context = "\n\n".join([text_chunks[i] for i in indices[0]]) | |
| response = client.chat.completions.create( | |
| model="llama3-8b-8192", | |
| messages=[ | |
| {"role": "system", "content": "You are an assistant that summarizes and analyzes documents."}, | |
| {"role": "user", "content": f"{context}\n\nQuestion: {question}"} | |
| ] | |
| ) | |
| return response.choices[0].message.content | |
| # Gradio UI | |
| with gr.Blocks() as app: | |
| gr.Markdown("## 🤖 Resume Q&A Assistant\nUpload a resume (PDF) and ask questions about its content.") | |
| with gr.Row(): | |
| pdf_input = gr.File(label="Upload your PDF", file_types=[".pdf"]) | |
| question_input = gr.Textbox(label="Ask a question about the resume") | |
| status_output = gr.Textbox(label="Status", interactive=False) | |
| answer_output = gr.Textbox(label="Answer", interactive=False) | |
| def run_app(pdf_file, question): | |
| status = process_pdf(pdf_file) | |
| answer = query_document(question) | |
| return status, answer | |
| submit_btn = gr.Button("Submit") | |
| submit_btn.click(fn=run_app, inputs=[pdf_input, question_input], outputs=[status_output, answer_output]) | |
| app.launch() | |