import gradio as gr import os import fitz # PyMuPDF import numpy as np import faiss from sentence_transformers import SentenceTransformer from groq import Groq # ✅ Load Groq API key securely groq_api_key = os.getenv("GROQ_API_KEY") client = Groq(api_key=groq_api_key) # Load embedding model model = SentenceTransformer('all-MiniLM-L6-v2') stored_chunks = [] stored_index = None def extract_text_from_pdf(pdf_path): doc = fitz.open(pdf_path) text = "" for page in doc: text += page.get_text() return text def handle_pdf(file_path): global stored_chunks, stored_index try: # Read text text = extract_text_from_pdf(file_path) # Simple chunking by 500 words words = text.split() chunks = [' '.join(words[i:i+500]) for i in range(0, len(words), 500)] # Embed and build FAISS index embeddings = model.encode(chunks) index = faiss.IndexFlatL2(embeddings.shape[1]) index.add(np.array(embeddings)) # Store for later use stored_chunks = chunks stored_index = index return "✅ PDF successfully processed. Ready for questions." except Exception as e: return f"❌ Error during PDF processing: {str(e)}" def answer_query(query): if not stored_chunks or stored_index is None: return "❌ Please upload and process a PDF first." try: query_vec = model.encode(query).reshape(1, -1) D, I = stored_index.search(query_vec, k=3) top_chunks = [stored_chunks[i] for i in I[0]] context = "\n\n".join(top_chunks) prompt = f"""Answer the question based on the context below:\n\nContext:\n{context}\n\nQuestion: {query}\nAnswer:""" response = client.chat.completions.create( model="llama3-8b-8192", messages=[{"role": "user", "content": prompt}], temperature=0.2 ) return response.choices[0].message.content.strip() except Exception as e: return f"❌ Error during answering: {str(e)}" # 🧠 Gradio UI with gr.Blocks() as demo: gr.Markdown("# 📄 PDF Q&A using Groq + LLaMA3") with gr.Row(): file_input = gr.File(label="Upload PDF", file_types=[".pdf"]) process_output = gr.Textbox(label="Processing Status") process_button = gr.Button("📥 Process PDF") process_button.click(fn=handle_pdf, inputs=[file_input], outputs=[process_output]) gr.Markdown("## 💬 Ask a Question from the PDF") question_input = gr.Textbox(label="Your Question") ask_button = gr.Button("🤖 Ask") answer_output = gr.Textbox(label="Answer", lines=5) ask_button.click(fn=answer_query, inputs=[question_input], outputs=[answer_output]) demo.launch()