import os import gradio as gr import faiss import numpy as np from PyPDF2 import PdfReader from sentence_transformers import SentenceTransformer from groq import Groq # 🔐 Groq API Key (embed securely for private use only) GROQ_API_KEY = "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" # 📦 Load embedding model model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") # 📂 Global storage for vector index and chunks chunks = [] index = None # 📄 PDF Text Extraction & Processing def process_pdf(file): global chunks, index try: reader = PdfReader(file.name) text = "\n".join(page.extract_text() or "" for page in reader.pages) if not text.strip(): return "❌ No text found in the PDF. Please upload a different file." # 📏 Chunking chunk_size = 300 chunk_overlap = 50 words = text.split() chunks = [ " ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size - chunk_overlap) ] # 📊 Embeddings + FAISS embeddings = model.encode(chunks) dimension = embeddings.shape[1] index = faiss.IndexFlatL2(dimension) index.add(np.array(embeddings)) return f"✅ Processed {len(chunks)} chunks from uploaded PDF. You can now ask questions." except Exception as e: return f"❌ Error processing the PDF: {str(e)}" # ❓ Ask a Question def ask_question(query): if not chunks or index is None: return "⚠️ Please upload and process a PDF first." query_embedding = model.encode([query]) distances, indices = index.search(np.array(query_embedding), k=3) context = "\n".join([chunks[i] for i in indices[0]]) # Use Groq API for question answering client = Groq(api_key=GROQ_API_KEY) prompt = f"Answer the question based on the following context:\n\n{context}\n\nQuestion: {query}" try: response = client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama3-70b-8192" ) return response.choices[0].message.content except Exception as e: return f"❌ Error from Groq API: {str(e)}" # 🎛️ Gradio Interface file_input = gr.File(label="📄 Upload PDF") question_input = gr.Textbox(label="❓ Ask a Question about the PDF") answer_output = gr.Textbox(label="📘 Answer") pdf_processor = gr.Interface(fn=process_pdf, inputs=file_input, outputs="text") pdf_qa = gr.Interface(fn=ask_question, inputs=question_input, outputs=answer_output) # Create a tabbed interface with "Upload PDF" and "Ask a Question" tabs app = gr.TabbedInterface( [pdf_processor, pdf_qa], tab_names=["Upload PDF", "Ask a Question"] ) # Launch the app if __name__ == "__main__": app.launch()