import os import gradio as gr import faiss import numpy as np from groq import Groq from pypdf import PdfReader from sentence_transformers import SentenceTransformer # ----------------------------- # Load Groq Client # ----------------------------- GROQ_API_KEY = os.getenv("GROQ_API_KEY") if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY not found in environment variables") client = Groq(api_key=GROQ_API_KEY) # ----------------------------- # Load Embedding Model (Open Source) # ----------------------------- embedding_model = SentenceTransformer("all-MiniLM-L6-v2") # Global storage documents = [] index = None # ----------------------------- # PDF Processing # ----------------------------- def process_pdf(file): global documents, index reader = PdfReader(file.name) text = "" for page in reader.pages: text += page.extract_text() # Chunking chunk_size = 500 chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] documents = chunks # Create embeddings embeddings = embedding_model.encode(chunks) embeddings = np.array(embeddings).astype("float32") # Create FAISS index dimension = embeddings.shape[1] index = faiss.IndexFlatL2(dimension) index.add(embeddings) return "✅ PDF processed and indexed successfully!" # ----------------------------- # Question Answering # ----------------------------- def answer_question(question): global documents, index if index is None: return "Please upload and process a PDF first." # Embed question question_embedding = embedding_model.encode([question]) question_embedding = np.array(question_embedding).astype("float32") # Search in FAISS k = 3 distances, indices = index.search(question_embedding, k) context = "" for idx in indices[0]: context += documents[idx] + "\n\n" prompt = f""" Answer the question based only on the context below. Context: {context} Question: {question} Answer: """ # Send to Groq LLM chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama-3.3-70b-versatile", ) return chat_completion.choices[0].message.content # ----------------------------- # Gradio UI # ----------------------------- with gr.Blocks() as app: gr.Markdown("# 📚 RAG PDF Chatbot (Groq + FAISS)") file_input = gr.File(label="Upload PDF") upload_button = gr.Button("Process PDF") upload_output = gr.Textbox() question_input = gr.Textbox(label="Ask a Question") answer_button = gr.Button("Get Answer") answer_output = gr.Textbox() upload_button.click(process_pdf, inputs=file_input, outputs=upload_output) answer_button.click(answer_question, inputs=question_input, outputs=answer_output) app.launch()