import os import gradio as gr import faiss import numpy as np from PyPDF2 import PdfReader from sentence_transformers import SentenceTransformer from groq import Groq # ✅ API Key from environment (set via Hugging Face "Secrets") GROQ_API_KEY = os.getenv("GROQ_API_KEY") if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY not found in environment variables. Set it in the Space 'Secrets' section.") client = Groq(api_key=GROQ_API_KEY) # ✅ Global state embedding_model = SentenceTransformer("all-MiniLM-L6-v2") faiss_index = None text_chunks = [] # ✅ Helper functions def extract_text_from_pdf(pdf_file_obj): reader = PdfReader(pdf_file_obj) text = "" for page in reader.pages: page_text = page.extract_text() if page_text: text += page_text + "\n" return text def split_into_chunks(text, chunk_size=500): words = text.split() return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)] def process_pdf_and_query(pdf_file, question): global faiss_index, text_chunks # Step 1: Process PDF raw_text = extract_text_from_pdf(pdf_file) text_chunks = split_into_chunks(raw_text) embeddings = embedding_model.encode(text_chunks) dimension = embeddings.shape[1] faiss_index = faiss.IndexFlatL2(dimension) faiss_index.add(np.array(embeddings)) # Step 2: Search FAISS index if not question.strip(): return "PDF processed. Please enter a question." query_vector = embedding_model.encode([question]) distances, indices = faiss_index.search(np.array(query_vector), k=3) context = "\n\n".join([text_chunks[i] for i in indices[0]]) # Step 3: Use Groq API to get response response = client.chat.completions.create( model="llama3-8b-8192", messages=[ {"role": "system", "content": "You are an assistant that summarizes and answers questions from documents."}, {"role": "user", "content": f"{context}\n\nQuestion: {question}"} ] ) return response.choices[0].message.content # ✅ Gradio interface iface = gr.Interface( fn=process_pdf_and_query, inputs=[ gr.File(label="Upload PDF"), gr.Textbox(label="Ask a question about the PDF") ], outputs=gr.Textbox(label="Answer"), title="Document Q&A with Groq", description="Upload a PDF and ask a question. Powered by Groq + LLaMA 3." ) iface.launch()