Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import faiss | |
| import numpy as np | |
| from PyPDF2 import PdfReader | |
| from sentence_transformers import SentenceTransformer | |
| from groq import Groq | |
| # β API Key from environment (set via Hugging Face "Secrets") | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| if not GROQ_API_KEY: | |
| raise ValueError("GROQ_API_KEY not found in environment variables. Set it in the Space 'Secrets' section.") | |
| client = Groq(api_key=GROQ_API_KEY) | |
| # β Global state | |
| embedding_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| faiss_index = None | |
| text_chunks = [] | |
| # β Helper functions | |
| def extract_text_from_pdf(pdf_file_obj): | |
| reader = PdfReader(pdf_file_obj) | |
| text = "" | |
| for page in reader.pages: | |
| page_text = page.extract_text() | |
| if page_text: | |
| text += page_text + "\n" | |
| return text | |
| def split_into_chunks(text, chunk_size=500): | |
| words = text.split() | |
| return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)] | |
| def process_pdf_and_query(pdf_file, question): | |
| global faiss_index, text_chunks | |
| # Step 1: Process PDF | |
| raw_text = extract_text_from_pdf(pdf_file) | |
| text_chunks = split_into_chunks(raw_text) | |
| embeddings = embedding_model.encode(text_chunks) | |
| dimension = embeddings.shape[1] | |
| faiss_index = faiss.IndexFlatL2(dimension) | |
| faiss_index.add(np.array(embeddings)) | |
| # Step 2: Search FAISS index | |
| if not question.strip(): | |
| return "PDF processed. Please enter a question." | |
| query_vector = embedding_model.encode([question]) | |
| distances, indices = faiss_index.search(np.array(query_vector), k=3) | |
| context = "\n\n".join([text_chunks[i] for i in indices[0]]) | |
| # Step 3: Use Groq API to get response | |
| response = client.chat.completions.create( | |
| model="llama3-8b-8192", | |
| messages=[ | |
| {"role": "system", "content": "You are an assistant that summarizes and answers questions from documents."}, | |
| {"role": "user", "content": f"{context}\n\nQuestion: {question}"} | |
| ] | |
| ) | |
| return response.choices[0].message.content | |
| # β Gradio interface | |
| iface = gr.Interface( | |
| fn=process_pdf_and_query, | |
| inputs=[ | |
| gr.File(label="Upload PDF"), | |
| gr.Textbox(label="Ask a question about the PDF") | |
| ], | |
| outputs=gr.Textbox(label="Answer"), | |
| title="Document Q&A with Groq", | |
| description="Upload a PDF and ask a question. Powered by Groq + LLaMA 3." | |
| ) | |
| iface.launch() | |