import os import fitz # PyMuPDF import faiss import numpy as np import gradio as gr from groq import Groq from sentence_transformers import SentenceTransformer # ✅ Load Groq API key from Hugging Face Secrets client = Groq(api_key=os.environ["GROQ_API_KEY"]) # ✅ Sentence embedding model embedding_model = SentenceTransformer("all-MiniLM-L6-v2") # === PDF → Text extraction === def extract_text_from_pdf(pdf_path): text = "" with fitz.open(pdf_path) as doc: for page in doc: text += page.get_text() return text # === Chunking text === def chunk_text(text, chunk_size=500): sentences = text.split(". ") chunks, current = [], "" for sentence in sentences: if len(current) + len(sentence) < chunk_size: current += sentence + ". " else: chunks.append(current.strip()) current = sentence + ". " if current: chunks.append(current.strip()) return chunks # === Vector store (FAISS) === class VectorStore: def __init__(self): self.index = faiss.IndexFlatL2(384) self.chunks = [] def add(self, embeddings, texts): self.index.add(np.array(embeddings)) self.chunks.extend(texts) def search(self, query, top_k=5): vec = embedding_model.encode([query]) _, I = self.index.search(np.array(vec), top_k) return [self.chunks[i] for i in I[0]] vs = VectorStore() system_prompt = "You are a study supervisor helping students understand their uploaded documents." # === Ask LLaMA 3 using Groq === def ask_llama3(system_prompt, user_prompt): try: result = client.chat.completions.create( model="llama3-8b-8192", messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ] ) return result.choices[0].message.content except Exception as e: return f"❌ Groq API Error: {e}" # === PDF upload handler === def upload_pdf(pdf_file): try: text = extract_text_from_pdf(pdf_file.name) chunks = chunk_text(text) embeddings = embedding_model.encode(chunks) vs.add(embeddings, chunks) return "✅ Document uploaded and processed!" except Exception as e: return f"❌ PDF Processing Error: {e}" # === QA handler === def ask_question(question): if not vs.chunks: return "⚠️ Please upload and process a PDF document first." try: docs = vs.search(question) context = "\n".join(docs) prompt = f"Use the context below to answer the question.\n\nContext:\n{context}\n\nQuestion: {question}" return ask_llama3(system_prompt, prompt) except Exception as e: return f"❌ Question Answering Error: {e}" # === Gradio UI === with gr.Blocks() as demo: gr.Markdown("## 📚 RAG PDF QA using LLaMA3 via Groq API") with gr.Row(): pdf_file = gr.File(label="Upload PDF Document") upload_button = gr.Button("Process PDF") with gr.Row(): question = gr.Textbox(label="Ask a question from the document") ask_button = gr.Button("Ask") answer = gr.Textbox(label="Answer", lines=6) upload_button.click(upload_pdf, inputs=pdf_file, outputs=answer) ask_button.click(ask_question, inputs=question, outputs=answer) demo.launch()