import os import gradio as gr from pypdf import PdfReader from sentence_transformers import SentenceTransformer import faiss import numpy as np import requests # Set your Groq API key and model GROQ_API_KEY = os.getenv("GROQ_API_KEY", "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") GROQ_MODEL = "llama3-8b-8192" embedding_model = SentenceTransformer("all-MiniLM-L6-v2") def extract_text_from_pdf(file): reader = PdfReader(file) return "\n".join(page.extract_text() or "" for page in reader.pages) def embed_document(text, chunk_size=500): chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] embeddings = embedding_model.encode(chunks) index = faiss.IndexFlatL2(embeddings.shape[1]) index.add(np.array(embeddings)) return chunks, index def query_groq(prompt): url = "https://api.groq.com/openai/v1/chat/completions" headers = { "Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json" } payload = { "model": GROQ_MODEL, "messages": [ { "role": "system", "content": ( "You are a helpful and knowledgeable AI assistant. A user has uploaded a document. " "Your task is to analyze the content of the document and provide accurate, clear, and concise answers to any questions " "the user asks based on that document. If the answer is not found in the document, politely state that the information is not available in the provided file." ) }, {"role": "user", "content": prompt} ], "temperature": 0.3 } response = requests.post(url, headers=headers, json=payload) try: data = response.json() if 'choices' in data: return data['choices'][0]['message']['content'] elif 'error' in data: return f"❌ API Error: {data['error']['message']}" else: return "❌ Unexpected API response:\n" + str(data) except Exception as e: return f"❌ Failed to parse response: {e}\nRaw: {response.text}" doc_chunks = [] doc_index = None def handle_upload(file): global doc_chunks, doc_index text = extract_text_from_pdf(file.name) doc_chunks, doc_index = embed_document(text) return "✅ Document processed. You may now ask questions." def answer_question(question): if not doc_chunks or doc_index is None: return "⚠️ Please upload a document first." query_embedding = embedding_model.encode([question]) D, I = doc_index.search(np.array(query_embedding), k=5) context = "\n\n".join([doc_chunks[i] for i in I[0]]) prompt = f"The user asked: '{question}'\n\nUse the following document content to answer:\n{context}" return query_groq(prompt) with gr.Blocks() as demo: gr.Markdown("## 📄 RAG App with Groq API (PDF-Based Q&A)") with gr.Row(): file_input = gr.File(label="Upload PDF", file_types=[".pdf"]) upload_btn = gr.Button("Process Document") upload_status = gr.Textbox(label="Status", interactive=False) question = gr.Textbox(label="Ask a question about the document") answer = gr.Textbox(label="Answer", lines=5) upload_btn.click(fn=handle_upload, inputs=file_input, outputs=upload_status) question.submit(fn=answer_question, inputs=question, outputs=answer) demo.launch()