import os import gradio as gr import faiss import numpy as np from groq import Groq from pypdf import PdfReader from sentence_transformers import SentenceTransformer # ----------------------- # Load models # ----------------------- embedding_model = SentenceTransformer("all-MiniLM-L6-v2") groq_client = Groq( api_key=os.environ.get("GROQ_API_KEY") # api_key=userdata.get('RAG') ) # ----------------------- # Global storage # ----------------------- faiss_index = None document_chunks = [] # ----------------------- # Helper functions # ----------------------- def read_pdf(file): reader = PdfReader(file) text = "" for page in reader.pages: text += page.extract_text() return text def chunk_text(text, chunk_size=500, overlap=50): chunks = [] start = 0 while start < len(text): end = start + chunk_size chunk = text[start:end] chunks.append(chunk) start = end - overlap return chunks def create_faiss_index(chunks): global faiss_index, document_chunks document_chunks = chunks embeddings = embedding_model.encode(chunks) embeddings = np.array(embeddings).astype("float32") dimension = embeddings.shape[1] faiss_index = faiss.IndexFlatL2(dimension) faiss_index.add(embeddings) def search_chunks(query, top_k=3): query_embedding = embedding_model.encode([query]) query_embedding = np.array(query_embedding).astype("float32") distances, indices = faiss_index.search(query_embedding, top_k) results = [] for idx in indices[0]: results.append(document_chunks[idx]) return results def ask_groq(context, question): prompt = f""" Use the text below to answer the question. Text: {context} Question: {question} """ response = groq_client.chat.completions.create( model="llama-3.3-70b-versatile", messages=[ {"role": "user", "content": prompt} ] ) return response.choices[0].message.content # ----------------------- # Gradio functions # ----------------------- def upload_pdf(file): if file is None: return "Please upload a PDF first." text = read_pdf(file) chunks = chunk_text(text) create_faiss_index(chunks) return "PDF processed. You can now ask questions." def answer_question(question): if faiss_index is None: return "Please upload a PDF first." relevant_chunks = search_chunks(question) context = "\n".join(relevant_chunks) answer = ask_groq(context, question) return answer # ----------------------- # Gradio UI # ----------------------- with gr.Blocks() as demo: gr.Markdown("# PDF Question Answer App") pdf_file = gr.File(label="Upload PDF") upload_btn = gr.Button("Process PDF") status = gr.Textbox(label="Status", lines=1) question = gr.Textbox( label="Your Question", lines=2, placeholder="Type your question here" ) ask_btn = gr.Button("Ask Question") answer = gr.Textbox( label="Answer", lines=10 ) upload_btn.click(upload_pdf, inputs=pdf_file, outputs=status) ask_btn.click(answer_question, inputs=question, outputs=answer) demo.launch()