import os import tempfile import numpy as np import faiss import gradio as gr from pypdf import PdfReader from sentence_transformers import SentenceTransformer from groq import Groq # =============================== # Groq API # =============================== GROQ_API_KEY = os.getenv("Rag_key") if not GROQ_API_KEY: raise ValueError("Groq API key not found. Add Rag_key in HuggingFace Secrets.") client = Groq(api_key=GROQ_API_KEY) # =============================== # Embedding Model (Open Source) # =============================== embedder = SentenceTransformer("all-MiniLM-L6-v2") # =============================== # PDF Reader # =============================== def read_pdf(pdf_path): reader = PdfReader(pdf_path) text = "" for page in reader.pages: text += page.extract_text() or "" return text # =============================== # Text Chunking # =============================== def chunk_text(text, chunk_size=400, overlap=50): chunks = [] start = 0 while start < len(text): end = start + chunk_size chunks.append(text[start:end]) start = end - overlap return chunks # =============================== # FAISS Vector Store # =============================== def create_faiss(chunks): embeddings = embedder.encode(chunks) dim = embeddings.shape[1] index = faiss.IndexFlatL2(dim) index.add(np.array(embeddings)) return index, embeddings def retrieve_chunks(chunks, index, question, k=3): q_embedding = embedder.encode([question]) distances, indices = index.search(np.array(q_embedding), k) return [chunks[i] for i in indices[0]] # =============================== # Groq LLM Call # =============================== def ask_llm(context, question): response = client.chat.completions.create( model="llama-3.3-70b-versatile", messages=[ { "role": "system", "content": "Answer ONLY from the provided context. Reply in Urdu." }, { "role": "user", "content": f"Context:\n{context}\n\nQuestion:\n{question}" } ] ) return response.choices[0].message.content # =============================== # Main RAG Pipeline (FIXED) # =============================== def rag_pipeline(file, question): try: if file is None or not question.strip(): return "براہ کرم PDF اپلوڈ کریں اور سوال لکھیں۔" # ✅ FIX: Handle HuggingFace NamedString if isinstance(file, str): pdf_path = file else: with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: tmp.write(file.read()) pdf_path = tmp.name text = read_pdf(pdf_path) if not text.strip(): return "PDF سے متن حاصل نہیں ہو سکا۔" chunks = chunk_text(text) index, _ = create_faiss(chunks) relevant = retrieve_chunks(chunks, index, question) context = "\n".join(relevant) return ask_llm(context, question) except Exception as e: return f"⚠️ خرابی: {str(e)}" # =============================== # Gradio UI # =============================== ui = gr.Interface( fn=rag_pipeline, inputs=[ gr.File(label="📄 PDF اپلوڈ کریں"), gr.Textbox(label="❓ سوال", placeholder="PDF سے سوال پوچھیں") ], outputs=gr.Textbox(label="📌 جواب"), title="Jehan Zada RAG App" ) ui.launch()