Spaces:
Sleeping
Sleeping
| import os | |
| import tempfile | |
| import numpy as np | |
| import faiss | |
| import gradio as gr | |
| from pypdf import PdfReader | |
| from sentence_transformers import SentenceTransformer | |
| from groq import Groq | |
| # =============================== | |
| # Groq API | |
| # =============================== | |
| GROQ_API_KEY = os.getenv("Rag_key") | |
| if not GROQ_API_KEY: | |
| raise ValueError("Groq API key not found. Add Rag_key in HuggingFace Secrets.") | |
| client = Groq(api_key=GROQ_API_KEY) | |
| # =============================== | |
| # Embedding Model (Open Source) | |
| # =============================== | |
| embedder = SentenceTransformer("all-MiniLM-L6-v2") | |
| # =============================== | |
| # PDF Reader | |
| # =============================== | |
| def read_pdf(pdf_path): | |
| reader = PdfReader(pdf_path) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() or "" | |
| return text | |
| # =============================== | |
| # Text Chunking | |
| # =============================== | |
| def chunk_text(text, chunk_size=400, overlap=50): | |
| chunks = [] | |
| start = 0 | |
| while start < len(text): | |
| end = start + chunk_size | |
| chunks.append(text[start:end]) | |
| start = end - overlap | |
| return chunks | |
| # =============================== | |
| # FAISS Vector Store | |
| # =============================== | |
| def create_faiss(chunks): | |
| embeddings = embedder.encode(chunks) | |
| dim = embeddings.shape[1] | |
| index = faiss.IndexFlatL2(dim) | |
| index.add(np.array(embeddings)) | |
| return index, embeddings | |
| def retrieve_chunks(chunks, index, question, k=3): | |
| q_embedding = embedder.encode([question]) | |
| distances, indices = index.search(np.array(q_embedding), k) | |
| return [chunks[i] for i in indices[0]] | |
| # =============================== | |
| # Groq LLM Call | |
| # =============================== | |
| def ask_llm(context, question): | |
| response = client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": "Answer ONLY from the provided context. Reply in Urdu." | |
| }, | |
| { | |
| "role": "user", | |
| "content": f"Context:\n{context}\n\nQuestion:\n{question}" | |
| } | |
| ] | |
| ) | |
| return response.choices[0].message.content | |
| # =============================== | |
| # Main RAG Pipeline (FIXED) | |
| # =============================== | |
| def rag_pipeline(file, question): | |
| try: | |
| if file is None or not question.strip(): | |
| return "براہ کرم PDF اپلوڈ کریں اور سوال لکھیں۔" | |
| # ✅ FIX: Handle HuggingFace NamedString | |
| if isinstance(file, str): | |
| pdf_path = file | |
| else: | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: | |
| tmp.write(file.read()) | |
| pdf_path = tmp.name | |
| text = read_pdf(pdf_path) | |
| if not text.strip(): | |
| return "PDF سے متن حاصل نہیں ہو سکا۔" | |
| chunks = chunk_text(text) | |
| index, _ = create_faiss(chunks) | |
| relevant = retrieve_chunks(chunks, index, question) | |
| context = "\n".join(relevant) | |
| return ask_llm(context, question) | |
| except Exception as e: | |
| return f"⚠️ خرابی: {str(e)}" | |
| # =============================== | |
| # Gradio UI | |
| # =============================== | |
| ui = gr.Interface( | |
| fn=rag_pipeline, | |
| inputs=[ | |
| gr.File(label="📄 PDF اپلوڈ کریں"), | |
| gr.Textbox(label="❓ سوال", placeholder="PDF سے سوال پوچھیں") | |
| ], | |
| outputs=gr.Textbox(label="📌 جواب"), | |
| title="Jehan Zada RAG App" | |
| ) | |
| ui.launch() |