Spaces:
Sleeping
Sleeping
File size: 3,568 Bytes
a5e1526 37e528f 19780c5 a5e1526 19780c5 6ed9247 19780c5 a5e1526 19780c5 a5e1526 19780c5 a5e1526 e84548e 19780c5 a5e1526 19780c5 a5e1526 19780c5 6ed9247 b34ce15 a5e1526 19780c5 a5e1526 19780c5 e84548e 19780c5 e84548e 19780c5 293ec96 19780c5 e84548e a5e1526 e84548e 19780c5 a5e1526 e84548e 6ed9247 e84548e 19780c5 b34ce15 19780c5 37e528f 19780c5 f094fac 19780c5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | import os
import tempfile
import numpy as np
import faiss
import gradio as gr
from pypdf import PdfReader
from sentence_transformers import SentenceTransformer
from groq import Groq
# ===============================
# Groq API
# ===============================
GROQ_API_KEY = os.getenv("Rag_key")
if not GROQ_API_KEY:
raise ValueError("Groq API key not found. Add Rag_key in HuggingFace Secrets.")
client = Groq(api_key=GROQ_API_KEY)
# ===============================
# Embedding Model (Open Source)
# ===============================
embedder = SentenceTransformer("all-MiniLM-L6-v2")
# ===============================
# PDF Reader
# ===============================
def read_pdf(pdf_path):
reader = PdfReader(pdf_path)
text = ""
for page in reader.pages:
text += page.extract_text() or ""
return text
# ===============================
# Text Chunking
# ===============================
def chunk_text(text, chunk_size=400, overlap=50):
chunks = []
start = 0
while start < len(text):
end = start + chunk_size
chunks.append(text[start:end])
start = end - overlap
return chunks
# ===============================
# FAISS Vector Store
# ===============================
def create_faiss(chunks):
embeddings = embedder.encode(chunks)
dim = embeddings.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(np.array(embeddings))
return index, embeddings
def retrieve_chunks(chunks, index, question, k=3):
q_embedding = embedder.encode([question])
distances, indices = index.search(np.array(q_embedding), k)
return [chunks[i] for i in indices[0]]
# ===============================
# Groq LLM Call
# ===============================
def ask_llm(context, question):
response = client.chat.completions.create(
model="llama-3.3-70b-versatile",
messages=[
{
"role": "system",
"content": "Answer ONLY from the provided context. Reply in Urdu."
},
{
"role": "user",
"content": f"Context:\n{context}\n\nQuestion:\n{question}"
}
]
)
return response.choices[0].message.content
# ===============================
# Main RAG Pipeline (FIXED)
# ===============================
def rag_pipeline(file, question):
try:
if file is None or not question.strip():
return "براہ کرم PDF اپلوڈ کریں اور سوال لکھیں۔"
# ✅ FIX: Handle HuggingFace NamedString
if isinstance(file, str):
pdf_path = file
else:
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
tmp.write(file.read())
pdf_path = tmp.name
text = read_pdf(pdf_path)
if not text.strip():
return "PDF سے متن حاصل نہیں ہو سکا۔"
chunks = chunk_text(text)
index, _ = create_faiss(chunks)
relevant = retrieve_chunks(chunks, index, question)
context = "\n".join(relevant)
return ask_llm(context, question)
except Exception as e:
return f"⚠️ خرابی: {str(e)}"
# ===============================
# Gradio UI
# ===============================
ui = gr.Interface(
fn=rag_pipeline,
inputs=[
gr.File(label="📄 PDF اپلوڈ کریں"),
gr.Textbox(label="❓ سوال", placeholder="PDF سے سوال پوچھیں")
],
outputs=gr.Textbox(label="📌 جواب"),
title="Jehan Zada RAG App"
)
ui.launch() |