File size: 3,568 Bytes
a5e1526
 
37e528f
19780c5
a5e1526
19780c5
6ed9247
19780c5
a5e1526
19780c5
 
 
a5e1526
 
19780c5
a5e1526
 
e84548e
19780c5
 
 
 
a5e1526
19780c5
 
 
 
 
a5e1526
 
19780c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ed9247
b34ce15
a5e1526
19780c5
 
 
 
 
 
 
 
 
 
a5e1526
19780c5
e84548e
19780c5
 
 
 
e84548e
 
 
 
19780c5
 
 
 
 
 
 
293ec96
19780c5
 
e84548e
a5e1526
e84548e
19780c5
 
 
a5e1526
e84548e
6ed9247
e84548e
19780c5
b34ce15
19780c5
37e528f
19780c5
 
 
 
 
 
 
 
f094fac
19780c5
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import os
import tempfile
import numpy as np
import faiss
import gradio as gr
from pypdf import PdfReader
from sentence_transformers import SentenceTransformer
from groq import Groq

# ===============================
# Groq API
# ===============================
GROQ_API_KEY = os.getenv("Rag_key")
if not GROQ_API_KEY:
    raise ValueError("Groq API key not found. Add Rag_key in HuggingFace Secrets.")

client = Groq(api_key=GROQ_API_KEY)

# ===============================
# Embedding Model (Open Source)
# ===============================
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# ===============================
# PDF Reader
# ===============================
def read_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text() or ""
    return text

# ===============================
# Text Chunking
# ===============================
def chunk_text(text, chunk_size=400, overlap=50):
    chunks = []
    start = 0
    while start < len(text):
        end = start + chunk_size
        chunks.append(text[start:end])
        start = end - overlap
    return chunks

# ===============================
# FAISS Vector Store
# ===============================
def create_faiss(chunks):
    embeddings = embedder.encode(chunks)
    dim = embeddings.shape[1]
    index = faiss.IndexFlatL2(dim)
    index.add(np.array(embeddings))
    return index, embeddings

def retrieve_chunks(chunks, index, question, k=3):
    q_embedding = embedder.encode([question])
    distances, indices = index.search(np.array(q_embedding), k)
    return [chunks[i] for i in indices[0]]

# ===============================
# Groq LLM Call
# ===============================
def ask_llm(context, question):
    response = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[
            {
                "role": "system",
                "content": "Answer ONLY from the provided context. Reply in Urdu."
            },
            {
                "role": "user",
                "content": f"Context:\n{context}\n\nQuestion:\n{question}"
            }
        ]
    )
    return response.choices[0].message.content

# ===============================
# Main RAG Pipeline (FIXED)
# ===============================
def rag_pipeline(file, question):
    try:
        if file is None or not question.strip():
            return "براہ کرم PDF اپلوڈ کریں اور سوال لکھیں۔"

        # ✅ FIX: Handle HuggingFace NamedString
        if isinstance(file, str):
            pdf_path = file
        else:
            with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
                tmp.write(file.read())
                pdf_path = tmp.name

        text = read_pdf(pdf_path)
        if not text.strip():
            return "PDF سے متن حاصل نہیں ہو سکا۔"

        chunks = chunk_text(text)
        index, _ = create_faiss(chunks)
        relevant = retrieve_chunks(chunks, index, question)
        context = "\n".join(relevant)

        return ask_llm(context, question)

    except Exception as e:
        return f"⚠️ خرابی: {str(e)}"

# ===============================
# Gradio UI
# ===============================
ui = gr.Interface(
    fn=rag_pipeline,
    inputs=[
        gr.File(label="📄 PDF اپلوڈ کریں"),
        gr.Textbox(label="❓ سوال", placeholder="PDF سے سوال پوچھیں")
    ],
    outputs=gr.Textbox(label="📌 جواب"),
    title="Jehan Zada RAG App"
)

ui.launch()