Spaces:
Sleeping
Sleeping
File size: 4,590 Bytes
a8765bd 33bbd08 a8765bd 75576a8 a8765bd 33bbd08 a8765bd 0d561f3 a8765bd 33bbd08 12277f8 a8765bd 33bbd08 12277f8 a8765bd 33bbd08 12277f8 33bbd08 12277f8 a8765bd 33bbd08 a8765bd 33bbd08 a8765bd 33bbd08 a8765bd 33bbd08 a8765bd 33bbd08 a8765bd 33bbd08 a8765bd 33bbd08 a8765bd 12277f8 33bbd08 db7a094 33bbd08 12277f8 a8765bd 33bbd08 a8765bd 33bbd08 a8765bd 3b33a13 33bbd08 3b33a13 33bbd08 3b33a13 33bbd08 3b33a13 33bbd08 3b33a13 33bbd08 ad2c72c 33bbd08 3b33a13 ad2c72c 33bbd08 3b33a13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 | import gradio as gr
import os
import requests
import numpy as np
from pypdf import PdfReader
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
# ---------------- CONFIG ----------------
GROQ_API_KEY = os.environ.get("smartdoc_rag_chatbot") # HF Secrets me add hona chahiye
GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
MODEL_NAME = "llama-3.1-8b-instant"
embedder = SentenceTransformer("all-MiniLM-L6-v2")
chunks = []
chunk_embeddings = []
# ---------------- PDF LOADING ----------------
def load_pdfs(pdf_files):
global chunks, chunk_embeddings
if not pdf_files:
return "❌ Please upload at least one PDF."
documents = []
for doc_id, pdf in enumerate(pdf_files):
reader = PdfReader(pdf)
for page_num, page in enumerate(reader.pages):
text = page.extract_text()
if text:
documents.append({
"text": text,
"page": page_num + 1,
"doc": f"Document {doc_id + 1}"
})
# chunking
chunks = []
for doc in documents:
text = doc["text"]
for i in range(0, len(text), 500):
chunks.append({
"content": text[i:i+500],
"page": doc["page"],
"doc": doc["doc"]
})
texts = [c["content"] for c in chunks]
chunk_embeddings = embedder.encode(texts)
return f"✅ Loaded {len(pdf_files)} PDF(s) with {len(chunks)} chunks."
# ---------------- RETRIEVAL ----------------
def retrieve_context(query, k=3):
query_embedding = embedder.encode([query])
similarities = cosine_similarity(query_embedding, chunk_embeddings)[0]
top_k = np.argsort(similarities)[-k:]
selected = [chunks[i] for i in top_k]
context = "\n".join([c["content"] for c in selected])
source = selected[-1]
return context, source
# ---------------- GROQ CALL ----------------
def ask_question(question):
if not chunks:
return "⚠️ Please load PDFs first."
context, source = retrieve_context(question)
prompt = f"""
You are SmartDoc RAG Chatbot.
Answer the question using ONLY the context below.
Context:
{context}
Question:
{question}
"""
headers = {
"Authorization": f"Bearer {GROQ_API_KEY}",
"Content-Type": "application/json"
}
response = requests.post(
GROQ_URL,
headers=headers,
json={
"model": MODEL_NAME,
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.2
}
)
answer = response.json()["choices"][0]["message"]["content"]
return f"""{answer}
📄 Source: {source['doc']} — Page {source['page']}"""
# ---------------- UI ----------------
css = """
body {
background: linear-gradient(120deg, #e0f2ff, #f8fbff);
}
h1, h3 {
text-align: center;
}
.gr-textbox textarea {
font-size: 15px;
}
.gr-button-primary {
font-weight: bold;
}
"""
with gr.Blocks(
theme=gr.themes.Soft(
primary_hue="blue",
secondary_hue="cyan",
neutral_hue="slate",
font=["Inter", "sans-serif"]
),
css=css
) as demo:
gr.Markdown("""
# 📄 SmartDoc RAG Chatbot
### Retrieval‑Augmented AI for Document Question Answering
Upload PDFs and ask questions based **only** on their content.
""")
with gr.Row():
# LEFT PANEL
with gr.Column(scale=1):
pdf_files = gr.File(
file_types=[".pdf"],
file_count="multiple",
label="📂 Upload PDF Documents"
)
load_btn = gr.Button("📥 Load Documents", variant="primary")
status = gr.Textbox(label="Status", interactive=False)
# RIGHT PANEL
with gr.Column(scale=2):
with gr.Row():
question = gr.Textbox(
placeholder="Type your question here…",
lines=1,
scale=8
)
send_btn = gr.Button("➤", scale=1)
answer = gr.Textbox(
label="Answer",
lines=8
)
# EVENTS
load_btn.click(load_pdfs, inputs=pdf_files, outputs=status)
send_btn.click(
ask_question,
inputs=question,
outputs=answer
).then(lambda: "", None, question)
question.submit(
ask_question,
inputs=question,
outputs=answer
).then(lambda: "", None, question)
demo.launch()
|