ShahbazAhmad-Lab commited on
Commit
cb54310
Β·
verified Β·
1 Parent(s): 1c02aa9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +350 -0
app.py ADDED
@@ -0,0 +1,350 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============================================================
2
+ # RAG Chatbot β€” Hugging Face Spaces
3
+ # Upload PDFs and ask questions!
4
+ # ============================================================
5
+
6
+ import os, warnings
7
+ warnings.filterwarnings("ignore")
8
+
9
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
10
+ from langchain_community.document_loaders import PyPDFLoader
11
+ from langchain_community.vectorstores import FAISS
12
+ from langchain_huggingface import HuggingFaceEmbeddings
13
+ from langchain_groq import ChatGroq
14
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
15
+ from langchain_core.messages import HumanMessage, AIMessage
16
+ from langchain_core.output_parsers import StrOutputParser
17
+ from langchain_core.runnables import RunnablePassthrough, RunnableLambda
18
+ import gradio as gr
19
+
20
+ # API Key from HF Secrets
21
+ GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "")
22
+
23
+ # ── Load PDFs ─────────────────────────────────────────────────
24
+ def load_pdfs(files):
25
+ all_docs = []
26
+ names = []
27
+ for file in files:
28
+ try:
29
+ loader = PyPDFLoader(file.name)
30
+ docs = loader.load()
31
+ for doc in docs:
32
+ doc.metadata["source"] = os.path.basename(file.name)
33
+ all_docs.extend(docs)
34
+ names.append(os.path.basename(file.name))
35
+ print(f" βœ… {os.path.basename(file.name)} β€” {len(docs)} pages")
36
+ except Exception as e:
37
+ print(f" ❌ Error: {e}")
38
+ return all_docs, names
39
+
40
+ # ── Build RAG ─────────────────────────────────────────────────
41
+ def build_rag(all_docs):
42
+ chunks = RecursiveCharacterTextSplitter(
43
+ chunk_size=600,
44
+ chunk_overlap=100,
45
+ separators=["\n\n", "\n", ". ", " ", ""]
46
+ ).split_documents(all_docs)
47
+ print(f" βœ‚οΈ {len(chunks)} chunks")
48
+
49
+ emb = HuggingFaceEmbeddings(
50
+ model_name="all-MiniLM-L6-v2",
51
+ model_kwargs={"device": "cpu"},
52
+ encode_kwargs={"normalize_embeddings": True}
53
+ )
54
+ vs = FAISS.from_documents(chunks, emb)
55
+
56
+ llm = ChatGroq(
57
+ groq_api_key=GROQ_API_KEY,
58
+ model_name="llama-3.3-70b-versatile",
59
+ temperature=0.3,
60
+ max_tokens=1500
61
+ )
62
+
63
+ retriever = vs.as_retriever(search_kwargs={"k": 4})
64
+
65
+ prompt = ChatPromptTemplate.from_messages([
66
+ ("system", """You are an expert AI assistant.
67
+ Answer using ONLY the context below.
68
+ Always mention the source document.
69
+ If answer not found, say: I don't have that information in the provided documents.
70
+
71
+ Context:
72
+ {context}"""),
73
+ MessagesPlaceholder(variable_name="chat_history"),
74
+ ("human", "{question}")
75
+ ])
76
+
77
+ def fmt(docs):
78
+ return "\n\n---\n\n".join(
79
+ f"[Source: {d.metadata.get('source','?')} | Page {d.metadata.get('page',0)+1}]:\n{d.page_content}"
80
+ for d in docs
81
+ )
82
+
83
+ chain = (
84
+ RunnablePassthrough.assign(
85
+ context=RunnableLambda(
86
+ lambda x: fmt(retriever.invoke(x["question"]))
87
+ )
88
+ )
89
+ | prompt | llm | StrOutputParser()
90
+ )
91
+ return chain, len(chunks)
92
+
93
+ # ── Global State ──────────────────────────────────────────────
94
+ rag_chain = None
95
+ ui_history = []
96
+
97
+ # ── Gradio Functions ──────────────────────────────────────────
98
+ def process_files(files):
99
+ global rag_chain
100
+ if not files:
101
+ return "⚠️ Koi file select nahi ki!", ""
102
+
103
+ print(f"\nπŸ“ Processing {len(files)} file(s)...")
104
+ docs, names = load_pdfs(files)
105
+
106
+ if not docs:
107
+ return "❌ PDFs se content extract nahi hua!", ""
108
+
109
+ try:
110
+ chain, n_chunks = build_rag(docs)
111
+ rag_chain = chain
112
+ chars = sum(len(d.page_content) for d in docs)
113
+ names_list = "\n".join([f"β€’ {n}" for n in names])
114
+ return (
115
+ f"βœ… **{len(names)} file(s) loaded!**\n\n{names_list}\n\n"
116
+ f"πŸ“Š {len(docs)} pages | {n_chunks} chunks | {chars:,} chars\n\n"
117
+ f"πŸ’¬ **Ab sawal poochho!**"
118
+ ), f"{len(names)} docs"
119
+ except Exception as e:
120
+ return f"❌ Error: {str(e)}", ""
121
+
122
+
123
+ def chat_fn(msg, history):
124
+ global rag_chain, ui_history
125
+ if not msg.strip():
126
+ return "", history
127
+ if rag_chain is None:
128
+ history.append({
129
+ "role": "assistant",
130
+ "content": "⚠️ Pehle PDF upload karo aur Process karo!"
131
+ })
132
+ return "", history
133
+ try:
134
+ ans = rag_chain.invoke({
135
+ "question": msg,
136
+ "chat_history": ui_history
137
+ })
138
+ ui_history.append(HumanMessage(content=msg))
139
+ ui_history.append(AIMessage(content=ans))
140
+ except Exception as e:
141
+ ans = f"❌ Error: {str(e)}"
142
+ print(f"ERROR: {e}")
143
+
144
+ history.append({"role": "user", "content": msg})
145
+ history.append({"role": "assistant", "content": ans})
146
+ return "", history
147
+
148
+
149
+ def clear_fn():
150
+ global ui_history
151
+ ui_history = []
152
+ return []
153
+
154
+ # ── CSS ───────────────────────────────────────────────────────
155
+ css = """
156
+ @import url('https://fonts.googleapis.com/css2?family=Syne:wght@700;800&family=DM+Sans:wght@300;400;500&display=swap');
157
+
158
+ * { box-sizing: border-box; }
159
+
160
+ body, .gradio-container {
161
+ font-family: 'DM Sans', sans-serif !important;
162
+ background: #0a0a0f !important;
163
+ color: #e8e6f0 !important;
164
+ }
165
+ .gradio-container {
166
+ max-width: 960px !important;
167
+ margin: 0 auto !important;
168
+ }
169
+ .app-title {
170
+ font-family: 'Syne', sans-serif !important;
171
+ font-size: 2.4rem !important;
172
+ font-weight: 800 !important;
173
+ background: linear-gradient(135deg, #a78bfa, #60a5fa, #34d399) !important;
174
+ -webkit-background-clip: text !important;
175
+ -webkit-text-fill-color: transparent !important;
176
+ background-clip: text !important;
177
+ text-align: center !important;
178
+ padding: 32px 0 8px !important;
179
+ }
180
+ .badge {
181
+ display: inline-flex; align-items: center; gap: 5px;
182
+ background: rgba(139,92,246,0.1);
183
+ border: 1px solid rgba(139,92,246,0.25);
184
+ border-radius: 20px; padding: 4px 12px;
185
+ font-size: 0.72rem; color: #a78bfa; font-weight: 500; margin: 3px;
186
+ }
187
+ .section-label {
188
+ font-family: 'Syne', sans-serif !important;
189
+ font-size: 0.7rem !important; font-weight: 700 !important;
190
+ letter-spacing: 2.5px !important; text-transform: uppercase !important;
191
+ color: #a78bfa !important; margin: 20px 0 12px !important;
192
+ }
193
+ textarea, input[type=text] {
194
+ background: #0d0d14 !important;
195
+ border: 1px solid #1f1f2e !important;
196
+ border-radius: 10px !important;
197
+ color: #e8e6f0 !important;
198
+ font-family: 'DM Sans', sans-serif !important;
199
+ font-size: 0.9rem !important;
200
+ transition: border-color 0.2s, box-shadow 0.2s !important;
201
+ scrollbar-width: thin !important;
202
+ scrollbar-color: #2d2d45 transparent !important;
203
+ }
204
+ textarea:focus, input[type=text]:focus {
205
+ border-color: #a78bfa !important;
206
+ box-shadow: 0 0 0 3px rgba(139,92,246,0.12) !important;
207
+ outline: none !important;
208
+ }
209
+ textarea::-webkit-scrollbar { width: 4px !important; }
210
+ textarea::-webkit-scrollbar-thumb {
211
+ background: #2d2d45 !important; border-radius: 10px !important;
212
+ }
213
+ textarea::-webkit-scrollbar-thumb:hover { background: #a78bfa !important; }
214
+ button.primary {
215
+ background: linear-gradient(135deg, #7c3aed, #4f46e5) !important;
216
+ border: none !important; border-radius: 10px !important;
217
+ color: white !important; font-family: 'Syne', sans-serif !important;
218
+ font-weight: 600 !important;
219
+ box-shadow: 0 4px 15px rgba(124,58,237,0.3) !important;
220
+ transition: all 0.2s ease !important;
221
+ }
222
+ button.primary:hover {
223
+ transform: translateY(-1px) !important;
224
+ box-shadow: 0 6px 20px rgba(124,58,237,0.4) !important;
225
+ }
226
+ button.secondary {
227
+ background: #13131a !important;
228
+ border: 1px solid #2d2d45 !important;
229
+ border-radius: 10px !important;
230
+ color: #9ca3af !important; transition: all 0.2s !important;
231
+ }
232
+ button.secondary:hover {
233
+ border-color: #a78bfa !important; color: #a78bfa !important;
234
+ }
235
+ label span { color: #6b7280 !important; font-size: 0.8rem !important; }
236
+ .examples-table td, .examples td {
237
+ background: #13131a !important;
238
+ border: 1px solid #1f1f2e !important;
239
+ border-radius: 8px !important; color: #9ca3af !important;
240
+ font-size: 0.8rem !important; cursor: pointer !important;
241
+ transition: all 0.2s !important;
242
+ }
243
+ .examples-table td:hover, .examples td:hover {
244
+ background: #1e1e30 !important;
245
+ color: #a78bfa !important; border-color: #a78bfa !important;
246
+ }
247
+ """
248
+
249
+ # ── UI ────────────────────────────────────────────────────────
250
+ with gr.Blocks(
251
+ css=css,
252
+ title="RAG Intelligence",
253
+ theme=gr.themes.Base(
254
+ primary_hue="violet",
255
+ neutral_hue="slate"
256
+ )
257
+ ) as demo:
258
+
259
+ gr.HTML("""
260
+ <div class="app-title">⚑ RAG Intelligence</div>
261
+ <div style="text-align:center; color:#6b7280; margin-bottom:16px;">
262
+ Multi-Document AI Β· FAISS Β· Groq LLaMA 3.3
263
+ </div>
264
+ <div style="text-align:center; margin-bottom:24px;">
265
+ <span class="badge">🧠 HuggingFace</span>
266
+ <span class="badge">⚑ Groq LLM</span>
267
+ <span class="badge">πŸ” FAISS</span>
268
+ <span class="badge">πŸ“„ Multi-PDF</span>
269
+ </div>
270
+ """)
271
+
272
+ gr.HTML('<div class="section-label">πŸ“₯ &nbsp; Upload Your PDFs</div>')
273
+
274
+ with gr.Row():
275
+ with gr.Column(scale=3):
276
+ file_input = gr.File(
277
+ label="PDF files select karo (multiple ho sakti hain)",
278
+ file_types=[".pdf"],
279
+ file_count="multiple",
280
+ )
281
+ process_btn = gr.Button(
282
+ "βš™οΈ Process Documents",
283
+ variant="primary"
284
+ )
285
+ with gr.Column(scale=2):
286
+ status_out = gr.Markdown(
287
+ "πŸ“‹ **Status:** Waiting for documents..."
288
+ )
289
+ badge_out = gr.Markdown("**0 docs loaded**")
290
+
291
+ gr.HTML('<hr style="border:none;border-top:1px solid #1a1a28;margin:20px 0;">')
292
+ gr.HTML('<div class="section-label">πŸ’¬ &nbsp; Chat With Documents</div>')
293
+
294
+ chatbot = gr.Chatbot(
295
+ label="",
296
+ height=480,
297
+ type="messages",
298
+ show_label=False,
299
+ placeholder="<div style='text-align:center;color:#374151;padding:40px;'>Load documents first, then ask anything! ✦</div>",
300
+ )
301
+
302
+ with gr.Row():
303
+ msg_box = gr.Textbox(
304
+ placeholder="✦ Apne documents ke baare mein kuch bhi poochho...",
305
+ label="", lines=2, max_lines=5,
306
+ scale=5, show_label=False, container=False,
307
+ )
308
+ with gr.Column(scale=1, min_width=110):
309
+ send_btn = gr.Button("Send ➀", variant="primary")
310
+ clear_btn = gr.Button("Clear πŸ—‘", variant="secondary")
311
+
312
+ gr.Examples(
313
+ examples=[
314
+ "Is document ka summary do",
315
+ "Main topics kya hain?",
316
+ "Important points bullet mein batao",
317
+ "Koi definition explain karo",
318
+ "Key concepts list karo",
319
+ ],
320
+ inputs=msg_box,
321
+ label="✦ Quick Questions",
322
+ )
323
+
324
+ gr.HTML("""
325
+ <div style="text-align:center;padding:20px 0 8px;
326
+ color:#2d2d45;font-size:0.72rem;letter-spacing:1.5px;">
327
+ RAG INTELLIGENCE Β· FAISS Β· GROQ Β· HUGGINGFACE
328
+ </div>
329
+ """)
330
+
331
+ # Events
332
+ process_btn.click(
333
+ fn=process_files,
334
+ inputs=[file_input],
335
+ outputs=[status_out, badge_out]
336
+ )
337
+ send_btn.click(
338
+ fn=chat_fn,
339
+ inputs=[msg_box, chatbot],
340
+ outputs=[msg_box, chatbot]
341
+ )
342
+ msg_box.submit(
343
+ fn=chat_fn,
344
+ inputs=[msg_box, chatbot],
345
+ outputs=[msg_box, chatbot]
346
+ )
347
+ clear_btn.click(fn=clear_fn, outputs=[chatbot])
348
+
349
+ if __name__ == "__main__":
350
+ demo.launch()