sofzcc commited on
Commit
456f6e2
·
verified ·
1 Parent(s): 2acce8f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -22
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import glob
3
  import yaml
 
4
  from typing import List, Tuple
5
 
6
  import faiss
@@ -37,7 +38,7 @@ def get_default_config():
37
  "index_directory": "./index",
38
  },
39
  "models": {
40
- "embedding": "all-MiniLM-L6-v2",
41
  "qa": "deepset/roberta-base-squad2",
42
  },
43
  "chunking": {
@@ -237,6 +238,8 @@ class RAGIndex:
237
  print("⚠️ No documents found in knowledge base")
238
  print(f" Please add .txt, .md, .pdf, or .docx files to: {KB_DIR}")
239
  self.index = None
 
 
240
  return
241
 
242
  all_chunks: List[str] = []
@@ -251,6 +254,8 @@ class RAGIndex:
251
  if not all_chunks:
252
  print("⚠️ No valid chunks created from documents")
253
  self.index = None
 
 
254
  return
255
 
256
  print(f"Created {len(all_chunks)} chunks from {len(docs)} documents")
@@ -299,7 +304,10 @@ class RAGIndex:
299
  try:
300
  q_emb = self.embedder.encode([query], convert_to_numpy=True)
301
  faiss.normalize_L2(q_emb)
302
- scores, idxs = self.index.search(q_emb, min(top_k, len(self.chunks)))
 
 
 
303
 
304
  results: List[Tuple[str, str, float]] = []
305
  for score, idx in zip(scores[0], idxs[0]):
@@ -325,7 +333,7 @@ class RAGIndex:
325
  if not question or not question.strip():
326
  return "Please ask a question."
327
 
328
- if self.index is None:
329
  return (
330
  f"📚 Knowledge base is empty.\n\n"
331
  f"Please add documents to: `{KB_DIR}`\n"
@@ -390,24 +398,77 @@ print("=" * 50)
390
 
391
 
392
  # -----------------------------
393
- # GRADIO CHAT
394
  # -----------------------------
395
 
396
  def rag_respond(message, history):
397
- """Handle chat messages"""
398
  if not message or not str(message).strip():
399
- return "Please enter a question."
400
-
401
- return rag_index.answer(str(message))
402
 
403
-
 
 
 
404
 
405
- # Build interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
  description = WELCOME_MSG
407
- if not rag_index.initialized or rag_index.index is None:
408
  description += (
409
- f"\n\n⚠️ **Note:** Knowledge base is empty. "
410
- f"Add documents to `{KB_DIR}` and restart."
411
  )
412
 
413
  examples = [
@@ -415,27 +476,85 @@ examples = [
415
  for qa in CONFIG.get("quick_actions", [])
416
  if qa.get("query")
417
  ]
418
- if not examples and rag_index.initialized and rag_index.index is not None:
419
  examples = [
420
  "What is this document about?",
421
  "Can you summarize the main points?",
422
  "What are the key findings?",
423
  ]
424
 
425
- chat = gr.ChatInterface(
426
- fn=rag_respond,
427
- title=CONFIG["client"]["name"],
428
- description=description,
429
- examples=examples if examples else None,
430
- )
431
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
433
 
434
 
435
  if __name__ == "__main__":
436
  port = int(os.environ.get("PORT", 7860))
437
- chat.launch(
438
  server_name="0.0.0.0",
439
  server_port=port,
440
  share=False,
441
- )
 
1
  import os
2
  import glob
3
  import yaml
4
+ import shutil
5
  from typing import List, Tuple
6
 
7
  import faiss
 
38
  "index_directory": "./index",
39
  },
40
  "models": {
41
+ "embedding": "sentence-transformers/all-MiniLM-L6-v2",
42
  "qa": "deepset/roberta-base-squad2",
43
  },
44
  "chunking": {
 
238
  print("⚠️ No documents found in knowledge base")
239
  print(f" Please add .txt, .md, .pdf, or .docx files to: {KB_DIR}")
240
  self.index = None
241
+ self.chunks = []
242
+ self.chunk_sources = []
243
  return
244
 
245
  all_chunks: List[str] = []
 
254
  if not all_chunks:
255
  print("⚠️ No valid chunks created from documents")
256
  self.index = None
257
+ self.chunks = []
258
+ self.chunk_sources = []
259
  return
260
 
261
  print(f"Created {len(all_chunks)} chunks from {len(docs)} documents")
 
304
  try:
305
  q_emb = self.embedder.encode([query], convert_to_numpy=True)
306
  faiss.normalize_L2(q_emb)
307
+ k = min(top_k, len(self.chunks)) if self.chunks else 0
308
+ if k == 0:
309
+ return []
310
+ scores, idxs = self.index.search(q_emb, k)
311
 
312
  results: List[Tuple[str, str, float]] = []
313
  for score, idx in zip(scores[0], idxs[0]):
 
333
  if not question or not question.strip():
334
  return "Please ask a question."
335
 
336
+ if self.index is None or not self.chunks:
337
  return (
338
  f"📚 Knowledge base is empty.\n\n"
339
  f"Please add documents to: `{KB_DIR}`\n"
 
398
 
399
 
400
  # -----------------------------
401
+ # GRADIO APP (BLOCKS)
402
  # -----------------------------
403
 
404
  def rag_respond(message, history):
405
+ """Handle chat messages for chatbot UI"""
406
  if not message or not str(message).strip():
407
+ return "", history
 
 
408
 
409
+ user_msg = str(message)
410
+ bot_reply = rag_index.answer(user_msg)
411
+ history = history + [[user_msg, bot_reply]]
412
+ return "", history
413
 
414
+
415
+ def upload_to_kb(files):
416
+ """Save uploaded files into the KB directory"""
417
+ if not files:
418
+ return "No files uploaded."
419
+
420
+ if not isinstance(files, list):
421
+ files = [files]
422
+
423
+ os.makedirs(KB_DIR, exist_ok=True)
424
+ saved_files = []
425
+
426
+ for f in files:
427
+ # Gradio File object or temp file path
428
+ # In older Gradio, `f.name` is the temp file path
429
+ src_path = getattr(f, "name", None) or str(f)
430
+ if not os.path.exists(src_path):
431
+ continue
432
+
433
+ filename = os.path.basename(src_path)
434
+ dest_path = os.path.join(KB_DIR, filename)
435
+
436
+ try:
437
+ shutil.copy(src_path, dest_path)
438
+ saved_files.append(filename)
439
+ except Exception as e:
440
+ print(f"Error saving file {filename}: {e}")
441
+
442
+ if not saved_files:
443
+ return "No files could be saved. Check logs."
444
+
445
+ return (
446
+ f"✅ Saved {len(saved_files)} file(s) to knowledge base:\n- "
447
+ + "\n- ".join(saved_files)
448
+ + "\n\nClick **Rebuild index** to include them in search."
449
+ )
450
+
451
+
452
+ def rebuild_index():
453
+ """Trigger index rebuild from UI"""
454
+ rag_index._build_or_load_index()
455
+ if rag_index.index is None or not rag_index.chunks:
456
+ return (
457
+ "⚠️ Index rebuild finished, but no documents or chunks were found.\n"
458
+ f"Add files to `{KB_DIR}` and try again."
459
+ )
460
+ return (
461
+ f"✅ Index rebuilt successfully.\n"
462
+ f"Chunks in index: {len(rag_index.chunks)}"
463
+ )
464
+
465
+
466
+ # Description + examples
467
  description = WELCOME_MSG
468
+ if not rag_index.initialized or rag_index.index is None or not rag_index.chunks:
469
  description += (
470
+ f"\n\n⚠️ **Note:** Knowledge base is currently empty or index is not built.\n"
471
+ f"Upload documents in the **Knowledge Base** tab and click **Rebuild index**."
472
  )
473
 
474
  examples = [
 
476
  for qa in CONFIG.get("quick_actions", [])
477
  if qa.get("query")
478
  ]
479
+ if not examples and rag_index.initialized and rag_index.index is not None and rag_index.chunks:
480
  examples = [
481
  "What is this document about?",
482
  "Can you summarize the main points?",
483
  "What are the key findings?",
484
  ]
485
 
 
 
 
 
 
 
486
 
487
+ with gr.Blocks(title=CONFIG["client"]["name"]) as demo:
488
+ gr.Markdown(f"# {CONFIG['client']['name']}")
489
+ gr.Markdown(description)
490
+
491
+ with gr.Tab("Chat"):
492
+ chatbot = gr.Chatbot(label="RAG Chat")
493
+ with gr.Row():
494
+ txt = gr.Textbox(
495
+ show_label=False,
496
+ placeholder="Ask a question about your documents...",
497
+ lines=2,
498
+ )
499
+ with gr.Row():
500
+ send_btn = gr.Button("Send")
501
+ clear_btn = gr.Button("Clear")
502
+
503
+ # Pre-fill example buttons if available
504
+ if examples:
505
+ gr.Markdown("### Example questions")
506
+ example_btns = []
507
+ with gr.Row():
508
+ for ex in examples:
509
+ example_btns.append(gr.Button(ex))
510
+
511
+ def use_example(example, history):
512
+ """When clicking an example, send it as a message"""
513
+ bot_reply = rag_index.answer(example)
514
+ history = history + [[example, bot_reply]]
515
+ return history
516
+
517
+ for btn, ex in zip(example_btns, examples):
518
+ btn.click(
519
+ use_example,
520
+ inputs=[gr.State(ex), chatbot],
521
+ outputs=chatbot,
522
+ )
523
+
524
+ # Chat logic wiring
525
+ txt.submit(rag_respond, [txt, chatbot], [txt, chatbot])
526
+ send_btn.click(rag_respond, [txt, chatbot], [txt, chatbot])
527
+ clear_btn.click(lambda: ([], ""), None, [chatbot, txt])
528
+
529
+ with gr.Tab("Knowledge Base"):
530
+ gr.Markdown(
531
+ f"""
532
+ ### Manage Knowledge Base
533
 
534
+ - Supported formats: `.txt`, `.md`, `.pdf`, `.docx`, `.doc`
535
+ - Files are stored in: `{KB_DIR}`
536
+ - After uploading, click **Rebuild index** so the assistant can use the new content.
537
+ """
538
+ )
539
+ kb_upload = gr.File(
540
+ label="Upload documents",
541
+ file_count="multiple",
542
+ )
543
+ kb_status = gr.Textbox(
544
+ label="Status",
545
+ lines=6,
546
+ interactive=False,
547
+ )
548
+ rebuild_btn = gr.Button("Rebuild index")
549
+
550
+ kb_upload.change(upload_to_kb, inputs=kb_upload, outputs=kb_status)
551
+ rebuild_btn.click(rebuild_index, inputs=None, outputs=kb_status)
552
 
553
 
554
  if __name__ == "__main__":
555
  port = int(os.environ.get("PORT", 7860))
556
+ demo.launch(
557
  server_name="0.0.0.0",
558
  server_port=port,
559
  share=False,
560
+ )