Parth211 commited on
Commit
7d7f0b0
·
verified ·
1 Parent(s): af8347e
Files changed (1) hide show
  1. app.py +147 -44
app.py CHANGED
@@ -385,25 +385,21 @@ def upload_file(file_obj):
385
  # initialize_database(file_path, progress)
386
  return list_file_path
387
 
 
388
 
389
-
390
-
391
-
392
-
393
- ###################################
394
  def demo():
395
  with gr.Blocks(theme="base") as demo:
396
  vector_db = gr.State()
397
  qa_chain = gr.State()
398
  collection_name = gr.State()
399
- history = gr.State()
400
 
401
  gr.Markdown(
402
  """<center><h2>PDF-based chatbot</center></h2>
403
  <h3>Ask any questions about your PDF documents</h3>""")
404
  gr.Markdown(
405
  """<b>Note:</b> This AI assistant, using Langchain and open-source LLMs, performs retrieval-augmented generation (RAG) from your PDF documents. \
406
- The user interface explicitely shows multiple steps to help understand the RAG workflow.
407
  This chatbot takes past questions into account when generating answers (via conversational memory), and includes document references for clarity purposes.<br>
408
  <br><b>Warning:</b> This space uses the free CPU Basic hardware from Hugging Face. Some steps and LLM models used below (free inference endpoints) can take some time to generate a reply.
409
  """)
@@ -411,16 +407,15 @@ def demo():
411
  with gr.Tab("Step 1 - Upload PDF"):
412
  with gr.Row():
413
  document = gr.Files(height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
414
- # upload_btn = gr.UploadButton("Loading document...", height=100, file_count="multiple", file_types=["pdf"], scale=1)
415
 
416
  with gr.Tab("Step 2 - Process document"):
417
  with gr.Row():
418
- db_btn = gr.Radio(["ChromaDB"], label="Vector database type", value = "ChromaDB", type="index", info="Choose your vector database")
419
  with gr.Accordion("Advanced options - Document text splitter", open=False):
420
  with gr.Row():
421
- slider_chunk_size = gr.Slider(minimum = 100, maximum = 1000, value=600, step=20, label="Chunk size", info="Chunk size", interactive=True)
422
  with gr.Row():
423
- slider_chunk_overlap = gr.Slider(minimum = 10, maximum = 200, value=40, step=10, label="Chunk overlap", info="Chunk overlap", interactive=True)
424
  with gr.Row():
425
  db_progress = gr.Textbox(label="Vector database initialization", value="None")
426
  with gr.Row():
@@ -428,17 +423,16 @@ def demo():
428
 
429
  with gr.Tab("Step 3 - Initialize QA chain"):
430
  with gr.Row():
431
- llm_btn = gr.Radio(list_llm_simple, \
432
- label="LLM models", value = list_llm_simple[0], type="index", info="Choose your LLM model")
433
  with gr.Accordion("Advanced options - LLM model", open=False):
434
  with gr.Row():
435
- slider_temperature = gr.Slider(minimum = 0.01, maximum = 1.0, value=0.7, step=0.1, label="Temperature", info="Model temperature", interactive=True)
436
  with gr.Row():
437
- slider_maxtokens = gr.Slider(minimum = 224, maximum = 4096, value=1024, step=32, label="Max Tokens", info="Model max tokens", interactive=True)
438
  with gr.Row():
439
- slider_topk = gr.Slider(minimum = 1, maximum = 10, value=3, step=1, label="top-k samples", info="Model top-k samples", interactive=True)
440
  with gr.Row():
441
- llm_progress = gr.Textbox(value="None",label="QA chain initialization")
442
  with gr.Row():
443
  qachain_btn = gr.Button("Initialize Question Answering chain")
444
 
@@ -462,42 +456,151 @@ def demo():
462
  with gr.Row("Metrics"):
463
  metrics_output = gr.Textbox(lines=10, label="Evaluation Metrics")
464
 
465
-
466
  # Preprocessing events
467
- #upload_btn.upload(upload_file, inputs=[upload_btn], outputs=[document])
468
- db_btn.click(initialize_database, \
469
- inputs=[document, slider_chunk_size, slider_chunk_overlap], \
470
- outputs=[vector_db, collection_name, db_progress])
471
- qachain_btn.click(initialize_LLM, \
472
- inputs=[llm_btn, slider_temperature, slider_maxtokens, slider_topk, vector_db], \
473
- outputs=[qa_chain, llm_progress]).then(lambda:[None,"",0,"",0,"",0], \
474
- inputs=None, \
475
- outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
476
- queue=False)
477
 
478
  # Chatbot events
479
- msg.submit(interact, inputs=[gr.State(),qa_chain, msg, history], outputs=[
480
- gr.State(), chatbot, history, response_source1, response_source1_page,
481
- response_source2, response_source2_page, response_source3, response_source3_page,
482
- None, None, None, metrics_output
483
- ],queue=False)
484
 
 
 
 
485
 
486
- submit_btn.click(conversation, \
487
- inputs=[qa_chain, msg, chatbot], \
488
- outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
489
- queue=False)
490
- clear_btn.click(lambda:[None,"",0,"",0,"",0], \
491
- inputs=None, \
492
- outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
493
- queue=False)
494
 
 
495
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
496
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
 
 
 
 
 
 
 
 
 
 
 
 
 
498
 
499
- demo.queue().launch(debug=True)
 
 
 
 
 
500
 
501
 
502
- if __name__ == "__main__":
503
- demo()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  # initialize_database(file_path, progress)
386
  return list_file_path
387
 
388
+ ####################################
389
 
 
 
 
 
 
390
  def demo():
391
  with gr.Blocks(theme="base") as demo:
392
  vector_db = gr.State()
393
  qa_chain = gr.State()
394
  collection_name = gr.State()
395
+ history = gr.State([]) # Initialize history as an empty list
396
 
397
  gr.Markdown(
398
  """<center><h2>PDF-based chatbot</center></h2>
399
  <h3>Ask any questions about your PDF documents</h3>""")
400
  gr.Markdown(
401
  """<b>Note:</b> This AI assistant, using Langchain and open-source LLMs, performs retrieval-augmented generation (RAG) from your PDF documents. \
402
+ The user interface explicitly shows multiple steps to help understand the RAG workflow.
403
  This chatbot takes past questions into account when generating answers (via conversational memory), and includes document references for clarity purposes.<br>
404
  <br><b>Warning:</b> This space uses the free CPU Basic hardware from Hugging Face. Some steps and LLM models used below (free inference endpoints) can take some time to generate a reply.
405
  """)
 
407
  with gr.Tab("Step 1 - Upload PDF"):
408
  with gr.Row():
409
  document = gr.Files(height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
 
410
 
411
  with gr.Tab("Step 2 - Process document"):
412
  with gr.Row():
413
+ db_btn = gr.Radio(["ChromaDB"], label="Vector database type", value="ChromaDB", type="index", info="Choose your vector database")
414
  with gr.Accordion("Advanced options - Document text splitter", open=False):
415
  with gr.Row():
416
+ slider_chunk_size = gr.Slider(minimum=100, maximum=1000, value=600, step=20, label="Chunk size", info="Chunk size", interactive=True)
417
  with gr.Row():
418
+ slider_chunk_overlap = gr.Slider(minimum=10, maximum=200, value=40, step=10, label="Chunk overlap", info="Chunk overlap", interactive=True)
419
  with gr.Row():
420
  db_progress = gr.Textbox(label="Vector database initialization", value="None")
421
  with gr.Row():
 
423
 
424
  with gr.Tab("Step 3 - Initialize QA chain"):
425
  with gr.Row():
426
+ llm_btn = gr.Radio(list_llm_simple, label="LLM models", value=list_llm_simple[0], type="index", info="Choose your LLM model")
 
427
  with gr.Accordion("Advanced options - LLM model", open=False):
428
  with gr.Row():
429
+ slider_temperature = gr.Slider(minimum=0.01, maximum=1.0, value=0.7, step=0.1, label="Temperature", info="Model temperature", interactive=True)
430
  with gr.Row():
431
+ slider_maxtokens = gr.Slider(minimum=224, maximum=4096, value=1024, step=32, label="Max Tokens", info="Model max tokens", interactive=True)
432
  with gr.Row():
433
+ slider_topk = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="top-k samples", info="Model top-k samples", interactive=True)
434
  with gr.Row():
435
+ llm_progress = gr.Textbox(value="None", label="QA chain initialization")
436
  with gr.Row():
437
  qachain_btn = gr.Button("Initialize Question Answering chain")
438
 
 
456
  with gr.Row("Metrics"):
457
  metrics_output = gr.Textbox(lines=10, label="Evaluation Metrics")
458
 
 
459
  # Preprocessing events
460
+ db_btn.click(initialize_database,
461
+ inputs=[document, slider_chunk_size, slider_chunk_overlap],
462
+ outputs=[vector_db, collection_name, db_progress])
463
+
464
+ qachain_btn.click(initialize_LLM,
465
+ inputs=[llm_btn, slider_temperature, slider_maxtokens, slider_topk, vector_db],
466
+ outputs=[qa_chain, llm_progress]).then(lambda: [None, "", 0, "", 0, "", 0],
467
+ inputs=None,
468
+ outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],
469
+ queue=False)
470
 
471
  # Chatbot events
472
+ msg.submit(interact, inputs=[qa_chain, msg, history], outputs=[
473
+ qa_chain, chatbot, history, doc_source1, source1_page,
474
+ doc_source2, source2_page, doc_source3, source3_page,
475
+ metrics_output
476
+ ])
477
 
478
+ submit_btn.click(interact,
479
+ inputs=[qa_chain, msg, history],
480
+ outputs=[qa_chain, chatbot, history, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page, metrics_output])
481
 
482
+ clear_btn.click(lambda: [None, "", 0, "", 0, "", 0],
483
+ inputs=None,
484
+ outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],
485
+ queue=False)
 
 
 
 
486
 
487
+ demo.queue().launch(debug=True)
488
 
489
+ if __name__ == "__main__":
490
+ demo()
491
+
492
+
493
+
494
+
495
+
496
+ ###################################
497
+ # def demo():
498
+ # with gr.Blocks(theme="base") as demo:
499
+ # vector_db = gr.State()
500
+ # qa_chain = gr.State()
501
+ # collection_name = gr.State()
502
+ # history = gr.State()
503
 
504
+ # gr.Markdown(
505
+ # """<center><h2>PDF-based chatbot</center></h2>
506
+ # <h3>Ask any questions about your PDF documents</h3>""")
507
+ # gr.Markdown(
508
+ # """<b>Note:</b> This AI assistant, using Langchain and open-source LLMs, performs retrieval-augmented generation (RAG) from your PDF documents. \
509
+ # The user interface explicitely shows multiple steps to help understand the RAG workflow.
510
+ # This chatbot takes past questions into account when generating answers (via conversational memory), and includes document references for clarity purposes.<br>
511
+ # <br><b>Warning:</b> This space uses the free CPU Basic hardware from Hugging Face. Some steps and LLM models used below (free inference endpoints) can take some time to generate a reply.
512
+ # """)
513
+
514
+ # with gr.Tab("Step 1 - Upload PDF"):
515
+ # with gr.Row():
516
+ # document = gr.Files(height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
517
+ # # upload_btn = gr.UploadButton("Loading document...", height=100, file_count="multiple", file_types=["pdf"], scale=1)
518
+
519
+ # with gr.Tab("Step 2 - Process document"):
520
+ # with gr.Row():
521
+ # db_btn = gr.Radio(["ChromaDB"], label="Vector database type", value = "ChromaDB", type="index", info="Choose your vector database")
522
+ # with gr.Accordion("Advanced options - Document text splitter", open=False):
523
+ # with gr.Row():
524
+ # slider_chunk_size = gr.Slider(minimum = 100, maximum = 1000, value=600, step=20, label="Chunk size", info="Chunk size", interactive=True)
525
+ # with gr.Row():
526
+ # slider_chunk_overlap = gr.Slider(minimum = 10, maximum = 200, value=40, step=10, label="Chunk overlap", info="Chunk overlap", interactive=True)
527
+ # with gr.Row():
528
+ # db_progress = gr.Textbox(label="Vector database initialization", value="None")
529
+ # with gr.Row():
530
+ # db_btn = gr.Button("Generate vector database")
531
+
532
+ # with gr.Tab("Step 3 - Initialize QA chain"):
533
+ # with gr.Row():
534
+ # llm_btn = gr.Radio(list_llm_simple, \
535
+ # label="LLM models", value = list_llm_simple[0], type="index", info="Choose your LLM model")
536
+ # with gr.Accordion("Advanced options - LLM model", open=False):
537
+ # with gr.Row():
538
+ # slider_temperature = gr.Slider(minimum = 0.01, maximum = 1.0, value=0.7, step=0.1, label="Temperature", info="Model temperature", interactive=True)
539
+ # with gr.Row():
540
+ # slider_maxtokens = gr.Slider(minimum = 224, maximum = 4096, value=1024, step=32, label="Max Tokens", info="Model max tokens", interactive=True)
541
+ # with gr.Row():
542
+ # slider_topk = gr.Slider(minimum = 1, maximum = 10, value=3, step=1, label="top-k samples", info="Model top-k samples", interactive=True)
543
+ # with gr.Row():
544
+ # llm_progress = gr.Textbox(value="None",label="QA chain initialization")
545
+ # with gr.Row():
546
+ # qachain_btn = gr.Button("Initialize Question Answering chain")
547
+
548
+ # with gr.Tab("Step 4 - Chatbot"):
549
+ # chatbot = gr.Chatbot(height=300)
550
+ # with gr.Accordion("Advanced - Document references", open=False):
551
+ # with gr.Row():
552
+ # doc_source1 = gr.Textbox(label="Reference 1", lines=2, container=True, scale=20)
553
+ # source1_page = gr.Number(label="Page", scale=1)
554
+ # with gr.Row():
555
+ # doc_source2 = gr.Textbox(label="Reference 2", lines=2, container=True, scale=20)
556
+ # source2_page = gr.Number(label="Page", scale=1)
557
+ # with gr.Row():
558
+ # doc_source3 = gr.Textbox(label="Reference 3", lines=2, container=True, scale=20)
559
+ # source3_page = gr.Number(label="Page", scale=1)
560
+ # with gr.Row():
561
+ # msg = gr.Textbox(placeholder="Type message (e.g. 'What is this document about?')", container=True)
562
+ # with gr.Row():
563
+ # submit_btn = gr.Button("Submit message")
564
+ # clear_btn = gr.ClearButton([msg, chatbot], value="Clear conversation")
565
+ # with gr.Row("Metrics"):
566
+ # metrics_output = gr.Textbox(lines=10, label="Evaluation Metrics")
567
 
568
+
569
+ # # Preprocessing events
570
+ # #upload_btn.upload(upload_file, inputs=[upload_btn], outputs=[document])
571
+ # db_btn.click(initialize_database, \
572
+ # inputs=[document, slider_chunk_size, slider_chunk_overlap], \
573
+ # outputs=[vector_db, collection_name, db_progress])
574
+ # qachain_btn.click(initialize_LLM, \
575
+ # inputs=[llm_btn, slider_temperature, slider_maxtokens, slider_topk, vector_db], \
576
+ # outputs=[qa_chain, llm_progress]).then(lambda:[None,"",0,"",0,"",0], \
577
+ # inputs=None, \
578
+ # outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
579
+ # queue=False)
580
 
581
+ # Chatbot events
582
+ # msg.submit(interact, inputs=[gr.State(),qa_chain, msg, history], outputs=[
583
+ # gr.State(), chatbot, history, response_source1, response_source1_page,
584
+ # response_source2, response_source2_page, response_source3, response_source3_page,
585
+ # None, None, None, metrics_output
586
+ # ],queue=False)
587
 
588
 
589
+ # submit_btn.click(conversation, \
590
+ # inputs=[qa_chain, msg, chatbot], \
591
+ # outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
592
+ # queue=False)
593
+ # clear_btn.click(lambda:[None,"",0,"",0,"",0], \
594
+ # inputs=None, \
595
+ # outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
596
+ # queue=False)
597
+
598
+
599
+
600
+
601
+
602
+ # demo.queue().launch(debug=True)
603
+
604
+
605
+ # if __name__ == "__main__":
606
+ # demo()