Spaces:

AutomaticReimbursementTool
/

demo

Sleeping

App Files Files Community

ankur-bohra commited on Aug 2, 2023

Commit

2fa693e

1 Parent(s): c0ab6fd

Fix flagging and possible race conditions

Browse files

Files changed (1) hide show

app.py +90 -77

app.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import base64
 import os
 from io import BytesIO
 from pathlib import Path
-from langchain.schema.output_parser import OutputParserException
 import gradio as gr
 from PIL import Image
 import categories
@@ -40,7 +41,6 @@ hf_writer_incorrect = gr.HuggingFaceDatasetSaver(
 #         global example_paths, current_file_path
 #         if current_file_path not in example_paths:
 #             return function(*args, **kwargs)
 def display_file(input_file):
@@ -76,63 +76,72 @@ def clear_inputs():
     return gr.File.update(value=None)
-def submit(input_file, old_text):
     if not input_file:
         gr.Error("Please upload a file to continue!")
         return gr.Textbox.update()
-    print("-"*5)
-    print("New input")
     # Send change to preprocessed image or to extracted text
     if input_file.name.endswith(".pdf"):
         text = process_pdf(Path(input_file.name), extract_only=True)
     else:
         text = process_image(Path(input_file.name), extract_only=True)
-    print("Extracted text")
     return text
-def categorize_extracted_text(extracted_text):
-    category = categories.categorize_text(extracted_text)
-    print("Recognized category:", category)
-    # gr.Info(f"Recognized category: {category}")
     return category
-def parse_from_category(category, extracted_text):
-    if not category:
-        print("Updated with no category:", category)
-        return (
-            gr.Chatbot.update(None),
-            gr.JSON.update(None),
-            gr.Button.update(interactive=False),
-            gr.Button.update(interactive=False),
-        )
-    else:
-        print("Updated with actual category:", category)
     category = Category[category]
-    print("Parsing text from", category)
     chain = categories.category_modules[category].chain
     formatted_prompt = chain.prompt.format_prompt(
-        text=extracted_text,
         format_instructions=chain.output_parser.get_format_instructions(),
     )
     result = chain.generate(
         input_list=[
             {
-                "text": extracted_text,
                 "format_instructions": chain.output_parser.get_format_instructions(),
             }
         ]
     )
-    question = f""
-    if len(formatted_prompt.messages) > 1:
-        question += f"**System:**\n{formatted_prompt.messages[0].content}"
-    question += f"\n\n**Human:**\n{formatted_prompt.messages[1].content}"
-    print("\tConstructed prompt")
     answer = result.generations[0][0].text
-    print("\tProcessed text")
     try:
-        information = chain.output_parser.parse_with_prompt(answer, formatted_prompt)
         information = information.json() if information else {}
     except OutputParserException as e:
         information = {
@@ -140,25 +149,22 @@ def parse_from_category(category, extracted_text):
             "details": str(e),
             "output": e.llm_output,
         }
-    return (
-        gr.Chatbot.update([[question, answer]]),
-        gr.JSON.update(information),
-        gr.Button.update(interactive=True),
-        gr.Button.update(interactive=True),
-    )
-def dynamic_auto_flag(flag_method):
-    def modified_flag_method(share_result, *args, **kwargs):
-        if share_result:
-            flag_method(*args, **kwargs)
-    return modified_flag_method
-# def save_example_and_submit(input_file):
-#     example_paths.append(input_file.name)
-#     submit(input_file, "")
 with gr.Blocks(title="Automatic Reimbursement Tool Demo") as page:
@@ -261,35 +267,14 @@ with gr.Blocks(title="Automatic Reimbursement Tool Demo") as page:
                     flag_irrelevant_button = gr.Button(
                         "Flag as irrelevant", variant="stop", interactive=True
                     )
             show_intermediate.change(
                 show_intermediate_outputs, show_intermediate, [intermediate_outputs]
             )
-            clear.click(clear_inputs, None, [input_file])
-            submit_button.click(
-                submit,
-                [input_file, extracted_text],
-                [extracted_text],
-            )
-            # submit_button.click(
-            #     lambda input_file, category, chatbot, information: (print("File supplied, resetting") or (
-            #         gr.Dropdown.update(Category.ACCOMODATION),
-            #         gr.Chatbot.update(None),
-            #         gr.Textbox.update(None),
-            #     )) if input_file else (print("File not supplied, keeping") or print(category, chatbot, information)),
-            #     [input_file, category, chatbot, information],
-            #     [category, chatbot, information],
-            # )
-            extracted_text.change(
-                categorize_extracted_text,
-                [extracted_text],
-                [category],
-            )
-            category.change(
-                parse_from_category,
-                [category, extracted_text],
-                [chatbot, information, flag_incorrect_button, flag_irrelevant_button],
             )
             hf_writer_normal.setup(
@@ -297,11 +282,37 @@ with gr.Blocks(title="Automatic Reimbursement Tool Demo") as page:
                 flagging_dir="flagged",
             )
             flag_method = gr.flagging.FlagMethod(
-                hf_writer_normal, "", "", visual_feedback=True
             )
-            information.change(
-                dynamic_auto_flag(flag_method),
-                inputs=[
                     share_result,
                     input_file,
                     extracted_text,
@@ -310,9 +321,8 @@ with gr.Blocks(title="Automatic Reimbursement Tool Demo") as page:
                     information,
                     contact,
                 ],
-                outputs=None,
                 preprocess=False,
-                queue=False,
             )
             hf_writer_incorrect.setup(
@@ -373,5 +383,8 @@ with gr.Blocks(title="Automatic Reimbursement Tool Demo") as page:
                 queue=False,
             )
 page.launch(show_api=True, show_error=True, debug=True)

 import base64
 import os
+import re
 from io import BytesIO
 from pathlib import Path
 import gradio as gr
+from langchain.schema.output_parser import OutputParserException
 from PIL import Image
 import categories
 #         global example_paths, current_file_path
 #         if current_file_path not in example_paths:
 #             return function(*args, **kwargs)
 def display_file(input_file):
     return gr.File.update(value=None)
+def clear_outputs(input_file):
+    if input_file:
+        return None, None, None, None
+def extract_text(input_file):
+    """Takes the input file and updates the extracted text"""
     if not input_file:
         gr.Error("Please upload a file to continue!")
         return gr.Textbox.update()
     # Send change to preprocessed image or to extracted text
     if input_file.name.endswith(".pdf"):
         text = process_pdf(Path(input_file.name), extract_only=True)
     else:
         text = process_image(Path(input_file.name), extract_only=True)
     return text
+def categorize_text(text):
+    """Takes the extracted text and updates the category"""
+    category = categories.categorize_text(text)
     return category
+def query(category, text):
+    """Takes the extracted text and category and updates the chatbot in two steps:
+    1. Construct a prompt
+    2. Generate a response
+    """
     category = Category[category]
     chain = categories.category_modules[category].chain
     formatted_prompt = chain.prompt.format_prompt(
+        text=text,
         format_instructions=chain.output_parser.get_format_instructions(),
     )
+    question = f""
+    if len(formatted_prompt.messages) > 1:
+        question += f"**System:**\n{formatted_prompt.messages[0].content}"
+    question += f"\n\n**Human:**\n{formatted_prompt.messages[1].content}"
+    yield gr.Chatbot.update([[question, "Generating..."]])
     result = chain.generate(
         input_list=[
             {
+                "text": text,
                 "format_instructions": chain.output_parser.get_format_instructions(),
             }
         ]
     )
     answer = result.generations[0][0].text
+    yield gr.Chatbot.update([[question, answer]])
+PARSING_REGEXP = r"\*\*System:\*\*\n([\s\S]+)\n\n\*\*Human:\*\*\n([\s\S]+)"
+def parse(category, chatbot):
+    """Takes the chatbot prompt and response and updates the extracted information"""
+    global PARSING_REGEXP
+    answer = chatbot[0][1]
+    category = Category[category]
+    chain = categories.category_modules[category].chain
+    yield {"status": "Parsing response..."}
     try:
+        information = chain.output_parser.parse(answer)
         information = information.json() if information else {}
     except OutputParserException as e:
         information = {
             "details": str(e),
             "output": e.llm_output,
         }
+    yield information
+def activate_flags():
+    return gr.Button.update(interactive=True), gr.Button.update(interactive=True)
+def deactivate_flags():
+    return gr.Button.update(interactive=False), gr.Button.update(interactive=False)
+def flag_if_shared(flag_method):
+    def proxy(share_result, request: gr.Request, *args, **kwargs):
+        if share_result:
+            return flag_method(request, *args, **kwargs)
+    return proxy
 with gr.Blocks(title="Automatic Reimbursement Tool Demo") as page:
                     flag_irrelevant_button = gr.Button(
                         "Flag as irrelevant", variant="stop", interactive=True
                     )
             show_intermediate.change(
                 show_intermediate_outputs, show_intermediate, [intermediate_outputs]
             )
+            clear.click(clear_inputs, None, [input_file]).then(
+                deactivate_flags,
+                None,
+                [flag_incorrect_button, flag_irrelevant_button],
             )
             hf_writer_normal.setup(
                 flagging_dir="flagged",
             )
             flag_method = gr.flagging.FlagMethod(
+                hf_writer_normal, "", "", visual_feedback=False
             )
+            submit_button.click(
+                clear_outputs,
+                [input_file],
+                [extracted_text, category, chatbot, information],
+            ).then(
+                extract_text,
+                [input_file],
+                [extracted_text],
+            ).then(
+                categorize_text,
+                [extracted_text],
+                [category],
+            ).then(
+                query,
+                [category, extracted_text],
+                [chatbot],
+                queue=True,
+            ).then(
+                parse,
+                [category, chatbot],
+                [information],
+            ).then(
+                activate_flags,
+                None,
+                [flag_incorrect_button, flag_irrelevant_button],
+            ).then(
+                flag_if_shared(flag_method),
+                [
                     share_result,
                     input_file,
                     extracted_text,
                     information,
                     contact,
                 ],
+                None,
                 preprocess=False,
             )
             hf_writer_incorrect.setup(
                 queue=False,
             )
+page.queue(
+    concurrency_count=1,
+    max_size=1,
+)
 page.launch(show_api=True, show_error=True, debug=True)