Spaces:

Cartinoe5930
/

LLMAgora

Sleeping

App Files Files Community

Cartinoe5930 commited on Sep 26, 2023

Commit

a71589c

1 Parent(s): 01f28a4

Update app.py

Browse files

Files changed (1) hide show

app.py +227 -33

app.py CHANGED Viewed

@@ -1,77 +1,271 @@
 import gradio as gr
-def inference(model_list, cot):
     if len(model_list) != 3:
         raise gr.Error("Please choose just '3' models! Neither more nor less!")
-    if cot:
-        b = "CoT was also used."
-    else:
-        b = ""
-    a = f"Hello, {model_list[0]}, {model_list[1]}, and {model_list[2]}!! {b}"
     return {
         output_msg: gr.update(visible=True),
         output_col: gr.update(visible=True),
-        model1_output1: a
     }
 TITLE = """<h1 align="center">LLM Agora 🗣️🏦</h1>"""
 INTRODUCTION_TEXT = """
 The **LLM Agora** 🗣️🏦 aims to improve the quality of open-source LMs' responses through debate & revision introduced in [Improving Factuality and Reasoning in Language Models through Multiagent Debate](https://arxiv.org/abs/2305.14325).
 Do you know that? 🤔 **LLMs can also improve their responses by debating with other LLMs**! 😮 We applied this concept to several open-source LMs to verify that the open-source model, not the proprietary one, can sufficiently improve the response through discussion. 🤗
-For more details, please refer to the GitHub Repository below.
 You can use LLM Agora with your own questions if the response of open-source LM is not satisfactory and you want to improve the quality!
-The Math, GSM8K, and MMLU Tabs show the results of the experiment, and for inference, please use the 'Inference' tab.
-Please check the more specific information in [GitHub Repository](https://github.com/gauss5930/LLM-Agora)!
 """
 RESPONSE_TEXT = """<h1 align="center">🤗 Here are the responses to each model!! 🤗</h1>"""
-with gr.Blocks() as demo:
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT)
     with gr.Column():
         with gr.Tab("Inference"):
-            with gr.Row():
                 with gr.Column():
-                    model_list = gr.CheckboxGroup(["Llama2", "Alpaca", "Vicuna", "Koala", "Falcon", "Baize", "WizardLM", "Orca", "phi-1.5"], label="Model Selection", info="Choose 3 LMs to participate in LLM Agora.", type="value")
                     cot = gr.Checkbox(label="CoT", info="Do you want to use CoT for inference?")
-                with gr.Column():
                     API_KEY = gr.Textbox(label="OpenAI API Key", value="", info="Please fill in your OpenAI API token.", placeholder="sk..", type="password")
-                    auth_token = gr.Textbox(label="Huggingface Authentication Token", value="", info="Please fill in your HuggingFace Authentication token.", placeholder="hf..", type="password")
-            with gr.Column():
-                question = gr.Textbox(value="", info="Please type your question!", placeholder="")
-                output = gr.Textbox()
             submit = gr.Button("Submit")
             with gr.Row(visible=False) as output_msg:
                 gr.HTML(RESPONSE_TEXT)
-            with gr.Row(visible=False) as output_col:
-                with gr.Column():
                     model1_output1 = gr.Textbox(label="1️⃣ model's initial response")
-                    model1_output2 = gr.Textbox(label="1️⃣ model's revised response")
-                    model1_output3 = gr.Textbox(label="1️⃣ model's final response")
-                with gr.Column():
                     model2_output1 = gr.Textbox(label="2️⃣ model's initial response")
                     model2_output2 = gr.Textbox(label="2️⃣ model's revised response")
                     model2_output3 = gr.Textbox(label="2️⃣ model's final response")
-                with gr.Column():
-                    model2_output1 = gr.Textbox(label="3️⃣ model's initial response")
-                    model2_output2 = gr.Textbox(label="3️⃣ model's revised response")
-                    model2_output3 = gr.Textbox(label="3️⃣ model's final response")
         with gr.Tab("Math"):
-            output_math = gr.Textbox()
         with gr.Tab("GSM8K"):
-            output_gsm = gr.Textbox()
         with gr.Tab("MMLU"):
-            output_mmlu = gr.Textbox()
-    submit.click(inference, [model_list], [output_msg, output_col, model1_output1])
-demo.launch(debug=True)

 import gradio as gr
+import json
+import requests
+import os
+from model_inference import Inference
+HF_TOKEN = os.environ.get("HF_TOKEN")
+model_list = ["llama", "llama-chat", "vicuna", "falcon", "falcon-instruct", "orca", "wizardlm"]
+with open("src/inference_endpoint.json", "r") as f:
+    inference_endpoint = json.load(f)
+for i in range(len(model_list)):
+    inference_endpoint[model_list[i]]["headers"]["Authorization"] += HF_TOKEN
+def warmup(model_list=model_list, model_inference_endpoints=inference_endpoint):
+    for i in range(len(model_list)):
+        API_URL = model_inference_endpoints[model_list[i]["API_URL"]]
+        headers = model_inference_endpoints[model_list[i]["headers"]]
+        def query(payload):
+            response = requests.post(API_URL, headers=headers, json=payload)
+            return response.json()
+        output = query({
+            "inputs": "Hello."
+        })
+    time.sleep(300)
+    return {
+        options: gr.update(visible=True),
+        inputbox: gr.update(visible=True),
+        warmup_button: gr.update(visible=False),
+        welcome_message: gr.update(visible=True)
+    }
+def inference(model_list, API_KEY, cot):
     if len(model_list) != 3:
         raise gr.Error("Please choose just '3' models! Neither more nor less!")
+    model_response = Inference(model_list, API_KEY, cot)
     return {
         output_msg: gr.update(visible=True),
         output_col: gr.update(visible=True),
+        model1_output1: model_response["agent_response"][model_list[0]][0],
+        model2_output1: model_response["agent_response"][model_list[1]][0],
+        model3_output1: model_response["agent_response"][model_list[2]][0],
+        summarization_text1: model_response["summarization"][0],
+        model1_output2: model_response["agent_response"][model_list[0]][1],
+        model2_output2: model_response["agent_response"][model_list[1]][1],
+        model3_output2: model_response["agent_response"][model_list[2]][1],
+        summarization_text2: model_response["summarization"][1],
+        model1_output3: model_response["agent_response"][model_list[0]][2],
+        model2_output3: model_response["agent_response"][model_list[1]][2],
+        model3_output3: model_response["agent_response"][model_list[2]][2]
     }
+def load_responses():
+    with open("result/Math/math_result.json", "r") as math_file:
+        math_responses = json.load(math_file)
+    with open("result/Math/math_result_cot.json", "r") as math_cot_file:
+        math_cot_responses = json.load(math_cot_file)
+    with open("result/GSM8K/gsm_result.json", "r") as gsm_file:
+        gsm_responses = json.load(gsm_file)
+    with open("result/GSM8K/gsm_result_cot.json", "r") as gsm_cot_file:
+        gsm_cot_responses = json.load(gsm_cot_file)
+    # with open("result/MMLU/mmlu_result.json", "r") as mmlu_file:
+    #     mmlu_responses = json.load(mmlu_file)
+    # with open("result/MMLU/mmlu_result_cot.json", "r") as mmlu_cot_file:
+    #     mmlu_cot_responses = json.load(mmlu_cot_file)
+    return math_responses, gsm_responses#, mmlu_responses
+def load_questions(math, gsm, mmlu):
+    math_questions = []
+    gsm_auestions = []
+    # mmlu_questions = []
+    for i in range(100):
+        math_questions.append(f"{i+1}. " + math[i]["question"])
+        gsm_questions.append(f"{i+1}. " + gsm[i]["question"])
+        # mmlu_questions.append(f"{i+1}. " + mmlu[i]["question"])
+    return math_questions, gsm_questions#, mmlu_questions
+math_result, gsm_result#, mmlu_result = load_responses()
+math_questions, gsm_questions = load_questions(math_result, gsm_result)
 TITLE = """<h1 align="center">LLM Agora 🗣️🏦</h1>"""
 INTRODUCTION_TEXT = """
 The **LLM Agora** 🗣️🏦 aims to improve the quality of open-source LMs' responses through debate & revision introduced in [Improving Factuality and Reasoning in Language Models through Multiagent Debate](https://arxiv.org/abs/2305.14325).
 Do you know that? 🤔 **LLMs can also improve their responses by debating with other LLMs**! 😮 We applied this concept to several open-source LMs to verify that the open-source model, not the proprietary one, can sufficiently improve the response through discussion. 🤗
+For more details, please refer to the [GitHub Repository](https://github.com/gauss5930/LLM-Agora).
 You can use LLM Agora with your own questions if the response of open-source LM is not satisfactory and you want to improve the quality!
+The Math, GSM8K, and MMLU Tabs show the results of the experiment(Llama2, WizardLM2, Orca2), and for inference, please use the 'Inference' tab.
+Here's how to use LLM Agora!
+1. Before start, click the 'Warm-up LLM Agora 🔥' button and wait until 'LLM Agora Ready!!' appears. (Go grab a coffee☕ since it takes 5 minutes!)
+2. Choose just 3 models! Neither more nor less!
+3. Check the CoT box if you want to utilize the Chain-of-Thought while inferencing.
+4. Please fill in your OpenAI API KEY, it will be used to use ChatGPT to summarize the responses.
+5. Type your question to Question box and click the 'Submit' button! If you do so, LLM Agora will show you improved answers! 🤗 (It will spend roughly a minute! Please wait for an answer!)
+For more detailed information, please check '※ Specific information about LLM Agora' at the bottom of the page.
 """
+WELCOME_TEXT = """<h1 align="center">🤗🔥 Welcome to LLM Agora 🔥🤗</h1>"""
 RESPONSE_TEXT = """<h1 align="center">🤗 Here are the responses to each model!! 🤗</h1>"""
+SPECIFIC_INFORMATION = """
+This is the specific information about LLM Agora!
+**Model size**
+|Model name|Model size|
+|---|---|
+|Llama2|13B|
+|Llama2-Chat|13B|
+|Vicuna|13B|
+|Falcon|7B|
+|Falcon-Instruct|7B|
+|WizardLM|13B|
+|Orca|13B|
+**Agent numbers & Debate rounds**
+- We limit the number of agents and debate rounds because of limitation of resources. As a result, we decided to use 3 agents and 2 rounds of debate!
+**GitHub Repository**
+- If you want to see more specific information, please check the [GitHub Repository](https://github.com/gauss5930/LLM-Agora) of LLM Agora!
+**Citation**
+```
+@article{du2023improving,
+  title={Improving Factuality and Reasoning in Language Models through Multiagent Debate},
+  author={Du, Yilun and Li, Shuang and Torralba, Antonio and Tenenbaum, Joshua B and Mordatch, Igor},
+  journal={arXiv preprint arXiv:2305.14325},
+  year={2023}
+}
+```
+"""
+with gr.Blocks(css=block_css) as demo:
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT)
     with gr.Column():
         with gr.Tab("Inference"):
+            warmup_button = gr.Button("Warm-up LLM Agora 🔥", visible=True)
+            welcome_message = gr.HTML(WELCOME_TEXT, visible=False)
+            with gr.Row(visible=False) as options:
                 with gr.Column():
+                    model_list = gr.CheckboxGroup(["Llama2🦙", "Llama2-Chat🦙", "Vicuna🦙", "Falcon🦅", "Falcon-Instruct🦅", "WizardLM🧙‍♂️", "Orca🐬"], label="Model Selection", info="Choose 3 LMs to participate in LLM Agora.", type="value")
                     cot = gr.Checkbox(label="CoT", info="Do you want to use CoT for inference?")
+                with gr.Column() as inputbox:
                     API_KEY = gr.Textbox(label="OpenAI API Key", value="", info="Please fill in your OpenAI API token.", placeholder="sk..", type="password")
+            with gr.Column(visible=False) as inputbox:
+                question = gr.Textbox(label="Question", value="", info="Please type your question!", placeholder="")
             submit = gr.Button("Submit")
             with gr.Row(visible=False) as output_msg:
                 gr.HTML(RESPONSE_TEXT)
+            with gr.Column(visible=False) as output_col:
+                with gr.Row(elem_id="model1_response"):
                     model1_output1 = gr.Textbox(label="1️⃣ model's initial response")
                     model2_output1 = gr.Textbox(label="2️⃣ model's initial response")
+                    model3_output1 = gr.Textbox(label="3️⃣ model's initial response")
+                summarization_text1 = gr.Textbox(lebel="Summarization 1")
+                with gr.Row(elem_id="model2_response"):
+                    model1_output2 = gr.Textbox(label="1️⃣ model's revised response")
                     model2_output2 = gr.Textbox(label="2️⃣ model's revised response")
+                    model3_output2 = gr.Textbox(label="3️⃣ model's revised response")
+                summarization_text2 = gr.Textbox(label="Summarization 2")
+                with gr.Row(elem_id="model3_response"):
+                    model1_output3 = gr.Textbox(label="1️⃣ model's final response")
                     model2_output3 = gr.Textbox(label="2️⃣ model's final response")
+                    model3_output3 = gr.Textbox(label="3️⃣ model's final response")
+            with gr.Accordion("※ Specific information about LLM Agora", open=False):
+                gr.Markdown(SPECIFIC_INFORMATION)
         with gr.Tab("Math"):
+            math_cot = gr.Checkbox(label="CoT", info="If you want to see CoT result, please check the box.")
+            math_question_list = gr.Dropdown(math_questions, label="Math Question", every=0.1)
+            with gr.Column():
+                with gr.Row(elem_id="model1_response"):
+                    math_model1_output1 = gr.Textbox(label="Llama2🦙's initial response")   # value=[int(math_question_list[0])-1]["agent_response"][]
+                    math_model2_output1 = gr.Textbox(label="WizardLM🧙‍♂️'s initial response")
+                    math_model3_output1 = gr.Textbox(label="Orca🐬's initial response")
+                math_summarization_text1 = gr.Textbox(lebel="Summarization 1")
+                with gr.Row(elem_id="model2_response"):
+                    math_model1_output2 = gr.Textbox(label="Llama2🦙's revised response")
+                    math_model2_output2 = gr.Textbox(label="WizardLM🧙‍♂️'s revised response")
+                    math_model3_output2 = gr.Textbox(label="Orca🐬's revised response")
+                math_summarization_text2 = gr.Textbox(label="Summarization 2")
+                with gr.Row(elem_id="model3_response"):
+                    math_model1_output3 = gr.Textbox(label="Llama2🦙's final response")
+                    math_model2_output3 = gr.Textbox(label="WizardLM🧙‍♂️'s final response")
+                    math_model3_output3 = gr.Textbox(label="Orca🐬's final response")
+            gr.HTML("""<h1 align="center"> The result of Math </h1>""")
+            gr.Image(value="result/Math/math_result.png")
         with gr.Tab("GSM8K"):
+            gsm_cot = gr.Checkbox(label="CoT", info="If you want to see CoT result, please check the box.")
+            gsm_question_list = gr.Dropdown(gsm_questions, label="Math Question")
+            with gr.Column():
+                with gr.Row(elem_id="model1_response"):
+                    gsm_model1_output1 = gr.Textbox(label="Llama2🦙's initial response")
+                    gsm_model2_output1 = gr.Textbox(label="WizardLM🧙‍♂️'s initial response")
+                    gsm_model3_output1 = gr.Textbox(label="Orca🐬's initial response")
+                gsm_summarization_text1 = gr.Textbox(lebel="Summarization 1")
+                with gr.Row(elem_id="model2_response"):
+                    gsm_model1_output2 = gr.Textbox(label="Llama2🦙's revised response")
+                    gsm_model2_output2 = gr.Textbox(label="WizardLM🧙‍♂️'s revised response")
+                    gsm_model3_output2 = gr.Textbox(label="Orca🐬's revised response")
+                gsm_summarization_text2 = gr.Textbox(label="Summarization 2")
+                with gr.Row(elem_id="model3_response"):
+                    gsm_model1_output3 = gr.Textbox(label="Llama2🦙's final response")
+                    gsm_model2_output3 = gr.Textbox(label="WizardLM🧙‍♂️'s final response")
+                    gsm_model3_output3 = gr.Textbox(label="Orca🐬's final response")
+            gr.HTML("""<h1 align="center"> The result of GSM8K </h1>""")
+            gr.Image(value="result/GSM8K/gsm_result.png")
         with gr.Tab("MMLU"):
+            mmlu_cot = gr.Checkbox(label="CoT", info="If you want to see CoT result, please check the box.")
+            # mmlu_question_list = gr.Dropdown(mmlu_questions, label="Math Question")
+            with gr.Column():
+                with gr.Row(elem_id="model1_response"):
+                    mmlu_model1_output1 = gr.Textbox(label="Llama2🦙's initial response")
+                    mmlu_model2_output1 = gr.Textbox(label="WizardLM🧙‍♂️'s initial response")
+                    mmlu_model3_output1 = gr.Textbox(label="Orca🐬's initial response")
+                mmlu_summarization_text1 = gr.Textbox(lebel="Summarization 1")
+                with gr.Row(elem_id="model2_response"):
+                    mmlu_model1_output2 = gr.Textbox(label="Llama2🦙's revised response")
+                    mmlu_model2_output2 = gr.Textbox(label="WizardLM🧙‍♂️'s revised response")
+                    mmlu_model3_output2 = gr.Textbox(label="Orca🐬's revised response")
+                mmlu_summarization_text2 = gr.Textbox(label="Summarization 2")
+                with gr.Row(elem_id="model3_response"):
+                    mmlu_model1_output3 = gr.Textbox(label="Llama2🦙's final response")
+                    mmlu_model2_output3 = gr.Textbox(label="WizardLM🧙‍♂️'s final response")
+                    mmlu_model3_output3 = gr.Textbox(label="Orca🐬's final response")
+            gr.HTML("""<h1 align="center"> The result of MMLU </h1>""")
+            gr.Image(value="result/MMLU/mmlu_result.png")
+        with gr.Accordion("※ Specific information about LLM Agora", open=False):
+            gr.Markdown(SPECIFIC_INFORMATION)
+    warmup_button.click(warmup, [], [options, inputbox, warmup_button, welcome_message])
+    submit.click(inference, [model_list, API_KEY, cot], [output_msg, output_col, model1_output1, model2_output1, model3_output1, summarization_text1, model1_output2, model2_output2, model3_output2, summarization_text2, model1_output3, model2_output3, model3_output3])
+demo.launch()