Spaces:
Sleeping
Sleeping
Commit
·
4417df5
1
Parent(s):
3b59357
Update app.py
Browse files
app.py
CHANGED
|
@@ -66,15 +66,16 @@ def warmup(model_list=model_list, model_inference_endpoints=inference_endpoint):
|
|
| 66 |
return {
|
| 67 |
options: gr.update(visible=True),
|
| 68 |
inputbox: gr.update(visible=True),
|
|
|
|
| 69 |
warmup_button: gr.update(visible=False),
|
| 70 |
welcome_message: gr.update(visible=True)
|
| 71 |
}
|
| 72 |
|
| 73 |
-
def inference(model_list, API_KEY, cot):
|
| 74 |
if len(model_list) != 3:
|
| 75 |
raise gr.Error("Please choose just '3' models! Neither more nor less!")
|
| 76 |
|
| 77 |
-
model_response = Inference(model_list, API_KEY, cot)
|
| 78 |
|
| 79 |
return {
|
| 80 |
output_msg: gr.update(visible=True),
|
|
@@ -137,20 +138,22 @@ TITLE = """<h1 align="center">LLM Agora 🗣️🏦</h1>"""
|
|
| 137 |
|
| 138 |
INTRODUCTION_TEXT = """
|
| 139 |
The **LLM Agora** 🗣️🏦 aims to improve the quality of open-source LMs' responses through debate & revision introduced in [Improving Factuality and Reasoning in Language Models through Multiagent Debate](https://arxiv.org/abs/2305.14325).
|
|
|
|
| 140 |
|
| 141 |
Do you know that? 🤔 **LLMs can also improve their responses by debating with other LLMs**! 😮 We applied this concept to several open-source LMs to verify that the open-source model, not the proprietary one, can sufficiently improve the response through discussion. 🤗
|
| 142 |
For more details, please refer to the [GitHub Repository](https://github.com/gauss5930/LLM-Agora).
|
|
|
|
| 143 |
|
| 144 |
You can use LLM Agora with your own questions if the response of open-source LM is not satisfactory and you want to improve the quality!
|
| 145 |
The Math, GSM8K, and MMLU Tabs show the results of the experiment(Llama2, WizardLM2, Orca2), and for inference, please use the 'Inference' tab.
|
| 146 |
|
| 147 |
Here's how to use LLM Agora!
|
| 148 |
|
| 149 |
-
1. Before
|
| 150 |
2. Choose just 3 models! Neither more nor less!
|
| 151 |
3. Check the CoT box if you want to utilize the Chain-of-Thought while inferencing.
|
| 152 |
4. Please fill in your OpenAI API KEY, it will be used to use ChatGPT to summarize the responses.
|
| 153 |
-
5. Type your question
|
| 154 |
|
| 155 |
For more detailed information, please check '※ Specific information about LLM Agora' at the bottom of the page.
|
| 156 |
"""
|
|
@@ -164,6 +167,8 @@ This is the specific information about LLM Agora!
|
|
| 164 |
|
| 165 |
**Model size**
|
| 166 |
|
|
|
|
|
|
|
| 167 |
|Model name|Model size|
|
| 168 |
|---|---|
|
| 169 |
|Llama2|13B|
|
|
@@ -176,7 +181,7 @@ This is the specific information about LLM Agora!
|
|
| 176 |
|
| 177 |
**Agent numbers & Debate rounds**
|
| 178 |
|
| 179 |
-
- We limit the number of agents and debate rounds because of limitation of resources. As a result, we decided to use 3 agents and 2 rounds of debate!
|
| 180 |
|
| 181 |
**GitHub Repository**
|
| 182 |
|
|
@@ -209,7 +214,7 @@ with gr.Blocks() as demo:
|
|
| 209 |
API_KEY = gr.Textbox(label="OpenAI API Key", value="", info="Please fill in your OpenAI API token.", placeholder="sk..", type="password")
|
| 210 |
with gr.Column(visible=False) as inputbox:
|
| 211 |
question = gr.Textbox(label="Question", value="", info="Please type your question!", placeholder="")
|
| 212 |
-
submit = gr.Button("Submit")
|
| 213 |
|
| 214 |
with gr.Row(visible=False) as output_msg:
|
| 215 |
gr.HTML(RESPONSE_TEXT)
|
|
@@ -292,10 +297,22 @@ with gr.Blocks() as demo:
|
|
| 292 |
gr.HTML("""<h1 align="center"> The result of GSM8K </h1>""")
|
| 293 |
# gr.Image(value="result/GSM8K/gsm_result.png")
|
| 294 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
|
| 296 |
with gr.Tab("MMLU"):
|
| 297 |
mmlu_cot = gr.Checkbox(label="CoT", info="If you want to see CoT result, please check the box.")
|
| 298 |
# mmlu_question_list = gr.Dropdown(mmlu_questions, value=callable(mmlu_questions), label="MMLU Question", every=0.1)
|
|
|
|
| 299 |
with gr.Column():
|
| 300 |
with gr.Row(elem_id="model1_response"):
|
| 301 |
mmlu_model1_output1 = gr.Textbox(label="Llama2🦙's 1️⃣st response")
|
|
@@ -315,10 +332,21 @@ with gr.Blocks() as demo:
|
|
| 315 |
gr.HTML("""<h1 align="center"> The result of MMLU </h1>""")
|
| 316 |
# gr.Image(value="result/MMLU/mmlu_result.png")
|
| 317 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
with gr.Accordion("※ Specific information about LLM Agora", open=False):
|
| 319 |
gr.Markdown(SPECIFIC_INFORMATION)
|
| 320 |
|
| 321 |
-
warmup_button.click(warmup, [], [options, inputbox, warmup_button, welcome_message])
|
| 322 |
-
submit.click(inference, [model_list, API_KEY, cot], [output_msg, output_col, model1_output1, model2_output1, model3_output1, summarization_text1, model1_output2, model2_output2, model3_output2, summarization_text2, model1_output3, model2_output3, model3_output3])
|
| 323 |
|
| 324 |
demo.launch()
|
|
|
|
| 66 |
return {
|
| 67 |
options: gr.update(visible=True),
|
| 68 |
inputbox: gr.update(visible=True),
|
| 69 |
+
submit: gr.update(visible=True)
|
| 70 |
warmup_button: gr.update(visible=False),
|
| 71 |
welcome_message: gr.update(visible=True)
|
| 72 |
}
|
| 73 |
|
| 74 |
+
def inference(model_list, question, API_KEY, cot):
|
| 75 |
if len(model_list) != 3:
|
| 76 |
raise gr.Error("Please choose just '3' models! Neither more nor less!")
|
| 77 |
|
| 78 |
+
model_response = Inference(model_list, question, API_KEY, cot)
|
| 79 |
|
| 80 |
return {
|
| 81 |
output_msg: gr.update(visible=True),
|
|
|
|
| 138 |
|
| 139 |
INTRODUCTION_TEXT = """
|
| 140 |
The **LLM Agora** 🗣️🏦 aims to improve the quality of open-source LMs' responses through debate & revision introduced in [Improving Factuality and Reasoning in Language Models through Multiagent Debate](https://arxiv.org/abs/2305.14325).
|
| 141 |
+
Thank you to the authors of this paper for suggesting a great idea!
|
| 142 |
|
| 143 |
Do you know that? 🤔 **LLMs can also improve their responses by debating with other LLMs**! 😮 We applied this concept to several open-source LMs to verify that the open-source model, not the proprietary one, can sufficiently improve the response through discussion. 🤗
|
| 144 |
For more details, please refer to the [GitHub Repository](https://github.com/gauss5930/LLM-Agora).
|
| 145 |
+
You can also check the results in this Space!
|
| 146 |
|
| 147 |
You can use LLM Agora with your own questions if the response of open-source LM is not satisfactory and you want to improve the quality!
|
| 148 |
The Math, GSM8K, and MMLU Tabs show the results of the experiment(Llama2, WizardLM2, Orca2), and for inference, please use the 'Inference' tab.
|
| 149 |
|
| 150 |
Here's how to use LLM Agora!
|
| 151 |
|
| 152 |
+
1. Before starting, click the 'Warm-up LLM Agora 🔥' button and wait until 'LLM Agora Ready!!' appears. (Suggest to go grab a coffee☕ since it takes 5 minutes!)
|
| 153 |
2. Choose just 3 models! Neither more nor less!
|
| 154 |
3. Check the CoT box if you want to utilize the Chain-of-Thought while inferencing.
|
| 155 |
4. Please fill in your OpenAI API KEY, it will be used to use ChatGPT to summarize the responses.
|
| 156 |
+
5. Type your question in the Question box and click the 'Submit' button! If you do so, LLM Agora will show you improved answers! 🤗 (It will take roughly a minute! Please wait for an answer!)
|
| 157 |
|
| 158 |
For more detailed information, please check '※ Specific information about LLM Agora' at the bottom of the page.
|
| 159 |
"""
|
|
|
|
| 167 |
|
| 168 |
**Model size**
|
| 169 |
|
| 170 |
+
Besides Falcon, all other models are based on Llama2.
|
| 171 |
+
|
| 172 |
|Model name|Model size|
|
| 173 |
|---|---|
|
| 174 |
|Llama2|13B|
|
|
|
|
| 181 |
|
| 182 |
**Agent numbers & Debate rounds**
|
| 183 |
|
| 184 |
+
- We limit the number of agents and debate rounds because of the limitation of resources. As a result, we decided to use 3 agents and 2 rounds of debate!
|
| 185 |
|
| 186 |
**GitHub Repository**
|
| 187 |
|
|
|
|
| 214 |
API_KEY = gr.Textbox(label="OpenAI API Key", value="", info="Please fill in your OpenAI API token.", placeholder="sk..", type="password")
|
| 215 |
with gr.Column(visible=False) as inputbox:
|
| 216 |
question = gr.Textbox(label="Question", value="", info="Please type your question!", placeholder="")
|
| 217 |
+
submit = gr.Button("Submit", visible=False)
|
| 218 |
|
| 219 |
with gr.Row(visible=False) as output_msg:
|
| 220 |
gr.HTML(RESPONSE_TEXT)
|
|
|
|
| 297 |
gr.HTML("""<h1 align="center"> The result of GSM8K </h1>""")
|
| 298 |
# gr.Image(value="result/GSM8K/gsm_result.png")
|
| 299 |
|
| 300 |
+
gsm_cot.select(
|
| 301 |
+
gsm_display_question_answer,
|
| 302 |
+
[gsm_question_list, gsm_cot],
|
| 303 |
+
[gsm_model1_output1, gsm_model2_output1, gsm_model3_output1, gsm_summarization_text1, gsm_model1_output2, gsm_model2_output2, gsm_model3_output2, gsm_summarization_text2, gsm_model1_output3, gsm_model2_output3, gsm_model3_output3]
|
| 304 |
+
)
|
| 305 |
+
gsm_question_list.change(
|
| 306 |
+
gsm_display_question_answer,
|
| 307 |
+
[gsm_question_list, gsm_cot],
|
| 308 |
+
[gsm_model1_output1, gsm_model2_output1, gsm_model3_output1, gsm_summarization_text1, gsm_model1_output2, gsm_model2_output2, gsm_model3_output2, gsm_summarization_text2, gsm_model1_output3, gsm_model2_output3, gsm_model3_output3]
|
| 309 |
+
)
|
| 310 |
+
|
| 311 |
|
| 312 |
with gr.Tab("MMLU"):
|
| 313 |
mmlu_cot = gr.Checkbox(label="CoT", info="If you want to see CoT result, please check the box.")
|
| 314 |
# mmlu_question_list = gr.Dropdown(mmlu_questions, value=callable(mmlu_questions), label="MMLU Question", every=0.1)
|
| 315 |
+
|
| 316 |
with gr.Column():
|
| 317 |
with gr.Row(elem_id="model1_response"):
|
| 318 |
mmlu_model1_output1 = gr.Textbox(label="Llama2🦙's 1️⃣st response")
|
|
|
|
| 332 |
gr.HTML("""<h1 align="center"> The result of MMLU </h1>""")
|
| 333 |
# gr.Image(value="result/MMLU/mmlu_result.png")
|
| 334 |
|
| 335 |
+
mmlu_cot.select(
|
| 336 |
+
mmlu_display_question_answer,
|
| 337 |
+
[mmlu_question_list, mmlu_cot],
|
| 338 |
+
[mmlu_model1_output1, mmlu_model2_output1, mmlu_model3_output1, mmlu_summarization_text1, mmlu_model1_output2, mmlu_model2_output2, mmlu_model3_output2, mmlu_summarization_text2, mmlu_model1_output3, mmlu_model2_output3, mmlu_model3_output3]
|
| 339 |
+
)
|
| 340 |
+
mmlu_question_list.change(
|
| 341 |
+
mmlu_display_question_answer,
|
| 342 |
+
[mmlu_question_list, mmlu_cot],
|
| 343 |
+
[mmlu_model1_output1, mmlu_model2_output1, mmlu_model3_output1, mmlu_summarization_text1, mmlu_model1_output2, mmlu_model2_output2, mmlu_model3_output2, mmlu_summarization_text2, mmlu_model1_output3, mmlu_model2_output3, mmlu_model3_output3]
|
| 344 |
+
)
|
| 345 |
+
|
| 346 |
with gr.Accordion("※ Specific information about LLM Agora", open=False):
|
| 347 |
gr.Markdown(SPECIFIC_INFORMATION)
|
| 348 |
|
| 349 |
+
warmup_button.click(warmup, [], [options, inputbox, submit, warmup_button, welcome_message])
|
| 350 |
+
submit.click(inference, [model_list, submit, API_KEY, cot], [output_msg, output_col, model1_output1, model2_output1, model3_output1, summarization_text1, model1_output2, model2_output2, model3_output2, summarization_text2, model1_output3, model2_output3, model3_output3])
|
| 351 |
|
| 352 |
demo.launch()
|