Spaces:
Running
Running
Updates to prepare for Apriel 1.6 release
Browse files
app.py
CHANGED
|
@@ -30,7 +30,12 @@ BUTTON_WIDTH = 160
|
|
| 30 |
DEFAULT_OPT_OUT_VALUE = DEBUG_MODE
|
| 31 |
|
| 32 |
# If DEBUG_MODEL is True, use an alternative model (without reasoning) for testing
|
| 33 |
-
DEFAULT_MODEL_NAME = "Apriel-1.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
BUTTON_ENABLED = gr.update(interactive=True)
|
| 36 |
BUTTON_DISABLED = gr.update(interactive=False)
|
|
@@ -126,6 +131,9 @@ def run_chat_inference(history, message, state):
|
|
| 126 |
error = None
|
| 127 |
model_name = model_config.get('MODEL_NAME')
|
| 128 |
temperature = model_config.get('TEMPERATURE', DEFAULT_MODEL_TEMPERATURE)
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
# Reinitialize the OpenAI client with a random endpoint from the list
|
| 131 |
setup_model(model_config.get('MODEL_KEY'))
|
|
@@ -435,19 +443,19 @@ def run_chat_inference(history, message, state):
|
|
| 435 |
output += reasoning_content + content
|
| 436 |
|
| 437 |
if is_reasoning:
|
| 438 |
-
parts = output.split(
|
| 439 |
|
| 440 |
if len(parts) > 1:
|
| 441 |
-
if parts[1].endswith(
|
| 442 |
-
parts[1] = parts[1].replace(
|
| 443 |
-
if parts[1].endswith("
|
| 444 |
-
parts[1] = parts[1].replace("
|
| 445 |
-
if parts[1].endswith("
|
| 446 |
-
parts[1] = parts[1].replace("
|
| 447 |
-
if parts[1].endswith("
|
| 448 |
-
parts[1] = parts[1].replace("
|
| 449 |
-
if parts[1].endswith("
|
| 450 |
-
parts[1] = parts[1].replace("
|
| 451 |
|
| 452 |
history[-1 if not completion_started else -2] = gr.ChatMessage(
|
| 453 |
role="assistant",
|
|
@@ -528,14 +536,11 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
|
|
| 528 |
}}
|
| 529 |
</style>
|
| 530 |
""", elem_classes="css-styles")
|
| 531 |
-
|
| 532 |
-
with gr.
|
| 533 |
-
gr.
|
| 534 |
-
""
|
| 535 |
-
|
| 536 |
-
"""
|
| 537 |
-
, elem_classes="banner-message"
|
| 538 |
-
)
|
| 539 |
with gr.Row(variant="panel", elem_classes="responsive-row"):
|
| 540 |
with gr.Column(scale=1, min_width=400, elem_classes="model-dropdown-container"):
|
| 541 |
model_dropdown = gr.Dropdown(
|
|
|
|
| 30 |
DEFAULT_OPT_OUT_VALUE = DEBUG_MODE
|
| 31 |
|
| 32 |
# If DEBUG_MODEL is True, use an alternative model (without reasoning) for testing
|
| 33 |
+
DEFAULT_MODEL_NAME = "Apriel-1.6-15B-Thinker" if not DEBUG_MODEL else "Apriel-1.6-15B-Thinker"
|
| 34 |
+
|
| 35 |
+
SHOW_BANNER = False
|
| 36 |
+
BANNER_MARKDOWN = """
|
| 37 |
+
<span class="banner-message-text">ℹ️ This app has been updated to use the recommended temperature of 0.6. We had set it to 0.8 earlier and expect 0.6 to be better. Please provide feedback using the model link.</span>
|
| 38 |
+
"""
|
| 39 |
|
| 40 |
BUTTON_ENABLED = gr.update(interactive=True)
|
| 41 |
BUTTON_DISABLED = gr.update(interactive=False)
|
|
|
|
| 131 |
error = None
|
| 132 |
model_name = model_config.get('MODEL_NAME')
|
| 133 |
temperature = model_config.get('TEMPERATURE', DEFAULT_MODEL_TEMPERATURE)
|
| 134 |
+
output_tag_start = model_config.get('OUTPUT_TAG_START', "[BEGIN FINAL RESPONSE]")
|
| 135 |
+
output_tag_end = model_config.get('OUTPUT_TAG_END', "[END FINAL RESPONSE]")
|
| 136 |
+
output_stop_token = model_config.get('OUTPUT_STOP_TOKEN', "<|end|>")
|
| 137 |
|
| 138 |
# Reinitialize the OpenAI client with a random endpoint from the list
|
| 139 |
setup_model(model_config.get('MODEL_KEY'))
|
|
|
|
| 443 |
output += reasoning_content + content
|
| 444 |
|
| 445 |
if is_reasoning:
|
| 446 |
+
parts = output.split(output_tag_start)
|
| 447 |
|
| 448 |
if len(parts) > 1:
|
| 449 |
+
if parts[1].endswith(output_tag_end):
|
| 450 |
+
parts[1] = parts[1].replace(output_tag_end, "")
|
| 451 |
+
if parts[1].endswith(f"{output_tag_end}\n{output_stop_token}"):
|
| 452 |
+
parts[1] = parts[1].replace(f"{output_tag_end}\n{output_stop_token}", "")
|
| 453 |
+
if parts[1].endswith(f"{output_tag_end}\n{output_stop_token}\n"):
|
| 454 |
+
parts[1] = parts[1].replace(f"{output_tag_end}\n{output_stop_token}\n", "")
|
| 455 |
+
if parts[1].endswith(f"{output_stop_token}"):
|
| 456 |
+
parts[1] = parts[1].replace(f"{output_stop_token}", "")
|
| 457 |
+
if parts[1].endswith(f"{output_stop_token}\n"):
|
| 458 |
+
parts[1] = parts[1].replace(f"{output_stop_token}\n", "")
|
| 459 |
|
| 460 |
history[-1 if not completion_started else -2] = gr.ChatMessage(
|
| 461 |
role="assistant",
|
|
|
|
| 536 |
}}
|
| 537 |
</style>
|
| 538 |
""", elem_classes="css-styles")
|
| 539 |
+
if SHOW_BANNER:
|
| 540 |
+
with gr.Row(variant="compact", elem_classes=["responsive-row", "no-padding"], ):
|
| 541 |
+
with gr.Column():
|
| 542 |
+
gr.Markdown(BANNER_MARKDOWN, elem_classes="banner-message")
|
| 543 |
+
|
|
|
|
|
|
|
|
|
|
| 544 |
with gr.Row(variant="panel", elem_classes="responsive-row"):
|
| 545 |
with gr.Column(scale=1, min_width=400, elem_classes="model-dropdown-container"):
|
| 546 |
model_dropdown = gr.Dropdown(
|
utils.py
CHANGED
|
@@ -12,6 +12,20 @@ DEBUG_MODE = False or os.environ.get("DEBUG_MODE") == "True"
|
|
| 12 |
DEBUG_MODEL = False or os.environ.get("DEBUG_MODEL") == "True"
|
| 13 |
|
| 14 |
models_config = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
"Apriel-1.5-15B-thinker": {
|
| 16 |
"MODEL_DISPLAY_NAME": "Apriel-1.5-15B-thinker",
|
| 17 |
"MODEL_HF_URL": "https://huggingface.co/ServiceNow-AI/Apriel-1.5-15b-Thinker",
|
|
@@ -21,7 +35,10 @@ models_config = {
|
|
| 21 |
"AUTH_TOKEN": os.environ.get("AUTH_TOKEN"),
|
| 22 |
"REASONING": True,
|
| 23 |
"MULTIMODAL": True,
|
| 24 |
-
"TEMPERATURE": 0.6
|
|
|
|
|
|
|
|
|
|
| 25 |
},
|
| 26 |
# "Apriel-Nemotron-15b-Thinker": {
|
| 27 |
# "MODEL_DISPLAY_NAME": "Apriel-Nemotron-15b-Thinker",
|
|
|
|
| 12 |
DEBUG_MODEL = False or os.environ.get("DEBUG_MODEL") == "True"
|
| 13 |
|
| 14 |
models_config = {
|
| 15 |
+
# "Apriel-1.6-15B-Thinker": {
|
| 16 |
+
# "MODEL_DISPLAY_NAME": "Apriel-1.6-15B-Thinker",
|
| 17 |
+
# "MODEL_HF_URL": "https://huggingface.co/ServiceNow-AI/Apriel-1.6-15b-Thinker",
|
| 18 |
+
# "MODEL_NAME": os.environ.get("MODEL_NAME_APRIEL_1_6_15B"),
|
| 19 |
+
# "VLLM_API_URL": os.environ.get("VLLM_API_URL_APRIEL_1_6_15B"),
|
| 20 |
+
# "VLLM_API_URL_LIST": os.environ.get("VLLM_API_URL_LIST_APRIEL_1_6_15B"),
|
| 21 |
+
# "AUTH_TOKEN": os.environ.get("AUTH_TOKEN"),
|
| 22 |
+
# "REASONING": True,
|
| 23 |
+
# "MULTIMODAL": True,
|
| 24 |
+
# "TEMPERATURE": 0.6,
|
| 25 |
+
# "OUTPUT_TAG_START": "[BEGIN FINAL RESPONSE]",
|
| 26 |
+
# "OUTPUT_TAG_END": "",
|
| 27 |
+
# "OUTPUT_STOP_TOKEN": "<|end|>"
|
| 28 |
+
# },
|
| 29 |
"Apriel-1.5-15B-thinker": {
|
| 30 |
"MODEL_DISPLAY_NAME": "Apriel-1.5-15B-thinker",
|
| 31 |
"MODEL_HF_URL": "https://huggingface.co/ServiceNow-AI/Apriel-1.5-15b-Thinker",
|
|
|
|
| 35 |
"AUTH_TOKEN": os.environ.get("AUTH_TOKEN"),
|
| 36 |
"REASONING": True,
|
| 37 |
"MULTIMODAL": True,
|
| 38 |
+
"TEMPERATURE": 0.6,
|
| 39 |
+
"OUTPUT_TAG_START": "[BEGIN FINAL RESPONSE]",
|
| 40 |
+
"OUTPUT_TAG_END": "[END FINAL RESPONSE]",
|
| 41 |
+
"OUTPUT_STOP_TOKEN": "<|end|>"
|
| 42 |
},
|
| 43 |
# "Apriel-Nemotron-15b-Thinker": {
|
| 44 |
# "MODEL_DISPLAY_NAME": "Apriel-Nemotron-15b-Thinker",
|