Spaces:

ServiceNow-AI
/

Apriel-Chat

Running

App Files Files Community

bradnow commited on 5 days ago

Commit

4296635

1 Parent(s): ed565d2

Updates to prepare for Apriel 1.6 release

Browse files

Files changed (2) hide show

app.py +25 -20
utils.py +18 -1

app.py CHANGED Viewed

@@ -30,7 +30,12 @@ BUTTON_WIDTH = 160
 DEFAULT_OPT_OUT_VALUE = DEBUG_MODE
 # If DEBUG_MODEL is True, use an alternative model (without reasoning) for testing
-DEFAULT_MODEL_NAME = "Apriel-1.5-15B-thinker" if not DEBUG_MODEL else "Apriel-1.5-15B-thinker"  # "Apriel-5b"
 BUTTON_ENABLED = gr.update(interactive=True)
 BUTTON_DISABLED = gr.update(interactive=False)
@@ -126,6 +131,9 @@ def run_chat_inference(history, message, state):
     error = None
     model_name = model_config.get('MODEL_NAME')
     temperature = model_config.get('TEMPERATURE', DEFAULT_MODEL_TEMPERATURE)
     # Reinitialize the OpenAI client with a random endpoint from the list
     setup_model(model_config.get('MODEL_KEY'))
@@ -435,19 +443,19 @@ def run_chat_inference(history, message, state):
             output += reasoning_content + content
             if is_reasoning:
-                parts = output.split("[BEGIN FINAL RESPONSE]")
                 if len(parts) > 1:
-                    if parts[1].endswith("[END FINAL RESPONSE]"):
-                        parts[1] = parts[1].replace("[END FINAL RESPONSE]", "")
-                    if parts[1].endswith("[END FINAL RESPONSE]\n<|end|>"):
-                        parts[1] = parts[1].replace("[END FINAL RESPONSE]\n<|end|>", "")
-                    if parts[1].endswith("[END FINAL RESPONSE]\n<|end|>\n"):
-                        parts[1] = parts[1].replace("[END FINAL RESPONSE]\n<|end|>\n", "")
-                    if parts[1].endswith("<|end|>"):
-                        parts[1] = parts[1].replace("<|end|>", "")
-                    if parts[1].endswith("<|end|>\n"):
-                        parts[1] = parts[1].replace("<|end|>\n", "")
                 history[-1 if not completion_started else -2] = gr.ChatMessage(
                     role="assistant",
@@ -528,14 +536,11 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
             }}
         </style>
         """, elem_classes="css-styles")
-    with gr.Row(variant="compact", elem_classes=["responsive-row", "no-padding"], ):
-        with gr.Column():
-            gr.Markdown(
-                """
-                <span class="banner-message-text">ℹ️ This app has been updated to use the recommended temperature of 0.6. We had set it to 0.8 earlier and expect 0.6 to be better. Please provide feedback using the model link.</span>
-                """
-                , elem_classes="banner-message"
-            )
     with gr.Row(variant="panel", elem_classes="responsive-row"):
         with gr.Column(scale=1, min_width=400, elem_classes="model-dropdown-container"):
             model_dropdown = gr.Dropdown(

 DEFAULT_OPT_OUT_VALUE = DEBUG_MODE
 # If DEBUG_MODEL is True, use an alternative model (without reasoning) for testing
+DEFAULT_MODEL_NAME = "Apriel-1.6-15B-Thinker" if not DEBUG_MODEL else "Apriel-1.6-15B-Thinker"
+SHOW_BANNER = False
+BANNER_MARKDOWN = """
+                    <span class="banner-message-text">ℹ️ This app has been updated to use the recommended temperature of 0.6. We had set it to 0.8 earlier and expect 0.6 to be better. Please provide feedback using the model link.</span>
+                    """
 BUTTON_ENABLED = gr.update(interactive=True)
 BUTTON_DISABLED = gr.update(interactive=False)
     error = None
     model_name = model_config.get('MODEL_NAME')
     temperature = model_config.get('TEMPERATURE', DEFAULT_MODEL_TEMPERATURE)
+    output_tag_start = model_config.get('OUTPUT_TAG_START', "[BEGIN FINAL RESPONSE]")
+    output_tag_end = model_config.get('OUTPUT_TAG_END', "[END FINAL RESPONSE]")
+    output_stop_token = model_config.get('OUTPUT_STOP_TOKEN', "<|end|>")
     # Reinitialize the OpenAI client with a random endpoint from the list
     setup_model(model_config.get('MODEL_KEY'))
             output += reasoning_content + content
             if is_reasoning:
+                parts = output.split(output_tag_start)
                 if len(parts) > 1:
+                    if parts[1].endswith(output_tag_end):
+                        parts[1] = parts[1].replace(output_tag_end, "")
+                    if parts[1].endswith(f"{output_tag_end}\n{output_stop_token}"):
+                        parts[1] = parts[1].replace(f"{output_tag_end}\n{output_stop_token}", "")
+                    if parts[1].endswith(f"{output_tag_end}\n{output_stop_token}\n"):
+                        parts[1] = parts[1].replace(f"{output_tag_end}\n{output_stop_token}\n", "")
+                    if parts[1].endswith(f"{output_stop_token}"):
+                        parts[1] = parts[1].replace(f"{output_stop_token}", "")
+                    if parts[1].endswith(f"{output_stop_token}\n"):
+                        parts[1] = parts[1].replace(f"{output_stop_token}\n", "")
                 history[-1 if not completion_started else -2] = gr.ChatMessage(
                     role="assistant",
             }}
         </style>
         """, elem_classes="css-styles")
+    if SHOW_BANNER:
+        with gr.Row(variant="compact", elem_classes=["responsive-row", "no-padding"], ):
+            with gr.Column():
+                gr.Markdown(BANNER_MARKDOWN, elem_classes="banner-message")
     with gr.Row(variant="panel", elem_classes="responsive-row"):
         with gr.Column(scale=1, min_width=400, elem_classes="model-dropdown-container"):
             model_dropdown = gr.Dropdown(

utils.py CHANGED Viewed

@@ -12,6 +12,20 @@ DEBUG_MODE = False or os.environ.get("DEBUG_MODE") == "True"
 DEBUG_MODEL = False or os.environ.get("DEBUG_MODEL") == "True"
 models_config = {
     "Apriel-1.5-15B-thinker": {
         "MODEL_DISPLAY_NAME": "Apriel-1.5-15B-thinker",
         "MODEL_HF_URL": "https://huggingface.co/ServiceNow-AI/Apriel-1.5-15b-Thinker",
@@ -21,7 +35,10 @@ models_config = {
         "AUTH_TOKEN": os.environ.get("AUTH_TOKEN"),
         "REASONING": True,
         "MULTIMODAL": True,
-        "TEMPERATURE": 0.6
     },
     # "Apriel-Nemotron-15b-Thinker": {
     #     "MODEL_DISPLAY_NAME": "Apriel-Nemotron-15b-Thinker",

 DEBUG_MODEL = False or os.environ.get("DEBUG_MODEL") == "True"
 models_config = {
+    # "Apriel-1.6-15B-Thinker": {
+    #     "MODEL_DISPLAY_NAME": "Apriel-1.6-15B-Thinker",
+    #     "MODEL_HF_URL": "https://huggingface.co/ServiceNow-AI/Apriel-1.6-15b-Thinker",
+    #     "MODEL_NAME": os.environ.get("MODEL_NAME_APRIEL_1_6_15B"),
+    #     "VLLM_API_URL": os.environ.get("VLLM_API_URL_APRIEL_1_6_15B"),
+    #     "VLLM_API_URL_LIST": os.environ.get("VLLM_API_URL_LIST_APRIEL_1_6_15B"),
+    #     "AUTH_TOKEN": os.environ.get("AUTH_TOKEN"),
+    #     "REASONING": True,
+    #     "MULTIMODAL": True,
+    #     "TEMPERATURE": 0.6,
+    #     "OUTPUT_TAG_START": "[BEGIN FINAL RESPONSE]",
+    #     "OUTPUT_TAG_END": "",
+    #     "OUTPUT_STOP_TOKEN": "<|end|>"
+    # },
     "Apriel-1.5-15B-thinker": {
         "MODEL_DISPLAY_NAME": "Apriel-1.5-15B-thinker",
         "MODEL_HF_URL": "https://huggingface.co/ServiceNow-AI/Apriel-1.5-15b-Thinker",
         "AUTH_TOKEN": os.environ.get("AUTH_TOKEN"),
         "REASONING": True,
         "MULTIMODAL": True,
+        "TEMPERATURE": 0.6,
+        "OUTPUT_TAG_START": "[BEGIN FINAL RESPONSE]",
+        "OUTPUT_TAG_END": "[END FINAL RESPONSE]",
+        "OUTPUT_STOP_TOKEN": "<|end|>"
     },
     # "Apriel-Nemotron-15b-Thinker": {
     #     "MODEL_DISPLAY_NAME": "Apriel-Nemotron-15b-Thinker",