bradnow commited on
Commit
4296635
·
1 Parent(s): ed565d2

Updates to prepare for Apriel 1.6 release

Browse files
Files changed (2) hide show
  1. app.py +25 -20
  2. utils.py +18 -1
app.py CHANGED
@@ -30,7 +30,12 @@ BUTTON_WIDTH = 160
30
  DEFAULT_OPT_OUT_VALUE = DEBUG_MODE
31
 
32
  # If DEBUG_MODEL is True, use an alternative model (without reasoning) for testing
33
- DEFAULT_MODEL_NAME = "Apriel-1.5-15B-thinker" if not DEBUG_MODEL else "Apriel-1.5-15B-thinker" # "Apriel-5b"
 
 
 
 
 
34
 
35
  BUTTON_ENABLED = gr.update(interactive=True)
36
  BUTTON_DISABLED = gr.update(interactive=False)
@@ -126,6 +131,9 @@ def run_chat_inference(history, message, state):
126
  error = None
127
  model_name = model_config.get('MODEL_NAME')
128
  temperature = model_config.get('TEMPERATURE', DEFAULT_MODEL_TEMPERATURE)
 
 
 
129
 
130
  # Reinitialize the OpenAI client with a random endpoint from the list
131
  setup_model(model_config.get('MODEL_KEY'))
@@ -435,19 +443,19 @@ def run_chat_inference(history, message, state):
435
  output += reasoning_content + content
436
 
437
  if is_reasoning:
438
- parts = output.split("[BEGIN FINAL RESPONSE]")
439
 
440
  if len(parts) > 1:
441
- if parts[1].endswith("[END FINAL RESPONSE]"):
442
- parts[1] = parts[1].replace("[END FINAL RESPONSE]", "")
443
- if parts[1].endswith("[END FINAL RESPONSE]\n<|end|>"):
444
- parts[1] = parts[1].replace("[END FINAL RESPONSE]\n<|end|>", "")
445
- if parts[1].endswith("[END FINAL RESPONSE]\n<|end|>\n"):
446
- parts[1] = parts[1].replace("[END FINAL RESPONSE]\n<|end|>\n", "")
447
- if parts[1].endswith("<|end|>"):
448
- parts[1] = parts[1].replace("<|end|>", "")
449
- if parts[1].endswith("<|end|>\n"):
450
- parts[1] = parts[1].replace("<|end|>\n", "")
451
 
452
  history[-1 if not completion_started else -2] = gr.ChatMessage(
453
  role="assistant",
@@ -528,14 +536,11 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
528
  }}
529
  </style>
530
  """, elem_classes="css-styles")
531
- with gr.Row(variant="compact", elem_classes=["responsive-row", "no-padding"], ):
532
- with gr.Column():
533
- gr.Markdown(
534
- """
535
- <span class="banner-message-text">ℹ️ This app has been updated to use the recommended temperature of 0.6. We had set it to 0.8 earlier and expect 0.6 to be better. Please provide feedback using the model link.</span>
536
- """
537
- , elem_classes="banner-message"
538
- )
539
  with gr.Row(variant="panel", elem_classes="responsive-row"):
540
  with gr.Column(scale=1, min_width=400, elem_classes="model-dropdown-container"):
541
  model_dropdown = gr.Dropdown(
 
30
  DEFAULT_OPT_OUT_VALUE = DEBUG_MODE
31
 
32
  # If DEBUG_MODEL is True, use an alternative model (without reasoning) for testing
33
+ DEFAULT_MODEL_NAME = "Apriel-1.6-15B-Thinker" if not DEBUG_MODEL else "Apriel-1.6-15B-Thinker"
34
+
35
+ SHOW_BANNER = False
36
+ BANNER_MARKDOWN = """
37
+ <span class="banner-message-text">ℹ️ This app has been updated to use the recommended temperature of 0.6. We had set it to 0.8 earlier and expect 0.6 to be better. Please provide feedback using the model link.</span>
38
+ """
39
 
40
  BUTTON_ENABLED = gr.update(interactive=True)
41
  BUTTON_DISABLED = gr.update(interactive=False)
 
131
  error = None
132
  model_name = model_config.get('MODEL_NAME')
133
  temperature = model_config.get('TEMPERATURE', DEFAULT_MODEL_TEMPERATURE)
134
+ output_tag_start = model_config.get('OUTPUT_TAG_START', "[BEGIN FINAL RESPONSE]")
135
+ output_tag_end = model_config.get('OUTPUT_TAG_END', "[END FINAL RESPONSE]")
136
+ output_stop_token = model_config.get('OUTPUT_STOP_TOKEN', "<|end|>")
137
 
138
  # Reinitialize the OpenAI client with a random endpoint from the list
139
  setup_model(model_config.get('MODEL_KEY'))
 
443
  output += reasoning_content + content
444
 
445
  if is_reasoning:
446
+ parts = output.split(output_tag_start)
447
 
448
  if len(parts) > 1:
449
+ if parts[1].endswith(output_tag_end):
450
+ parts[1] = parts[1].replace(output_tag_end, "")
451
+ if parts[1].endswith(f"{output_tag_end}\n{output_stop_token}"):
452
+ parts[1] = parts[1].replace(f"{output_tag_end}\n{output_stop_token}", "")
453
+ if parts[1].endswith(f"{output_tag_end}\n{output_stop_token}\n"):
454
+ parts[1] = parts[1].replace(f"{output_tag_end}\n{output_stop_token}\n", "")
455
+ if parts[1].endswith(f"{output_stop_token}"):
456
+ parts[1] = parts[1].replace(f"{output_stop_token}", "")
457
+ if parts[1].endswith(f"{output_stop_token}\n"):
458
+ parts[1] = parts[1].replace(f"{output_stop_token}\n", "")
459
 
460
  history[-1 if not completion_started else -2] = gr.ChatMessage(
461
  role="assistant",
 
536
  }}
537
  </style>
538
  """, elem_classes="css-styles")
539
+ if SHOW_BANNER:
540
+ with gr.Row(variant="compact", elem_classes=["responsive-row", "no-padding"], ):
541
+ with gr.Column():
542
+ gr.Markdown(BANNER_MARKDOWN, elem_classes="banner-message")
543
+
 
 
 
544
  with gr.Row(variant="panel", elem_classes="responsive-row"):
545
  with gr.Column(scale=1, min_width=400, elem_classes="model-dropdown-container"):
546
  model_dropdown = gr.Dropdown(
utils.py CHANGED
@@ -12,6 +12,20 @@ DEBUG_MODE = False or os.environ.get("DEBUG_MODE") == "True"
12
  DEBUG_MODEL = False or os.environ.get("DEBUG_MODEL") == "True"
13
 
14
  models_config = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  "Apriel-1.5-15B-thinker": {
16
  "MODEL_DISPLAY_NAME": "Apriel-1.5-15B-thinker",
17
  "MODEL_HF_URL": "https://huggingface.co/ServiceNow-AI/Apriel-1.5-15b-Thinker",
@@ -21,7 +35,10 @@ models_config = {
21
  "AUTH_TOKEN": os.environ.get("AUTH_TOKEN"),
22
  "REASONING": True,
23
  "MULTIMODAL": True,
24
- "TEMPERATURE": 0.6
 
 
 
25
  },
26
  # "Apriel-Nemotron-15b-Thinker": {
27
  # "MODEL_DISPLAY_NAME": "Apriel-Nemotron-15b-Thinker",
 
12
  DEBUG_MODEL = False or os.environ.get("DEBUG_MODEL") == "True"
13
 
14
  models_config = {
15
+ # "Apriel-1.6-15B-Thinker": {
16
+ # "MODEL_DISPLAY_NAME": "Apriel-1.6-15B-Thinker",
17
+ # "MODEL_HF_URL": "https://huggingface.co/ServiceNow-AI/Apriel-1.6-15b-Thinker",
18
+ # "MODEL_NAME": os.environ.get("MODEL_NAME_APRIEL_1_6_15B"),
19
+ # "VLLM_API_URL": os.environ.get("VLLM_API_URL_APRIEL_1_6_15B"),
20
+ # "VLLM_API_URL_LIST": os.environ.get("VLLM_API_URL_LIST_APRIEL_1_6_15B"),
21
+ # "AUTH_TOKEN": os.environ.get("AUTH_TOKEN"),
22
+ # "REASONING": True,
23
+ # "MULTIMODAL": True,
24
+ # "TEMPERATURE": 0.6,
25
+ # "OUTPUT_TAG_START": "[BEGIN FINAL RESPONSE]",
26
+ # "OUTPUT_TAG_END": "",
27
+ # "OUTPUT_STOP_TOKEN": "<|end|>"
28
+ # },
29
  "Apriel-1.5-15B-thinker": {
30
  "MODEL_DISPLAY_NAME": "Apriel-1.5-15B-thinker",
31
  "MODEL_HF_URL": "https://huggingface.co/ServiceNow-AI/Apriel-1.5-15b-Thinker",
 
35
  "AUTH_TOKEN": os.environ.get("AUTH_TOKEN"),
36
  "REASONING": True,
37
  "MULTIMODAL": True,
38
+ "TEMPERATURE": 0.6,
39
+ "OUTPUT_TAG_START": "[BEGIN FINAL RESPONSE]",
40
+ "OUTPUT_TAG_END": "[END FINAL RESPONSE]",
41
+ "OUTPUT_STOP_TOKEN": "<|end|>"
42
  },
43
  # "Apriel-Nemotron-15b-Thinker": {
44
  # "MODEL_DISPLAY_NAME": "Apriel-Nemotron-15b-Thinker",