phi-mini-instruct-distilled-qwq-bespoke-qwq

Sleeping

Ayushnangia commited on Apr 6, 2025

Commit

4911f6b

verified ·

1 Parent(s): 4fdc0d3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -52,8 +52,7 @@ def respond(message, history, max_tokens, temperature, top_p):
     # Generate the assistant response tokens
     output_ids = model.generate(
         input_ids,
-        max_new_tokens=5000,
         temperature=temperature,
         top_p=top_p,
         do_sample=True,
@@ -81,6 +80,7 @@ def respond(message, history, max_tokens, temperature, top_p):
 demo = gr.ChatInterface(
     fn=respond,
     additional_inputs=[
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
     ],

     # Generate the assistant response tokens
     output_ids = model.generate(
         input_ids,
+        max_new_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,
         do_sample=True,
 demo = gr.ChatInterface(
     fn=respond,
     additional_inputs=[
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
     ],