Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -77,8 +77,7 @@ def respond(
|
|
| 77 |
temperature,
|
| 78 |
lp_start,
|
| 79 |
lp_decay,
|
| 80 |
-
|
| 81 |
-
mirostat_tau,
|
| 82 |
frequency_penalty,
|
| 83 |
presence_penalty,
|
| 84 |
max_tokens
|
|
@@ -101,10 +100,10 @@ def respond(
|
|
| 101 |
temperature=temperature,
|
| 102 |
stream=True,
|
| 103 |
stop=["<|im_end|>"],
|
| 104 |
-
|
| 105 |
-
mirostat_tau=mirostat_tau,
|
| 106 |
-
mirostat_eta=mirostat_eta,
|
| 107 |
max_tokens=max_tokens,
|
|
|
|
|
|
|
| 108 |
frequency_penalty=frequency_penalty,
|
| 109 |
presence_penalty=presence_penalty,
|
| 110 |
logits_processor=lambda ids, logits: custom_lp_logits_processor(ids, logits, lp_start, lp_decay, len(convo))
|
|
@@ -132,8 +131,7 @@ demo = gr.ChatInterface(
|
|
| 132 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.8, step=0.1, label="Temperature", info="How chaotic should the model be?"),
|
| 133 |
gr.Slider(minimum=0, maximum=512, value=32, step=1, label="Length penalty start", info='When should the model start being more likely to shut up?'),
|
| 134 |
gr.Slider(minimum=0.5, maximum=1.5, value=1.02, step=0.01, label="Length penalty decay factor", info='How fast should that stop likelihood increase?'),
|
| 135 |
-
gr.Slider(minimum=0.0, maximum=
|
| 136 |
-
gr.Slider(minimum=0.0, maximum=10.0, value=3.0, step=0.5, label="Mirostat tau", info="Lower number keeps hallucinations to a minimum"),
|
| 137 |
gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Frequency penalty", info='"Don\'repeat yourself"'),
|
| 138 |
gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Presence penalty", info='"Use lots of diverse words"'),
|
| 139 |
gr.Slider(minimum=1, maximum=1024, value=1024, step=1, label="Max new tokens", info="How many words can the model generate at most?"),
|
|
|
|
| 77 |
temperature,
|
| 78 |
lp_start,
|
| 79 |
lp_decay,
|
| 80 |
+
min_p,
|
|
|
|
| 81 |
frequency_penalty,
|
| 82 |
presence_penalty,
|
| 83 |
max_tokens
|
|
|
|
| 100 |
temperature=temperature,
|
| 101 |
stream=True,
|
| 102 |
stop=["<|im_end|>"],
|
| 103 |
+
min_p=min_p,
|
|
|
|
|
|
|
| 104 |
max_tokens=max_tokens,
|
| 105 |
+
# Disable top-p pruning
|
| 106 |
+
top_k=100000000,
|
| 107 |
frequency_penalty=frequency_penalty,
|
| 108 |
presence_penalty=presence_penalty,
|
| 109 |
logits_processor=lambda ids, logits: custom_lp_logits_processor(ids, logits, lp_start, lp_decay, len(convo))
|
|
|
|
| 131 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.8, step=0.1, label="Temperature", info="How chaotic should the model be?"),
|
| 132 |
gr.Slider(minimum=0, maximum=512, value=32, step=1, label="Length penalty start", info='When should the model start being more likely to shut up?'),
|
| 133 |
gr.Slider(minimum=0.5, maximum=1.5, value=1.02, step=0.01, label="Length penalty decay factor", info='How fast should that stop likelihood increase?'),
|
| 134 |
+
gr.Slider(minimum=0.0, maximum=10.0, value=3.0, step=0.5, label="Min_p", info="Lower values make it more random (ratio between lowest-probability and highest-probability tokens)"),
|
|
|
|
| 135 |
gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Frequency penalty", info='"Don\'repeat yourself"'),
|
| 136 |
gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Presence penalty", info='"Use lots of diverse words"'),
|
| 137 |
gr.Slider(minimum=1, maximum=1024, value=1024, step=1, label="Max new tokens", info="How many words can the model generate at most?"),
|