Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -24,9 +24,10 @@ try:
|
|
| 24 |
model=model,
|
| 25 |
tokenizer=tokenizer,
|
| 26 |
torch_dtype=torch.float16,
|
| 27 |
-
max_length=128, # Limit response length
|
| 28 |
num_return_sequences=1, # Only generate one response
|
| 29 |
-
do_sample=
|
|
|
|
|
|
|
| 30 |
)
|
| 31 |
|
| 32 |
except Exception as e:
|
|
@@ -55,6 +56,8 @@ def respond(
|
|
| 55 |
temperature=temperature,
|
| 56 |
top_p=top_p,
|
| 57 |
)[0]["generated_text"]
|
|
|
|
|
|
|
| 58 |
#Extract the bot's reply (adjust if your model format is different)
|
| 59 |
bot_response = response.split("Assistant:")[-1].strip()
|
| 60 |
yield bot_response
|
|
|
|
| 24 |
model=model,
|
| 25 |
tokenizer=tokenizer,
|
| 26 |
torch_dtype=torch.float16,
|
|
|
|
| 27 |
num_return_sequences=1, # Only generate one response
|
| 28 |
+
do_sample=True, # Enable sampling since we're using temperature and top_p
|
| 29 |
+
truncation=True, # Explicitly enable truncation
|
| 30 |
+
max_new_tokens=128 # Use only max_new_tokens
|
| 31 |
)
|
| 32 |
|
| 33 |
except Exception as e:
|
|
|
|
| 56 |
temperature=temperature,
|
| 57 |
top_p=top_p,
|
| 58 |
)[0]["generated_text"]
|
| 59 |
+
prompt,
|
| 60 |
+
do_sample=True,
|
| 61 |
#Extract the bot's reply (adjust if your model format is different)
|
| 62 |
bot_response = response.split("Assistant:")[-1].strip()
|
| 63 |
yield bot_response
|