Spaces:

bobpopboom
/

testing

Sleeping

bobpopboom commited on Feb 9, 2025

Commit

2876cd4

verified ·

1 Parent(s): b7c5b78

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -24,9 +24,10 @@ try:
         model=model,
         tokenizer=tokenizer,
         torch_dtype=torch.float16,
-        max_length=128,          # Limit response length
         num_return_sequences=1,  # Only generate one response
-        do_sample=False         # Use greedy decoding for speed
     )
 except Exception as e:
@@ -55,6 +56,8 @@ def respond(
             temperature=temperature,
             top_p=top_p,
         )[0]["generated_text"]
         #Extract the bot's reply (adjust if your model format is different)
         bot_response = response.split("Assistant:")[-1].strip()
         yield bot_response

         model=model,
         tokenizer=tokenizer,
         torch_dtype=torch.float16,
         num_return_sequences=1,  # Only generate one response
+        do_sample=True,         # Enable sampling since we're using temperature and top_p
+        truncation=True,        # Explicitly enable truncation
+        max_new_tokens=128      # Use only max_new_tokens
     )
 except Exception as e:
             temperature=temperature,
             top_p=top_p,
         )[0]["generated_text"]
+        prompt,
+        do_sample=True,
         #Extract the bot's reply (adjust if your model format is different)
         bot_response = response.split("Assistant:")[-1].strip()
         yield bot_response