Spaces:

redael
/

udc

Sleeping

redael commited on Jul 16, 2024

Commit

6f1cdd7

verified ·

1 Parent(s): 6765159

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ model.to(device)
 if device.type == 'cuda':
     model = model.half()  # Use FP16 precision
-def generate_response(prompt, model, tokenizer, max_length=100, num_beams=1, temperature=0.7, top_p=0.9, repetition_penalty=1.0):
     # Prepare the prompt
     prompt = f"User: {prompt}\nAssistant:"
     inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True, max_length=512).to(device)
@@ -26,7 +26,7 @@ def generate_response(prompt, model, tokenizer, max_length=100, num_beams=1, tem
         num_beams=num_beams,  # Use a lower number of beams
         temperature=temperature,
         top_p=top_p,
-        repetition_penalty=repetition_penalty,
         early_stopping=True
     )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -50,7 +50,7 @@ def respond(message, history: list[tuple[str, str]]):
     conversation += f"User: {message}\nAssistant:"
     # Fixed values for generation parameters
-    max_tokens = 100  # Reduce max tokens if possible
     temperature = 0.7
     top_p = 0.9

 if device.type == 'cuda':
     model = model.half()  # Use FP16 precision
+def generate_response(prompt, model, tokenizer, max_length=100, num_beams=1, temperature=0.7, top_p=0.9, repetition_penalty=2.0):
     # Prepare the prompt
     prompt = f"User: {prompt}\nAssistant:"
     inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True, max_length=512).to(device)
         num_beams=num_beams,  # Use a lower number of beams
         temperature=temperature,
         top_p=top_p,
+        repetition_penalty=repetition_penalty,  # Increased repetition penalty
         early_stopping=True
     )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     conversation += f"User: {message}\nAssistant:"
     # Fixed values for generation parameters
+    max_tokens = 100  # Adjusted max tokens
     temperature = 0.7
     top_p = 0.9