Update app.py
Browse files
app.py
CHANGED
|
@@ -19,13 +19,17 @@ if text:
|
|
| 19 |
|
| 20 |
# Generate text
|
| 21 |
output = model.generate(
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
# Decode generated text
|
| 31 |
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
|
|
|
| 19 |
|
| 20 |
# Generate text
|
| 21 |
output = model.generate(
|
| 22 |
+
input_ids=encoded_input,
|
| 23 |
+
max_length=100, # Specify the max length for the generated text
|
| 24 |
+
num_return_sequences=1, # Number of sequences to generate
|
| 25 |
+
no_repeat_ngram_size=2, # Avoid repeating n-grams of length 2
|
| 26 |
+
top_k=50, # Limits the sampling pool to top_k tokens
|
| 27 |
+
top_p=0.95, # Cumulative probability threshold for nucleus sampling
|
| 28 |
+
temperature=0.7, # Controls the randomness of predictions
|
| 29 |
+
do_sample=True, # Enable sampling
|
| 30 |
+
attention_mask=encoded_input.new_ones(encoded_input.shape),
|
| 31 |
+
pad_token_id=tokenizer.eos_token_id # Use the end-of-sequence token as padding
|
| 32 |
+
)
|
| 33 |
|
| 34 |
# Decode generated text
|
| 35 |
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
|