Reduced tockens
Browse files
app.py
CHANGED
|
@@ -4,7 +4,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
| 4 |
import re
|
| 5 |
|
| 6 |
# Load model and tokenizer
|
| 7 |
-
print("Loading
|
| 8 |
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
|
| 9 |
model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it")
|
| 10 |
|
|
@@ -107,7 +107,7 @@ def respond(message, history):
|
|
| 107 |
with torch.no_grad():
|
| 108 |
chat_history_ids = model.generate(
|
| 109 |
input_ids,
|
| 110 |
-
max_length=
|
| 111 |
temperature=0.9,
|
| 112 |
top_p=0.9,
|
| 113 |
repetition_penalty=1.2,
|
|
|
|
| 4 |
import re
|
| 5 |
|
| 6 |
# Load model and tokenizer
|
| 7 |
+
print("Loading gemma...")
|
| 8 |
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
|
| 9 |
model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it")
|
| 10 |
|
|
|
|
| 107 |
with torch.no_grad():
|
| 108 |
chat_history_ids = model.generate(
|
| 109 |
input_ids,
|
| 110 |
+
max_length=250,
|
| 111 |
temperature=0.9,
|
| 112 |
top_p=0.9,
|
| 113 |
repetition_penalty=1.2,
|