Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -21,10 +21,22 @@ MODEL_ID = "google/gemma-4-31B-it-assistant"
|
|
| 21 |
print("Loading tokenizer...")
|
| 22 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
print("Loading model...")
|
| 25 |
model = AutoModelForCausalLM.from_pretrained(
|
| 26 |
MODEL_ID,
|
| 27 |
-
|
| 28 |
device_map="auto",
|
| 29 |
)
|
| 30 |
model.eval()
|
|
@@ -73,11 +85,6 @@ demo = gr.ChatInterface(
|
|
| 73 |
fn=chat,
|
| 74 |
title="Gemma 4 Assistant",
|
| 75 |
description="google/gemma-4-31B-it-assistant — streaming enabled",
|
| 76 |
-
examples=[
|
| 77 |
-
"Explain quantum computing in simple terms",
|
| 78 |
-
"Write a Python function to reverse a string",
|
| 79 |
-
"What is photosynthesis?",
|
| 80 |
-
],
|
| 81 |
)
|
| 82 |
|
| 83 |
demo.launch()
|
|
|
|
| 21 |
print("Loading tokenizer...")
|
| 22 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
| 23 |
|
| 24 |
+
# Manually set Gemma chat template since tokenizer doesn't include one
|
| 25 |
+
tokenizer.chat_template = (
|
| 26 |
+
"{% for message in messages %}"
|
| 27 |
+
"{% if message['role'] == 'user' %}"
|
| 28 |
+
"user\n{{ message['content'] }}\n"
|
| 29 |
+
"{% elif message['role'] == 'assistant' %}"
|
| 30 |
+
"model\n{{ message['content'] }}\n"
|
| 31 |
+
"{% endif %}"
|
| 32 |
+
"{% endfor %}"
|
| 33 |
+
"{% if add_generation_prompt %}model\n{% endif %}"
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
print("Loading model...")
|
| 37 |
model = AutoModelForCausalLM.from_pretrained(
|
| 38 |
MODEL_ID,
|
| 39 |
+
dtype=torch.bfloat16,
|
| 40 |
device_map="auto",
|
| 41 |
)
|
| 42 |
model.eval()
|
|
|
|
| 85 |
fn=chat,
|
| 86 |
title="Gemma 4 Assistant",
|
| 87 |
description="google/gemma-4-31B-it-assistant — streaming enabled",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
)
|
| 89 |
|
| 90 |
demo.launch()
|