Spaces:
Running
Running
removes use cache
Browse files
model.py
CHANGED
|
@@ -89,8 +89,7 @@ class SmolLM3Model:
|
|
| 89 |
model_kwargs = {
|
| 90 |
"torch_dtype": self.torch_dtype,
|
| 91 |
"device_map": self.device_map,
|
| 92 |
-
"trust_remote_code": True
|
| 93 |
-
"use_cache": False # Disable KV cache for training
|
| 94 |
}
|
| 95 |
|
| 96 |
# Only add flash attention if the model supports it
|
|
|
|
| 89 |
model_kwargs = {
|
| 90 |
"torch_dtype": self.torch_dtype,
|
| 91 |
"device_map": self.device_map,
|
| 92 |
+
"trust_remote_code": True
|
|
|
|
| 93 |
}
|
| 94 |
|
| 95 |
# Only add flash attention if the model supports it
|