Spaces:
Running
Running
hardcode mixed precision off for inference
Browse files- app.py +5 -11
- estimate_train_vram.py +1 -0
app.py
CHANGED
|
@@ -150,8 +150,7 @@ def build_interface(estimate_vram_fn):
|
|
| 150 |
return app
|
| 151 |
|
| 152 |
|
| 153 |
-
def estimate_vram(gradio_params):
|
| 154 |
-
print(gradio_params)
|
| 155 |
model_config = ModelConfig(**filter_params_for_dataclass(ModelConfig, gradio_params))
|
| 156 |
training_config = TrainingConfig(**filter_params_for_dataclass(TrainingConfig, gradio_params))
|
| 157 |
|
|
@@ -159,22 +158,17 @@ def estimate_vram(gradio_params):
|
|
| 159 |
# Update model config
|
| 160 |
if not gradio_params["repo_id"]:
|
| 161 |
return "No model selected!"
|
| 162 |
-
|
| 163 |
-
# if gradio_params["cache_dir"]:
|
| 164 |
-
# config = scrape_config_from_hub(gradio_params["repo_id"])
|
| 165 |
-
# model_config.overwrite_with_hf_config(config)
|
| 166 |
-
cache_dir="cache/"
|
| 167 |
# By default, scrape config.json from hub
|
| 168 |
-
#else:
|
| 169 |
config = download_config_from_hub(gradio_params["repo_id"], cache_dir)# gradio_params["cache_dir"])
|
| 170 |
model_config.overwrite_with_hf_config(config.to_dict())
|
| 171 |
|
| 172 |
if training_config.train:
|
| 173 |
total_vram_dict = training_vram_required(model_config, training_config)
|
| 174 |
-
output_str = f"Total {total_vram_dict['total']}GB = {total_vram_dict['model']}GB (model) + {total_vram_dict['gradients']}GB (gradients) + {total_vram_dict['optimizer']}GB (optimizer) + {total_vram_dict['activations']}GB activations"
|
| 175 |
else: # inference
|
| 176 |
total_vram_dict = inference_vram_required(model_config, training_config)
|
| 177 |
-
output_str = f"Total {total_vram_dict['total']}GB = {total_vram_dict['model']}GB (model) + {total_vram_dict['kv_cache']}GB (KV cache) + {total_vram_dict['activations']}GB activations"
|
| 178 |
return output_str
|
| 179 |
|
| 180 |
if __name__ == "__main__":
|
|
@@ -184,7 +178,7 @@ if __name__ == "__main__":
|
|
| 184 |
# Launch gradio interface
|
| 185 |
if not args.no_app:
|
| 186 |
import gradio as gr
|
| 187 |
-
estimate_vram_fn = partial(estimate_vram)
|
| 188 |
interface = build_interface(estimate_vram_fn)
|
| 189 |
interface.launch()
|
| 190 |
# Command line interface
|
|
|
|
| 150 |
return app
|
| 151 |
|
| 152 |
|
| 153 |
+
def estimate_vram(cache_dir, gradio_params):
|
|
|
|
| 154 |
model_config = ModelConfig(**filter_params_for_dataclass(ModelConfig, gradio_params))
|
| 155 |
training_config = TrainingConfig(**filter_params_for_dataclass(TrainingConfig, gradio_params))
|
| 156 |
|
|
|
|
| 158 |
# Update model config
|
| 159 |
if not gradio_params["repo_id"]:
|
| 160 |
return "No model selected!"
|
| 161 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
# By default, scrape config.json from hub
|
|
|
|
| 163 |
config = download_config_from_hub(gradio_params["repo_id"], cache_dir)# gradio_params["cache_dir"])
|
| 164 |
model_config.overwrite_with_hf_config(config.to_dict())
|
| 165 |
|
| 166 |
if training_config.train:
|
| 167 |
total_vram_dict = training_vram_required(model_config, training_config)
|
| 168 |
+
output_str = f"Total {total_vram_dict['total']}GB = {total_vram_dict['model']}GB (model) + {total_vram_dict['gradients']}GB (gradients) + {total_vram_dict['optimizer']}GB (optimizer) + {total_vram_dict['activations']}GB (activations)"
|
| 169 |
else: # inference
|
| 170 |
total_vram_dict = inference_vram_required(model_config, training_config)
|
| 171 |
+
output_str = f"Total {total_vram_dict['total']}GB = {total_vram_dict['model']}GB (model) + {total_vram_dict['kv_cache']}GB (KV cache) + {total_vram_dict['activations']}GB (activations)"
|
| 172 |
return output_str
|
| 173 |
|
| 174 |
if __name__ == "__main__":
|
|
|
|
| 178 |
# Launch gradio interface
|
| 179 |
if not args.no_app:
|
| 180 |
import gradio as gr
|
| 181 |
+
estimate_vram_fn = partial(estimate_vram, args.cache_dir)
|
| 182 |
interface = build_interface(estimate_vram_fn)
|
| 183 |
interface.launch()
|
| 184 |
# Command line interface
|
estimate_train_vram.py
CHANGED
|
@@ -58,6 +58,7 @@ def training_vram_required(model_config, training_config):
|
|
| 58 |
|
| 59 |
|
| 60 |
def inference_vram_required(model_config, training_config):
|
|
|
|
| 61 |
# Total inference VRAM = model size + KV cache size + activations + additional overhead
|
| 62 |
model_vram = model_memory(parameters=model_config.model_size,
|
| 63 |
precision=model_config.precision,
|
|
|
|
| 58 |
|
| 59 |
|
| 60 |
def inference_vram_required(model_config, training_config):
|
| 61 |
+
model_config.mixed_precision = False
|
| 62 |
# Total inference VRAM = model size + KV cache size + activations + additional overhead
|
| 63 |
model_vram = model_memory(parameters=model_config.model_size,
|
| 64 |
precision=model_config.precision,
|