Marintosti commited on
Commit
86b7fb0
·
verified ·
1 Parent(s): ff010d3

ci: deploy 7be149f

Browse files
Files changed (1) hide show
  1. configs/deployment_config.yaml +3 -1
configs/deployment_config.yaml CHANGED
@@ -6,7 +6,9 @@ vllm:
6
  tensor_parallel_size: 1
7
  max_model_len: 2048
8
  gpu_memory_utilization: 0.9
9
- dtype: "bfloat16"
 
 
10
 
11
  inference:
12
  temperature: 0.3
 
6
  tensor_parallel_size: 1
7
  max_model_len: 2048
8
  gpu_memory_utilization: 0.9
9
+ # T4 (compute capability 7.5) ne supporte pas bfloat16. Float16 OK sur T4 et A10/L4.
10
+ # Pour passer en bfloat16 (precision legerement meilleure), il faut un GPU >= Ampere (L4, A100, H100).
11
+ dtype: "float16"
12
 
13
  inference:
14
  temperature: 0.3