File size: 491 Bytes
c839f1f
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
{
  "method": "PTQ-INT8",
  "original_model": "/projects/extern/kisski/kisski-narges-llm-interactive/dir.project/Biomni-R0-32B-Preview",
  "quantized_model": "/projects/extern/kisski/kisski-narges-llm-interactive/dir.project/Biomni-R0-32B-PTQ-INT8",
  "calibration_samples": 120,
  "quantization_scheme": "INT8 weights and activations",
  "backend": "optimum-quanto",
  "model_size_estimate": "~8-10 GB (vs ~60 GB original)",
  "note": "Quantization may be applied at serving time by vLLM"
}