last_int8 / model_info.json
Thomaschtl's picture
Upload model_info.json with huggingface_hub
0bfbad5 verified
{
"model_name": "bnb_8bit_conservative_stem_optimized",
"source_model": "luca-deandrea/MNLP_M3_mcqa_model",
"quantization_method": "BitsAndBytes_8bit",
"timestamp": "2025-06-10 23:30:05",
"processing_time_seconds": 31.926149606704712,
"quantization_config": {
"load_in_8bit": true,
"llm_int8_threshold": 8.0,
"llm_int8_skip_modules": [
"lm_head"
],
"llm_int8_enable_fp32_cpu_offload": false,
"llm_int8_has_fp16_weight": false
},
"model_stats": {
"size_mb": 732.8867282867432,
"validation_score": 0.6,
"compression_ratio": "~2x compared to FP16"
},
"stem_optimizations": [
"conservative_8bit_threshold_8.0",
"lm_head_layer_kept_in_fp16",
"deterministic_generation_config",
"optimized_for_stem_reasoning",
"cache_enabled_for_performance"
],
"usage_instructions": {
"library": "transformers",
"loading": "AutoModelForCausalLM.from_pretrained('bnb_8bit_conservative_model')",
"recommended_for": "MNLP_STEM_mcqa_evals",
"notes": "Model ready for lighteval evaluation"
},
"performance_characteristics": {
"accuracy": "High (conservative quantization)",
"speed": "Good (8-bit quantization)",
"memory": "Reduced (~50% of original)",
"best_for": "STEM reasoning tasks requiring high accuracy"
}
}