{ "model_name": "bnb_8bit_conservative_stem_optimized", "source_model": "luca-deandrea/MNLP_M3_mcqa_model", "quantization_method": "BitsAndBytes_8bit", "timestamp": "2025-06-10 23:30:05", "processing_time_seconds": 31.926149606704712, "quantization_config": { "load_in_8bit": true, "llm_int8_threshold": 8.0, "llm_int8_skip_modules": [ "lm_head" ], "llm_int8_enable_fp32_cpu_offload": false, "llm_int8_has_fp16_weight": false }, "model_stats": { "size_mb": 732.8867282867432, "validation_score": 0.6, "compression_ratio": "~2x compared to FP16" }, "stem_optimizations": [ "conservative_8bit_threshold_8.0", "lm_head_layer_kept_in_fp16", "deterministic_generation_config", "optimized_for_stem_reasoning", "cache_enabled_for_performance" ], "usage_instructions": { "library": "transformers", "loading": "AutoModelForCausalLM.from_pretrained('bnb_8bit_conservative_model')", "recommended_for": "MNLP_STEM_mcqa_evals", "notes": "Model ready for lighteval evaluation" }, "performance_characteristics": { "accuracy": "High (conservative quantization)", "speed": "Good (8-bit quantization)", "memory": "Reduced (~50% of original)", "best_for": "STEM reasoning tasks requiring high accuracy" } }