{ "task": "2.3 - Library-Based Quantization with bitsandbytes", "model_source": "ranjan56cse/gpt2-large-agnews-classifier", "int8_metrics": { "model_name": "INT8 (bitsandbytes)", "quantization_method": "bitsandbytes", "num_samples_evaluated": 2000, "accuracy": 0.9505, "precision": 0.9503862443765202, "recall": 0.9505, "f1_score": 0.9504235034869967, "confusion_matrix": [ [ 494, 4, 11, 2 ], [ 1, 521, 2, 2 ], [ 12, 0, 409, 28 ], [ 11, 2, 24, 477 ] ], "per_class_metrics": { "World": { "precision": 0.9536679536679536, "recall": 0.9667318982387475, "f1": 0.9601554907677357 }, "Sports": { "precision": 0.9886148007590133, "recall": 0.9904942965779467, "f1": 0.9895536562203229 }, "Business": { "precision": 0.9170403587443946, "recall": 0.910913140311804, "f1": 0.9139664804469274 }, "Sci/Tech": { "precision": 0.93713163064833, "recall": 0.9280155642023347, "f1": 0.9325513196480938 } }, "memory_mb": 801.35498046875, "avg_latency_ms": 75.78567934036255, "std_latency_ms": 31.118263723559952 }, "nf4_metrics": { "model_name": "NF4 (bitsandbytes)", "quantization_method": "bitsandbytes", "num_samples_evaluated": 2000, "accuracy": 0.9495, "precision": 0.9493676564802229, "recall": 0.9495, "f1_score": 0.9494144946434884, "confusion_matrix": [ [ 494, 4, 12, 1 ], [ 1, 521, 2, 2 ], [ 12, 0, 407, 30 ], [ 11, 2, 24, 477 ] ], "per_class_metrics": { "World": { "precision": 0.9536679536679536, "recall": 0.9667318982387475, "f1": 0.9601554907677357 }, "Sports": { "precision": 0.9886148007590133, "recall": 0.9904942965779467, "f1": 0.9895536562203229 }, "Business": { "precision": 0.9146067415730337, "recall": 0.9064587973273942, "f1": 0.9105145413870246 }, "Sci/Tech": { "precision": 0.9352941176470588, "recall": 0.9280155642023347, "f1": 0.931640625 } }, "memory_mb": 463.8560791015625, "avg_latency_ms": 43.911072731018066, "std_latency_ms": 1.4153050965884408 }, "comparison": { "accuracy_diff": 0.0010000000000000009, "memory_reduction_pct": 42.11602967386172 } }