HyperlinksSpace
/

TinyModel1

Text Classification

text-embeddings-inference

Model card Files Files and versions

TinyModel1 / eval_report.json

anriltine's picture

Deploy TinyModel1 from GitHub Actions

b717780 verified about 1 month ago

History Blame Contribute Delete

4.48 kB

	{
	"reproducibility": {
	"seed": 42,
	"dataset": "fancyzhx/ag_news",
	"dataset_config": null,
	"train_split": "train",
	"eval_split": "test",
	"text_column": "text",
	"label_column": "label",
	"max_train_samples": 3000,
	"max_eval_samples": 600,
	"note": "Train and eval rows are the first N after shuffle(seed) of each split; see texts/eval-reproducibility.md."
	},
	"metrics": {
	"accuracy": 0.538333,
	"macro_f1": 0.455368,
	"weighted_f1": 0.452694,
	"per_class_f1": {
	"World": 0.536585,
	"Sports": 0.730964,
	"Business": 0.0,
	"Sci/Tech": 0.553922
	},
	"confusion_matrix": [
	[
	66,
	67,
	0,
	15
	],
	[
	1,
	144,
	0,
	2
	],
	[
	12,
	15,
	0,
	125
	],
	[
	19,
	21,
	0,
	113
	]
	],
	"confusion_matrix_axis": "rows=true class, columns=predicted class",
	"label_order": [
	"World",
	"Sports",
	"Business",
	"Sci/Tech"
	]
	},
	"dataset_quality": {
	"class_distribution": {
	"train": {
	"counts_by_label": {
	"World": 771,
	"Sports": 742,
	"Business": 691,
	"Sci/Tech": 796
	},
	"proportions_by_label": {
	"World": 0.257,
	"Sports": 0.247333,
	"Business": 0.230333,
	"Sci/Tech": 0.265333
	},
	"total": 3000
	},
	"eval": {
	"counts_by_label": {
	"World": 148,
	"Sports": 147,
	"Business": 152,
	"Sci/Tech": 153
	},
	"proportions_by_label": {
	"World": 0.246667,
	"Sports": 0.245,
	"Business": 0.253333,
	"Sci/Tech": 0.255
	},
	"total": 600
	}
	}
	},
	"error_analysis": {
	"top_confusions": [
	{
	"true_label": "Business",
	"predicted_label": "Sci/Tech",
	"count": 125
	},
	{
	"true_label": "World",
	"predicted_label": "Sports",
	"count": 67
	},
	{
	"true_label": "Sci/Tech",
	"predicted_label": "Sports",
	"count": 21
	},
	{
	"true_label": "Sci/Tech",
	"predicted_label": "World",
	"count": 19
	},
	{
	"true_label": "World",
	"predicted_label": "Sci/Tech",
	"count": 15
	},
	{
	"true_label": "Business",
	"predicted_label": "Sports",
	"count": 15
	},
	{
	"true_label": "Business",
	"predicted_label": "World",
	"count": 12
	},
	{
	"true_label": "Sports",
	"predicted_label": "Sci/Tech",
	"count": 2
	},
	{
	"true_label": "Sports",
	"predicted_label": "World",
	"count": 1
	}
	]
	},
	"calibration": {
	"max_prob_histogram": {
	"num_bins": 10,
	"bins": [
	{
	"bin_low": 0.0,
	"bin_high": 0.1,
	"count": 0
	},
	{
	"bin_low": 0.1,
	"bin_high": 0.2,
	"count": 0
	},
	{
	"bin_low": 0.2,
	"bin_high": 0.3,
	"count": 1
	},
	{
	"bin_low": 0.3,
	"bin_high": 0.4,
	"count": 27
	},
	{
	"bin_low": 0.4,
	"bin_high": 0.5,
	"count": 156
	},
	{
	"bin_low": 0.5,
	"bin_high": 0.6,
	"count": 237
	},
	{
	"bin_low": 0.6,
	"bin_high": 0.7,
	"count": 171
	},
	{
	"bin_low": 0.7,
	"bin_high": 0.8,
	"count": 8
	},
	{
	"bin_low": 0.8,
	"bin_high": 0.9,
	"count": 0
	},
	{
	"bin_low": 0.9,
	"bin_high": 1.0,
	"count": 0
	}
	],
	"note": "Each eval example contributes one max softmax probability (winner class)."
	}
	},
	"routing": {
	"fallback_behavior": "At inference, if the maximum softmax probability is below `min_confidence`, treat the prediction as low-confidence: route to human review, a secondary model, or a safe default class\u2014choose per product.",
	"min_confidence": null,
	"comment": "`min_confidence` is not set by training; typical starting range is 0.5\u20130.7 for routing. Tune on a validation set using `max_prob` histogram and error analysis."
	}
	}