qwen3p5_base_llmsurgeon / summary.json
semran1's picture
Upload folder using huggingface_hub
7eef335 verified
Raw
History Blame Contribute Delete
5.29 kB
{
"config": {
"local_samples_dir": "data_samples/redmod_hf_streaming",
"merge_web": false,
"classifier": "distilbert",
"seed": 0,
"val_fraction": 0.2,
"target_model": "./qwen3p5base",
"generator": "hf",
"num_prompts": 300,
"max_new_tokens": 512,
"gen_temperature": 0.8,
"top_p": 0.9,
"gen_seed": 0,
"hf_revision": null,
"hf_model_name": "distilbert/distilbert-base-uncased",
"hf_epochs": 3,
"hf_batch_size": 64,
"hf_lr": 2e-05,
"hf_weight_decay": 0.01,
"hf_max_length": 256,
"hf_pretrained_dir": null,
"bootstrap": false,
"n_boot": 300,
"prompts_style": "unconditional"
},
"categories": [
"cci4_extras_stem",
"dclm_pro",
"diverse_qa_filtered",
"extras",
"megamathweb_pro",
"nemotron_synth_code",
"retrieve_pile",
"sft_code_proc",
"sft_math_proc",
"textbooks_expanded"
],
"val_metrics": {
"val_acc": 0.9223,
"temperature": 1.1051709180756477,
"model_name": "distilbert/distilbert-base-uncased"
},
"confusion_matrix": [
[
0.976,
0.007,
0.0,
0.007,
0.0,
0.0,
0.01,
0.0,
0.0,
0.0
],
[
0.003,
0.802,
0.094,
0.01,
0.001,
0.0,
0.09,
0.0,
0.0,
0.0
],
[
0.0,
0.11,
0.785,
0.01,
0.01,
0.0,
0.085,
0.0,
0.0,
0.0
],
[
0.013,
0.015,
0.01,
0.93,
0.013,
0.001,
0.018,
0.0,
0.0,
0.0
],
[
0.001,
0.003,
0.0,
0.009,
0.978,
0.0,
0.007,
0.0,
0.0,
0.002
],
[
0.0,
0.0,
0.0,
0.0,
0.001,
0.999,
0.0,
0.0,
0.0,
0.0
],
[
0.007,
0.126,
0.088,
0.009,
0.008,
0.0,
0.76,
0.0,
0.0,
0.002
],
[
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
1.0,
0.0,
0.0
],
[
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
1.0,
0.0
],
[
0.0,
0.002,
0.0,
0.0,
0.002,
0.0,
0.003,
0.0,
0.0,
0.993
]
],
"pbar": [
0.23522692918777466,
0.04656211659312248,
0.07114483416080475,
0.37367069721221924,
0.12033222615718842,
0.003941215109080076,
0.13968943059444427,
0.0024817727971822023,
0.0025195805355906487,
0.004431016743183136
],
"priors": {
"point": [
0.23441256603714983,
0.013787808032268919,
0.06585231548553169,
0.3964950990921098,
0.11575067513032279,
0.0035482863873162896,
0.16124749129042545,
0.0024817908881231148,
0.00251959862653156,
0.0039043690302204646
],
"mean": [
0.23441256603714983,
0.013787808032268919,
0.06585231548553169,
0.3964950990921098,
0.11575067513032279,
0.0035482863873162896,
0.16124749129042545,
0.0024817908881231148,
0.00251959862653156,
0.0039043690302204646
],
"ci_lo": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0
],
"ci_hi": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0
]
},
"unknown": {
"mode": "threshold",
"metric": "maxprob",
"threshold": 0.9,
"mean_probability": 0.1387745887041092,
"ci_lo": 0.1387745887041092,
"ci_hi": 0.1387745887041092
},
"categories_with_unknown": [
"cci4_extras_stem",
"dclm_pro",
"diverse_qa_filtered",
"extras",
"megamathweb_pro",
"nemotron_synth_code",
"retrieve_pile",
"sft_code_proc",
"sft_math_proc",
"textbooks_expanded",
"Unknown"
],
"priors_with_unknown": {
"point": [
0.20188205859826952,
0.011874410643459586,
0.056713687488813794,
0.3414716547924072,
0.09968742279688929,
0.003055874403312082,
0.13887043700702723,
0.0021373813783742237,
0.0021699423634352045,
0.003362541823902558,
0.1387745887041092
],
"mean": [
0.20188205859826952,
0.011874410643459586,
0.056713687488813794,
0.3414716547924072,
0.09968742279688929,
0.003055874403312082,
0.13887043700702723,
0.0021373813783742237,
0.0021699423634352045,
0.003362541823902558,
0.1387745887041092
],
"ci_lo": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.1387745887041092
],
"ci_hi": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.1387745887041092
]
},
"pbar_with_unknown": [
0.2025834172964096,
0.04010047763586044,
0.06127173826098442,
0.32181471586227417,
0.10363317281007767,
0.003394274739548564,
0.12030409276485443,
0.0021373657509684563,
0.0021699268836528063,
0.0038161042612046003,
0.1387745887041092
]
}