| { | |
| "config": { | |
| "local_samples_dir": "data_samples/redmod_hf_streaming", | |
| "merge_web": false, | |
| "classifier": "distilbert", | |
| "seed": 0, | |
| "val_fraction": 0.2, | |
| "target_model": "./qwen3p5base", | |
| "generator": "hf", | |
| "num_prompts": 300, | |
| "max_new_tokens": 512, | |
| "gen_temperature": 0.8, | |
| "top_p": 0.9, | |
| "gen_seed": 0, | |
| "hf_revision": null, | |
| "hf_model_name": "distilbert/distilbert-base-uncased", | |
| "hf_epochs": 3, | |
| "hf_batch_size": 64, | |
| "hf_lr": 2e-05, | |
| "hf_weight_decay": 0.01, | |
| "hf_max_length": 256, | |
| "hf_pretrained_dir": null, | |
| "bootstrap": false, | |
| "n_boot": 300, | |
| "prompts_style": "unconditional" | |
| }, | |
| "categories": [ | |
| "cci4_extras_stem", | |
| "dclm_pro", | |
| "diverse_qa_filtered", | |
| "extras", | |
| "megamathweb_pro", | |
| "nemotron_synth_code", | |
| "retrieve_pile", | |
| "sft_code_proc", | |
| "sft_math_proc", | |
| "textbooks_expanded" | |
| ], | |
| "val_metrics": { | |
| "val_acc": 0.9223, | |
| "temperature": 1.1051709180756477, | |
| "model_name": "distilbert/distilbert-base-uncased" | |
| }, | |
| "confusion_matrix": [ | |
| [ | |
| 0.976, | |
| 0.007, | |
| 0.0, | |
| 0.007, | |
| 0.0, | |
| 0.0, | |
| 0.01, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| [ | |
| 0.003, | |
| 0.802, | |
| 0.094, | |
| 0.01, | |
| 0.001, | |
| 0.0, | |
| 0.09, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| [ | |
| 0.0, | |
| 0.11, | |
| 0.785, | |
| 0.01, | |
| 0.01, | |
| 0.0, | |
| 0.085, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| [ | |
| 0.013, | |
| 0.015, | |
| 0.01, | |
| 0.93, | |
| 0.013, | |
| 0.001, | |
| 0.018, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| [ | |
| 0.001, | |
| 0.003, | |
| 0.0, | |
| 0.009, | |
| 0.978, | |
| 0.0, | |
| 0.007, | |
| 0.0, | |
| 0.0, | |
| 0.002 | |
| ], | |
| [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.001, | |
| 0.999, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| [ | |
| 0.007, | |
| 0.126, | |
| 0.088, | |
| 0.009, | |
| 0.008, | |
| 0.0, | |
| 0.76, | |
| 0.0, | |
| 0.0, | |
| 0.002 | |
| ], | |
| [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 1.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 1.0, | |
| 0.0 | |
| ], | |
| [ | |
| 0.0, | |
| 0.002, | |
| 0.0, | |
| 0.0, | |
| 0.002, | |
| 0.0, | |
| 0.003, | |
| 0.0, | |
| 0.0, | |
| 0.993 | |
| ] | |
| ], | |
| "pbar": [ | |
| 0.23522692918777466, | |
| 0.04656211659312248, | |
| 0.07114483416080475, | |
| 0.37367069721221924, | |
| 0.12033222615718842, | |
| 0.003941215109080076, | |
| 0.13968943059444427, | |
| 0.0024817727971822023, | |
| 0.0025195805355906487, | |
| 0.004431016743183136 | |
| ], | |
| "priors": { | |
| "point": [ | |
| 0.23441256603714983, | |
| 0.013787808032268919, | |
| 0.06585231548553169, | |
| 0.3964950990921098, | |
| 0.11575067513032279, | |
| 0.0035482863873162896, | |
| 0.16124749129042545, | |
| 0.0024817908881231148, | |
| 0.00251959862653156, | |
| 0.0039043690302204646 | |
| ], | |
| "mean": [ | |
| 0.23441256603714983, | |
| 0.013787808032268919, | |
| 0.06585231548553169, | |
| 0.3964950990921098, | |
| 0.11575067513032279, | |
| 0.0035482863873162896, | |
| 0.16124749129042545, | |
| 0.0024817908881231148, | |
| 0.00251959862653156, | |
| 0.0039043690302204646 | |
| ], | |
| "ci_lo": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "ci_hi": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ] | |
| }, | |
| "unknown": { | |
| "mode": "threshold", | |
| "metric": "maxprob", | |
| "threshold": 0.9, | |
| "mean_probability": 0.1387745887041092, | |
| "ci_lo": 0.1387745887041092, | |
| "ci_hi": 0.1387745887041092 | |
| }, | |
| "categories_with_unknown": [ | |
| "cci4_extras_stem", | |
| "dclm_pro", | |
| "diverse_qa_filtered", | |
| "extras", | |
| "megamathweb_pro", | |
| "nemotron_synth_code", | |
| "retrieve_pile", | |
| "sft_code_proc", | |
| "sft_math_proc", | |
| "textbooks_expanded", | |
| "Unknown" | |
| ], | |
| "priors_with_unknown": { | |
| "point": [ | |
| 0.20188205859826952, | |
| 0.011874410643459586, | |
| 0.056713687488813794, | |
| 0.3414716547924072, | |
| 0.09968742279688929, | |
| 0.003055874403312082, | |
| 0.13887043700702723, | |
| 0.0021373813783742237, | |
| 0.0021699423634352045, | |
| 0.003362541823902558, | |
| 0.1387745887041092 | |
| ], | |
| "mean": [ | |
| 0.20188205859826952, | |
| 0.011874410643459586, | |
| 0.056713687488813794, | |
| 0.3414716547924072, | |
| 0.09968742279688929, | |
| 0.003055874403312082, | |
| 0.13887043700702723, | |
| 0.0021373813783742237, | |
| 0.0021699423634352045, | |
| 0.003362541823902558, | |
| 0.1387745887041092 | |
| ], | |
| "ci_lo": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.1387745887041092 | |
| ], | |
| "ci_hi": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.1387745887041092 | |
| ] | |
| }, | |
| "pbar_with_unknown": [ | |
| 0.2025834172964096, | |
| 0.04010047763586044, | |
| 0.06127173826098442, | |
| 0.32181471586227417, | |
| 0.10363317281007767, | |
| 0.003394274739548564, | |
| 0.12030409276485443, | |
| 0.0021373657509684563, | |
| 0.0021699268836528063, | |
| 0.0038161042612046003, | |
| 0.1387745887041092 | |
| ] | |
| } |