Buckets:

McClain's picture
download
raw
6.4 kB
{
"version": "rejection_sampling_v2",
"description": "Rejection-sampling redo at per-model sweep-optimal temperature, scored with the analysis2 strict-QC pipeline (BLAST against curated oriDB + AMRFinder + repeats + two-stage filter), matching the main 8-prompt evaluation. Same 2-prompt protocol as v1 baselines/rejection_sampling/ (ATG + cfg.default_query GFP cassette), 10K samples per cell. Replaces an earlier manifest that carried plasmidkit-based numbers; plasmidkit over-counts ORIs by treating overlapping ColE1/pBR322/f1 motifs as separate features.",
"generated_utc": "2026-05-01T14:04:38.229463Z",
"cells": {
"Base_t1": {
"model": "Base",
"model_id": "UCL-CSSB/PlasmidGPT",
"temperature": 1.0,
"top_p": 0.9,
"max_tokens": 256,
"seed": 118556232,
"prompts": [
"ATG",
"<cfg.default_query: GFP cassette, 880 bp>"
],
"n_samples": 10000,
"samples_per_prompt": 5000,
"gen_time_sec": 107.0,
"sha256_outputs": "1a326eef8578e653fdf3ac81d33b0eb54512b16e9237d7d9b4e261ba56f72112",
"sha256_outputs_in_metadata": "1a326eef8578e653fdf3ac81d33b0eb54512b16e9237d7d9b4e261ba56f72112",
"sha256_outputs_match": true,
"sha256_full_column": "3a4b856bde117e94a4097873e0b0beaf29405db605905a8c5bf03e5db9b3bfa4",
"sha256_v1_outputs": "363e89d716c87e8ad59c16c00b9be4294637c211312b7a4e3999392c8b775cb3",
"first_5_ids": [
"seq_0",
"seq_1",
"seq_2",
"seq_3",
"seq_4"
],
"first_5_lengths": [
31,
8,
282,
88,
129
],
"strict_qc_analysis2": {
"n_sequences": 10000,
"n_passed": 715,
"pass_rate_pct": 7.15,
"sha256_qc_passed": "9d2929bb15a82f81f0ac3f7c70613004c219719925f29a494b02e70ce335da2d",
"sha256_qc_summary": "228387ab775c1deb8abefcc1cad8c75b89271fa58363ea7e5f0ceba6ac9b1795",
"thresholds": {
"ori_low_id": 85.0,
"ori_low_cov": 80.0,
"amr_low_id": 85.0,
"amr_low_cov": 80.0,
"ori_strict_id": 99.0,
"ori_strict_cov": 99.0,
"amr_strict_id": 100.0,
"amr_strict_cov": 100.0,
"repeat_max_len": 50
},
"pipeline": "analysis2: qc_oriv_arg2 + repeats2 + filter_qc_two_stage2",
"regenerated_utc": "2026-04-30T21:53:51.439679Z"
}
},
"SFT_t1": {
"model": "SFT",
"model_id": "UCL-CSSB/PlasmidGPT-SFT",
"temperature": 1.0,
"top_p": 0.9,
"max_tokens": 256,
"seed": 1660713596,
"prompts": [
"ATG",
"<cfg.default_query: GFP cassette, 880 bp>"
],
"n_samples": 10000,
"samples_per_prompt": 5000,
"gen_time_sec": 107.0,
"sha256_outputs": "fa586c04962e45521053673bca06e74db439d9587e2c61c726c9487c37f6f4c6",
"sha256_outputs_in_metadata": "fa586c04962e45521053673bca06e74db439d9587e2c61c726c9487c37f6f4c6",
"sha256_outputs_match": true,
"sha256_full_column": "54a0eea63e5c398203791601f60f9b1e038ab8fb94427429842bbf948599e4f7",
"sha256_v1_outputs": "0921fb93c60b2fac84a29de309ca6e504fa98f7e701de9e30ffa576dcb11be22",
"first_5_ids": [
"seq_0",
"seq_1",
"seq_2",
"seq_3",
"seq_4"
],
"first_5_lengths": [
30,
10,
20,
82,
35
],
"strict_qc_analysis2": {
"n_sequences": 10000,
"n_passed": 715,
"pass_rate_pct": 7.15,
"sha256_qc_passed": "64f1ae354bbd272cb590cf416a50bcf93358a9044ccb171ea7a2b0a20b5f73bb",
"sha256_qc_summary": "03169ce198e3d181f5b20a7401191bfe06395e62704370c38c0c4c8ae7809108",
"thresholds": {
"ori_low_id": 85.0,
"ori_low_cov": 80.0,
"amr_low_id": 85.0,
"amr_low_cov": 80.0,
"ori_strict_id": 99.0,
"ori_strict_cov": 99.0,
"amr_strict_id": 100.0,
"amr_strict_cov": 100.0,
"repeat_max_len": 50
},
"pipeline": "analysis2: qc_oriv_arg2 + repeats2 + filter_qc_two_stage2",
"regenerated_utc": "2026-04-30T22:03:06.455317Z"
}
},
"GRPO_t1.15": {
"model": "GRPO",
"model_id": "UCL-CSSB/PlasmidGPT-GRPO",
"temperature": 1.15,
"top_p": 0.9,
"max_tokens": 256,
"seed": 627311195,
"prompts": [
"ATG",
"<cfg.default_query: GFP cassette, 880 bp>"
],
"n_samples": 10000,
"samples_per_prompt": 5000,
"gen_time_sec": 194.6,
"sha256_outputs": "8a8738b1485a20433c3d48d143e879fe4db80137b9feda6dcacb16e7a63c17b1",
"sha256_outputs_in_metadata": "8a8738b1485a20433c3d48d143e879fe4db80137b9feda6dcacb16e7a63c17b1",
"sha256_outputs_match": true,
"sha256_full_column": "2e338ee5bdc6d0067108bfbb7eefbbaf1e6a48b2996a7878c2b1dbe8470ede5c",
"sha256_v1_outputs": "404d3cb55215ad70f17fe1cb735e6a1f3bc67908e21a8bc980df7a576eb6d25b",
"first_5_ids": [
"seq_0",
"seq_1",
"seq_2",
"seq_3",
"seq_4"
],
"first_5_lengths": [
6561,
6542,
6654,
6549,
6615
],
"strict_qc_analysis2": {
"n_sequences": 10000,
"n_passed": 8847,
"pass_rate_pct": 88.47,
"sha256_qc_passed": "d92ed6349b5b043a33dcadcd71b689c22de0bf7959215ac9ec40973f103bafa8",
"sha256_qc_summary": "d0115974159aa37c9fb7308cbb6d9523c803206e68001f6e12c1a5ad9c76d350",
"thresholds": {
"ori_low_id": 85.0,
"ori_low_cov": 80.0,
"amr_low_id": 85.0,
"amr_low_cov": 80.0,
"ori_strict_id": 99.0,
"ori_strict_cov": 99.0,
"amr_strict_id": 100.0,
"amr_strict_cov": 100.0,
"repeat_max_len": 50
},
"pipeline": "analysis2: qc_oriv_arg2 + repeats2 + filter_qc_two_stage2",
"regenerated_utc": "2026-04-30T22:13:14.040489Z"
}
}
},
"v1_outputs_shas": {
"Base": "363e89d716c87e8ad59c16c00b9be4294637c211312b7a4e3999392c8b775cb3",
"SFT": "0921fb93c60b2fac84a29de309ca6e504fa98f7e701de9e30ffa576dcb11be22",
"GRPO": "404d3cb55215ad70f17fe1cb735e6a1f3bc67908e21a8bc980df7a576eb6d25b"
},
"cross_check": {
"all_v2_distinct": true,
"all_v2_distinct_from_v1": true,
"all_metadata_sha256_match_file": true
}
}

Xet Storage Details

Size:
6.4 kB
·
Xet hash:
491aa54dd294f910478b9d98adc29e2994180274ccbccf8650e9fa0404949ce4

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.