Buckets:
| { | |
| "version": "rejection_sampling_v2", | |
| "description": "Rejection-sampling redo at per-model sweep-optimal temperature, scored with the analysis2 strict-QC pipeline (BLAST against curated oriDB + AMRFinder + repeats + two-stage filter), matching the main 8-prompt evaluation. Same 2-prompt protocol as v1 baselines/rejection_sampling/ (ATG + cfg.default_query GFP cassette), 10K samples per cell. Replaces an earlier manifest that carried plasmidkit-based numbers; plasmidkit over-counts ORIs by treating overlapping ColE1/pBR322/f1 motifs as separate features.", | |
| "generated_utc": "2026-05-01T14:04:38.229463Z", | |
| "cells": { | |
| "Base_t1": { | |
| "model": "Base", | |
| "model_id": "UCL-CSSB/PlasmidGPT", | |
| "temperature": 1.0, | |
| "top_p": 0.9, | |
| "max_tokens": 256, | |
| "seed": 118556232, | |
| "prompts": [ | |
| "ATG", | |
| "<cfg.default_query: GFP cassette, 880 bp>" | |
| ], | |
| "n_samples": 10000, | |
| "samples_per_prompt": 5000, | |
| "gen_time_sec": 107.0, | |
| "sha256_outputs": "1a326eef8578e653fdf3ac81d33b0eb54512b16e9237d7d9b4e261ba56f72112", | |
| "sha256_outputs_in_metadata": "1a326eef8578e653fdf3ac81d33b0eb54512b16e9237d7d9b4e261ba56f72112", | |
| "sha256_outputs_match": true, | |
| "sha256_full_column": "3a4b856bde117e94a4097873e0b0beaf29405db605905a8c5bf03e5db9b3bfa4", | |
| "sha256_v1_outputs": "363e89d716c87e8ad59c16c00b9be4294637c211312b7a4e3999392c8b775cb3", | |
| "first_5_ids": [ | |
| "seq_0", | |
| "seq_1", | |
| "seq_2", | |
| "seq_3", | |
| "seq_4" | |
| ], | |
| "first_5_lengths": [ | |
| 31, | |
| 8, | |
| 282, | |
| 88, | |
| 129 | |
| ], | |
| "strict_qc_analysis2": { | |
| "n_sequences": 10000, | |
| "n_passed": 715, | |
| "pass_rate_pct": 7.15, | |
| "sha256_qc_passed": "9d2929bb15a82f81f0ac3f7c70613004c219719925f29a494b02e70ce335da2d", | |
| "sha256_qc_summary": "228387ab775c1deb8abefcc1cad8c75b89271fa58363ea7e5f0ceba6ac9b1795", | |
| "thresholds": { | |
| "ori_low_id": 85.0, | |
| "ori_low_cov": 80.0, | |
| "amr_low_id": 85.0, | |
| "amr_low_cov": 80.0, | |
| "ori_strict_id": 99.0, | |
| "ori_strict_cov": 99.0, | |
| "amr_strict_id": 100.0, | |
| "amr_strict_cov": 100.0, | |
| "repeat_max_len": 50 | |
| }, | |
| "pipeline": "analysis2: qc_oriv_arg2 + repeats2 + filter_qc_two_stage2", | |
| "regenerated_utc": "2026-04-30T21:53:51.439679Z" | |
| } | |
| }, | |
| "SFT_t1": { | |
| "model": "SFT", | |
| "model_id": "UCL-CSSB/PlasmidGPT-SFT", | |
| "temperature": 1.0, | |
| "top_p": 0.9, | |
| "max_tokens": 256, | |
| "seed": 1660713596, | |
| "prompts": [ | |
| "ATG", | |
| "<cfg.default_query: GFP cassette, 880 bp>" | |
| ], | |
| "n_samples": 10000, | |
| "samples_per_prompt": 5000, | |
| "gen_time_sec": 107.0, | |
| "sha256_outputs": "fa586c04962e45521053673bca06e74db439d9587e2c61c726c9487c37f6f4c6", | |
| "sha256_outputs_in_metadata": "fa586c04962e45521053673bca06e74db439d9587e2c61c726c9487c37f6f4c6", | |
| "sha256_outputs_match": true, | |
| "sha256_full_column": "54a0eea63e5c398203791601f60f9b1e038ab8fb94427429842bbf948599e4f7", | |
| "sha256_v1_outputs": "0921fb93c60b2fac84a29de309ca6e504fa98f7e701de9e30ffa576dcb11be22", | |
| "first_5_ids": [ | |
| "seq_0", | |
| "seq_1", | |
| "seq_2", | |
| "seq_3", | |
| "seq_4" | |
| ], | |
| "first_5_lengths": [ | |
| 30, | |
| 10, | |
| 20, | |
| 82, | |
| 35 | |
| ], | |
| "strict_qc_analysis2": { | |
| "n_sequences": 10000, | |
| "n_passed": 715, | |
| "pass_rate_pct": 7.15, | |
| "sha256_qc_passed": "64f1ae354bbd272cb590cf416a50bcf93358a9044ccb171ea7a2b0a20b5f73bb", | |
| "sha256_qc_summary": "03169ce198e3d181f5b20a7401191bfe06395e62704370c38c0c4c8ae7809108", | |
| "thresholds": { | |
| "ori_low_id": 85.0, | |
| "ori_low_cov": 80.0, | |
| "amr_low_id": 85.0, | |
| "amr_low_cov": 80.0, | |
| "ori_strict_id": 99.0, | |
| "ori_strict_cov": 99.0, | |
| "amr_strict_id": 100.0, | |
| "amr_strict_cov": 100.0, | |
| "repeat_max_len": 50 | |
| }, | |
| "pipeline": "analysis2: qc_oriv_arg2 + repeats2 + filter_qc_two_stage2", | |
| "regenerated_utc": "2026-04-30T22:03:06.455317Z" | |
| } | |
| }, | |
| "GRPO_t1.15": { | |
| "model": "GRPO", | |
| "model_id": "UCL-CSSB/PlasmidGPT-GRPO", | |
| "temperature": 1.15, | |
| "top_p": 0.9, | |
| "max_tokens": 256, | |
| "seed": 627311195, | |
| "prompts": [ | |
| "ATG", | |
| "<cfg.default_query: GFP cassette, 880 bp>" | |
| ], | |
| "n_samples": 10000, | |
| "samples_per_prompt": 5000, | |
| "gen_time_sec": 194.6, | |
| "sha256_outputs": "8a8738b1485a20433c3d48d143e879fe4db80137b9feda6dcacb16e7a63c17b1", | |
| "sha256_outputs_in_metadata": "8a8738b1485a20433c3d48d143e879fe4db80137b9feda6dcacb16e7a63c17b1", | |
| "sha256_outputs_match": true, | |
| "sha256_full_column": "2e338ee5bdc6d0067108bfbb7eefbbaf1e6a48b2996a7878c2b1dbe8470ede5c", | |
| "sha256_v1_outputs": "404d3cb55215ad70f17fe1cb735e6a1f3bc67908e21a8bc980df7a576eb6d25b", | |
| "first_5_ids": [ | |
| "seq_0", | |
| "seq_1", | |
| "seq_2", | |
| "seq_3", | |
| "seq_4" | |
| ], | |
| "first_5_lengths": [ | |
| 6561, | |
| 6542, | |
| 6654, | |
| 6549, | |
| 6615 | |
| ], | |
| "strict_qc_analysis2": { | |
| "n_sequences": 10000, | |
| "n_passed": 8847, | |
| "pass_rate_pct": 88.47, | |
| "sha256_qc_passed": "d92ed6349b5b043a33dcadcd71b689c22de0bf7959215ac9ec40973f103bafa8", | |
| "sha256_qc_summary": "d0115974159aa37c9fb7308cbb6d9523c803206e68001f6e12c1a5ad9c76d350", | |
| "thresholds": { | |
| "ori_low_id": 85.0, | |
| "ori_low_cov": 80.0, | |
| "amr_low_id": 85.0, | |
| "amr_low_cov": 80.0, | |
| "ori_strict_id": 99.0, | |
| "ori_strict_cov": 99.0, | |
| "amr_strict_id": 100.0, | |
| "amr_strict_cov": 100.0, | |
| "repeat_max_len": 50 | |
| }, | |
| "pipeline": "analysis2: qc_oriv_arg2 + repeats2 + filter_qc_two_stage2", | |
| "regenerated_utc": "2026-04-30T22:13:14.040489Z" | |
| } | |
| } | |
| }, | |
| "v1_outputs_shas": { | |
| "Base": "363e89d716c87e8ad59c16c00b9be4294637c211312b7a4e3999392c8b775cb3", | |
| "SFT": "0921fb93c60b2fac84a29de309ca6e504fa98f7e701de9e30ffa576dcb11be22", | |
| "GRPO": "404d3cb55215ad70f17fe1cb735e6a1f3bc67908e21a8bc980df7a576eb6d25b" | |
| }, | |
| "cross_check": { | |
| "all_v2_distinct": true, | |
| "all_v2_distinct_from_v1": true, | |
| "all_metadata_sha256_match_file": true | |
| } | |
| } |
Xet Storage Details
- Size:
- 6.4 kB
- Xet hash:
- 491aa54dd294f910478b9d98adc29e2994180274ccbccf8650e9fa0404949ce4
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.