Aethon-N1-Base-Open-Structure / bundle /integration_report.json
OkeyMeta's picture
Update Open Structure release to v26 with HH-RLHF and religion data
a1ee4bd verified
{
"name": "Aethon N1 Base",
"family": "Aethon Native",
"tokenizer": "Aethon Native Concept Codec (ANCC)",
"size_unit": "Structural Capacity (SC)",
"capacity": {
"sc": 112897,
"concept_count": 27344,
"explicit_edge_count": 81899,
"abstraction_count": 3654,
"revision_count": 0,
"raw_unit_count": 0
},
"lanes": {
"identity": {
"lane": "identity",
"sources": [
"data/native/identity/aethon_identity_v1.jsonl",
"data/identity/aethon_identity.jsonl",
"data/identity/aethon_identity_test.jsonl"
],
"direct_facts": 0,
"rows": 23,
"docs_accepted": 23,
"docs_skipped": 0,
"learned_edges": 28,
"derived_edges": 0,
"shard_count": 3,
"merge_edges": 28,
"merge_contradictions": 0,
"merge_raw_units": 0,
"completed_shards": 3,
"total_rows_planned": 23,
"captured_units": 23
},
"reasoning": {
"lane": "reasoning",
"sources": [
"data/native/reasoning/aethon_hle_transfer_v1.jsonl",
"data/native/reasoning/aethon_agi_transfer_v2.jsonl",
"data/native/reasoning/aethon_agi_transfer_v1.jsonl",
"data/base/aethon_curated_reasoning_base_v7.jsonl",
"data/base/aethon_curated_reasoning_base_v6.jsonl",
"data/base/aethon_curated_reasoning_base_v5.jsonl",
"data/base/aethon_curated_reasoning_base_v4.jsonl",
"data/base/aethon_curated_reasoning_base_v3.jsonl",
"data/base/aethon_curated_reasoning_base_v2.jsonl",
"data/base/aethon_memory_native_v1.jsonl",
"data/base/aethon_curated_reasoning_base_v8.jsonl",
"data/base/aethon_clean_fact_base_v1.jsonl",
"data/base/aethon_clean_probe_v1.jsonl",
"data/base/aethon_gold_probe_v1.jsonl",
"data/base/aethon_multilingual_base_mix.jsonl",
"data/base/aethon_v2_expanded.jsonl",
"data/base/test_base_curriculum.jsonl",
"data/base/shards/aethon_base_0001.jsonl",
"data/base/shards/aethon_base_0002.jsonl",
"data/base/shards/aethon_base_0003.jsonl",
"data/base/shards/aethon_base_0004.jsonl",
"data/base/shards/aethon_base_0005.jsonl",
"data/base/shards/aethon_base_0006.jsonl",
"data/base/shards/aethon_base_0007.jsonl",
"data/base/shards/aethon_base_0008.jsonl",
"data/chat/aethon_logic_curriculum_v1.jsonl",
"data/chat/aethon_exact_answer_chat.jsonl",
"data/chat/aethon_exact_control_v1.jsonl",
"data/chat/clean/anthropic_hh_rlhf_clean.jsonl",
"data/reasoning_corpora/clean/advanced_python_cot.jsonl",
"data/reasoning_corpora/clean/aethon_bootstrap_mix.jsonl",
"data/reasoning_corpora/clean/am_deepseek_r1_distilled.jsonl",
"data/reasoning_corpora/clean/openmath_instruct_1.jsonl",
"data/reasoning_corpora/clean/bespoke_stratos_17k.jsonl",
"data/reasoning_corpora/clean/open_thoughts_114k.jsonl",
"data/reasoning_corpora/clean/r1_distill_sft.jsonl",
"data/groundedness/aethon_groundedness.jsonl",
"data/groundedness/aethon_groundedness_verifier.jsonl",
"data/groundedness/aethon_retrieval_eval.jsonl",
"data/groundedness/aethon_retrieval_verifier.jsonl"
],
"direct_facts": 0,
"rows": 297584,
"docs_accepted": 127774,
"docs_skipped": 0,
"learned_edges": 45253,
"derived_edges": 0,
"shard_count": 85,
"merge_edges": 45253,
"merge_contradictions": 0,
"merge_raw_units": 0,
"completed_shards": 85,
"total_rows_planned": 297584,
"captured_units": 127774
},
"math": {
"lane": "math",
"sources": [
"data/native/math/aethon_math_native_v1.jsonl",
"data/chat/aethon_column_addition_v1.jsonl",
"data/chat/aethon_digit_math_chat_v1.jsonl",
"data/chat/aethon_digit_math_micro_v1.jsonl",
"data/chat/aethon_gold_math_chat_v1.jsonl",
"data/chat/aethon_math_exact_short_v1.jsonl",
"data/chat/aethon_systematic_math_v1.jsonl"
],
"direct_facts": 0,
"rows": 10530,
"docs_accepted": 4,
"docs_skipped": 0,
"learned_edges": 13,
"derived_edges": 0,
"shard_count": 8,
"merge_edges": 13,
"merge_contradictions": 0,
"merge_raw_units": 0,
"completed_shards": 8,
"total_rows_planned": 10530,
"captured_units": 4
},
"code": {
"lane": "code",
"sources": [
"data/native/code/aethon_code_native_v1.jsonl",
"data/code/advanced_python_exec.jsonl",
"data/tool_use/clean/bitagent_tool_calling.jsonl",
"data/tool_use/clean/toolace.jsonl"
],
"direct_facts": 0,
"rows": 10337,
"docs_accepted": 9520,
"docs_skipped": 0,
"learned_edges": 3455,
"derived_edges": 0,
"shard_count": 4,
"merge_edges": 3455,
"merge_contradictions": 0,
"merge_raw_units": 0,
"completed_shards": 4,
"total_rows_planned": 10337,
"captured_units": 9515
},
"story": {
"lane": "story",
"sources": [
"data/native/story/aethon_story_native_v1.jsonl",
"data/chat/aethon_gold_chat_v1.jsonl",
"data/chat/aethon_gold_chat_v3.jsonl",
"data/chat/aethon_reasoning_chat_bootstrap.jsonl",
"data/chat/aethon_copy_probe_v1.jsonl",
"data/chat/aethon_speakback_chat.jsonl",
"data/chat/aethon_truthful_chat_bootstrap.jsonl",
"data/chat/clean/anthropic_hh_rlhf_clean.jsonl",
"data/chat/clean/aethon_religion_chat_v1.jsonl"
],
"direct_facts": 0,
"rows": 176490,
"docs_accepted": 5,
"docs_skipped": 0,
"learned_edges": 2,
"derived_edges": 0,
"shard_count": 42,
"merge_edges": 2,
"merge_contradictions": 0,
"merge_raw_units": 0,
"completed_shards": 42,
"total_rows_planned": 176490,
"captured_units": 5
},
"world": {
"lane": "world",
"sources": [
"data/native/world/aethon_religion_world_v1.jsonl",
"data/world_knowledge/clean/structured_wikipedia_en.jsonl",
"data/world_knowledge/clean/fineweb_edu_sample.jsonl",
"data/world_knowledge/clean/cosmopedia_web.jsonl",
"data/world_knowledge/clean/masakhanews_yor.jsonl",
"data/world_knowledge/clean/masakhanews_ibo.jsonl",
"data/world_knowledge/clean/masakhanews_pcm.jsonl",
"data/world_knowledge/clean/masakhanews_hau.jsonl",
"data/base/aethon_multilingual_base_mix.jsonl",
"data/chat/clean/anthropic_hh_rlhf_clean.jsonl",
"data/chat/clean/aethon_religion_chat_v1.jsonl",
"data/chat/clean/helpsteer2_clean.jsonl",
"data/chat/clean/oasst1_clean.jsonl",
"data/chat/clean/ultrachat_200k_clean.jsonl"
],
"direct_facts": 0,
"rows": 252980,
"docs_accepted": 23428,
"docs_skipped": 0,
"learned_edges": 33158,
"derived_edges": 0,
"shard_count": 60,
"merge_edges": 33158,
"merge_contradictions": 0,
"merge_raw_units": 0,
"completed_shards": 60,
"total_rows_planned": 252980,
"captured_units": 23428
}
},
"digest": {
"processed_units": 160749,
"total_units": 160749,
"learned_edges": 81909,
"derived_edges": 2462,
"rule_count": 0
}
}