File size: 4,858 Bytes
ed6bec6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
# generator/run_generator.py
import json
from typing import List, Dict, Any
from .config import OUTPUT_JSONL, DEFAULT_SAMPLE_COUNT
from .dictionary_access import load_all_dictionaries
# Existing semantic templates
from .templates_basic import generate_emotional_expression_samples
from .templates_actions import generate_action_object_samples
from .templates_context import generate_context_rich_samples
from .templates_context_advanced import generate_advanced_context_samples
# NEW protocol-level templates
from .templates_identity import generate_identity_samples
from .templates_intent import generate_intent_samples
from .templates_behavior import generate_behavior_samples
from .templates_memory import generate_memory_samples
from .templates_thought import generate_thought_samples
from .templates_safety_response import generate_safety_response_samples
# Envelope + training builder
from .training_builder import build_training_sample, save_samples
def main():
dictionaries = load_all_dictionaries()
# -----------------------------
# 1. Generate semantic samples
# -----------------------------
emotional = generate_emotional_expression_samples(
dictionaries=dictionaries,
count=DEFAULT_SAMPLE_COUNT // 6,
source_language="en",
)
actions = generate_action_object_samples(
dictionaries=dictionaries,
count=DEFAULT_SAMPLE_COUNT // 6,
source_language="en",
)
context_rich = generate_context_rich_samples(
dictionaries=dictionaries,
count=DEFAULT_SAMPLE_COUNT // 6,
source_language="en",
)
advanced = generate_advanced_context_samples(
dictionaries=dictionaries,
count=DEFAULT_SAMPLE_COUNT // 6,
source_language="en",
)
# -----------------------------
# 2. Generate protocol samples
# -----------------------------
identity = generate_identity_samples(
dictionaries=dictionaries,
count=DEFAULT_SAMPLE_COUNT // 12,
source_language="en",
)
intent = generate_intent_samples(
dictionaries=dictionaries,
count=DEFAULT_SAMPLE_COUNT // 12,
source_language="en",
)
behavior = generate_behavior_samples(
dictionaries=dictionaries,
count=DEFAULT_SAMPLE_COUNT // 12,
source_language="en",
)
memory = generate_memory_samples(
dictionaries=dictionaries,
count=DEFAULT_SAMPLE_COUNT // 12,
source_language="en",
)
thought = generate_thought_samples(
dictionaries=dictionaries,
count=DEFAULT_SAMPLE_COUNT // 12,
source_language="en",
)
safety = generate_safety_response_samples(
dictionaries=dictionaries,
count=DEFAULT_SAMPLE_COUNT // 12,
source_language="en",
)
# Merge all raw samples
raw_samples = (
emotional
+ actions
+ context_rich
+ advanced
+ identity
+ intent
+ behavior
+ memory
+ thought
+ safety
)
# -----------------------------
# 3. Convert to Glyphic training samples
# -----------------------------
training_samples = []
for sample in raw_samples:
user_text = sample.get("input", "")
glyphic_output = sample.get("glyphic", "")
realized_output = sample.get("output", "")
identity_text = sample.get("identity", "A helpful, aligned Glyphic agent.")
emotion = sample.get("emotion", "neutral")
sensory = sample.get("sensory", "none")
social = sample.get("social", "alone")
intent_dict = sample.get("intent", {
"goal": "assist",
"urgency": "1",
"focus": "support"
})
behavior_dict = sample.get("behavior", {
"tone": "warm",
"pacing": "steady",
"depth": "medium",
"style": "natural",
"clarity": "high"
})
memory_summary = sample.get("memory", "")
thought_chain = sample.get("thought_chain", "")
training_sample = build_training_sample(
user_text=user_text,
identity=identity_text,
emotion=emotion,
sensory=sensory,
social=social,
intent=intent_dict,
behavior=behavior_dict,
memory_summary=memory_summary,
thought_chain=thought_chain,
glyphic_output=glyphic_output,
realized_output=realized_output
)
training_samples.append(training_sample)
# -----------------------------
# 4. Save dataset
# -----------------------------
save_samples(str(OUTPUT_JSONL), training_samples)
print(f"Wrote {len(training_samples)} Glyphic training samples to {OUTPUT_JSONL}")
if __name__ == "__main__":
main()
|