|
|
|
|
|
|
|
|
import json |
|
|
from typing import List, Dict, Any |
|
|
|
|
|
from .config import OUTPUT_JSONL, DEFAULT_SAMPLE_COUNT |
|
|
from .dictionary_access import load_all_dictionaries |
|
|
|
|
|
|
|
|
from .templates_basic import generate_emotional_expression_samples |
|
|
from .templates_actions import generate_action_object_samples |
|
|
from .templates_context import generate_context_rich_samples |
|
|
from .templates_context_advanced import generate_advanced_context_samples |
|
|
|
|
|
|
|
|
from .templates_identity import generate_identity_samples |
|
|
from .templates_intent import generate_intent_samples |
|
|
from .templates_behavior import generate_behavior_samples |
|
|
from .templates_memory import generate_memory_samples |
|
|
from .templates_thought import generate_thought_samples |
|
|
from .templates_safety_response import generate_safety_response_samples |
|
|
|
|
|
|
|
|
from .training_builder import build_training_sample, save_samples |
|
|
|
|
|
|
|
|
def main(): |
|
|
dictionaries = load_all_dictionaries() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
emotional = generate_emotional_expression_samples( |
|
|
dictionaries=dictionaries, |
|
|
count=DEFAULT_SAMPLE_COUNT // 6, |
|
|
source_language="en", |
|
|
) |
|
|
|
|
|
actions = generate_action_object_samples( |
|
|
dictionaries=dictionaries, |
|
|
count=DEFAULT_SAMPLE_COUNT // 6, |
|
|
source_language="en", |
|
|
) |
|
|
|
|
|
context_rich = generate_context_rich_samples( |
|
|
dictionaries=dictionaries, |
|
|
count=DEFAULT_SAMPLE_COUNT // 6, |
|
|
source_language="en", |
|
|
) |
|
|
|
|
|
advanced = generate_advanced_context_samples( |
|
|
dictionaries=dictionaries, |
|
|
count=DEFAULT_SAMPLE_COUNT // 6, |
|
|
source_language="en", |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
identity = generate_identity_samples( |
|
|
dictionaries=dictionaries, |
|
|
count=DEFAULT_SAMPLE_COUNT // 12, |
|
|
source_language="en", |
|
|
) |
|
|
|
|
|
intent = generate_intent_samples( |
|
|
dictionaries=dictionaries, |
|
|
count=DEFAULT_SAMPLE_COUNT // 12, |
|
|
source_language="en", |
|
|
) |
|
|
|
|
|
behavior = generate_behavior_samples( |
|
|
dictionaries=dictionaries, |
|
|
count=DEFAULT_SAMPLE_COUNT // 12, |
|
|
source_language="en", |
|
|
) |
|
|
|
|
|
memory = generate_memory_samples( |
|
|
dictionaries=dictionaries, |
|
|
count=DEFAULT_SAMPLE_COUNT // 12, |
|
|
source_language="en", |
|
|
) |
|
|
|
|
|
thought = generate_thought_samples( |
|
|
dictionaries=dictionaries, |
|
|
count=DEFAULT_SAMPLE_COUNT // 12, |
|
|
source_language="en", |
|
|
) |
|
|
|
|
|
safety = generate_safety_response_samples( |
|
|
dictionaries=dictionaries, |
|
|
count=DEFAULT_SAMPLE_COUNT // 12, |
|
|
source_language="en", |
|
|
) |
|
|
|
|
|
|
|
|
raw_samples = ( |
|
|
emotional |
|
|
+ actions |
|
|
+ context_rich |
|
|
+ advanced |
|
|
+ identity |
|
|
+ intent |
|
|
+ behavior |
|
|
+ memory |
|
|
+ thought |
|
|
+ safety |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
training_samples = [] |
|
|
|
|
|
for sample in raw_samples: |
|
|
user_text = sample.get("input", "") |
|
|
glyphic_output = sample.get("glyphic", "") |
|
|
realized_output = sample.get("output", "") |
|
|
|
|
|
identity_text = sample.get("identity", "A helpful, aligned Glyphic agent.") |
|
|
emotion = sample.get("emotion", "neutral") |
|
|
sensory = sample.get("sensory", "none") |
|
|
social = sample.get("social", "alone") |
|
|
|
|
|
intent_dict = sample.get("intent", { |
|
|
"goal": "assist", |
|
|
"urgency": "1", |
|
|
"focus": "support" |
|
|
}) |
|
|
|
|
|
behavior_dict = sample.get("behavior", { |
|
|
"tone": "warm", |
|
|
"pacing": "steady", |
|
|
"depth": "medium", |
|
|
"style": "natural", |
|
|
"clarity": "high" |
|
|
}) |
|
|
|
|
|
memory_summary = sample.get("memory", "") |
|
|
thought_chain = sample.get("thought_chain", "") |
|
|
|
|
|
training_sample = build_training_sample( |
|
|
user_text=user_text, |
|
|
identity=identity_text, |
|
|
emotion=emotion, |
|
|
sensory=sensory, |
|
|
social=social, |
|
|
intent=intent_dict, |
|
|
behavior=behavior_dict, |
|
|
memory_summary=memory_summary, |
|
|
thought_chain=thought_chain, |
|
|
glyphic_output=glyphic_output, |
|
|
realized_output=realized_output |
|
|
) |
|
|
|
|
|
training_samples.append(training_sample) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
save_samples(str(OUTPUT_JSONL), training_samples) |
|
|
|
|
|
print(f"Wrote {len(training_samples)} Glyphic training samples to {OUTPUT_JSONL}") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|
|
|
|