glyphic-language / generator /run_generator.py
UnconditionalLove's picture
Upload 97 files
ed6bec6 verified
# generator/run_generator.py
import json
from typing import List, Dict, Any
from .config import OUTPUT_JSONL, DEFAULT_SAMPLE_COUNT
from .dictionary_access import load_all_dictionaries
# Existing semantic templates
from .templates_basic import generate_emotional_expression_samples
from .templates_actions import generate_action_object_samples
from .templates_context import generate_context_rich_samples
from .templates_context_advanced import generate_advanced_context_samples
# NEW protocol-level templates
from .templates_identity import generate_identity_samples
from .templates_intent import generate_intent_samples
from .templates_behavior import generate_behavior_samples
from .templates_memory import generate_memory_samples
from .templates_thought import generate_thought_samples
from .templates_safety_response import generate_safety_response_samples
# Envelope + training builder
from .training_builder import build_training_sample, save_samples
def main():
dictionaries = load_all_dictionaries()
# -----------------------------
# 1. Generate semantic samples
# -----------------------------
emotional = generate_emotional_expression_samples(
dictionaries=dictionaries,
count=DEFAULT_SAMPLE_COUNT // 6,
source_language="en",
)
actions = generate_action_object_samples(
dictionaries=dictionaries,
count=DEFAULT_SAMPLE_COUNT // 6,
source_language="en",
)
context_rich = generate_context_rich_samples(
dictionaries=dictionaries,
count=DEFAULT_SAMPLE_COUNT // 6,
source_language="en",
)
advanced = generate_advanced_context_samples(
dictionaries=dictionaries,
count=DEFAULT_SAMPLE_COUNT // 6,
source_language="en",
)
# -----------------------------
# 2. Generate protocol samples
# -----------------------------
identity = generate_identity_samples(
dictionaries=dictionaries,
count=DEFAULT_SAMPLE_COUNT // 12,
source_language="en",
)
intent = generate_intent_samples(
dictionaries=dictionaries,
count=DEFAULT_SAMPLE_COUNT // 12,
source_language="en",
)
behavior = generate_behavior_samples(
dictionaries=dictionaries,
count=DEFAULT_SAMPLE_COUNT // 12,
source_language="en",
)
memory = generate_memory_samples(
dictionaries=dictionaries,
count=DEFAULT_SAMPLE_COUNT // 12,
source_language="en",
)
thought = generate_thought_samples(
dictionaries=dictionaries,
count=DEFAULT_SAMPLE_COUNT // 12,
source_language="en",
)
safety = generate_safety_response_samples(
dictionaries=dictionaries,
count=DEFAULT_SAMPLE_COUNT // 12,
source_language="en",
)
# Merge all raw samples
raw_samples = (
emotional
+ actions
+ context_rich
+ advanced
+ identity
+ intent
+ behavior
+ memory
+ thought
+ safety
)
# -----------------------------
# 3. Convert to Glyphic training samples
# -----------------------------
training_samples = []
for sample in raw_samples:
user_text = sample.get("input", "")
glyphic_output = sample.get("glyphic", "")
realized_output = sample.get("output", "")
identity_text = sample.get("identity", "A helpful, aligned Glyphic agent.")
emotion = sample.get("emotion", "neutral")
sensory = sample.get("sensory", "none")
social = sample.get("social", "alone")
intent_dict = sample.get("intent", {
"goal": "assist",
"urgency": "1",
"focus": "support"
})
behavior_dict = sample.get("behavior", {
"tone": "warm",
"pacing": "steady",
"depth": "medium",
"style": "natural",
"clarity": "high"
})
memory_summary = sample.get("memory", "")
thought_chain = sample.get("thought_chain", "")
training_sample = build_training_sample(
user_text=user_text,
identity=identity_text,
emotion=emotion,
sensory=sensory,
social=social,
intent=intent_dict,
behavior=behavior_dict,
memory_summary=memory_summary,
thought_chain=thought_chain,
glyphic_output=glyphic_output,
realized_output=realized_output
)
training_samples.append(training_sample)
# -----------------------------
# 4. Save dataset
# -----------------------------
save_samples(str(OUTPUT_JSONL), training_samples)
print(f"Wrote {len(training_samples)} Glyphic training samples to {OUTPUT_JSONL}")
if __name__ == "__main__":
main()