# generator/templates_context.py import random from typing import List, Dict, Any from .meaning_model import StructuredMeaning, Context from .encoder import encode_term def generate_context_rich_samples( dictionaries: Dict[str, Any], count: int, source_language: str = "en", ) -> List[Dict[str, Any]]: """ Generates samples that combine: - actor - action - object - time context - place context - activity context """ actions = dictionaries.get("actions", []) objects = dictionaries.get("objects", []) actors = dictionaries.get("actors", []) times = dictionaries.get("context_time", []) places = dictionaries.get("context_place", []) activities = dictionaries.get("context_activity", []) results = [] if not actions or not objects or not actors: raise ValueError("Missing required dictionaries: actions, objects, actors.") for _ in range(count): action = random.choice(actions) obj = random.choice(objects) actor = random.choice(actors) time_ctx = random.choice(times) if times else None place_ctx = random.choice(places) if places else None activity_ctx = random.choice(activities) if activities else None action_id = action.get("id", "action.unknown") object_id = obj.get("id", "object.unknown") actor_id = actor.get("id", "actor.unknown") time_id = time_ctx.get("id") if time_ctx else None place_id = place_ctx.get("id") if place_ctx else None activity_id = activity_ctx.get("id") if activity_ctx else None input_text = f"{actor_id} {action_id} the {object_id}" if place_id: input_text += f" in the {place_id}" if activity_id: input_text += f" while {activity_id}" if time_id: input_text += f" ({time_id})" input_text += "." meaning = StructuredMeaning( actor=actor_id, action=action_id, object=object_id, modifiers=[], emotion=None, context=Context( time=time_id, place=place_id, activity=activity_id, ), intent="intent.describe_event", meta={ "source_language": source_language, "confidence": 0.95, }, ) actor_enc = encode_term(actor_id, dictionaries) action_enc = encode_term(action_id, dictionaries) object_enc = encode_term(object_id, dictionaries) time_enc = encode_term(time_id, dictionaries) if time_id else None place_enc = encode_term(place_id, dictionaries) if place_id else None activity_enc = encode_term(activity_id, dictionaries) if activity_id else None glyphic_human = ( f"ACTOR[{actor_enc['human']}] " f"ACTION[{action_enc['human']}] " f"OBJECT[{object_enc['human']}]" ) if time_enc: glyphic_human += f" CONTEXT.TIME[{time_enc['human']}]" if place_enc: glyphic_human += f" CONTEXT.PLACE[{place_enc['human']}]" if activity_enc: glyphic_human += f" CONTEXT.ACTIVITY[{activity_enc['human']}]" glyphic_human += " INTENT.describe" glyphic_compact = ( f"ACT{{{actor_enc['compact']}}} " f"ACTN{{{action_enc['compact']}}} " f"OBJ{{{object_enc['compact']}}}" ) if time_enc: glyphic_compact += f" CTX{{TIME:{time_enc['compact']}}}" if place_enc: glyphic_compact += f" CTX{{PLACE:{place_enc['compact']}}}" if activity_enc: glyphic_compact += f" CTX{{ACT:{activity_enc['compact']}}}" glyphic_compact += " INT{DESC}" glyphic_tokens = ( f"{actor_enc['tokens']} " f"{action_enc['tokens']} " f"{object_enc['tokens']}" ) if time_enc: glyphic_tokens += f" " if place_enc: glyphic_tokens += f" " if activity_enc: glyphic_tokens += f" " glyphic_tokens += " " results.append( { "input_text": input_text, "glyphic_output_human": glyphic_human, "glyphic_output_compact": glyphic_compact, "glyphic_output_tokens": glyphic_tokens, "structured_meaning": meaning.to_dict(), } ) return results