# okto_version: "1.2" PROJECT "InferenceAdvancedExample" DESCRIPTION "Demonstrates advanced INFERENCE block with nested CONTROL" ENV { accelerator: "gpu" min_memory: "8GB" precision: "fp16" } DATASET { train: "examples/datasets/demo_train.jsonl" format: "jsonl" type: "chat" } MODEL { name: "inference-advanced-model" base: "oktoseek/base-mini" device: "cuda" } TRAIN { epochs: 5 batch_size: 32 device: "cuda" } BEHAVIOR { mode: "chat" personality: "friendly" verbosity: "high" language: "en" prompt_style: "User: {input}\nAssistant:" } INFERENCE { mode: "chat" format: "User: {input}\nAssistant:" exit_command: "/exit" params { max_length: 200 temperature: 0.7 top_p: 0.9 top_k: 40 beams: 2 do_sample: true repetition_penalty: 1.1 } CONTROL { IF confidence < 0.3 { RETRY LOG "Low confidence, retrying" } IF repetition > 3 { REGENERATE LOG "High repetition detected" } IF hallucination_score > 0.5 { REPLACE WITH "I'm not certain about that. Could you rephrase?" } IF toxic == true { REPLACE WITH "I cannot assist with that request." } } } GUARD { prevent { hallucination toxicity bias } detect_using: ["classifier", "embedding"] on_violation { REPLACE with_message: "Sorry, I cannot process that request." } } EXPORT { format: ["okm"] path: "export/" }