# okto_version: "1.2"
PROJECT "InferenceAdvancedExample"
DESCRIPTION "Demonstrates advanced INFERENCE block with nested CONTROL"

ENV {
  accelerator: "gpu"
  min_memory: "8GB"
  precision: "fp16"
}

DATASET {
  train: "examples/datasets/demo_train.jsonl"
  format: "jsonl"
  type: "chat"
}

MODEL {
  name: "inference-advanced-model"
  base: "oktoseek/base-mini"
  device: "cuda"
}

TRAIN {
  epochs: 5
  batch_size: 32
  device: "cuda"
}

BEHAVIOR {
  mode: "chat"
  personality: "friendly"
  verbosity: "high"
  language: "en"
  prompt_style: "User: {input}\nAssistant:"
}

INFERENCE {
  mode: "chat"
  format: "User: {input}\nAssistant:"
  exit_command: "/exit"
  
  params {
    max_length: 200
    temperature: 0.7
    top_p: 0.9
    top_k: 40
    beams: 2
    do_sample: true
    repetition_penalty: 1.1
  }
  
  CONTROL {
    IF confidence < 0.3 {
      RETRY
      LOG "Low confidence, retrying"
    }
    
    IF repetition > 3 {
      REGENERATE
      LOG "High repetition detected"
    }
    
    IF hallucination_score > 0.5 {
      REPLACE WITH "I'm not certain about that. Could you rephrase?"
    }
    
    IF toxic == true {
      REPLACE WITH "I cannot assist with that request."
    }
  }
}

GUARD {
  prevent {
    hallucination
    toxicity
    bias
  }
  
  detect_using: ["classifier", "embedding"]
  
  on_violation {
    REPLACE
    with_message: "Sorry, I cannot process that request."
  }
}

EXPORT {
  format: ["okm"]
  path: "export/"
}