File size: 1,560 Bytes
5fc8c9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# okto_version: "1.2"
PROJECT "InferenceAdvancedExample"
DESCRIPTION "Demonstrates advanced INFERENCE block with nested CONTROL"

ENV {

  accelerator: "gpu"

  min_memory: "8GB"

  precision: "fp16"
}

DATASET {

  train: "examples/datasets/demo_train.jsonl"

  format: "jsonl"

  type: "chat"
}

MODEL {

  name: "inference-advanced-model"

  base: "oktoseek/base-mini"

  device: "cuda"
}

TRAIN {

  epochs: 5

  batch_size: 32

  device: "cuda"
}

BEHAVIOR {

  mode: "chat"

  personality: "friendly"

  verbosity: "high"

  language: "en"

  prompt_style: "User: {input}\nAssistant:"
}

INFERENCE {

  mode: "chat"

  format: "User: {input}\nAssistant:"

  exit_command: "/exit"
  
  params {

    max_length: 200

    temperature: 0.7

    top_p: 0.9

    top_k: 40

    beams: 2

    do_sample: true

    repetition_penalty: 1.1
  }
  
  CONTROL {
    IF confidence < 0.3 {
      RETRY
      LOG "Low confidence, retrying"
    }
    
    IF repetition > 3 {
      REGENERATE
      LOG "High repetition detected"
    }
    
    IF hallucination_score > 0.5 {
      REPLACE WITH "I'm not certain about that. Could you rephrase?"
    }
    
    IF toxic == true {
      REPLACE WITH "I cannot assist with that request."
    }
  }
}

GUARD {
  prevent {
    hallucination
    toxicity
    bias
  }

  
  detect_using: ["classifier", "embedding"]
  
  on_violation {
    REPLACE

    with_message: "Sorry, I cannot process that request."
  }
}

EXPORT {

  format: ["okm"]

  path: "export/"
}