p7inc3 commited on
Commit
04db24b
Β·
verified Β·
1 Parent(s): a354d82

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +10 -17
handler.py CHANGED
@@ -89,34 +89,31 @@ class EndpointHandler:
89
  print("[INFO] Model loaded successfully")
90
 
91
  # ============================================
92
- # Threshold Config (tunable)
93
  # ============================================
94
  self.config = {
95
- "HIGH_ATTACK": 0.75, # attack type softmax threshold
96
- "MEDIUM_ATTACK": 0.55, # attack type softmax threshold
97
- "HIGH_CONF": 0.85, # binary head dangerous threshold
98
- "LOW_CONF": 0.30, # binary head safe threshold
99
- "EXTREME_CONF": 0.95 # override for medium attack scores
100
  }
101
 
102
  # =====================================================
103
- # Decision Logic β€” FIXED
104
  # =====================================================
105
  def decide(self, danger_prob, fine_score, family_score):
106
  """
107
  Binary head (danger_prob) is the authority.
108
  Attack type scores only matter if binary head is already suspicious.
109
- This prevents false positives where softmax picks a random attack class
110
- on benign input.
111
  """
112
  cfg = self.config
113
 
114
- # 1. Definite safe zone β€” binary head is confident it's safe
115
  if danger_prob <= cfg["LOW_CONF"]:
116
  return False
117
 
118
  # 2. Danger zone β€” binary head must be confident it's dangerous
119
- # BEFORE we even look at attack type scores
120
  if danger_prob >= cfg["HIGH_CONF"]:
121
  # Strong attack type confidence β†’ dangerous
122
  if fine_score >= cfg["HIGH_ATTACK"] or family_score >= cfg["HIGH_ATTACK"]:
@@ -127,8 +124,7 @@ class EndpointHandler:
127
  if danger_prob >= cfg["EXTREME_CONF"]:
128
  return True
129
 
130
- # 3. Gray zone (LOW_CONF < danger_prob < HIGH_CONF) β†’ always safe
131
- # This catches miscalibrated binary head outputs
132
  return False
133
 
134
  # =====================================================
@@ -185,7 +181,6 @@ class EndpointHandler:
185
  else:
186
  attack_type = "none"
187
  attack_family = "none"
188
- # Zero out misleading scores when safe
189
  fine_score = 0.0
190
  family_score = 0.0
191
 
@@ -204,15 +199,13 @@ class EndpointHandler:
204
  ]
205
 
206
  # ================================
207
- # Response β€” FIXED confidence reporting
208
  # ================================
209
  return {
210
  "status": "DANGEROUS" if is_dangerous else "SAFE",
211
 
212
- # Raw binary head probability β€” always meaningful
213
  "binary_confidence": round(danger_prob, 4),
214
 
215
- # Decision confidence: how confident we are in the final status
216
  "confidence": round(
217
  danger_prob if is_dangerous else (1 - danger_prob),
218
  4
 
89
  print("[INFO] Model loaded successfully")
90
 
91
  # ============================================
92
+ # Threshold Config β€” TIGHTENED
93
  # ============================================
94
  self.config = {
95
+ "HIGH_ATTACK": 0.80, # ← RAISED from 0.75
96
+ "MEDIUM_ATTACK": 0.55,
97
+ "HIGH_CONF": 0.85,
98
+ "LOW_CONF": 0.30,
99
+ "EXTREME_CONF": 0.95
100
  }
101
 
102
  # =====================================================
103
+ # Decision Logic
104
  # =====================================================
105
  def decide(self, danger_prob, fine_score, family_score):
106
  """
107
  Binary head (danger_prob) is the authority.
108
  Attack type scores only matter if binary head is already suspicious.
 
 
109
  """
110
  cfg = self.config
111
 
112
+ # 1. Definite safe zone
113
  if danger_prob <= cfg["LOW_CONF"]:
114
  return False
115
 
116
  # 2. Danger zone β€” binary head must be confident it's dangerous
 
117
  if danger_prob >= cfg["HIGH_CONF"]:
118
  # Strong attack type confidence β†’ dangerous
119
  if fine_score >= cfg["HIGH_ATTACK"] or family_score >= cfg["HIGH_ATTACK"]:
 
124
  if danger_prob >= cfg["EXTREME_CONF"]:
125
  return True
126
 
127
+ # 3. Gray zone β€” always safe
 
128
  return False
129
 
130
  # =====================================================
 
181
  else:
182
  attack_type = "none"
183
  attack_family = "none"
 
184
  fine_score = 0.0
185
  family_score = 0.0
186
 
 
199
  ]
200
 
201
  # ================================
202
+ # Response
203
  # ================================
204
  return {
205
  "status": "DANGEROUS" if is_dangerous else "SAFE",
206
 
 
207
  "binary_confidence": round(danger_prob, 4),
208
 
 
209
  "confidence": round(
210
  danger_prob if is_dangerous else (1 - danger_prob),
211
  4