Denisijcu commited on
Commit
4ad96d9
·
verified ·
1 Parent(s): cdb0c31

Upload 2 files

Browse files
Files changed (2) hide show
  1. attack.py +198 -0
  2. session.py +58 -0
attack.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from app.api.schemas.telemetry import TelemetryPayload
3
+ import numpy as np
4
+
5
+ router = APIRouter()
6
+
7
+
8
+ def _compute_entropy(intervals: np.ndarray, bins: int = 20) -> float:
9
+ """
10
+ Shannon entropy normalizada — valor entre 0.0 y 1.0.
11
+
12
+ ANTES (bug de Gemini): entropy_score = np.std(intervals)
13
+ → Devolvía ~120ms (el std en milisegundos) → score de 129.74
14
+ → Cualquier std > 10ms daba verdict HUMAN → bypass trivial
15
+
16
+ AHORA: Shannon entropy del histograma de distribución, normalizada.
17
+ Humans: 0.50–0.85 (Goldilocks zone)
18
+ Bots constantes: ~0.0
19
+ Bots con ruido puro (demasiado uniforme): ~1.0
20
+ """
21
+ if len(intervals) < 3:
22
+ return 0.0
23
+ hist, _ = np.histogram(intervals, bins=bins, density=True)
24
+ hist = hist + 1e-10 # evitar log(0)
25
+ raw_entropy = -np.sum(hist * np.log2(hist)) * (intervals.max() - intervals.min()) / bins
26
+ # Normalizar contra entropía máxima teórica
27
+ max_entropy = np.log2(bins)
28
+ return float(np.clip(raw_entropy / max_entropy, 0.0, 1.0))
29
+
30
+
31
+ def _compute_cv(intervals: np.ndarray) -> float:
32
+ """Coeficiente de variación — mide irregularidad orgánica."""
33
+ mean = np.mean(intervals)
34
+ if mean == 0:
35
+ return 0.0
36
+ return float(np.clip(np.std(intervals) / mean, 0.0, 3.0))
37
+
38
+
39
+ def _correction_burst_ratio(events: list) -> float:
40
+ """
41
+ Qué fracción de correcciones vienen en bursts de 3+.
42
+ Humanos corrigen en ráfagas (darse cuenta de una palabra entera mal).
43
+ Bots corrigen uniformemente o no corrigen.
44
+ """
45
+ corrections = [getattr(e, 'is_correction', False) or getattr(e, 'key', '') == 'Backspace'
46
+ for e in events]
47
+ if not any(corrections):
48
+ return 0.0
49
+
50
+ burst = streak = 0
51
+ for c in corrections:
52
+ if c:
53
+ streak += 1
54
+ else:
55
+ if streak >= 3:
56
+ burst += streak
57
+ streak = 0
58
+ if streak >= 3:
59
+ burst += streak
60
+
61
+ total_corrections = sum(corrections)
62
+ return float(burst / max(total_corrections, 1))
63
+
64
+
65
+ def _score_ghosting(intervals: np.ndarray, events: list) -> tuple[float, dict]:
66
+ """
67
+ Scoring multi-señal con pesos.
68
+ Retorna (score_final, signal_breakdown) donde score ∈ [0.0, 1.0].
69
+ """
70
+ entropy = _compute_entropy(intervals)
71
+ cv = _compute_cv(intervals)
72
+ burst = _correction_burst_ratio(events)
73
+ mean_ms = float(np.mean(intervals))
74
+
75
+ # ── Signal 1: Entropy (Goldilocks zone) ──────────────────────────────────
76
+ # Demasiado bajo = bot regular. Demasiado alto = bot con ruido puro.
77
+ if 0.50 <= entropy <= 0.85:
78
+ s_entropy = 1.0
79
+ elif 0.35 <= entropy <= 0.95:
80
+ s_entropy = 0.5
81
+ else:
82
+ s_entropy = 0.05
83
+
84
+ # ── Signal 2: CV — variabilidad orgánica ─────────────────────────────────
85
+ if cv > 0.50:
86
+ s_cv = 1.0
87
+ elif cv > 0.30:
88
+ s_cv = 0.6
89
+ elif cv > 0.15:
90
+ s_cv = 0.3
91
+ else:
92
+ s_cv = 0.05 # cv casi 0 = bot metronomo
93
+
94
+ # ── Signal 3: Mean IKL en rango humano ───────────────────────────────────
95
+ if 60 <= mean_ms <= 500:
96
+ s_mean = 1.0
97
+ elif 40 <= mean_ms <= 700:
98
+ s_mean = 0.5
99
+ else:
100
+ s_mean = 0.1
101
+
102
+ # ── Signal 4: Correction burst ratio ─────────────────────────────────────
103
+ # Zero corrections = penalización. Bots no cometen errores orgánicos.
104
+ total_events = len(events)
105
+ corrections = sum(1 for e in events
106
+ if getattr(e, 'is_correction', False)
107
+ or getattr(e, 'key', '') == 'Backspace')
108
+ corr_rate = corrections / max(total_events, 1)
109
+
110
+ if corr_rate == 0.0:
111
+ s_corrections = 0.05 # zero corrections → bot tell
112
+ elif 0.02 <= corr_rate <= 0.15:
113
+ s_corrections = 0.5 + burst * 0.5 # rate OK + burst bonus
114
+ else:
115
+ s_corrections = 0.2 # rate fuera de rango
116
+
117
+ # ── Weighted final score ──────────────────────────────────────────────────
118
+ weights = {"entropy": 0.35, "cv": 0.30, "mean": 0.15, "corrections": 0.20}
119
+ raw = (s_entropy * weights["entropy"] +
120
+ s_cv * weights["cv"] +
121
+ s_mean * weights["mean"] +
122
+ s_corrections * weights["corrections"])
123
+
124
+ final_score = float(np.clip(raw, 0.0, 1.0))
125
+
126
+ breakdown = {
127
+ "entropy": round(entropy, 4),
128
+ "cv": round(cv, 4),
129
+ "mean_ikl_ms": round(mean_ms, 2),
130
+ "corr_rate": round(corr_rate, 4),
131
+ "burst_ratio": round(burst, 4),
132
+ "s_entropy": round(s_entropy, 3),
133
+ "s_cv": round(s_cv, 3),
134
+ "s_mean": round(s_mean, 3),
135
+ "s_corrections": round(s_corrections, 3),
136
+ }
137
+ return final_score, breakdown
138
+
139
+
140
+ @router.post("/simulate/ghosting")
141
+ async def simulate_ghosting(payload: TelemetryPayload):
142
+ """
143
+ Ghosting attack detector — v2 (fixed).
144
+
145
+ FIX: score ahora es Shannon entropy normalizada ∈ [0.0, 1.0]
146
+ con scoring multi-señal (entropy + CV + IKL mean + corrections).
147
+
148
+ BUG anterior: entropy_score = np.std(intervals)
149
+ → std en ms (~120) nunca bounded → score 129.74 → bypass trivial.
150
+ """
151
+ events = payload.events
152
+ if not events or len(events) < 2:
153
+ raise HTTPException(status_code=400, detail="Minimum 2 events required")
154
+
155
+ # Extraer timestamps — compatible con ambos schemas (timestamp y timestamp_ms)
156
+ timestamps = []
157
+ for e in events:
158
+ ts = getattr(e, 'timestamp_ms', None) or getattr(e, 'timestamp', None)
159
+ if ts is not None:
160
+ timestamps.append(float(ts))
161
+
162
+ if len(timestamps) < 2:
163
+ raise HTTPException(status_code=400, detail="Could not extract timestamps from events")
164
+
165
+ intervals = np.diff(np.array(timestamps))
166
+ intervals = intervals[intervals > 0] # filtrar intervalos imposibles
167
+
168
+ if len(intervals) < 2:
169
+ raise HTTPException(status_code=400, detail="Not enough valid intervals")
170
+
171
+ # Mínimo de keystrokes para análisis confiable
172
+ if len(events) < 15:
173
+ return {
174
+ "session_id": payload.session_id,
175
+ "entropy_score": 0.0,
176
+ "score": 0.0,
177
+ "verdict": "INCONCLUSIVE",
178
+ "reason": f"Need at least 15 keystrokes, got {len(events)}",
179
+ "signal_breakdown": {},
180
+ }
181
+
182
+ final_score, breakdown = _score_ghosting(intervals, events)
183
+
184
+ # Thresholds alineados con engine.py de Claude
185
+ if final_score >= 0.65:
186
+ verdict = "HUMAN"
187
+ elif final_score >= 0.40:
188
+ verdict = "SUSPECT"
189
+ else:
190
+ verdict = "BOT"
191
+
192
+ return {
193
+ "session_id": payload.session_id,
194
+ "entropy_score": breakdown["entropy"], # mantener campo para compatibilidad
195
+ "score": round(final_score, 4), # el score real normalizado
196
+ "verdict": verdict,
197
+ "signal_breakdown": breakdown,
198
+ }
session.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+ from fastapi import APIRouter, HTTPException
3
+ from qdrant_client import QdrantClient
4
+ from app.api.schemas.telemetry import TelemetryPayload
5
+ from app.core.engine import DECI_Engine
6
+
7
+ router = APIRouter()
8
+ engine = DECI_Engine()
9
+
10
+ # Conexión al Vault (deci_vault es el nombre del servicio en tu docker-compose)
11
+ try:
12
+ vault = QdrantClient(host="localhost", port=6333) # Usa "deci_vault" si corre dentro de Docker
13
+ except Exception:
14
+ vault = None
15
+
16
+ @router.post("/analyze")
17
+ async def analyze_session(payload: TelemetryPayload):
18
+ """
19
+ Analiza la sesión y, si es humana, guarda la firma en el Cognitive DNA Vault.
20
+ """
21
+ try:
22
+ if not payload.events:
23
+ raise HTTPException(status_code=400, detail="No telemetry events provided")
24
+
25
+ result = engine.process_session(payload.events)
26
+
27
+ # --- LÓGICA DE PERSISTENCIA (El "Plus" de hoy) ---
28
+ if result.get("is_human") and vault:
29
+ # Creamos el vector de 128 dimensiones
30
+ vector = [0.0] * 128
31
+ # Mapeamos las métricas clave de Claude
32
+ vector[0] = result["score"]
33
+ vector[1] = result["breakdown"]["entropy"]
34
+ vector[2] = result["breakdown"]["cv"]
35
+ vector[3] = result["breakdown"].get("burst", 0.0)
36
+
37
+ vault.upsert(
38
+ collection_name="cognitive_dna",
39
+ points=[{
40
+ "id": str(uuid.uuid4()),
41
+ "vector": vector,
42
+ "payload": {
43
+ "user": "Denis",
44
+ "session_id": payload.session_id,
45
+ "verdict": result["verdict"]
46
+ }
47
+ }]
48
+ )
49
+
50
+ return {
51
+ "session_id": payload.session_id,
52
+ "analysis": result,
53
+ "vault_synced": result.get("is_human", False)
54
+ }
55
+
56
+ except Exception as e:
57
+ print(f"🚨 [SESSION_ERROR]: {str(e)}")
58
+ raise HTTPException(status_code=500, detail=str(e))