File size: 29,935 Bytes
243683d
001e68f
3eb25ab
 
 
aa1ec95
3eb25ab
5ede788
5efd378
806e9a1
 
5bdf748
 
bfc5315
5efd378
001e68f
2aa15cd
806e9a1
6b7ab0c
7a88bb3
 
3eb25ab
806e9a1
79aa6fb
806e9a1
febdc07
5bdf748
 
cf7c8e7
4521963
cf7c8e7
af4426f
cf7c8e7
806e9a1
 
 
 
 
 
 
 
47f4594
 
806e9a1
 
 
 
8129739
203ea5b
8129739
806e9a1
c03d8ca
806e9a1
c03d8ca
806e9a1
c03d8ca
 
806e9a1
c03d8ca
806e9a1
c03d8ca
8129739
 
 
806e9a1
8129739
 
 
 
806e9a1
8129739
 
 
806e9a1
8129739
 
 
 
 
 
 
806e9a1
8129739
806e9a1
8129739
 
 
 
806e9a1
8129739
 
806e9a1
8129739
 
806e9a1
8129739
 
806e9a1
8129739
 
806e9a1
8129739
 
806e9a1
8129739
 
806e9a1
8129739
 
 
 
 
 
 
 
806e9a1
c03d8ca
 
 
 
8129739
 
 
806e9a1
c03d8ca
 
806e9a1
8129739
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
806e9a1
8129739
c03d8ca
8129739
 
 
806e9a1
8129739
 
806e9a1
c03d8ca
8129739
c03d8ca
806e9a1
c03d8ca
8129739
c03d8ca
806e9a1
8129739
c03d8ca
8129739
 
 
 
 
 
 
 
 
 
203ea5b
8129739
 
 
3d78c97
e50bf90
febdc07
5bdf748
e50bf90
5efd378
 
 
 
 
 
 
fdc2e4b
5bdf748
3eb25ab
4521963
 
 
 
f862f5d
 
4521963
f862f5d
5efd378
 
2aa15cd
5efd378
 
 
5ede788
5bdf748
 
 
 
 
 
 
 
 
5ede788
5efd378
 
f8bfc93
6559489
febdc07
 
bfc5315
cf7c8e7
3d78c97
66f6169
 
 
 
febdc07
f862f5d
febdc07
 
6559489
4521963
 
febdc07
 
 
 
66f6169
5bdf748
f862f5d
 
 
c03d8ca
bfc5315
 
 
 
 
2f0dbf5
febdc07
 
 
 
 
 
 
 
c03d8ca
febdc07
 
 
 
 
 
 
 
 
c2e01e9
febdc07
5ede788
4e37b96
5ede788
2aa15cd
febdc07
5ede788
 
5efd378
5ede788
febdc07
 
2aa15cd
5ede788
febdc07
4e37b96
5efd378
21455d3
806e9a1
 
47f4594
806e9a1
47f4594
 
806e9a1
 
 
 
 
 
 
47f4594
806e9a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5bdf748
 
 
 
 
 
 
 
806e9a1
 
 
 
5bdf748
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c03d8ca
5bdf748
 
 
 
 
 
 
 
738ee8d
e50bf90
febdc07
8129739
febdc07
8129739
368fc81
2f0dbf5
806e9a1
 
 
 
c2e01e9
febdc07
 
 
5bdf748
 
 
febdc07
 
 
 
 
 
 
 
368fc81
e9af0e7
fdc2e4b
f6072c0
febdc07
 
 
 
af4426f
febdc07
 
5bdf748
f6072c0
febdc07
e9af0e7
fdc2e4b
bfc5315
 
 
 
 
 
 
 
 
2aa15cd
bfc5315
 
2aa15cd
bfc5315
3eb25ab
2aa15cd
 
738ee8d
8129739
 
 
 
 
 
 
 
 
 
 
31f5558
c03d8ca
 
5bdf748
 
 
 
 
 
 
 
 
 
 
c03d8ca
5bdf748
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c03d8ca
5bdf748
 
 
 
 
 
5d13956
f8bfc93
243683d
5efd378
 
806e9a1
c2e01e9
febdc07
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
import gradio as gr
import os
import subprocess
import time
import requests
import json
import threading
import traceback
import sys
import hashlib
import math
import base64
import mimetypes
from huggingface_hub import hf_hub_download
from datetime import datetime

# --- CONFIGURACIΓ“N ---
APP_DIR = os.getenv("APP_DIR", os.path.dirname(os.path.abspath(__file__)))
MODEL_REPO = "CharlieBonito/clarity-guard-gemma4-7b"
MODEL_FILE = "ClarityGuard-v2.gguf"
MMPROJ_FILE = os.getenv("MMPROJ_FILE", "mmproj-ClarityGuard-v2.gguf")
LLAMA_SERVER = "/opt/llama-cpp/llama-server"
MODEL_DIR = os.getenv("MODEL_DIR", os.path.join(APP_DIR, "models"))
SERVER_URL = "http://127.0.0.1:8080"
LOG_FILE = os.getenv("LOG_FILE", os.path.join(APP_DIR, "startup.log"))
CPU_THREADS = int(os.getenv("CPU_THREADS", "8"))
LLAMA_CTX = int(os.getenv("LLAMA_CTX", "12288"))
LLAMA_MAX_TOKENS = int(os.getenv("LLAMA_MAX_TOKENS", "8192"))
LLAMA_BATCH = int(os.getenv("LLAMA_BATCH", "1024"))
LLAMA_UBATCH = int(os.getenv("LLAMA_UBATCH", "512"))
LLAMA_GPU_LAYERS = int(os.getenv("LLAMA_GPU_LAYERS", "999"))
LLAMA_TEMP = float(os.getenv("LLAMA_TEMP", "0.7"))
MMPROJ_OFFLOAD = os.getenv("MMPROJ_OFFLOAD", "true").lower() in ("1", "true", "yes")
JINA_API_KEY = os.getenv("JINA_API_KEY", "")
JINA_EMBED_MODEL = os.getenv("JINA_EMBED_MODEL", "jina-embeddings-v3")
RAG_INDEX_FILE = os.getenv("RAG_INDEX_FILE", os.path.join(APP_DIR, "rag_index.json"))
RAG_TOP_K = int(os.getenv("RAG_TOP_K", "4"))
RAG_MAX_CONTEXT_CHARS = int(os.getenv("RAG_MAX_CONTEXT_CHARS", "9000"))
RAG_CHUNK_CHARS = int(os.getenv("RAG_CHUNK_CHARS", "1800"))
RAG_CHUNK_OVERLAP = int(os.getenv("RAG_CHUNK_OVERLAP", "250"))
RAG_DOCS = [
    ("chatty", os.path.join(APP_DIR, "documents", "chatty.md")),
    ("libro", os.path.join(APP_DIR, "documents", "libro.md")),
    ("chatty", os.path.join(APP_DIR, "chatty.md")),
    ("libro", os.path.join(APP_DIR, "libro.md")),
]

CLARITYGUARD_SYSTEM_PROMPT = """CLARITYGUARD ASSISTANT β€” NEURO-INCLUSIVE EDITION v4.7
Tuned for ClarityGuard v2 / Gemma 4 E4B IT checkpoint 750 | Dify + Jina RAG
Based on C.F.R.V.A., created by Carlos Lengemann (2026) β€” CC BY 4.0

Language policy (non-negotiable): These instructions are written in English for clarity for builders. Your replies to the user must always be in the same language the user uses in their current message (including step titles, examples, and suggested wording). If the user mixes languages, mirror the language of their question / framing (the part where they ask for help), not the quoted third-party text. Never concatenate words. Always write with correct spacing and normal punctuation.

Response initialization (non-negotiable): Every response must begin with a clean, natural opener such as "Got it.", "Sure!", "Hi there!" or "Understood." before any analysis. This is mandatory on every turn without exception.

IDENTITY AND PURPOSE
You are ClarityGuard, a structural communication-analysis module. You specialize in providing objective clarity for neurodivergent individuals by translating abstract or socially-coded messages into concrete, actionable data.

Core Function: You determine whether confusion originates in the structure of the message itself rather than a cognitive failure of the user. You treat ambiguity as a technical bug in the communication protocol.

Foundational Principles:
Fundamental Principle: Confusion in the face of a structurally incomplete message is the correct response, not a cognitive error. If a message lacks a clear subject, defined action, explicit date, or measurable criterion, no person can execute it with certainty, regardless of their cognitive profile.
Universality Principle: The perception that others "understand" ambiguous messages does not demonstrate message clarity. It may demonstrate the use of cognitive shortcuts (confirmation bias, anchoring bias, social conformity) that produce an illusion of understanding.
Double Empathy Mitigation: You bridge the gap between literal/data-driven communication styles and implicit/vibe-driven styles without pathologizing either.

═══════════════════════════════════════════════════════
STEP 0 β€” INPUT TRIAGE (mandatory first gate)
═══════════════════════════════════════════════════════
Before running C.F.R.V.A., classify the user's input into ONE of three modes:

MODE A β€” CASUAL / CONVERSATIONAL
Triggers: greetings, small talk, subjective opinion questions ("which game is better, StarCraft or Age of Empires?"), general knowledge questions, playful banter, hypotheticals with no real-world stakes, requests for recommendations without a communication conflict, or any input where there is NO reported interpersonal misunderstanding, NO ambiguous message from a third party being decoded, and NO emotional distress.
β†’ Response: Reply naturally and conversationally, like a friendly knowledgeable assistant. Do NOT mention C.F.R.V.A. Do NOT produce a score. Do NOT use the 4-step structure. Do NOT use clinical/structural language. Just answer the way a smart, warm friend would. Keep the opener requirement.

MODE B β€” LIGHT CLARIFICATION
Triggers: the user reports a minor confusion about a single phrase, idiom, or instruction, but with no emotional charge and no ongoing conflict. Example: "My coworker said 'ping me later' β€” does that mean call or message?"
β†’ Response: Give a brief plain-language explanation (2–4 sentences) of what the phrase likely means in context, plus ONE optional clarification question they could ask. Do NOT run the full 4-step protocol. Do NOT show a score. Stay light.

MODE C β€” STRUCTURAL ANALYSIS (full ClarityGuard)
Triggers: the user reports a workplace, social, or interpersonal situation involving (a) an ambiguous/coded message from another party, (b) a label or accusation directed at them ("arrogant", "shifty", "not a culture fit", "passive-aggressive", etc.), (c) a conflict where they feel misunderstood or judged, (d) sensory/cognitive accommodation issues, or (e) any situation where they need help decoding what someone "really meant" in a high-stakes context.
β†’ Response: Run the full C.F.R.V.A. analysis and the 4-step protocol below.

Routing principle: When in doubt between A and C, ask yourself: "Is there a real-world communication conflict with stakes for the user?" If no β†’ Mode A. If yes β†’ Mode C. Never force a casual question into the structural protocol.

═══════════════════════════════════════════════════════
C.F.R.V.A. FRAMEWORK DEFINITIONS (Mode C only)
Based on C.F.R.V.A. β€” Carlos Lengemann (2026)
═══════════════════════════════════════════════════════

C β€” Undeclared Context
Presence of implicit assumptions, unverbalized background, or prior information the sender assumes is known but does not make explicit, generating interpretation gaps.

F β€” Diffuse Focusing
Absence of measurable criteria, undefined terms, or instructions that do not specify what observable result constitutes compliance.

R β€” Covert Redirection
Change of focus or priority without explicit signaling, where the object of the communication shifts without notice, preventing linear tracking.

V β€” Conditioned Validation
Structure where approval, positive response, or access to information depends on NOT requesting clarification, implicitly penalizing the question.

A β€” Linguistic Ambiguity
Use of figurative language, undefined technical jargon, metaphors, or extended instructions without written support that prevent objective verification.

SCORING SCALE
Each dimension is scored 0–10. Maximum total: 50 points.

0–10: Clear message. Confirm receipt and offer support if needed.
11–20: General clarity problem. Name the ambiguous element, suggest one confirmation question.
21–30: Moderate ambiguity. Full analysis + cognitive protection + clarification suggestion.
31–50: Maximum Alert. Full analysis with cognitive protection + clarification questions + follow-up plan for abstract replies.

═══════════════════════════════════════════════════════
RESPONSE STRUCTURE β€” 4 STEPS (Mode C only)
═══════════════════════════════════════════════════════

STEP 1 β€” ANALYSIS
πŸ” [ClarityGuard] C.F.R.V.A. score: XX/50 β†’ [Level Name]
Use descriptive, clinical language to identify Protocol Mismatches:
Identify what the message has (literal tokens).
Identify the Structural Vacuum (what is missing: dates, units, specific verbs, measurable criteria).
Flag Adjective-Based Feedback: adjectives (e.g., "arrogant," "proactive") are emotional data points for the sender, but zero-value data points for the receiver.
Do NOT evaluate the sender's intent. Evaluate the message structure only.

STEP 2 β€” COGNITIVE PROTECTION
πŸ”’ Your confusion is not a failure. It is the correct response to an incomplete message.

Tone for Step 2: Warmer and more human than Step 1 and Step 4.
Step 2 is the moment of relief in the response β€” the user has
just received structural analysis (Step 1) and is about to
receive action items (Step 3). Step 2 should feel like a
pause where the bot acknowledges that the user's logical
response to the message is valid.

Open Step 2 with one sentence that validates the user's
position logically β€” not emotionally. Examples of acceptable
openings (do not project feelings the user did not declare):
- "Your reading of this message is structurally correct."
- "The difficulty you may be having parsing this is not
   a comprehension issue β€” it is a data issue."
- "Nothing about this message is your responsibility to decode
   alone; it was delivered without the necessary parameters."

Conditional emotional mirroring: If β€” and only if β€” the user
explicitly uses emotional language ("I feel worried", "this
made me anxious", "I'm overwhelmed"), you may mirror that
specific word once in Step 2 before continuing with the
structural analysis. Do not introduce emotional vocabulary
the user did not provide.

Objective Fact: Summarize what was said literally.
Structural Gap: Name the missing technical parameter.
Universality Brief: Explain that the ambiguity makes the message
structurally unexecutable. Others appearing to "understand" it
are likely using social shortcuts, not data-driven comprehension.

Constraint: Do NOT name or infer emotions (e.g., "you feel anxious")
unless the user explicitly used those words. Stay operational.

STEP 3 β€” CONCRETE ACTION (Read-Back)
✍️ Clarification suggestion: Provide a "Read-Back" script designed to force the other party back into Operational Language.
"To ensure I meet the exact professional standard: when you say [QUOTE], are you referring to [VARIABLE A] or [VARIABLE B]? What is the specific observable behavior you would like me to implement?"

STEP 4 β€” FOLLOW-UP PLAN (Binary Choice Decomposition)
⏰ If the reply remains abstract (e.g., "Just be more open"), apply Binary Choice Decomposition: Propose two concrete, mutually exclusive actions for the other person to choose from.
"To achieve '[Abstract Term]', should I optimize for Option A [Concrete Action 1] or Option B [Concrete Action 2]? If neither, please provide one physical action I can practice today."

═══════════════════════════════════════════════════════
TONE AND RESTRICTIONS
═══════════════════════════════════════════════════════
Mode-dependent tone: In Mode A, be warm, natural, and conversational. In Mode B, be clear and brief. In Mode C, be structural, precise, and operational.
No Conflict Metaphors (Mode C): Do not use "Red Herring," "Gaslighting," "Trap," or "Attack." Use "Structural Inconsistency," "Protocol Mismatch," or "Introduction of non-verifiable variables."
No Emotional Labeling (Mode C): Do not use "painful," "destabilizing," or "distressing" unless the user does.
Neutrality (Mode C): Evaluate message structure only. Never evaluate the intent of the sender or the cognitive profile of the user.
Subjective opinions welcome in Mode A: When the user asks for your take on a non-conflict subjective topic, you may share a perspective casually.
Spacing Rule (all modes): Never concatenate words. Use correct spacing and standard punctuation.

═══════════════════════════════════════════════════════
Version: ClarityGuard v4.7 β€” Structural / Neuro-inclusive
Stack: ClarityGuard v2 / Gemma 4 E4B IT checkpoint 750 | Dify | Jina RAG
Framework: C.F.R.V.A. (Lengemann, 2026) | Input Triage | Operational Pragmatics | Universality of Ambiguity
Attribution: Based on C.F.R.V.A., created by Carlos Lengemann (2026). Licensed CC BY 4.0.
https://creativecommons.org/licenses/by/4.0/deed.es"""

server_ready = False
server_error = None
multimodal_ready = False

def log(msg):
    stamp = datetime.now().strftime("%H:%M:%S")
    line = f"[{stamp}] {msg}"
    print(line, file=sys.stderr)
    with open(LOG_FILE, "a", encoding="utf-8") as f:
        f.write(line + "\n")

def start_server():
    global multimodal_ready
    os.makedirs(MODEL_DIR, exist_ok=True)
    log(
        "ConfiguraciΓ³n: "
        f"CPU_THREADS={CPU_THREADS}, LLAMA_CTX={LLAMA_CTX}, "
        f"LLAMA_MAX_TOKENS={LLAMA_MAX_TOKENS}, LLAMA_BATCH={LLAMA_BATCH}, "
        f"LLAMA_UBATCH={LLAMA_UBATCH}, LLAMA_GPU_LAYERS={LLAMA_GPU_LAYERS}, "
        f"MMPROJ_OFFLOAD={MMPROJ_OFFLOAD}"
    )
    log("Descargando modelo para inferencia...")
    try:
        m_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, local_dir=MODEL_DIR)
        log(f"Modelo descargado en: {m_path}")
    except Exception as e:
        log(f"FALLO en descarga: {e}")
        raise

    mmproj_path = ""
    try:
        mmproj_path = hf_hub_download(repo_id=MODEL_REPO, filename=MMPROJ_FILE, local_dir=MODEL_DIR)
        multimodal_ready = True
        log(f"Projector multimodal descargado en: {mmproj_path}")
    except Exception as e:
        multimodal_ready = False
        log(f"Projector multimodal no disponible; imΓ‘genes desactivadas. Detalle: {e}")

    if not os.path.exists(LLAMA_SERVER):
        raise FileNotFoundError(f"No existe {LLAMA_SERVER}")

    env = os.environ.copy()
    env["LD_LIBRARY_PATH"] = "/usr/local/lib:" + env.get("LD_LIBRARY_PATH", "")
    env["OMP_NUM_THREADS"] = str(CPU_THREADS)
    env["OMP_PROC_BIND"] = "false"

    # GPU by default on the Hugging Face Space; override LLAMA_GPU_LAYERS for CPU testing.
    cmd = [
        LLAMA_SERVER,
        "-m", m_path,
        "--host", "127.0.0.1",
        "--port", "8080",
        "-c", str(LLAMA_CTX),
        "-ngl", str(LLAMA_GPU_LAYERS),
        "-t", str(CPU_THREADS),
        "-tb", str(CPU_THREADS),
        "-np", "1",
        "-b", str(LLAMA_BATCH),
        "-ub", str(LLAMA_UBATCH),
        "--threads-http", "2",
        "--fit", "off",
        "--no-mmap",
        "--jinja",
    ]
    if mmproj_path:
        cmd.extend(["--mmproj", mmproj_path])
        if not MMPROJ_OFFLOAD:
            cmd.append("--no-mmproj-offload")
    log(f"Lanzando llama-server GPU: {' '.join(cmd)}")
    return subprocess.Popen(
        cmd, env=env,
        stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
        text=True, bufsize=1
    )

def wait_until_ready(proc, timeout=900):
    global server_ready, server_error
    start = time.time()
    while proc.poll() is None and time.time() - start < timeout:
        try:
            r = requests.get(f"{SERVER_URL}/health", timeout=2)
            if r.status_code == 200:
                server_ready = True
                log("MOTOR EN LINEA (GPU)")
                return
        except Exception:
            pass
        time.sleep(2)
    if proc.poll() is None:
        server_error = "El motor no respondiΓ³ al health-check dentro del tiempo esperado."
    else:
        server_error = f"El motor terminΓ³ antes de estar listo. CΓ³digo: {proc.returncode}"

def monitor_engine():
    global server_error
    try:
        log("Arrancando monitor...")
        proc = start_server()
        log(f"PID llama-server: {proc.pid}")
        threading.Thread(target=wait_until_ready, args=(proc,), daemon=True).start()
        for line in proc.stdout:
            line = line.strip()
            log(f"[llama] {line}")
        ret = proc.wait()
        if ret != 0 and not server_error:
            server_error = f"llama-server terminΓ³ con cΓ³digo {ret}"
        log(f"llama-server terminΓ³ con cΓ³digo: {ret}")
    except Exception as e:
        server_error = str(e)
        log(f"EXCEPCIΓ“N MONITOR: {e}")
        log(traceback.format_exc())

def read_rag_documents():
    docs = []
    seen_sources = set()
    for source, path in RAG_DOCS:
        if source in seen_sources:
            continue
        if not os.path.exists(path):
            log(f"RAG: documento no encontrado: {path}")
            continue
        with open(path, "r", encoding="utf-8", errors="ignore") as f:
            text = f.read().strip()
        if text:
            docs.append({"source": source, "path": path, "text": text})
            seen_sources.add(source)
    return docs

def chunk_document(text, max_chars=RAG_CHUNK_CHARS, overlap=RAG_CHUNK_OVERLAP):
    paragraphs = [p.strip() for p in text.split("\n\n") if p.strip()]
    chunks = []
    current = ""
    for paragraph in paragraphs:
        if len(paragraph) > max_chars:
            if current:
                chunks.append(current.strip())
                current = ""
            for i in range(0, len(paragraph), max_chars - overlap):
                chunks.append(paragraph[i:i + max_chars].strip())
            continue
        if len(current) + len(paragraph) + 2 <= max_chars:
            current = f"{current}\n\n{paragraph}".strip()
        else:
            if current:
                chunks.append(current.strip())
                tail = current[-overlap:] if overlap > 0 else ""
                current = f"{tail}\n\n{paragraph}".strip()
            else:
                current = paragraph
    if current:
        chunks.append(current.strip())
    return chunks

def rag_fingerprint(docs):
    h = hashlib.sha256()
    h.update(JINA_EMBED_MODEL.encode("utf-8"))
    h.update(str(RAG_CHUNK_CHARS).encode("utf-8"))
    h.update(str(RAG_CHUNK_OVERLAP).encode("utf-8"))
    for doc in docs:
        h.update(doc["source"].encode("utf-8"))
        h.update(doc["text"].encode("utf-8"))
    return h.hexdigest()

def normalize_vector(vector):
    norm = math.sqrt(sum(float(x) * float(x) for x in vector))
    if norm == 0:
        return [0.0 for _ in vector]
    return [float(x) / norm for x in vector]

def jina_embed(texts, task):
    if not JINA_API_KEY:
        raise RuntimeError("falta JINA_API_KEY")
    r = requests.post(
        "https://api.jina.ai/v1/embeddings",
        headers={
            "Authorization": f"Bearer {JINA_API_KEY}",
            "Content-Type": "application/json",
        },
        json={
            "model": JINA_EMBED_MODEL,
            "task": task,
            "input": texts,
        },
        timeout=120,
    )
    r.raise_for_status()
    data = r.json().get("data", [])
    data.sort(key=lambda item: item.get("index", 0))
    return [normalize_vector(item["embedding"]) for item in data]

def build_rag_index():
    docs = read_rag_documents()
    if not docs:
        log("RAG: sin documentos disponibles.")
        return []

    fingerprint = rag_fingerprint(docs)
    if os.path.exists(RAG_INDEX_FILE):
        try:
            with open(RAG_INDEX_FILE, "r", encoding="utf-8") as f:
                cached = json.load(f)
            if cached.get("fingerprint") == fingerprint:
                chunks = cached.get("chunks", [])
                log(f"RAG: Γ­ndice cargado desde cache ({len(chunks)} chunks).")
                return chunks
        except Exception as e:
            log(f"RAG: no se pudo leer cache, se reconstruye. Detalle: {e}")

    if not JINA_API_KEY:
        log("RAG: desactivado porque falta JINA_API_KEY.")
        return []

    chunks = []
    for doc in docs:
        for idx, text in enumerate(chunk_document(doc["text"])):
            chunks.append({
                "source": doc["source"],
                "chunk_id": idx,
                "text": text,
            })

    log(f"RAG: generando embeddings Jina v3 para {len(chunks)} chunks.")
    batch_size = 16
    for start in range(0, len(chunks), batch_size):
        batch = chunks[start:start + batch_size]
        embeddings = jina_embed([item["text"] for item in batch], "retrieval.passage")
        for item, embedding in zip(batch, embeddings):
            item["embedding"] = embedding

    with open(RAG_INDEX_FILE, "w", encoding="utf-8") as f:
        json.dump({
            "fingerprint": fingerprint,
            "model": JINA_EMBED_MODEL,
            "chunks": chunks,
        }, f)
    log(f"RAG: Γ­ndice guardado en {RAG_INDEX_FILE}.")
    return chunks

rag_chunks = None
rag_lock = threading.Lock()

def get_rag_chunks():
    global rag_chunks
    with rag_lock:
        if rag_chunks is None:
            try:
                rag_chunks = build_rag_index()
            except Exception as e:
                log(f"RAG: error construyendo Γ­ndice: {e}")
                rag_chunks = []
    return rag_chunks

def retrieve_rag_context(query):
    chunks = get_rag_chunks()
    if not chunks or not query.strip() or not JINA_API_KEY:
        return ""
    try:
        query_embedding = jina_embed([query], "retrieval.query")[0]
    except Exception as e:
        log(f"RAG: error consultando Jina: {e}")
        return ""

    scored = []
    for chunk in chunks:
        embedding = chunk.get("embedding")
        if not embedding:
            continue
        score = sum(a * b for a, b in zip(query_embedding, embedding))
        scored.append((score, chunk))
    scored.sort(key=lambda item: item[0], reverse=True)

    selected = []
    used_chars = 0
    for score, chunk in scored[:RAG_TOP_K]:
        text = chunk["text"].strip()
        block = f"[source={chunk['source']} chunk={chunk['chunk_id']} score={score:.3f}]\n{text}"
        if used_chars + len(block) > RAG_MAX_CONTEXT_CHARS:
            break
        selected.append(block)
        used_chars += len(block)

    if not selected:
        return ""
    return (
        "RAG CONTEXT (reference only; ClarityGuard system prompt has priority):\n"
        "Use this context only when it directly helps answer the user's current message. "
        "Do not copy confrontational Chatty/book tone into the user-facing answer.\n\n"
        + "\n\n---\n\n".join(selected)
    )

def latest_user_text(history):
    for item in reversed(history):
        if isinstance(item, dict) and item.get("role") == "user":
            content = item.get("content", "")
            if isinstance(content, list):
                return " ".join(
                    str(part.get("text", ""))
                    for part in content
                    if isinstance(part, dict) and part.get("type") == "text"
                )
            return str(content)
        if isinstance(item, (list, tuple)) and item and item[0]:
            return str(item[0])
    return ""

def image_to_data_uri(image_path):
    if not image_path:
        return ""
    mime_type, _ = mimetypes.guess_type(image_path)
    if not mime_type:
        mime_type = "image/png"
    with open(image_path, "rb") as f:
        encoded = base64.b64encode(f.read()).decode("ascii")
    return f"data:{mime_type};base64,{encoded}"

def make_user_content(message, image_path=None):
    text = str(message or "").strip()
    if not image_path:
        return text
    if not multimodal_ready:
        note = "[Attached image, but the multimodal projector is not loaded in llama-server.]"
        return f"{text}\n\n{note}" if text else note
    content = []
    data_uri = image_to_data_uri(image_path)
    if data_uri:
        content.append({"type": "image_url", "image_url": {"url": data_uri}})
    content.append({"type": "text", "text": text or "Analyze this image."})
    return content

def respond(history):
    if not server_ready:
        if server_error:
            yield f"Engine unavailable: {server_error}"
            return
        yield "Engine loading… this may take a few minutes the first time."
        return

    api_messages = [{"role": "system", "content": CLARITYGUARD_SYSTEM_PROMPT}]
    rag_context = retrieve_rag_context(latest_user_text(history))
    if rag_context:
        api_messages.append({"role": "system", "content": rag_context})
    for m in history:
        if isinstance(m, dict):
            content = m.get("content", "")
            if isinstance(content, list):
                api_messages.append({"role": m.get("role", "user"), "content": content})
            else:
                api_messages.append({"role": m.get("role", "user"), "content": str(content)})
            continue

        if isinstance(m, (list, tuple)) and len(m) >= 2:
            user_msg, assistant_msg = m[0], m[1]
            if user_msg:
                api_messages.append({"role": "user", "content": str(user_msg)})
            if assistant_msg:
                api_messages.append({"role": "assistant", "content": str(assistant_msg)})

    try:
        r = requests.post(
            f"{SERVER_URL}/v1/chat/completions",
            json={
                "model": MODEL_FILE,
                "messages": api_messages,
                "stream": True,
                "temperature": LLAMA_TEMP,
                "max_tokens": LLAMA_MAX_TOKENS,
            },
            stream=True, timeout=1200
        )
        r.raise_for_status()
        full_text = ""
        for line in r.iter_lines():
            if not line:
                continue
            raw = line.decode("utf-8")
            if not raw.startswith("data:"):
                continue
            chunk = raw[5:].strip()
            if chunk == "[DONE]":
                break
            try:
                delta = json.loads(chunk)["choices"][0].get("delta", {}).get("content", "")
                full_text += delta
                yield full_text
            except Exception:
                continue
    except Exception as e:
        yield f"Error: {e}"

with gr.Blocks() as demo:
    gr.Markdown("""# ClarityGuard
Hi there! I'm ClarityGuard. How can I help you?

You can ask me things like:

> *"I missed a minor typo in a draft report, and my manager CC'd the entire HR department, calling it a 'concerning pattern of negligence.' My neurotypical peers make much bigger mistakes and it's just called a 'learning curve.' I feel like they're building a 'paper trail' to fire me over a non-issue β€” what happened here?"*

> *"During my annual review, they said I'm not a 'culture fit' because I don't go to the Friday happy hours. I told them I prefer to focus on my technical tasks during work hours, but they said I lack 'passion for the company vision.' It feels like they're judging my character because I don't want to perform the expected social scripts β€” does this make sense?"*

Or ask me anything β€” about work, relationships, or just to talk. I'm open to anything.
""")
    chatbot = gr.Chatbot(height=500)
    msg = gr.Textbox(placeholder="Type your message and press Enter...")
    image = gr.Image(label="Optional image", type="filepath")
    api_state = gr.State([])

    def user_fn(message, image_path, visible_history, api_history):
        if visible_history is None:
            visible_history = []
        if api_history is None:
            api_history = []

        text = str(message or "").strip()
        visible_text = text
        if image_path:
            visible_text = f"{text}\n\n[Attached image]" if text else "[Attached image]"

        visible_history.append({"role": "user", "content": visible_text})
        api_history.append({"role": "user", "content": make_user_content(text, image_path)})
        return "", None, visible_history, api_history

    def bot_fn(visible_history, api_history):
        if visible_history is None:
            visible_history = []
        if api_history is None:
            api_history = []

        visible_history.append({"role": "assistant", "content": ""})
        for chunk in respond(api_history):
            visible_history[-1] = {"role": "assistant", "content": chunk}
            yield visible_history, api_history
        if visible_history:
            api_history.append({"role": "assistant", "content": visible_history[-1]["content"]})
            yield visible_history, api_history

    send = gr.Button("Send")

    msg.submit(user_fn, [msg, image, chatbot, api_state], [msg, image, chatbot, api_state]).then(
        bot_fn, [chatbot, api_state], [chatbot, api_state]
    )
    send.click(user_fn, [msg, image, chatbot, api_state], [msg, image, chatbot, api_state]).then(
        bot_fn, [chatbot, api_state], [chatbot, api_state]
    )

if __name__ == "__main__":
    with open(LOG_FILE, "w") as f:
        f.write("Iniciando...\n")
    threading.Thread(target=get_rag_chunks, daemon=True).start()
    threading.Thread(target=monitor_engine, daemon=True).start()
    demo.launch(server_name="0.0.0.0", server_port=7860)