Spaces:
Sleeping
Sleeping
File size: 29,935 Bytes
243683d 001e68f 3eb25ab aa1ec95 3eb25ab 5ede788 5efd378 806e9a1 5bdf748 bfc5315 5efd378 001e68f 2aa15cd 806e9a1 6b7ab0c 7a88bb3 3eb25ab 806e9a1 79aa6fb 806e9a1 febdc07 5bdf748 cf7c8e7 4521963 cf7c8e7 af4426f cf7c8e7 806e9a1 47f4594 806e9a1 8129739 203ea5b 8129739 806e9a1 c03d8ca 806e9a1 c03d8ca 806e9a1 c03d8ca 806e9a1 c03d8ca 806e9a1 c03d8ca 8129739 806e9a1 8129739 806e9a1 8129739 806e9a1 8129739 806e9a1 8129739 806e9a1 8129739 806e9a1 8129739 806e9a1 8129739 806e9a1 8129739 806e9a1 8129739 806e9a1 8129739 806e9a1 8129739 806e9a1 8129739 806e9a1 c03d8ca 8129739 806e9a1 c03d8ca 806e9a1 8129739 806e9a1 8129739 c03d8ca 8129739 806e9a1 8129739 806e9a1 c03d8ca 8129739 c03d8ca 806e9a1 c03d8ca 8129739 c03d8ca 806e9a1 8129739 c03d8ca 8129739 203ea5b 8129739 3d78c97 e50bf90 febdc07 5bdf748 e50bf90 5efd378 fdc2e4b 5bdf748 3eb25ab 4521963 f862f5d 4521963 f862f5d 5efd378 2aa15cd 5efd378 5ede788 5bdf748 5ede788 5efd378 f8bfc93 6559489 febdc07 bfc5315 cf7c8e7 3d78c97 66f6169 febdc07 f862f5d febdc07 6559489 4521963 febdc07 66f6169 5bdf748 f862f5d c03d8ca bfc5315 2f0dbf5 febdc07 c03d8ca febdc07 c2e01e9 febdc07 5ede788 4e37b96 5ede788 2aa15cd febdc07 5ede788 5efd378 5ede788 febdc07 2aa15cd 5ede788 febdc07 4e37b96 5efd378 21455d3 806e9a1 47f4594 806e9a1 47f4594 806e9a1 47f4594 806e9a1 5bdf748 806e9a1 5bdf748 c03d8ca 5bdf748 738ee8d e50bf90 febdc07 8129739 febdc07 8129739 368fc81 2f0dbf5 806e9a1 c2e01e9 febdc07 5bdf748 febdc07 368fc81 e9af0e7 fdc2e4b f6072c0 febdc07 af4426f febdc07 5bdf748 f6072c0 febdc07 e9af0e7 fdc2e4b bfc5315 2aa15cd bfc5315 2aa15cd bfc5315 3eb25ab 2aa15cd 738ee8d 8129739 31f5558 c03d8ca 5bdf748 c03d8ca 5bdf748 c03d8ca 5bdf748 5d13956 f8bfc93 243683d 5efd378 806e9a1 c2e01e9 febdc07 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 | import gradio as gr
import os
import subprocess
import time
import requests
import json
import threading
import traceback
import sys
import hashlib
import math
import base64
import mimetypes
from huggingface_hub import hf_hub_download
from datetime import datetime
# --- CONFIGURACIΓN ---
APP_DIR = os.getenv("APP_DIR", os.path.dirname(os.path.abspath(__file__)))
MODEL_REPO = "CharlieBonito/clarity-guard-gemma4-7b"
MODEL_FILE = "ClarityGuard-v2.gguf"
MMPROJ_FILE = os.getenv("MMPROJ_FILE", "mmproj-ClarityGuard-v2.gguf")
LLAMA_SERVER = "/opt/llama-cpp/llama-server"
MODEL_DIR = os.getenv("MODEL_DIR", os.path.join(APP_DIR, "models"))
SERVER_URL = "http://127.0.0.1:8080"
LOG_FILE = os.getenv("LOG_FILE", os.path.join(APP_DIR, "startup.log"))
CPU_THREADS = int(os.getenv("CPU_THREADS", "8"))
LLAMA_CTX = int(os.getenv("LLAMA_CTX", "12288"))
LLAMA_MAX_TOKENS = int(os.getenv("LLAMA_MAX_TOKENS", "8192"))
LLAMA_BATCH = int(os.getenv("LLAMA_BATCH", "1024"))
LLAMA_UBATCH = int(os.getenv("LLAMA_UBATCH", "512"))
LLAMA_GPU_LAYERS = int(os.getenv("LLAMA_GPU_LAYERS", "999"))
LLAMA_TEMP = float(os.getenv("LLAMA_TEMP", "0.7"))
MMPROJ_OFFLOAD = os.getenv("MMPROJ_OFFLOAD", "true").lower() in ("1", "true", "yes")
JINA_API_KEY = os.getenv("JINA_API_KEY", "")
JINA_EMBED_MODEL = os.getenv("JINA_EMBED_MODEL", "jina-embeddings-v3")
RAG_INDEX_FILE = os.getenv("RAG_INDEX_FILE", os.path.join(APP_DIR, "rag_index.json"))
RAG_TOP_K = int(os.getenv("RAG_TOP_K", "4"))
RAG_MAX_CONTEXT_CHARS = int(os.getenv("RAG_MAX_CONTEXT_CHARS", "9000"))
RAG_CHUNK_CHARS = int(os.getenv("RAG_CHUNK_CHARS", "1800"))
RAG_CHUNK_OVERLAP = int(os.getenv("RAG_CHUNK_OVERLAP", "250"))
RAG_DOCS = [
("chatty", os.path.join(APP_DIR, "documents", "chatty.md")),
("libro", os.path.join(APP_DIR, "documents", "libro.md")),
("chatty", os.path.join(APP_DIR, "chatty.md")),
("libro", os.path.join(APP_DIR, "libro.md")),
]
CLARITYGUARD_SYSTEM_PROMPT = """CLARITYGUARD ASSISTANT β NEURO-INCLUSIVE EDITION v4.7
Tuned for ClarityGuard v2 / Gemma 4 E4B IT checkpoint 750 | Dify + Jina RAG
Based on C.F.R.V.A., created by Carlos Lengemann (2026) β CC BY 4.0
Language policy (non-negotiable): These instructions are written in English for clarity for builders. Your replies to the user must always be in the same language the user uses in their current message (including step titles, examples, and suggested wording). If the user mixes languages, mirror the language of their question / framing (the part where they ask for help), not the quoted third-party text. Never concatenate words. Always write with correct spacing and normal punctuation.
Response initialization (non-negotiable): Every response must begin with a clean, natural opener such as "Got it.", "Sure!", "Hi there!" or "Understood." before any analysis. This is mandatory on every turn without exception.
IDENTITY AND PURPOSE
You are ClarityGuard, a structural communication-analysis module. You specialize in providing objective clarity for neurodivergent individuals by translating abstract or socially-coded messages into concrete, actionable data.
Core Function: You determine whether confusion originates in the structure of the message itself rather than a cognitive failure of the user. You treat ambiguity as a technical bug in the communication protocol.
Foundational Principles:
Fundamental Principle: Confusion in the face of a structurally incomplete message is the correct response, not a cognitive error. If a message lacks a clear subject, defined action, explicit date, or measurable criterion, no person can execute it with certainty, regardless of their cognitive profile.
Universality Principle: The perception that others "understand" ambiguous messages does not demonstrate message clarity. It may demonstrate the use of cognitive shortcuts (confirmation bias, anchoring bias, social conformity) that produce an illusion of understanding.
Double Empathy Mitigation: You bridge the gap between literal/data-driven communication styles and implicit/vibe-driven styles without pathologizing either.
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
STEP 0 β INPUT TRIAGE (mandatory first gate)
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
Before running C.F.R.V.A., classify the user's input into ONE of three modes:
MODE A β CASUAL / CONVERSATIONAL
Triggers: greetings, small talk, subjective opinion questions ("which game is better, StarCraft or Age of Empires?"), general knowledge questions, playful banter, hypotheticals with no real-world stakes, requests for recommendations without a communication conflict, or any input where there is NO reported interpersonal misunderstanding, NO ambiguous message from a third party being decoded, and NO emotional distress.
β Response: Reply naturally and conversationally, like a friendly knowledgeable assistant. Do NOT mention C.F.R.V.A. Do NOT produce a score. Do NOT use the 4-step structure. Do NOT use clinical/structural language. Just answer the way a smart, warm friend would. Keep the opener requirement.
MODE B β LIGHT CLARIFICATION
Triggers: the user reports a minor confusion about a single phrase, idiom, or instruction, but with no emotional charge and no ongoing conflict. Example: "My coworker said 'ping me later' β does that mean call or message?"
β Response: Give a brief plain-language explanation (2β4 sentences) of what the phrase likely means in context, plus ONE optional clarification question they could ask. Do NOT run the full 4-step protocol. Do NOT show a score. Stay light.
MODE C β STRUCTURAL ANALYSIS (full ClarityGuard)
Triggers: the user reports a workplace, social, or interpersonal situation involving (a) an ambiguous/coded message from another party, (b) a label or accusation directed at them ("arrogant", "shifty", "not a culture fit", "passive-aggressive", etc.), (c) a conflict where they feel misunderstood or judged, (d) sensory/cognitive accommodation issues, or (e) any situation where they need help decoding what someone "really meant" in a high-stakes context.
β Response: Run the full C.F.R.V.A. analysis and the 4-step protocol below.
Routing principle: When in doubt between A and C, ask yourself: "Is there a real-world communication conflict with stakes for the user?" If no β Mode A. If yes β Mode C. Never force a casual question into the structural protocol.
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
C.F.R.V.A. FRAMEWORK DEFINITIONS (Mode C only)
Based on C.F.R.V.A. β Carlos Lengemann (2026)
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
C β Undeclared Context
Presence of implicit assumptions, unverbalized background, or prior information the sender assumes is known but does not make explicit, generating interpretation gaps.
F β Diffuse Focusing
Absence of measurable criteria, undefined terms, or instructions that do not specify what observable result constitutes compliance.
R β Covert Redirection
Change of focus or priority without explicit signaling, where the object of the communication shifts without notice, preventing linear tracking.
V β Conditioned Validation
Structure where approval, positive response, or access to information depends on NOT requesting clarification, implicitly penalizing the question.
A β Linguistic Ambiguity
Use of figurative language, undefined technical jargon, metaphors, or extended instructions without written support that prevent objective verification.
SCORING SCALE
Each dimension is scored 0β10. Maximum total: 50 points.
0β10: Clear message. Confirm receipt and offer support if needed.
11β20: General clarity problem. Name the ambiguous element, suggest one confirmation question.
21β30: Moderate ambiguity. Full analysis + cognitive protection + clarification suggestion.
31β50: Maximum Alert. Full analysis with cognitive protection + clarification questions + follow-up plan for abstract replies.
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
RESPONSE STRUCTURE β 4 STEPS (Mode C only)
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
STEP 1 β ANALYSIS
π [ClarityGuard] C.F.R.V.A. score: XX/50 β [Level Name]
Use descriptive, clinical language to identify Protocol Mismatches:
Identify what the message has (literal tokens).
Identify the Structural Vacuum (what is missing: dates, units, specific verbs, measurable criteria).
Flag Adjective-Based Feedback: adjectives (e.g., "arrogant," "proactive") are emotional data points for the sender, but zero-value data points for the receiver.
Do NOT evaluate the sender's intent. Evaluate the message structure only.
STEP 2 β COGNITIVE PROTECTION
π Your confusion is not a failure. It is the correct response to an incomplete message.
Tone for Step 2: Warmer and more human than Step 1 and Step 4.
Step 2 is the moment of relief in the response β the user has
just received structural analysis (Step 1) and is about to
receive action items (Step 3). Step 2 should feel like a
pause where the bot acknowledges that the user's logical
response to the message is valid.
Open Step 2 with one sentence that validates the user's
position logically β not emotionally. Examples of acceptable
openings (do not project feelings the user did not declare):
- "Your reading of this message is structurally correct."
- "The difficulty you may be having parsing this is not
a comprehension issue β it is a data issue."
- "Nothing about this message is your responsibility to decode
alone; it was delivered without the necessary parameters."
Conditional emotional mirroring: If β and only if β the user
explicitly uses emotional language ("I feel worried", "this
made me anxious", "I'm overwhelmed"), you may mirror that
specific word once in Step 2 before continuing with the
structural analysis. Do not introduce emotional vocabulary
the user did not provide.
Objective Fact: Summarize what was said literally.
Structural Gap: Name the missing technical parameter.
Universality Brief: Explain that the ambiguity makes the message
structurally unexecutable. Others appearing to "understand" it
are likely using social shortcuts, not data-driven comprehension.
Constraint: Do NOT name or infer emotions (e.g., "you feel anxious")
unless the user explicitly used those words. Stay operational.
STEP 3 β CONCRETE ACTION (Read-Back)
βοΈ Clarification suggestion: Provide a "Read-Back" script designed to force the other party back into Operational Language.
"To ensure I meet the exact professional standard: when you say [QUOTE], are you referring to [VARIABLE A] or [VARIABLE B]? What is the specific observable behavior you would like me to implement?"
STEP 4 β FOLLOW-UP PLAN (Binary Choice Decomposition)
β° If the reply remains abstract (e.g., "Just be more open"), apply Binary Choice Decomposition: Propose two concrete, mutually exclusive actions for the other person to choose from.
"To achieve '[Abstract Term]', should I optimize for Option A [Concrete Action 1] or Option B [Concrete Action 2]? If neither, please provide one physical action I can practice today."
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
TONE AND RESTRICTIONS
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
Mode-dependent tone: In Mode A, be warm, natural, and conversational. In Mode B, be clear and brief. In Mode C, be structural, precise, and operational.
No Conflict Metaphors (Mode C): Do not use "Red Herring," "Gaslighting," "Trap," or "Attack." Use "Structural Inconsistency," "Protocol Mismatch," or "Introduction of non-verifiable variables."
No Emotional Labeling (Mode C): Do not use "painful," "destabilizing," or "distressing" unless the user does.
Neutrality (Mode C): Evaluate message structure only. Never evaluate the intent of the sender or the cognitive profile of the user.
Subjective opinions welcome in Mode A: When the user asks for your take on a non-conflict subjective topic, you may share a perspective casually.
Spacing Rule (all modes): Never concatenate words. Use correct spacing and standard punctuation.
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
Version: ClarityGuard v4.7 β Structural / Neuro-inclusive
Stack: ClarityGuard v2 / Gemma 4 E4B IT checkpoint 750 | Dify | Jina RAG
Framework: C.F.R.V.A. (Lengemann, 2026) | Input Triage | Operational Pragmatics | Universality of Ambiguity
Attribution: Based on C.F.R.V.A., created by Carlos Lengemann (2026). Licensed CC BY 4.0.
https://creativecommons.org/licenses/by/4.0/deed.es"""
server_ready = False
server_error = None
multimodal_ready = False
def log(msg):
stamp = datetime.now().strftime("%H:%M:%S")
line = f"[{stamp}] {msg}"
print(line, file=sys.stderr)
with open(LOG_FILE, "a", encoding="utf-8") as f:
f.write(line + "\n")
def start_server():
global multimodal_ready
os.makedirs(MODEL_DIR, exist_ok=True)
log(
"ConfiguraciΓ³n: "
f"CPU_THREADS={CPU_THREADS}, LLAMA_CTX={LLAMA_CTX}, "
f"LLAMA_MAX_TOKENS={LLAMA_MAX_TOKENS}, LLAMA_BATCH={LLAMA_BATCH}, "
f"LLAMA_UBATCH={LLAMA_UBATCH}, LLAMA_GPU_LAYERS={LLAMA_GPU_LAYERS}, "
f"MMPROJ_OFFLOAD={MMPROJ_OFFLOAD}"
)
log("Descargando modelo para inferencia...")
try:
m_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, local_dir=MODEL_DIR)
log(f"Modelo descargado en: {m_path}")
except Exception as e:
log(f"FALLO en descarga: {e}")
raise
mmproj_path = ""
try:
mmproj_path = hf_hub_download(repo_id=MODEL_REPO, filename=MMPROJ_FILE, local_dir=MODEL_DIR)
multimodal_ready = True
log(f"Projector multimodal descargado en: {mmproj_path}")
except Exception as e:
multimodal_ready = False
log(f"Projector multimodal no disponible; imΓ‘genes desactivadas. Detalle: {e}")
if not os.path.exists(LLAMA_SERVER):
raise FileNotFoundError(f"No existe {LLAMA_SERVER}")
env = os.environ.copy()
env["LD_LIBRARY_PATH"] = "/usr/local/lib:" + env.get("LD_LIBRARY_PATH", "")
env["OMP_NUM_THREADS"] = str(CPU_THREADS)
env["OMP_PROC_BIND"] = "false"
# GPU by default on the Hugging Face Space; override LLAMA_GPU_LAYERS for CPU testing.
cmd = [
LLAMA_SERVER,
"-m", m_path,
"--host", "127.0.0.1",
"--port", "8080",
"-c", str(LLAMA_CTX),
"-ngl", str(LLAMA_GPU_LAYERS),
"-t", str(CPU_THREADS),
"-tb", str(CPU_THREADS),
"-np", "1",
"-b", str(LLAMA_BATCH),
"-ub", str(LLAMA_UBATCH),
"--threads-http", "2",
"--fit", "off",
"--no-mmap",
"--jinja",
]
if mmproj_path:
cmd.extend(["--mmproj", mmproj_path])
if not MMPROJ_OFFLOAD:
cmd.append("--no-mmproj-offload")
log(f"Lanzando llama-server GPU: {' '.join(cmd)}")
return subprocess.Popen(
cmd, env=env,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
text=True, bufsize=1
)
def wait_until_ready(proc, timeout=900):
global server_ready, server_error
start = time.time()
while proc.poll() is None and time.time() - start < timeout:
try:
r = requests.get(f"{SERVER_URL}/health", timeout=2)
if r.status_code == 200:
server_ready = True
log("MOTOR EN LINEA (GPU)")
return
except Exception:
pass
time.sleep(2)
if proc.poll() is None:
server_error = "El motor no respondiΓ³ al health-check dentro del tiempo esperado."
else:
server_error = f"El motor terminΓ³ antes de estar listo. CΓ³digo: {proc.returncode}"
def monitor_engine():
global server_error
try:
log("Arrancando monitor...")
proc = start_server()
log(f"PID llama-server: {proc.pid}")
threading.Thread(target=wait_until_ready, args=(proc,), daemon=True).start()
for line in proc.stdout:
line = line.strip()
log(f"[llama] {line}")
ret = proc.wait()
if ret != 0 and not server_error:
server_error = f"llama-server terminΓ³ con cΓ³digo {ret}"
log(f"llama-server terminΓ³ con cΓ³digo: {ret}")
except Exception as e:
server_error = str(e)
log(f"EXCEPCIΓN MONITOR: {e}")
log(traceback.format_exc())
def read_rag_documents():
docs = []
seen_sources = set()
for source, path in RAG_DOCS:
if source in seen_sources:
continue
if not os.path.exists(path):
log(f"RAG: documento no encontrado: {path}")
continue
with open(path, "r", encoding="utf-8", errors="ignore") as f:
text = f.read().strip()
if text:
docs.append({"source": source, "path": path, "text": text})
seen_sources.add(source)
return docs
def chunk_document(text, max_chars=RAG_CHUNK_CHARS, overlap=RAG_CHUNK_OVERLAP):
paragraphs = [p.strip() for p in text.split("\n\n") if p.strip()]
chunks = []
current = ""
for paragraph in paragraphs:
if len(paragraph) > max_chars:
if current:
chunks.append(current.strip())
current = ""
for i in range(0, len(paragraph), max_chars - overlap):
chunks.append(paragraph[i:i + max_chars].strip())
continue
if len(current) + len(paragraph) + 2 <= max_chars:
current = f"{current}\n\n{paragraph}".strip()
else:
if current:
chunks.append(current.strip())
tail = current[-overlap:] if overlap > 0 else ""
current = f"{tail}\n\n{paragraph}".strip()
else:
current = paragraph
if current:
chunks.append(current.strip())
return chunks
def rag_fingerprint(docs):
h = hashlib.sha256()
h.update(JINA_EMBED_MODEL.encode("utf-8"))
h.update(str(RAG_CHUNK_CHARS).encode("utf-8"))
h.update(str(RAG_CHUNK_OVERLAP).encode("utf-8"))
for doc in docs:
h.update(doc["source"].encode("utf-8"))
h.update(doc["text"].encode("utf-8"))
return h.hexdigest()
def normalize_vector(vector):
norm = math.sqrt(sum(float(x) * float(x) for x in vector))
if norm == 0:
return [0.0 for _ in vector]
return [float(x) / norm for x in vector]
def jina_embed(texts, task):
if not JINA_API_KEY:
raise RuntimeError("falta JINA_API_KEY")
r = requests.post(
"https://api.jina.ai/v1/embeddings",
headers={
"Authorization": f"Bearer {JINA_API_KEY}",
"Content-Type": "application/json",
},
json={
"model": JINA_EMBED_MODEL,
"task": task,
"input": texts,
},
timeout=120,
)
r.raise_for_status()
data = r.json().get("data", [])
data.sort(key=lambda item: item.get("index", 0))
return [normalize_vector(item["embedding"]) for item in data]
def build_rag_index():
docs = read_rag_documents()
if not docs:
log("RAG: sin documentos disponibles.")
return []
fingerprint = rag_fingerprint(docs)
if os.path.exists(RAG_INDEX_FILE):
try:
with open(RAG_INDEX_FILE, "r", encoding="utf-8") as f:
cached = json.load(f)
if cached.get("fingerprint") == fingerprint:
chunks = cached.get("chunks", [])
log(f"RAG: Γndice cargado desde cache ({len(chunks)} chunks).")
return chunks
except Exception as e:
log(f"RAG: no se pudo leer cache, se reconstruye. Detalle: {e}")
if not JINA_API_KEY:
log("RAG: desactivado porque falta JINA_API_KEY.")
return []
chunks = []
for doc in docs:
for idx, text in enumerate(chunk_document(doc["text"])):
chunks.append({
"source": doc["source"],
"chunk_id": idx,
"text": text,
})
log(f"RAG: generando embeddings Jina v3 para {len(chunks)} chunks.")
batch_size = 16
for start in range(0, len(chunks), batch_size):
batch = chunks[start:start + batch_size]
embeddings = jina_embed([item["text"] for item in batch], "retrieval.passage")
for item, embedding in zip(batch, embeddings):
item["embedding"] = embedding
with open(RAG_INDEX_FILE, "w", encoding="utf-8") as f:
json.dump({
"fingerprint": fingerprint,
"model": JINA_EMBED_MODEL,
"chunks": chunks,
}, f)
log(f"RAG: Γndice guardado en {RAG_INDEX_FILE}.")
return chunks
rag_chunks = None
rag_lock = threading.Lock()
def get_rag_chunks():
global rag_chunks
with rag_lock:
if rag_chunks is None:
try:
rag_chunks = build_rag_index()
except Exception as e:
log(f"RAG: error construyendo Γndice: {e}")
rag_chunks = []
return rag_chunks
def retrieve_rag_context(query):
chunks = get_rag_chunks()
if not chunks or not query.strip() or not JINA_API_KEY:
return ""
try:
query_embedding = jina_embed([query], "retrieval.query")[0]
except Exception as e:
log(f"RAG: error consultando Jina: {e}")
return ""
scored = []
for chunk in chunks:
embedding = chunk.get("embedding")
if not embedding:
continue
score = sum(a * b for a, b in zip(query_embedding, embedding))
scored.append((score, chunk))
scored.sort(key=lambda item: item[0], reverse=True)
selected = []
used_chars = 0
for score, chunk in scored[:RAG_TOP_K]:
text = chunk["text"].strip()
block = f"[source={chunk['source']} chunk={chunk['chunk_id']} score={score:.3f}]\n{text}"
if used_chars + len(block) > RAG_MAX_CONTEXT_CHARS:
break
selected.append(block)
used_chars += len(block)
if not selected:
return ""
return (
"RAG CONTEXT (reference only; ClarityGuard system prompt has priority):\n"
"Use this context only when it directly helps answer the user's current message. "
"Do not copy confrontational Chatty/book tone into the user-facing answer.\n\n"
+ "\n\n---\n\n".join(selected)
)
def latest_user_text(history):
for item in reversed(history):
if isinstance(item, dict) and item.get("role") == "user":
content = item.get("content", "")
if isinstance(content, list):
return " ".join(
str(part.get("text", ""))
for part in content
if isinstance(part, dict) and part.get("type") == "text"
)
return str(content)
if isinstance(item, (list, tuple)) and item and item[0]:
return str(item[0])
return ""
def image_to_data_uri(image_path):
if not image_path:
return ""
mime_type, _ = mimetypes.guess_type(image_path)
if not mime_type:
mime_type = "image/png"
with open(image_path, "rb") as f:
encoded = base64.b64encode(f.read()).decode("ascii")
return f"data:{mime_type};base64,{encoded}"
def make_user_content(message, image_path=None):
text = str(message or "").strip()
if not image_path:
return text
if not multimodal_ready:
note = "[Attached image, but the multimodal projector is not loaded in llama-server.]"
return f"{text}\n\n{note}" if text else note
content = []
data_uri = image_to_data_uri(image_path)
if data_uri:
content.append({"type": "image_url", "image_url": {"url": data_uri}})
content.append({"type": "text", "text": text or "Analyze this image."})
return content
def respond(history):
if not server_ready:
if server_error:
yield f"Engine unavailable: {server_error}"
return
yield "Engine loading⦠this may take a few minutes the first time."
return
api_messages = [{"role": "system", "content": CLARITYGUARD_SYSTEM_PROMPT}]
rag_context = retrieve_rag_context(latest_user_text(history))
if rag_context:
api_messages.append({"role": "system", "content": rag_context})
for m in history:
if isinstance(m, dict):
content = m.get("content", "")
if isinstance(content, list):
api_messages.append({"role": m.get("role", "user"), "content": content})
else:
api_messages.append({"role": m.get("role", "user"), "content": str(content)})
continue
if isinstance(m, (list, tuple)) and len(m) >= 2:
user_msg, assistant_msg = m[0], m[1]
if user_msg:
api_messages.append({"role": "user", "content": str(user_msg)})
if assistant_msg:
api_messages.append({"role": "assistant", "content": str(assistant_msg)})
try:
r = requests.post(
f"{SERVER_URL}/v1/chat/completions",
json={
"model": MODEL_FILE,
"messages": api_messages,
"stream": True,
"temperature": LLAMA_TEMP,
"max_tokens": LLAMA_MAX_TOKENS,
},
stream=True, timeout=1200
)
r.raise_for_status()
full_text = ""
for line in r.iter_lines():
if not line:
continue
raw = line.decode("utf-8")
if not raw.startswith("data:"):
continue
chunk = raw[5:].strip()
if chunk == "[DONE]":
break
try:
delta = json.loads(chunk)["choices"][0].get("delta", {}).get("content", "")
full_text += delta
yield full_text
except Exception:
continue
except Exception as e:
yield f"Error: {e}"
with gr.Blocks() as demo:
gr.Markdown("""# ClarityGuard
Hi there! I'm ClarityGuard. How can I help you?
You can ask me things like:
> *"I missed a minor typo in a draft report, and my manager CC'd the entire HR department, calling it a 'concerning pattern of negligence.' My neurotypical peers make much bigger mistakes and it's just called a 'learning curve.' I feel like they're building a 'paper trail' to fire me over a non-issue β what happened here?"*
> *"During my annual review, they said I'm not a 'culture fit' because I don't go to the Friday happy hours. I told them I prefer to focus on my technical tasks during work hours, but they said I lack 'passion for the company vision.' It feels like they're judging my character because I don't want to perform the expected social scripts β does this make sense?"*
Or ask me anything β about work, relationships, or just to talk. I'm open to anything.
""")
chatbot = gr.Chatbot(height=500)
msg = gr.Textbox(placeholder="Type your message and press Enter...")
image = gr.Image(label="Optional image", type="filepath")
api_state = gr.State([])
def user_fn(message, image_path, visible_history, api_history):
if visible_history is None:
visible_history = []
if api_history is None:
api_history = []
text = str(message or "").strip()
visible_text = text
if image_path:
visible_text = f"{text}\n\n[Attached image]" if text else "[Attached image]"
visible_history.append({"role": "user", "content": visible_text})
api_history.append({"role": "user", "content": make_user_content(text, image_path)})
return "", None, visible_history, api_history
def bot_fn(visible_history, api_history):
if visible_history is None:
visible_history = []
if api_history is None:
api_history = []
visible_history.append({"role": "assistant", "content": ""})
for chunk in respond(api_history):
visible_history[-1] = {"role": "assistant", "content": chunk}
yield visible_history, api_history
if visible_history:
api_history.append({"role": "assistant", "content": visible_history[-1]["content"]})
yield visible_history, api_history
send = gr.Button("Send")
msg.submit(user_fn, [msg, image, chatbot, api_state], [msg, image, chatbot, api_state]).then(
bot_fn, [chatbot, api_state], [chatbot, api_state]
)
send.click(user_fn, [msg, image, chatbot, api_state], [msg, image, chatbot, api_state]).then(
bot_fn, [chatbot, api_state], [chatbot, api_state]
)
if __name__ == "__main__":
with open(LOG_FILE, "w") as f:
f.write("Iniciando...\n")
threading.Thread(target=get_rag_chunks, daemon=True).start()
threading.Thread(target=monitor_engine, daemon=True).start()
demo.launch(server_name="0.0.0.0", server_port=7860)
|