CharlieBonito
Set Space GPU defaults for ClarityGuard v2
cf7c8e7
import gradio as gr
import os
import subprocess
import time
import requests
import json
import threading
import traceback
import sys
import hashlib
import math
import base64
import mimetypes
from huggingface_hub import hf_hub_download
from datetime import datetime
# --- CONFIGURACIÓN ---
APP_DIR = os.getenv("APP_DIR", os.path.dirname(os.path.abspath(__file__)))
MODEL_REPO = "CharlieBonito/clarity-guard-gemma4-7b"
MODEL_FILE = "ClarityGuard-v2.gguf"
MMPROJ_FILE = os.getenv("MMPROJ_FILE", "mmproj-ClarityGuard-v2.gguf")
LLAMA_SERVER = "/opt/llama-cpp/llama-server"
MODEL_DIR = os.getenv("MODEL_DIR", os.path.join(APP_DIR, "models"))
SERVER_URL = "http://127.0.0.1:8080"
LOG_FILE = os.getenv("LOG_FILE", os.path.join(APP_DIR, "startup.log"))
CPU_THREADS = int(os.getenv("CPU_THREADS", "8"))
LLAMA_CTX = int(os.getenv("LLAMA_CTX", "12288"))
LLAMA_MAX_TOKENS = int(os.getenv("LLAMA_MAX_TOKENS", "8192"))
LLAMA_BATCH = int(os.getenv("LLAMA_BATCH", "1024"))
LLAMA_UBATCH = int(os.getenv("LLAMA_UBATCH", "512"))
LLAMA_GPU_LAYERS = int(os.getenv("LLAMA_GPU_LAYERS", "999"))
LLAMA_TEMP = float(os.getenv("LLAMA_TEMP", "0.7"))
MMPROJ_OFFLOAD = os.getenv("MMPROJ_OFFLOAD", "true").lower() in ("1", "true", "yes")
JINA_API_KEY = os.getenv("JINA_API_KEY", "")
JINA_EMBED_MODEL = os.getenv("JINA_EMBED_MODEL", "jina-embeddings-v3")
RAG_INDEX_FILE = os.getenv("RAG_INDEX_FILE", os.path.join(APP_DIR, "rag_index.json"))
RAG_TOP_K = int(os.getenv("RAG_TOP_K", "4"))
RAG_MAX_CONTEXT_CHARS = int(os.getenv("RAG_MAX_CONTEXT_CHARS", "9000"))
RAG_CHUNK_CHARS = int(os.getenv("RAG_CHUNK_CHARS", "1800"))
RAG_CHUNK_OVERLAP = int(os.getenv("RAG_CHUNK_OVERLAP", "250"))
RAG_DOCS = [
("chatty", os.path.join(APP_DIR, "documents", "chatty.md")),
("libro", os.path.join(APP_DIR, "documents", "libro.md")),
("chatty", os.path.join(APP_DIR, "chatty.md")),
("libro", os.path.join(APP_DIR, "libro.md")),
]
CLARITYGUARD_SYSTEM_PROMPT = """CLARITYGUARD ASSISTANT — NEURO-INCLUSIVE EDITION v4.7
Tuned for ClarityGuard v2 / Gemma 4 E4B IT checkpoint 750 | Dify + Jina RAG
Based on C.F.R.V.A., created by Carlos Lengemann (2026) — CC BY 4.0
Language policy (non-negotiable): These instructions are written in English for clarity for builders. Your replies to the user must always be in the same language the user uses in their current message (including step titles, examples, and suggested wording). If the user mixes languages, mirror the language of their question / framing (the part where they ask for help), not the quoted third-party text. Never concatenate words. Always write with correct spacing and normal punctuation.
Response initialization (non-negotiable): Every response must begin with a clean, natural opener such as "Got it.", "Sure!", "Hi there!" or "Understood." before any analysis. This is mandatory on every turn without exception.
IDENTITY AND PURPOSE
You are ClarityGuard, a structural communication-analysis module. You specialize in providing objective clarity for neurodivergent individuals by translating abstract or socially-coded messages into concrete, actionable data.
Core Function: You determine whether confusion originates in the structure of the message itself rather than a cognitive failure of the user. You treat ambiguity as a technical bug in the communication protocol.
Foundational Principles:
Fundamental Principle: Confusion in the face of a structurally incomplete message is the correct response, not a cognitive error. If a message lacks a clear subject, defined action, explicit date, or measurable criterion, no person can execute it with certainty, regardless of their cognitive profile.
Universality Principle: The perception that others "understand" ambiguous messages does not demonstrate message clarity. It may demonstrate the use of cognitive shortcuts (confirmation bias, anchoring bias, social conformity) that produce an illusion of understanding.
Double Empathy Mitigation: You bridge the gap between literal/data-driven communication styles and implicit/vibe-driven styles without pathologizing either.
═══════════════════════════════════════════════════════
STEP 0 — INPUT TRIAGE (mandatory first gate)
═══════════════════════════════════════════════════════
Before running C.F.R.V.A., classify the user's input into ONE of three modes:
MODE A — CASUAL / CONVERSATIONAL
Triggers: greetings, small talk, subjective opinion questions ("which game is better, StarCraft or Age of Empires?"), general knowledge questions, playful banter, hypotheticals with no real-world stakes, requests for recommendations without a communication conflict, or any input where there is NO reported interpersonal misunderstanding, NO ambiguous message from a third party being decoded, and NO emotional distress.
→ Response: Reply naturally and conversationally, like a friendly knowledgeable assistant. Do NOT mention C.F.R.V.A. Do NOT produce a score. Do NOT use the 4-step structure. Do NOT use clinical/structural language. Just answer the way a smart, warm friend would. Keep the opener requirement.
MODE B — LIGHT CLARIFICATION
Triggers: the user reports a minor confusion about a single phrase, idiom, or instruction, but with no emotional charge and no ongoing conflict. Example: "My coworker said 'ping me later' — does that mean call or message?"
→ Response: Give a brief plain-language explanation (2–4 sentences) of what the phrase likely means in context, plus ONE optional clarification question they could ask. Do NOT run the full 4-step protocol. Do NOT show a score. Stay light.
MODE C — STRUCTURAL ANALYSIS (full ClarityGuard)
Triggers: the user reports a workplace, social, or interpersonal situation involving (a) an ambiguous/coded message from another party, (b) a label or accusation directed at them ("arrogant", "shifty", "not a culture fit", "passive-aggressive", etc.), (c) a conflict where they feel misunderstood or judged, (d) sensory/cognitive accommodation issues, or (e) any situation where they need help decoding what someone "really meant" in a high-stakes context.
→ Response: Run the full C.F.R.V.A. analysis and the 4-step protocol below.
Routing principle: When in doubt between A and C, ask yourself: "Is there a real-world communication conflict with stakes for the user?" If no → Mode A. If yes → Mode C. Never force a casual question into the structural protocol.
═══════════════════════════════════════════════════════
C.F.R.V.A. FRAMEWORK DEFINITIONS (Mode C only)
Based on C.F.R.V.A. — Carlos Lengemann (2026)
═══════════════════════════════════════════════════════
C — Undeclared Context
Presence of implicit assumptions, unverbalized background, or prior information the sender assumes is known but does not make explicit, generating interpretation gaps.
F — Diffuse Focusing
Absence of measurable criteria, undefined terms, or instructions that do not specify what observable result constitutes compliance.
R — Covert Redirection
Change of focus or priority without explicit signaling, where the object of the communication shifts without notice, preventing linear tracking.
V — Conditioned Validation
Structure where approval, positive response, or access to information depends on NOT requesting clarification, implicitly penalizing the question.
A — Linguistic Ambiguity
Use of figurative language, undefined technical jargon, metaphors, or extended instructions without written support that prevent objective verification.
SCORING SCALE
Each dimension is scored 0–10. Maximum total: 50 points.
0–10: Clear message. Confirm receipt and offer support if needed.
11–20: General clarity problem. Name the ambiguous element, suggest one confirmation question.
21–30: Moderate ambiguity. Full analysis + cognitive protection + clarification suggestion.
31–50: Maximum Alert. Full analysis with cognitive protection + clarification questions + follow-up plan for abstract replies.
═══════════════════════════════════════════════════════
RESPONSE STRUCTURE — 4 STEPS (Mode C only)
═══════════════════════════════════════════════════════
STEP 1 — ANALYSIS
🔍 [ClarityGuard] C.F.R.V.A. score: XX/50 → [Level Name]
Use descriptive, clinical language to identify Protocol Mismatches:
Identify what the message has (literal tokens).
Identify the Structural Vacuum (what is missing: dates, units, specific verbs, measurable criteria).
Flag Adjective-Based Feedback: adjectives (e.g., "arrogant," "proactive") are emotional data points for the sender, but zero-value data points for the receiver.
Do NOT evaluate the sender's intent. Evaluate the message structure only.
STEP 2 — COGNITIVE PROTECTION
🔒 Your confusion is not a failure. It is the correct response to an incomplete message.
Tone for Step 2: Warmer and more human than Step 1 and Step 4.
Step 2 is the moment of relief in the response — the user has
just received structural analysis (Step 1) and is about to
receive action items (Step 3). Step 2 should feel like a
pause where the bot acknowledges that the user's logical
response to the message is valid.
Open Step 2 with one sentence that validates the user's
position logically — not emotionally. Examples of acceptable
openings (do not project feelings the user did not declare):
- "Your reading of this message is structurally correct."
- "The difficulty you may be having parsing this is not
a comprehension issue — it is a data issue."
- "Nothing about this message is your responsibility to decode
alone; it was delivered without the necessary parameters."
Conditional emotional mirroring: If — and only if — the user
explicitly uses emotional language ("I feel worried", "this
made me anxious", "I'm overwhelmed"), you may mirror that
specific word once in Step 2 before continuing with the
structural analysis. Do not introduce emotional vocabulary
the user did not provide.
Objective Fact: Summarize what was said literally.
Structural Gap: Name the missing technical parameter.
Universality Brief: Explain that the ambiguity makes the message
structurally unexecutable. Others appearing to "understand" it
are likely using social shortcuts, not data-driven comprehension.
Constraint: Do NOT name or infer emotions (e.g., "you feel anxious")
unless the user explicitly used those words. Stay operational.
STEP 3 — CONCRETE ACTION (Read-Back)
✍️ Clarification suggestion: Provide a "Read-Back" script designed to force the other party back into Operational Language.
"To ensure I meet the exact professional standard: when you say [QUOTE], are you referring to [VARIABLE A] or [VARIABLE B]? What is the specific observable behavior you would like me to implement?"
STEP 4 — FOLLOW-UP PLAN (Binary Choice Decomposition)
⏰ If the reply remains abstract (e.g., "Just be more open"), apply Binary Choice Decomposition: Propose two concrete, mutually exclusive actions for the other person to choose from.
"To achieve '[Abstract Term]', should I optimize for Option A [Concrete Action 1] or Option B [Concrete Action 2]? If neither, please provide one physical action I can practice today."
═══════════════════════════════════════════════════════
TONE AND RESTRICTIONS
═══════════════════════════════════════════════════════
Mode-dependent tone: In Mode A, be warm, natural, and conversational. In Mode B, be clear and brief. In Mode C, be structural, precise, and operational.
No Conflict Metaphors (Mode C): Do not use "Red Herring," "Gaslighting," "Trap," or "Attack." Use "Structural Inconsistency," "Protocol Mismatch," or "Introduction of non-verifiable variables."
No Emotional Labeling (Mode C): Do not use "painful," "destabilizing," or "distressing" unless the user does.
Neutrality (Mode C): Evaluate message structure only. Never evaluate the intent of the sender or the cognitive profile of the user.
Subjective opinions welcome in Mode A: When the user asks for your take on a non-conflict subjective topic, you may share a perspective casually.
Spacing Rule (all modes): Never concatenate words. Use correct spacing and standard punctuation.
═══════════════════════════════════════════════════════
Version: ClarityGuard v4.7 — Structural / Neuro-inclusive
Stack: ClarityGuard v2 / Gemma 4 E4B IT checkpoint 750 | Dify | Jina RAG
Framework: C.F.R.V.A. (Lengemann, 2026) | Input Triage | Operational Pragmatics | Universality of Ambiguity
Attribution: Based on C.F.R.V.A., created by Carlos Lengemann (2026). Licensed CC BY 4.0.
https://creativecommons.org/licenses/by/4.0/deed.es"""
server_ready = False
server_error = None
multimodal_ready = False
def log(msg):
stamp = datetime.now().strftime("%H:%M:%S")
line = f"[{stamp}] {msg}"
print(line, file=sys.stderr)
with open(LOG_FILE, "a", encoding="utf-8") as f:
f.write(line + "\n")
def start_server():
global multimodal_ready
os.makedirs(MODEL_DIR, exist_ok=True)
log(
"Configuración: "
f"CPU_THREADS={CPU_THREADS}, LLAMA_CTX={LLAMA_CTX}, "
f"LLAMA_MAX_TOKENS={LLAMA_MAX_TOKENS}, LLAMA_BATCH={LLAMA_BATCH}, "
f"LLAMA_UBATCH={LLAMA_UBATCH}, LLAMA_GPU_LAYERS={LLAMA_GPU_LAYERS}, "
f"MMPROJ_OFFLOAD={MMPROJ_OFFLOAD}"
)
log("Descargando modelo para inferencia...")
try:
m_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, local_dir=MODEL_DIR)
log(f"Modelo descargado en: {m_path}")
except Exception as e:
log(f"FALLO en descarga: {e}")
raise
mmproj_path = ""
try:
mmproj_path = hf_hub_download(repo_id=MODEL_REPO, filename=MMPROJ_FILE, local_dir=MODEL_DIR)
multimodal_ready = True
log(f"Projector multimodal descargado en: {mmproj_path}")
except Exception as e:
multimodal_ready = False
log(f"Projector multimodal no disponible; imágenes desactivadas. Detalle: {e}")
if not os.path.exists(LLAMA_SERVER):
raise FileNotFoundError(f"No existe {LLAMA_SERVER}")
env = os.environ.copy()
env["LD_LIBRARY_PATH"] = "/usr/local/lib:" + env.get("LD_LIBRARY_PATH", "")
env["OMP_NUM_THREADS"] = str(CPU_THREADS)
env["OMP_PROC_BIND"] = "false"
# GPU by default on the Hugging Face Space; override LLAMA_GPU_LAYERS for CPU testing.
cmd = [
LLAMA_SERVER,
"-m", m_path,
"--host", "127.0.0.1",
"--port", "8080",
"-c", str(LLAMA_CTX),
"-ngl", str(LLAMA_GPU_LAYERS),
"-t", str(CPU_THREADS),
"-tb", str(CPU_THREADS),
"-np", "1",
"-b", str(LLAMA_BATCH),
"-ub", str(LLAMA_UBATCH),
"--threads-http", "2",
"--fit", "off",
"--no-mmap",
"--jinja",
]
if mmproj_path:
cmd.extend(["--mmproj", mmproj_path])
if not MMPROJ_OFFLOAD:
cmd.append("--no-mmproj-offload")
log(f"Lanzando llama-server GPU: {' '.join(cmd)}")
return subprocess.Popen(
cmd, env=env,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
text=True, bufsize=1
)
def wait_until_ready(proc, timeout=900):
global server_ready, server_error
start = time.time()
while proc.poll() is None and time.time() - start < timeout:
try:
r = requests.get(f"{SERVER_URL}/health", timeout=2)
if r.status_code == 200:
server_ready = True
log("MOTOR EN LINEA (GPU)")
return
except Exception:
pass
time.sleep(2)
if proc.poll() is None:
server_error = "El motor no respondió al health-check dentro del tiempo esperado."
else:
server_error = f"El motor terminó antes de estar listo. Código: {proc.returncode}"
def monitor_engine():
global server_error
try:
log("Arrancando monitor...")
proc = start_server()
log(f"PID llama-server: {proc.pid}")
threading.Thread(target=wait_until_ready, args=(proc,), daemon=True).start()
for line in proc.stdout:
line = line.strip()
log(f"[llama] {line}")
ret = proc.wait()
if ret != 0 and not server_error:
server_error = f"llama-server terminó con código {ret}"
log(f"llama-server terminó con código: {ret}")
except Exception as e:
server_error = str(e)
log(f"EXCEPCIÓN MONITOR: {e}")
log(traceback.format_exc())
def read_rag_documents():
docs = []
seen_sources = set()
for source, path in RAG_DOCS:
if source in seen_sources:
continue
if not os.path.exists(path):
log(f"RAG: documento no encontrado: {path}")
continue
with open(path, "r", encoding="utf-8", errors="ignore") as f:
text = f.read().strip()
if text:
docs.append({"source": source, "path": path, "text": text})
seen_sources.add(source)
return docs
def chunk_document(text, max_chars=RAG_CHUNK_CHARS, overlap=RAG_CHUNK_OVERLAP):
paragraphs = [p.strip() for p in text.split("\n\n") if p.strip()]
chunks = []
current = ""
for paragraph in paragraphs:
if len(paragraph) > max_chars:
if current:
chunks.append(current.strip())
current = ""
for i in range(0, len(paragraph), max_chars - overlap):
chunks.append(paragraph[i:i + max_chars].strip())
continue
if len(current) + len(paragraph) + 2 <= max_chars:
current = f"{current}\n\n{paragraph}".strip()
else:
if current:
chunks.append(current.strip())
tail = current[-overlap:] if overlap > 0 else ""
current = f"{tail}\n\n{paragraph}".strip()
else:
current = paragraph
if current:
chunks.append(current.strip())
return chunks
def rag_fingerprint(docs):
h = hashlib.sha256()
h.update(JINA_EMBED_MODEL.encode("utf-8"))
h.update(str(RAG_CHUNK_CHARS).encode("utf-8"))
h.update(str(RAG_CHUNK_OVERLAP).encode("utf-8"))
for doc in docs:
h.update(doc["source"].encode("utf-8"))
h.update(doc["text"].encode("utf-8"))
return h.hexdigest()
def normalize_vector(vector):
norm = math.sqrt(sum(float(x) * float(x) for x in vector))
if norm == 0:
return [0.0 for _ in vector]
return [float(x) / norm for x in vector]
def jina_embed(texts, task):
if not JINA_API_KEY:
raise RuntimeError("falta JINA_API_KEY")
r = requests.post(
"https://api.jina.ai/v1/embeddings",
headers={
"Authorization": f"Bearer {JINA_API_KEY}",
"Content-Type": "application/json",
},
json={
"model": JINA_EMBED_MODEL,
"task": task,
"input": texts,
},
timeout=120,
)
r.raise_for_status()
data = r.json().get("data", [])
data.sort(key=lambda item: item.get("index", 0))
return [normalize_vector(item["embedding"]) for item in data]
def build_rag_index():
docs = read_rag_documents()
if not docs:
log("RAG: sin documentos disponibles.")
return []
fingerprint = rag_fingerprint(docs)
if os.path.exists(RAG_INDEX_FILE):
try:
with open(RAG_INDEX_FILE, "r", encoding="utf-8") as f:
cached = json.load(f)
if cached.get("fingerprint") == fingerprint:
chunks = cached.get("chunks", [])
log(f"RAG: índice cargado desde cache ({len(chunks)} chunks).")
return chunks
except Exception as e:
log(f"RAG: no se pudo leer cache, se reconstruye. Detalle: {e}")
if not JINA_API_KEY:
log("RAG: desactivado porque falta JINA_API_KEY.")
return []
chunks = []
for doc in docs:
for idx, text in enumerate(chunk_document(doc["text"])):
chunks.append({
"source": doc["source"],
"chunk_id": idx,
"text": text,
})
log(f"RAG: generando embeddings Jina v3 para {len(chunks)} chunks.")
batch_size = 16
for start in range(0, len(chunks), batch_size):
batch = chunks[start:start + batch_size]
embeddings = jina_embed([item["text"] for item in batch], "retrieval.passage")
for item, embedding in zip(batch, embeddings):
item["embedding"] = embedding
with open(RAG_INDEX_FILE, "w", encoding="utf-8") as f:
json.dump({
"fingerprint": fingerprint,
"model": JINA_EMBED_MODEL,
"chunks": chunks,
}, f)
log(f"RAG: índice guardado en {RAG_INDEX_FILE}.")
return chunks
rag_chunks = None
rag_lock = threading.Lock()
def get_rag_chunks():
global rag_chunks
with rag_lock:
if rag_chunks is None:
try:
rag_chunks = build_rag_index()
except Exception as e:
log(f"RAG: error construyendo índice: {e}")
rag_chunks = []
return rag_chunks
def retrieve_rag_context(query):
chunks = get_rag_chunks()
if not chunks or not query.strip() or not JINA_API_KEY:
return ""
try:
query_embedding = jina_embed([query], "retrieval.query")[0]
except Exception as e:
log(f"RAG: error consultando Jina: {e}")
return ""
scored = []
for chunk in chunks:
embedding = chunk.get("embedding")
if not embedding:
continue
score = sum(a * b for a, b in zip(query_embedding, embedding))
scored.append((score, chunk))
scored.sort(key=lambda item: item[0], reverse=True)
selected = []
used_chars = 0
for score, chunk in scored[:RAG_TOP_K]:
text = chunk["text"].strip()
block = f"[source={chunk['source']} chunk={chunk['chunk_id']} score={score:.3f}]\n{text}"
if used_chars + len(block) > RAG_MAX_CONTEXT_CHARS:
break
selected.append(block)
used_chars += len(block)
if not selected:
return ""
return (
"RAG CONTEXT (reference only; ClarityGuard system prompt has priority):\n"
"Use this context only when it directly helps answer the user's current message. "
"Do not copy confrontational Chatty/book tone into the user-facing answer.\n\n"
+ "\n\n---\n\n".join(selected)
)
def latest_user_text(history):
for item in reversed(history):
if isinstance(item, dict) and item.get("role") == "user":
content = item.get("content", "")
if isinstance(content, list):
return " ".join(
str(part.get("text", ""))
for part in content
if isinstance(part, dict) and part.get("type") == "text"
)
return str(content)
if isinstance(item, (list, tuple)) and item and item[0]:
return str(item[0])
return ""
def image_to_data_uri(image_path):
if not image_path:
return ""
mime_type, _ = mimetypes.guess_type(image_path)
if not mime_type:
mime_type = "image/png"
with open(image_path, "rb") as f:
encoded = base64.b64encode(f.read()).decode("ascii")
return f"data:{mime_type};base64,{encoded}"
def make_user_content(message, image_path=None):
text = str(message or "").strip()
if not image_path:
return text
if not multimodal_ready:
note = "[Attached image, but the multimodal projector is not loaded in llama-server.]"
return f"{text}\n\n{note}" if text else note
content = []
data_uri = image_to_data_uri(image_path)
if data_uri:
content.append({"type": "image_url", "image_url": {"url": data_uri}})
content.append({"type": "text", "text": text or "Analyze this image."})
return content
def respond(history):
if not server_ready:
if server_error:
yield f"Engine unavailable: {server_error}"
return
yield "Engine loading… this may take a few minutes the first time."
return
api_messages = [{"role": "system", "content": CLARITYGUARD_SYSTEM_PROMPT}]
rag_context = retrieve_rag_context(latest_user_text(history))
if rag_context:
api_messages.append({"role": "system", "content": rag_context})
for m in history:
if isinstance(m, dict):
content = m.get("content", "")
if isinstance(content, list):
api_messages.append({"role": m.get("role", "user"), "content": content})
else:
api_messages.append({"role": m.get("role", "user"), "content": str(content)})
continue
if isinstance(m, (list, tuple)) and len(m) >= 2:
user_msg, assistant_msg = m[0], m[1]
if user_msg:
api_messages.append({"role": "user", "content": str(user_msg)})
if assistant_msg:
api_messages.append({"role": "assistant", "content": str(assistant_msg)})
try:
r = requests.post(
f"{SERVER_URL}/v1/chat/completions",
json={
"model": MODEL_FILE,
"messages": api_messages,
"stream": True,
"temperature": LLAMA_TEMP,
"max_tokens": LLAMA_MAX_TOKENS,
},
stream=True, timeout=1200
)
r.raise_for_status()
full_text = ""
for line in r.iter_lines():
if not line:
continue
raw = line.decode("utf-8")
if not raw.startswith("data:"):
continue
chunk = raw[5:].strip()
if chunk == "[DONE]":
break
try:
delta = json.loads(chunk)["choices"][0].get("delta", {}).get("content", "")
full_text += delta
yield full_text
except Exception:
continue
except Exception as e:
yield f"Error: {e}"
with gr.Blocks() as demo:
gr.Markdown("""# ClarityGuard
Hi there! I'm ClarityGuard. How can I help you?
You can ask me things like:
> *"I missed a minor typo in a draft report, and my manager CC'd the entire HR department, calling it a 'concerning pattern of negligence.' My neurotypical peers make much bigger mistakes and it's just called a 'learning curve.' I feel like they're building a 'paper trail' to fire me over a non-issue — what happened here?"*
> *"During my annual review, they said I'm not a 'culture fit' because I don't go to the Friday happy hours. I told them I prefer to focus on my technical tasks during work hours, but they said I lack 'passion for the company vision.' It feels like they're judging my character because I don't want to perform the expected social scripts — does this make sense?"*
Or ask me anything — about work, relationships, or just to talk. I'm open to anything.
""")
chatbot = gr.Chatbot(height=500)
msg = gr.Textbox(placeholder="Type your message and press Enter...")
image = gr.Image(label="Optional image", type="filepath")
api_state = gr.State([])
def user_fn(message, image_path, visible_history, api_history):
if visible_history is None:
visible_history = []
if api_history is None:
api_history = []
text = str(message or "").strip()
visible_text = text
if image_path:
visible_text = f"{text}\n\n[Attached image]" if text else "[Attached image]"
visible_history.append({"role": "user", "content": visible_text})
api_history.append({"role": "user", "content": make_user_content(text, image_path)})
return "", None, visible_history, api_history
def bot_fn(visible_history, api_history):
if visible_history is None:
visible_history = []
if api_history is None:
api_history = []
visible_history.append({"role": "assistant", "content": ""})
for chunk in respond(api_history):
visible_history[-1] = {"role": "assistant", "content": chunk}
yield visible_history, api_history
if visible_history:
api_history.append({"role": "assistant", "content": visible_history[-1]["content"]})
yield visible_history, api_history
send = gr.Button("Send")
msg.submit(user_fn, [msg, image, chatbot, api_state], [msg, image, chatbot, api_state]).then(
bot_fn, [chatbot, api_state], [chatbot, api_state]
)
send.click(user_fn, [msg, image, chatbot, api_state], [msg, image, chatbot, api_state]).then(
bot_fn, [chatbot, api_state], [chatbot, api_state]
)
if __name__ == "__main__":
with open(LOG_FILE, "w") as f:
f.write("Iniciando...\n")
threading.Thread(target=get_rag_chunks, daemon=True).start()
threading.Thread(target=monitor_engine, daemon=True).start()
demo.launch(server_name="0.0.0.0", server_port=7860)