Spaces:

CharlieBonito
/

ClarityGuardAgent

Sleeping

App Files Files Community

CharlieBonito commited on Apr 24

Commit

fdc2e4b

verified ·

1 Parent(s): 5d13956

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -38

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import requests
 import json
 import threading
-# --- CONFIGURACIÓN MÍNIMA ---
 MODEL_REPO = "CharlieBonito/clarity-guard-gemma4-7b"
 MODEL_FILE = "Checkpoint-375-Ollama-Clean-7.5B-Q4_K_M.gguf"
 MMPROJ_FILE = "mmproj-Checkpoint-375-Ollama-Clean-BF16.gguf"
@@ -16,83 +16,81 @@ SERVER_URL = "http://127.0.0.1:8080"
 server_ready = False
-def download_models():
     from huggingface_hub import hf_hub_download
     os.makedirs(MODEL_DIR, exist_ok=True)
-    # Descarga mínima para probar el flujo
     m_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, local_dir=MODEL_DIR)
     mm_path = hf_hub_download(repo_id=MODEL_REPO, filename=MMPROJ_FILE, local_dir=MODEL_DIR)
-    return m_path, mm_path
-def start_server():
-    m_path, mm_path = download_models()
     env = os.environ.copy()
     env["LD_LIBRARY_PATH"] = f"/usr/local/lib:/usr/local/cuda/lib64:{env.get('LD_LIBRARY_PATH', '')}"
-    # Reducimos el contexto al mínimo (1024) para que la L4 arranque instantáneamente
     cmd = [
         LLAMA_SERVER, "-m", m_path, "--mmproj", mm_path,
         "--host", "127.0.0.1", "--port", "8080",
-        "-c", "1024", "-ngl", "99", "--no-mmap"
     ]
-    subprocess.Popen(cmd, env=env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-def wait_for_server():
     global server_ready
     start_server()
-    for _ in range(60):
         try:
             if requests.get(f"{SERVER_URL}/health", timeout=1).status_code == 200:
                 server_ready = True
-                print("✅ Motor listo.")
                 break
         except: pass
-        time.sleep(2)
 def respond(history):
     if not server_ready:
-        yield "⏳ Cargando... dale un momento a la L4."
         return
-    # Gradio 6: history es una lista de {"role": "user", "content": "texto"}
-    # El prompt de sistema se inyecta aquí para la prueba
-    api_messages = [{"role": "system", "content": "Eres un asistente breve. Di hola y contesta rápido."}]
-    api_messages.extend(history)
     try:
-        response = requests.post(
             f"{SERVER_URL}/v1/chat/completions",
-            json={"messages": api_messages, "stream": True, "temperature": 0.1},
-            stream=True, timeout=30
         )
         full_text = ""
-        for line in response.iter_lines():
             if line:
                 chunk = line.decode("utf-8")[6:]
                 if chunk.strip() == "[DONE]": break
-                data = json.loads(chunk)
-                delta = data["choices"][0].get("delta", {}).get("content", "")
-                full_text += delta
-                yield full_text
     except Exception as e:
-        yield f"❌ Error de API: {e}"
-# --- INTERFAZ GRADIO 6 (SINTAXIS CORRECTA) ---
 with gr.Blocks() as demo:
-    gr.Markdown("# ⚡ ClarityGuard Mini-Test")
-    # Importante: No le pases dicts al content si es solo texto
-    chatbot = gr.Chatbot(height=400)
-    msg = gr.Textbox(placeholder="Escribe 'Hola'...")
     def user_fn(message, history):
-        # FIX: En Gradio 6 para texto plano, el content DEBE ser un string
-        # Si envías {'text': message} lanzará el ValueError que viste
-        history.append({"role": "user", "content": message})
         return "", history
     def bot_fn(history):
-        # El asistente empieza con contenido vacío (string)
         history.append({"role": "assistant", "content": ""})
         for chunk in respond(history[:-1]):
             history[-1]["content"] = chunk
             yield history
@@ -102,5 +100,5 @@ with gr.Blocks() as demo:
     )
 if __name__ == "__main__":
-    threading.Thread(target=wait_for_server, daemon=True).start()
     demo.launch(server_name="0.0.0.0", server_port=7860)

 import json
 import threading
+# --- CONFIGURACIÓN ---
 MODEL_REPO = "CharlieBonito/clarity-guard-gemma4-7b"
 MODEL_FILE = "Checkpoint-375-Ollama-Clean-7.5B-Q4_K_M.gguf"
 MMPROJ_FILE = "mmproj-Checkpoint-375-Ollama-Clean-BF16.gguf"
 server_ready = False
+def start_server():
     from huggingface_hub import hf_hub_download
     os.makedirs(MODEL_DIR, exist_ok=True)
     m_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, local_dir=MODEL_DIR)
     mm_path = hf_hub_download(repo_id=MODEL_REPO, filename=MMPROJ_FILE, local_dir=MODEL_DIR)
     env = os.environ.copy()
     env["LD_LIBRARY_PATH"] = f"/usr/local/lib:/usr/local/cuda/lib64:{env.get('LD_LIBRARY_PATH', '')}"
+    # Contexto mínimo y sin logs para que no sature la terminal
     cmd = [
         LLAMA_SERVER, "-m", m_path, "--mmproj", mm_path,
         "--host", "127.0.0.1", "--port", "8080",
+        "-c", "1024", "-ngl", "99"
     ]
+    subprocess.Popen(cmd, env=env)
+def health_check():
     global server_ready
     start_server()
+    while True:
         try:
             if requests.get(f"{SERVER_URL}/health", timeout=1).status_code == 200:
                 server_ready = True
+                print("[DEBUG] ✅ MOTOR LISTO")
                 break
         except: pass
+        time.sleep(5)
 def respond(history):
     if not server_ready:
+        yield "Got it. El motor de ClarityGuard sigue cargando en la GPU L4... (Intenta de nuevo en 30s)"
         return
+    # Convertir historial de Gradio 6 a formato OpenAI simple
+    messages = [{"role": "system", "content": "Eres ClarityGuard. Saluda y sé breve."}]
+    for m in history:
+        messages.append({"role": m["role"], "content": str(m["content"])})
     try:
+        r = requests.post(
             f"{SERVER_URL}/v1/chat/completions",
+            json={"messages": messages, "stream": True, "temperature": 0.1},
+            stream=True, timeout=60
         )
         full_text = ""
+        for line in r.iter_lines():
             if line:
                 chunk = line.decode("utf-8")[6:]
                 if chunk.strip() == "[DONE]": break
+                try:
+                    data = json.loads(chunk)
+                    full_text += data["choices"][0].get("delta", {}).get("content", "")
+                    yield full_text
+                except: continue
     except Exception as e:
+        yield f"❌ Error de conexión: {e}"
+# --- INTERFAZ GRADIO 6 (MÁXIMA SIMPLICIDAD) ---
 with gr.Blocks() as demo:
+    gr.Markdown("# 🔍 ClarityGuard | Test L4")
+    chatbot = gr.Chatbot(height=400)
+    msg = gr.Textbox(placeholder="Escribe 'Hola' y presiona Enter...")
     def user_fn(message, history):
+        print(f"[DEBUG] Usuario dijo: {message}")
+        if history is None: history = []
+        # Gradio 6: El contenido debe ser un string puro para evitar el ValueError
+        history.append({"role": "user", "content": str(message)})
         return "", history
     def bot_fn(history):
+        print(f"[DEBUG] Generando respuesta...")
         history.append({"role": "assistant", "content": ""})
+        # history[:-1] envía todo el historial menos el mensaje vacío del asistente
         for chunk in respond(history[:-1]):
             history[-1]["content"] = chunk
             yield history
     )
 if __name__ == "__main__":
+    threading.Thread(target=health_check, daemon=True).start()
     demo.launch(server_name="0.0.0.0", server_port=7860)