CharlieBonito commited on
Commit
fdc2e4b
·
verified ·
1 Parent(s): 5d13956

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -38
app.py CHANGED
@@ -6,7 +6,7 @@ import requests
6
  import json
7
  import threading
8
 
9
- # --- CONFIGURACIÓN MÍNIMA ---
10
  MODEL_REPO = "CharlieBonito/clarity-guard-gemma4-7b"
11
  MODEL_FILE = "Checkpoint-375-Ollama-Clean-7.5B-Q4_K_M.gguf"
12
  MMPROJ_FILE = "mmproj-Checkpoint-375-Ollama-Clean-BF16.gguf"
@@ -16,83 +16,81 @@ SERVER_URL = "http://127.0.0.1:8080"
16
 
17
  server_ready = False
18
 
19
- def download_models():
20
  from huggingface_hub import hf_hub_download
21
  os.makedirs(MODEL_DIR, exist_ok=True)
22
- # Descarga mínima para probar el flujo
23
  m_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, local_dir=MODEL_DIR)
24
  mm_path = hf_hub_download(repo_id=MODEL_REPO, filename=MMPROJ_FILE, local_dir=MODEL_DIR)
25
- return m_path, mm_path
26
-
27
- def start_server():
28
- m_path, mm_path = download_models()
29
  env = os.environ.copy()
30
  env["LD_LIBRARY_PATH"] = f"/usr/local/lib:/usr/local/cuda/lib64:{env.get('LD_LIBRARY_PATH', '')}"
31
 
32
- # Reducimos el contexto al mínimo (1024) para que la L4 arranque instantáneamente
33
  cmd = [
34
  LLAMA_SERVER, "-m", m_path, "--mmproj", mm_path,
35
  "--host", "127.0.0.1", "--port", "8080",
36
- "-c", "1024", "-ngl", "99", "--no-mmap"
37
  ]
38
- subprocess.Popen(cmd, env=env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
39
 
40
- def wait_for_server():
41
  global server_ready
42
  start_server()
43
- for _ in range(60):
44
  try:
45
  if requests.get(f"{SERVER_URL}/health", timeout=1).status_code == 200:
46
  server_ready = True
47
- print("✅ Motor listo.")
48
  break
49
  except: pass
50
- time.sleep(2)
51
 
52
  def respond(history):
53
  if not server_ready:
54
- yield " Cargando... dale un momento a la L4."
55
  return
56
 
57
- # Gradio 6: history es una lista de {"role": "user", "content": "texto"}
58
- # El prompt de sistema se inyecta aquí para la prueba
59
- api_messages = [{"role": "system", "content": "Eres un asistente breve. Di hola y contesta rápido."}]
60
- api_messages.extend(history)
61
 
62
  try:
63
- response = requests.post(
64
  f"{SERVER_URL}/v1/chat/completions",
65
- json={"messages": api_messages, "stream": True, "temperature": 0.1},
66
- stream=True, timeout=30
67
  )
68
  full_text = ""
69
- for line in response.iter_lines():
70
  if line:
71
  chunk = line.decode("utf-8")[6:]
72
  if chunk.strip() == "[DONE]": break
73
- data = json.loads(chunk)
74
- delta = data["choices"][0].get("delta", {}).get("content", "")
75
- full_text += delta
76
- yield full_text
 
77
  except Exception as e:
78
- yield f"❌ Error de API: {e}"
79
 
80
- # --- INTERFAZ GRADIO 6 (SINTAXIS CORRECTA) ---
81
  with gr.Blocks() as demo:
82
- gr.Markdown("# ClarityGuard Mini-Test")
83
- # Importante: No le pases dicts al content si es solo texto
84
- chatbot = gr.Chatbot(height=400)
85
- msg = gr.Textbox(placeholder="Escribe 'Hola'...")
86
 
87
  def user_fn(message, history):
88
- # FIX: En Gradio 6 para texto plano, el content DEBE ser un string
89
- # Si envías {'text': message} lanzará el ValueError que viste
90
- history.append({"role": "user", "content": message})
 
91
  return "", history
92
 
93
  def bot_fn(history):
94
- # El asistente empieza con contenido vacío (string)
95
  history.append({"role": "assistant", "content": ""})
 
96
  for chunk in respond(history[:-1]):
97
  history[-1]["content"] = chunk
98
  yield history
@@ -102,5 +100,5 @@ with gr.Blocks() as demo:
102
  )
103
 
104
  if __name__ == "__main__":
105
- threading.Thread(target=wait_for_server, daemon=True).start()
106
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
6
  import json
7
  import threading
8
 
9
+ # --- CONFIGURACIÓN ---
10
  MODEL_REPO = "CharlieBonito/clarity-guard-gemma4-7b"
11
  MODEL_FILE = "Checkpoint-375-Ollama-Clean-7.5B-Q4_K_M.gguf"
12
  MMPROJ_FILE = "mmproj-Checkpoint-375-Ollama-Clean-BF16.gguf"
 
16
 
17
  server_ready = False
18
 
19
+ def start_server():
20
  from huggingface_hub import hf_hub_download
21
  os.makedirs(MODEL_DIR, exist_ok=True)
 
22
  m_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, local_dir=MODEL_DIR)
23
  mm_path = hf_hub_download(repo_id=MODEL_REPO, filename=MMPROJ_FILE, local_dir=MODEL_DIR)
24
+
 
 
 
25
  env = os.environ.copy()
26
  env["LD_LIBRARY_PATH"] = f"/usr/local/lib:/usr/local/cuda/lib64:{env.get('LD_LIBRARY_PATH', '')}"
27
 
28
+ # Contexto mínimo y sin logs para que no sature la terminal
29
  cmd = [
30
  LLAMA_SERVER, "-m", m_path, "--mmproj", mm_path,
31
  "--host", "127.0.0.1", "--port", "8080",
32
+ "-c", "1024", "-ngl", "99"
33
  ]
34
+ subprocess.Popen(cmd, env=env)
35
 
36
+ def health_check():
37
  global server_ready
38
  start_server()
39
+ while True:
40
  try:
41
  if requests.get(f"{SERVER_URL}/health", timeout=1).status_code == 200:
42
  server_ready = True
43
+ print("[DEBUG] MOTOR LISTO")
44
  break
45
  except: pass
46
+ time.sleep(5)
47
 
48
  def respond(history):
49
  if not server_ready:
50
+ yield "Got it. El motor de ClarityGuard sigue cargando en la GPU L4... (Intenta de nuevo en 30s)"
51
  return
52
 
53
+ # Convertir historial de Gradio 6 a formato OpenAI simple
54
+ messages = [{"role": "system", "content": "Eres ClarityGuard. Saluda y breve."}]
55
+ for m in history:
56
+ messages.append({"role": m["role"], "content": str(m["content"])})
57
 
58
  try:
59
+ r = requests.post(
60
  f"{SERVER_URL}/v1/chat/completions",
61
+ json={"messages": messages, "stream": True, "temperature": 0.1},
62
+ stream=True, timeout=60
63
  )
64
  full_text = ""
65
+ for line in r.iter_lines():
66
  if line:
67
  chunk = line.decode("utf-8")[6:]
68
  if chunk.strip() == "[DONE]": break
69
+ try:
70
+ data = json.loads(chunk)
71
+ full_text += data["choices"][0].get("delta", {}).get("content", "")
72
+ yield full_text
73
+ except: continue
74
  except Exception as e:
75
+ yield f"❌ Error de conexión: {e}"
76
 
77
+ # --- INTERFAZ GRADIO 6 (MÁXIMA SIMPLICIDAD) ---
78
  with gr.Blocks() as demo:
79
+ gr.Markdown("# 🔍 ClarityGuard | Test L4")
80
+ chatbot = gr.Chatbot(height=400)
81
+ msg = gr.Textbox(placeholder="Escribe 'Hola' y presiona Enter...")
 
82
 
83
  def user_fn(message, history):
84
+ print(f"[DEBUG] Usuario dijo: {message}")
85
+ if history is None: history = []
86
+ # Gradio 6: El contenido debe ser un string puro para evitar el ValueError
87
+ history.append({"role": "user", "content": str(message)})
88
  return "", history
89
 
90
  def bot_fn(history):
91
+ print(f"[DEBUG] Generando respuesta...")
92
  history.append({"role": "assistant", "content": ""})
93
+ # history[:-1] envía todo el historial menos el mensaje vacío del asistente
94
  for chunk in respond(history[:-1]):
95
  history[-1]["content"] = chunk
96
  yield history
 
100
  )
101
 
102
  if __name__ == "__main__":
103
+ threading.Thread(target=health_check, daemon=True).start()
104
  demo.launch(server_name="0.0.0.0", server_port=7860)