OpScanIA

Sleeping

App Files Files Community

jorgeiv500 commited on Nov 11, 2025

Commit

c93afa6

verified ·

1 Parent(s): 42632ea

Update app.py

Browse files

Files changed (1) hide show

app.py +135 -117

app.py CHANGED Viewed

@@ -1,80 +1,140 @@
-# app.py — DeepSeek-OCR + DeepSeek-R1 Medical Mini (GGUF local rápido) — Gradio 5
 import os, tempfile, traceback
 import gradio as gr
 import torch
 from PIL import Image
-from transformers import AutoModel, AutoTokenizer
 import spaces
-from huggingface_hub import hf_hub_download
-from llama_cpp import Llama
 # ===============================================================
-# CHAT: DeepSeek-R1 Medical Mini — SOLO LOCAL (GGUF) para máxima rapidez sin tokens
-# - Puedes forzar un archivo con GGUF_REPO / GGUF_FILE
-# - Si no especificas, probamos Q4 (rápido) y caemos a f16 si no está
 # ===============================================================
-GGUF_REPO = os.getenv("GGUF_REPO", "mradermacher/DeepSeek-r1-Medical-Mini-GGUF").strip()
-GGUF_FILE = os.getenv("GGUF_FILE", "").strip()
-# Orden de preferencia (más rápido -> más pesado). Cambia nombres si tu repo usa otros.
-_DEFAULT_CANDIDATES = [
-    "DeepSeek-r1-Medical-Mini.Q4_K_M.gguf",
-    "DeepSeek-r1-Medical-Mini.Q4_0.gguf",
-    "DeepSeek-r1-Medical-Mini.Q5_0.gguf",
-    "DeepSeek-r1-Medical-Mini.Q8_0.gguf",
-    "DeepSeek-r1-Medical-Mini.f16.gguf",
-]
-GGUF_CANDIDATES = [GGUF_FILE] if GGUF_FILE else _DEFAULT_CANDIDATES
-N_CTX = int(os.getenv("N_CTX", "2048"))
-N_THREADS = int(os.getenv("N_THREADS", str(os.cpu_count() or 4)))
-N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "0"))   # Zero/CPU => 0
-N_BATCH = int(os.getenv("N_BATCH", "96"))
-_llm = None
-def _download_gguf():
-    last_err = None
-    for fname in GGUF_CANDIDATES:
-        try:
-            path = hf_hub_download(repo_id=GGUF_REPO, filename=fname)
-            return path, fname
-        except Exception as e:
-            last_err = e
-    raise RuntimeError(f"No se pudo descargar GGUF desde {GGUF_REPO}. Último error: {last_err}")
-def get_llm():
-    global _llm
-    if _llm is not None:
-        return _llm
-    gguf_path, used = _download_gguf()
-    print(f"[R1/llama.cpp] usando: {used}")
-    _llm = Llama(
-        model_path=gguf_path,
-        n_ctx=N_CTX,
-        n_threads=N_THREADS,
-        n_gpu_layers=N_GPU_LAYERS,
-        n_batch=N_BATCH,
-        verbose=False,
-    )
-    return _llm
-def _format_chatml(messages):
-    parts = []
-    for m in messages:
-        parts.append(f"<|im_start|>{m.get('role','user')}\n{m.get('content','')}<|im_end|>\n")
-    parts.append("<|im_start|>assistant\n")
-    return "".join(parts)
-def r1_chat_local(messages, temperature=0.2, max_tokens=384):
-    # llama.cpp acepta messages directamente; si tu build no, usa prompt=_format_chatml(messages)
-    llm = get_llm()
-    out = llm.create_chat_completion(messages=messages, temperature=temperature, max_tokens=max_tokens)
-    return out["choices"][0]["message"]["content"]
-# Warmup opcional
-if os.getenv("WARMUP", "0") == "1":
-    try: get_llm()
-    except Exception: pass
 # ===============================================================
 # DeepSeek-OCR (intacto) con fallback si no hay FlashAttention2
@@ -152,57 +212,15 @@ def process_image(image, model_size, task_type, is_eval_mode):
         text_result = plain_text_result if plain_text_result else markdown_content
         return result_image, markdown_content, text_result
-# ===============================================================
-# Chat (inyecta OCR) — con R1 local
-# ===============================================================
-def _truncate(text, max_chars=3000): return (text or "")[:max_chars]
-def _system_prompt():
-    return ("Eres un asistente clínico educativo. No sustituyes el juicio médico. "
-            "Usa CONTEXTO_OCR si existe; si falta, pídelo. Evita diagnósticos definitivos.")
-def _ocr_context(ocr_md, ocr_txt): return _truncate(ocr_md) or _truncate(ocr_txt) or ""
-def to_chat_messages(chat_msgs, ocr_md, ocr_txt):
-    sys = _system_prompt()
-    ctx = _ocr_context(ocr_md, ocr_txt)
-    if ctx:
-        sys += ("\n\n---\n"
-                "CONTEXTO_OCR (fuente principal; si falta un dato, dilo explícitamente):\n"
-                f"{ctx}\n---")
-    msgs = [{"role": "system", "content": sys}]
-    for m in (chat_msgs or []):
-        if m.get("role") in ("user", "assistant"):
-            msgs.append({"role": m["role"], "content": m.get("content", "")})
-    return msgs
-def r1_reply(user_msg, chat_msgs, ocr_md, ocr_txt):
-    if not user_msg:
-        user_msg = "Analiza el CONTEXTO_OCR anterior y responde a partir de ese contenido."
-    try:
-        msgs = to_chat_messages(chat_msgs, ocr_md, ocr_txt) + [{"role": "user", "content": user_msg}]
-        answer = r1_chat_local(msgs, temperature=0.2, max_tokens=512)
-        updated = (chat_msgs or []) + [{"role": "user", "content": user_msg},
-                                       {"role": "assistant", "content": answer}]
-        return updated, "", gr.update(value="")
-    except Exception as e:
-        err = f"{e.__class__.__name__}: {str(e) or repr(e)}"
-        tb = traceback.format_exc(limit=2)
-        updated = (chat_msgs or []) + [{"role": "user", "content": user_msg or ""},
-                                       {"role": "assistant", "content": f"⚠️ Error LLM: {err}"}]
-        return updated, "", gr.update(value=f"{err}\n{tb}")
-def clear_chat(): return [], "", gr.update(value="")
 # ===============================================================
 # UI (Gradio 5)
 # ===============================================================
-with gr.Blocks(title="DeepSeek-OCR + R1 Medical (GGUF rápido)", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
-        # DeepSeek-OCR → Chat Médico con **DeepSeek-R1 Medical Mini (GGUF local rápido)**
         1) **Sube una imagen** y corre **OCR** (imagen anotada, Markdown y texto).
-        2) **Chatea** con **R1 Medical Mini** usando automáticamente el **OCR** como contexto.
         *Uso educativo; no reemplaza consejo médico.*
         """
     )
@@ -231,10 +249,10 @@ with gr.Blocks(title="DeepSeek-OCR + R1 Medical (GGUF rápido)", theme=gr.themes
                 md_preview = gr.Textbox(label="Snapshot Markdown OCR", lines=10, interactive=False)
                 txt_preview = gr.Textbox(label="Snapshot Texto OCR", lines=10, interactive=False)
-    gr.Markdown("## Chat Clínico (R1 Medical Mini — GGUF local)")
     with gr.Row():
         with gr.Column(scale=2):
-            chatbot = gr.Chatbot(label="Asistente OCR (R1 GGUF)", type="messages", height=420)
             user_in = gr.Textbox(label="Mensaje", placeholder="Escribe tu consulta… (vacío = analiza solo el OCR)", lines=2)
             with gr.Row():
                 send_btn = gr.Button("Enviar", variant="primary")
@@ -252,7 +270,7 @@ with gr.Blocks(title="DeepSeek-OCR + R1 Medical (GGUF rápido)", theme=gr.themes
         outputs=[ocr_md_state, ocr_txt_state, md_preview, txt_preview],
     )
-    send_btn.click(fn=r1_reply, inputs=[user_in, chatbot, ocr_md_state, ocr_txt_state],
                    outputs=[chatbot, user_in, error_box])
     clear_btn.click(fn=clear_chat, outputs=[chatbot, user_in, error_box])

+# app.py — DeepSeek-OCR + BioMedLM (remoto o local) — Gradio 5
 import os, tempfile, traceback
 import gradio as gr
 import torch
 from PIL import Image
+from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM
 import spaces
+from huggingface_hub import hf_hub_download, InferenceClient
 # ===============================================================
+# CHAT: BioMedLM — Remoto (HF Inference) o Local (Transformers)
+#   - Modo remoto: BIO_REMOTE=1 (recomendado en Spaces Zero/CPU)
+#   - Modo local:  BIO_REMOTE=0 (usa PyTorch; 13B, CPU puede ser lento)
+#   - Variables:   BIO_MODEL_ID=stanford-crfm/BioMedLM, HF_TOKEN
 # ===============================================================
+BIO_REMOTE = os.getenv("BIO_REMOTE", "0") == "1"
+BIO_MODEL_ID = os.getenv("BIO_MODEL_ID", "stanford-crfm/BioMedLM").strip()
+HF_TOKEN = os.getenv("HF_TOKEN")
+# Parámetros de generación por defecto
+GEN_TEMPERATURE = float(os.getenv("GEN_TEMPERATURE", "0.2"))
+GEN_TOP_P = float(os.getenv("GEN_TOP_P", "0.9"))
+GEN_MAX_NEW_TOKENS = int(os.getenv("GEN_MAX_NEW_TOKENS", "512"))
+GEN_REP_PENALTY = float(os.getenv("GEN_REP_PENALTY", "1.1"))
+_bio_model = None
+_bio_tokenizer = None
+_hf_client = None
+def get_biomedlm():
+    """Obtiene el manejador del modelo BioMedLM según modo remoto/local."""
+    global _bio_model, _bio_tokenizer, _hf_client
+    if BIO_REMOTE:
+        if _hf_client is None:
+            _hf_client = InferenceClient(model=BIO_MODEL_ID, token=HF_TOKEN)
+        return ("remote", _hf_client)
+    else:
+        if _bio_model is None:
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            dtype = torch.bfloat16 if (device == "cuda" and torch.cuda.is_bf16_supported()) else (
+                torch.float16 if device == "cuda" else torch.float32
+            )
+            _bio_tokenizer = AutoTokenizer.from_pretrained(BIO_MODEL_ID, use_fast=True)
+            _bio_model = AutoModelForCausalLM.from_pretrained(
+                BIO_MODEL_ID,
+                torch_dtype=dtype,
+            )
+            _bio_model = _bio_model.to(device)
+        return ("local", (_bio_model, _bio_tokenizer))
+def _system_prompt():
+    return ("Eres un asistente clínico educativo. No sustituyes el juicio médico. "
+            "Usa CONTEXTO_OCR si existe; si falta, pídelo. Evita diagnósticos definitivos.")
+def _truncate(text, max_chars=3000):
+    return (text or "")[:max_chars]
+def _ocr_context(ocr_md, ocr_txt):
+    return _truncate(ocr_md) or _truncate(ocr_txt) or ""
+def build_prompt(chat_msgs, ocr_md, ocr_txt, user_msg):
+    """Crea un prompt estilo 'instruct' apto para BioMedLM (no es modelo chat)."""
+    sys = _system_prompt()
+    ctx = _ocr_context(ocr_md, ocr_txt)
+    history_lines = []
+    for m in (chat_msgs or []):
+        role = m.get("role")
+        content = (m.get("content") or "").strip()
+        if not content:
+            continue
+        if role == "user":
+            history_lines.append(f"User: {content}")
+        elif role == "assistant":
+            history_lines.append(f"Assistant: {content}")
+    if user_msg:
+        history_lines.append(f"User: {user_msg}")
+    convo = "\n".join(history_lines).strip()
+    prompt = f"### System\n{sys}\n\n"
+    if ctx:
+        prompt += f"### Context (OCR)\n{ctx}\n\n"
+    prompt += f"### Conversation\n{convo}\nAssistant:"
+    return prompt
+def biomedlm_reply(user_msg, chat_msgs, ocr_md, ocr_txt):
+    """Genera respuesta con BioMedLM (remoto o local)."""
+    try:
+        if not user_msg:
+            user_msg = "Analiza el CONTEXTO_OCR anterior y responde a partir de ese contenido."
+        prompt = build_prompt(chat_msgs, ocr_md, ocr_txt, user_msg)
+        mode, handle = get_biomedlm()
+        if mode == "remote":
+            # HF Inference (text-generation)
+            out = handle.text_generation(
+                prompt,
+                max_new_tokens=GEN_MAX_NEW_TOKENS,
+                temperature=GEN_TEMPERATURE,
+                top_p=GEN_TOP_P,
+                repetition_penalty=GEN_REP_PENALTY,
+                # Paradas suaves; evita que el modelo “rompa” secciones
+                stop_sequences=["\nUser:", "### System", "### Context", "### Conversation"]
+            )
+            answer = out
+        else:
+            # Local (PyTorch)
+            model, tok = handle
+            inputs = tok(prompt, return_tensors="pt").to(model.device)
+            gen_ids = model.generate(
+                **inputs,
+                do_sample=True,
+                temperature=GEN_TEMPERATURE,
+                top_p=GEN_TOP_P,
+                repetition_penalty=GEN_REP_PENALTY,
+                max_new_tokens=GEN_MAX_NEW_TOKENS,
+                eos_token_id=tok.eos_token_id,
+            )
+            answer = tok.decode(gen_ids[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
+        updated = (chat_msgs or []) + [
+            {"role": "user", "content": user_msg},
+            {"role": "assistant", "content": answer.strip()}
+        ]
+        return updated, "", gr.update(value="")
+    except Exception as e:
+        err = f"{e.__class__.__name__}: {str(e) or repr(e)}"
+        tb = traceback.format_exc(limit=2)
+        updated = (chat_msgs or []) + [
+            {"role": "user", "content": user_msg or ""},
+            {"role": "assistant", "content": f"⚠️ Error LLM: {err}"}
+        ]
+        return updated, "", gr.update(value=f"{err}\n{tb}")
+def clear_chat():
+    return [], "", gr.update(value="")
 # ===============================================================
 # DeepSeek-OCR (intacto) con fallback si no hay FlashAttention2
         text_result = plain_text_result if plain_text_result else markdown_content
         return result_image, markdown_content, text_result
 # ===============================================================
 # UI (Gradio 5)
 # ===============================================================
+with gr.Blocks(title="DeepSeek-OCR + BioMedLM", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
+        # DeepSeek-OCR → Chat Médico con **BioMedLM**
         1) **Sube una imagen** y corre **OCR** (imagen anotada, Markdown y texto).
+        2) **Chatea** con **BioMedLM** usando automáticamente el **OCR** como contexto.
         *Uso educativo; no reemplaza consejo médico.*
         """
     )
                 md_preview = gr.Textbox(label="Snapshot Markdown OCR", lines=10, interactive=False)
                 txt_preview = gr.Textbox(label="Snapshot Texto OCR", lines=10, interactive=False)
+    gr.Markdown("## Chat Clínico (BioMedLM)")
     with gr.Row():
         with gr.Column(scale=2):
+            chatbot = gr.Chatbot(label="Asistente OCR (BioMedLM)", type="messages", height=420)
             user_in = gr.Textbox(label="Mensaje", placeholder="Escribe tu consulta… (vacío = analiza solo el OCR)", lines=2)
             with gr.Row():
                 send_btn = gr.Button("Enviar", variant="primary")
         outputs=[ocr_md_state, ocr_txt_state, md_preview, txt_preview],
     )
+    send_btn.click(fn=biomedlm_reply, inputs=[user_in, chatbot, ocr_md_state, ocr_txt_state],
                    outputs=[chatbot, user_in, error_box])
     clear_btn.click(fn=clear_chat, outputs=[chatbot, user_in, error_box])