OpScanIA

Sleeping

App Files Files Community

jorgeiv500 commited on about 1 month ago

Commit

42632ea

1 Parent(s): 2c7042c

SS

Browse files

Files changed (3) hide show

README.md +18 -45
app.py +76 -154
requirements.txt +4 -5

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: OpScan.IA — DeepSeek-OCR + DeepSeek-R1 Medical Mini
 emoji: 🩺
 colorFrom: gray
 colorTo: purple
@@ -9,51 +9,24 @@ app_file: app.py
 pinned: false
 ---
-# OpScan.IA — DeepSeek-OCR + DeepSeek-R1 Medical Mini
-Aplicación en **Gradio** que:
-1) Extrae texto y marcas de un documento/imagen con **DeepSeek-OCR**.
-2) Inyecta automáticamente ese OCR como **contexto** para chatear con **DeepSeek-R1 Medical Mini** (remoto o GGUF local).
-> **Uso educativo**. No sustituye criterio clínico ni diagnóstico profesional.
----
-## ✨ Características
-- **OCR**: cajas, Markdown y/o texto plano a partir de imágenes (upload/clipboard/cámara).
-- **Chat clínico**: el LLM recibe el OCR como *system context* y responde con cautela.
-- **Modos del chat**:
-  - **Remoto (HF Inference)**: `R1_REMOTE=1` (sin token si el modelo es público).
-  - **Local GGUF (CPU/Zero)**: `R1_REMOTE=0` con `llama.cpp`.
-- **Tolerante a entorno**: si el OCR falla por `FlashAttention2`, cae a `_attn_implementation="eager"` automáticamente.
----
-## 📦 Requisitos
-`requirements.txt`:
-```txt
-gradio==5.49.1
-spaces>=0.28.3
-torch==2.6.0
-torchvision==0.21.0
-transformers==4.46.3
-tokenizers==0.20.3
-accelerate>=0.34.2
-safetensors>=0.4.5
-huggingface-hub>=0.30.0
-hf-transfer>=0.1.6
-pillow>=10.4.0
-numpy>=1.26.0
-tqdm>=4.66.4
-requests>=2.31.0
-einops>=0.7.0
-addict>=2.4.0
-easydict>=1.13
-sentencepiece>=0.2.0
-pydantic==2.10.6
-protobuf<4
-click<8.1
-llama-cpp-python==0.2.90
-# (Opcional GPU) flash-attn / xformers

 ---
+title: OpScan.IA — DeepSeek-OCR + R1 Medical Mini (GGUF rápido)
 emoji: 🩺
 colorFrom: gray
 colorTo: purple
 pinned: false
 ---
+# OpScan.IA — DeepSeek-OCR + DeepSeek-R1 Medical Mini (GGUF local rápido)
+**Objetivo:** máxima velocidad **sin tokens** en Spaces Zero/CPU.
+El chat usa **DeepSeek-R1 Medical Mini** en **GGUF** (cuantizado Q4 si está disponible) con `llama.cpp`.
+El OCR se hace con **DeepSeek-OCR** (con *fallback* automático a `_attn_implementation="eager"` si no hay FlashAttention2).
+## Requisitos
+Ver `requirements.txt`.
+## Variables opcionales
+- `GGUF_REPO` (default: `mradermacher/DeepSeek-r1-Medical-Mini-GGUF`)
+- `GGUF_FILE` (si no se define, el app prueba en orden: `Q4_K_M`, `Q4_0`, `Q5_0`, `Q8_0`, `f16`)
+- `N_CTX` (2048), `N_THREADS` (auto), `N_GPU_LAYERS` (0), `N_BATCH` (96), `WARMUP` (0/1)
+- `OCR_ATTN_IMPL`: `flash_attention_2` o `eager`
+## Ejecución local
+```bash
+python -m venv .venv
+source .venv/bin/activate
+pip install -r requirements.txt
+python app.py

app.py CHANGED Viewed

@@ -1,103 +1,55 @@
-# app.py — DeepSeek-OCR + DeepSeek-R1 Medical Mini (remoto HF o local GGUF) — Gradio 5
 import os, tempfile, traceback
 import gradio as gr
 import torch
 from PIL import Image
 from transformers import AutoModel, AutoTokenizer
 import spaces
-from huggingface_hub import hf_hub_download, InferenceClient
 from llama_cpp import Llama
 # ===============================================================
-# Configuración LLM (CHAT) — DeepSeek-R1 Medical Mini
-#  - Remoto (HF Inference): R1_REMOTE=1  y  (opcional) R1_MODEL_ID, HF_TOKEN
-#  - Local GGUF (CPU/Zero): R1_REMOTE=0  y  GGUF_REPO / GGUF_FILE
 # ===============================================================
-R1_REMOTE = os.getenv("R1_REMOTE", "0") == "1"
-R1_MODEL_ID = os.getenv("R1_MODEL_ID", "Mouhib007/DeepSeek-r1-Medical-Mini")
-HF_TOKEN = os.getenv("HF_TOKEN")  # público -> puede ser None
-# ---- Local GGUF (fallback / modo offline) ----
-GGUF_CANDIDATES = []
-ENV_REPO = os.getenv("GGUF_REPO", "").strip()
-ENV_FILE = os.getenv("GGUF_FILE", "").strip()
-if ENV_REPO and ENV_FILE:
-    GGUF_CANDIDATES.append((ENV_REPO, ENV_FILE))
-# Candidato por defecto (ajústalo si usas otro)
-GGUF_CANDIDATES.append((
-    "mradermacher/DeepSeek-r1-Medical-Mini-GGUF",
-    "DeepSeek-r1-Medical-Mini.f16.gguf"
-))
 N_CTX = int(os.getenv("N_CTX", "2048"))
 N_THREADS = int(os.getenv("N_THREADS", str(os.cpu_count() or 4)))
-N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "0"))
 N_BATCH = int(os.getenv("N_BATCH", "96"))
-# ---- Cliente remoto (HF Inference) ----
-_remote_client = None
-def get_remote_client():
-    global _remote_client
-    if _remote_client is None:
-        _remote_client = InferenceClient(model=R1_MODEL_ID, token=HF_TOKEN, timeout=60)
-    return _remote_client
-# ---- Formato ChatML (compatible con DeepSeek/Qwen) ----
-def _format_chatml(messages):
-    parts = []
-    for m in messages:
-        role = m.get("role", "user")
-        content = m.get("content", "")
-        parts.append(f"<|im_start|>{role}\n{content}<|im_end|>\n")
-    parts.append("<|im_start|>assistant\n")
-    return "".join(parts)
-def r1_chat(messages, temperature=0.2, max_tokens=384):
-    """Remoto (HF) o local (llama-cpp) para DeepSeek-R1 Medical Mini."""
-    if R1_REMOTE:
-        client = get_remote_client()
-        try:
-            # Algunos endpoints soportan chat_completion
-            resp = client.chat_completion(messages=messages, temperature=temperature, max_tokens=max_tokens)
-            return resp.choices[0].message["content"]
-        except Exception:
-            # Fallback universal a text_generation con ChatML
-            try:
-                prompt = _format_chatml(messages)
-                return client.text_generation(
-                    prompt,
-                    max_new_tokens=max_tokens,
-                    temperature=temperature,
-                    stop_sequences=["<|im_end|>"],
-                    stream=False,
-                )
-            except Exception:
-                # Si remoto falla (401/429/etc), caemos a local si hay GGUF
-                pass
-    # Local GGUF
-    llm = get_llm()
-    out = llm.create_chat_completion(messages=messages, temperature=temperature, max_tokens=max_tokens)
-    return out["choices"][0]["message"]["content"]
-# ---- Loader local (GGUF) ----
 _llm = None
 def _download_gguf():
     last_err = None
-    for repo, fname in GGUF_CANDIDATES:
         try:
-            return hf_hub_download(repo_id=repo, filename=fname), repo, fname
         except Exception as e:
             last_err = e
-    raise RuntimeError(f"No se pudo descargar ningún GGUF. Último error: {last_err}")
 def get_llm():
     global _llm
     if _llm is not None:
         return _llm
-    gguf_path, _, _ = _download_gguf()
     _llm = Llama(
         model_path=gguf_path,
-        # No forzamos chat_format; usamos el del GGUF del R1
         n_ctx=N_CTX,
         n_threads=N_THREADS,
         n_gpu_layers=N_GPU_LAYERS,
@@ -106,15 +58,26 @@ def get_llm():
     )
     return _llm
-# Warmup opcional (para no esperar en el primer mensaje si usas local)
-if os.getenv("WARMUP", "0") == "1" and not R1_REMOTE:
-    try:
-        get_llm()
-    except Exception:
-        pass
 # ===============================================================
-# DeepSeek-OCR (INTACTO — con fallback si no hay FlashAttention2)
 # ===============================================================
 def _best_dtype():
     if torch.cuda.is_available():
@@ -124,24 +87,16 @@ def _best_dtype():
 def _load_ocr_model():
     model_name = "deepseek-ai/DeepSeek-OCR"
     ocr_tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-    attn_impl = os.getenv("OCR_ATTN_IMPL", "flash_attention_2")  # por defecto igual que antes
     try:
         ocr_model = AutoModel.from_pretrained(
-            model_name,
-            _attn_implementation=attn_impl,
-            trust_remote_code=True,
-            use_safetensors=True,
         ).eval()
         return ocr_tokenizer, ocr_model
     except Exception as e:
-        # Si falla por FlashAttention2, reintenta en modo "eager" (CPU/compat)
-        msg = str(e)
-        if "flash_attn" in msg or "FlashAttention2" in msg or "flash_attention_2" in msg:
             ocr_model = AutoModel.from_pretrained(
-                model_name,
-                _attn_implementation="eager",
-                trust_remote_code=True,
-                use_safetensors=True,
             ).eval()
             return ocr_tokenizer, ocr_model
         raise
@@ -150,22 +105,13 @@ tokenizer, model = _load_ocr_model()
 @spaces.GPU
 def process_image(image, model_size, task_type, is_eval_mode):
-    """
-    Devuelve: imagen anotada, markdown y texto (o markdown si no hay texto).
-    """
     if image is None:
         return None, "Please upload an image first.", "Please upload an image first."
     dtype = _best_dtype()
     model_device = model.cuda().to(dtype) if torch.cuda.is_available() else model.to(dtype)
     with tempfile.TemporaryDirectory() as output_path:
-        if task_type == "Free OCR":
-            prompt = "<image>\nFree OCR. "
-        elif task_type == "Convert to Markdown":
-            prompt = "<image>\n<|grounding|>Convert the document to markdown. "
-        else:
-            prompt = "<image>\nFree OCR. "
         temp_image_path = os.path.join(output_path, "temp_image.jpg")
         image.save(temp_image_path)
@@ -194,44 +140,36 @@ def process_image(image, model_size, task_type, is_eval_mode):
         image_result_path = os.path.join(output_path, "result_with_boxes.jpg")
         markdown_result_path = os.path.join(output_path, "result.mmd")
         if os.path.exists(markdown_result_path):
             with open(markdown_result_path, "r", encoding="utf-8") as f:
                 markdown_content = f.read()
-        else:
-            markdown_content = "Markdown result was not generated. This is expected for 'Free OCR' task."
         result_image = None
         if os.path.exists(image_result_path):
-            result_image = Image.open(image_result_path)
-            result_image.load()
         text_result = plain_text_result if plain_text_result else markdown_content
         return result_image, markdown_content, text_result
 # ===============================================================
-# Chat (inyecta OCR en el primer system) — usando R1
 # ===============================================================
-def _truncate(text, max_chars=3000):
-    return (text or "")[:max_chars]
 def _system_prompt():
-    return (
-        "Eres un asistente clínico educativo. No sustituyes el juicio médico. "
-        "Usa CONTEXTO_OCR si existe; si falta, pídelo. Evita diagnósticos definitivos."
-    )
-def _ocr_context(ocr_md, ocr_txt):
-    return _truncate(ocr_md) or _truncate(ocr_txt) or ""
 def to_chat_messages(chat_msgs, ocr_md, ocr_txt):
     sys = _system_prompt()
     ctx = _ocr_context(ocr_md, ocr_txt)
     if ctx:
-        sys += (
-            "\n\n---\n"
-            "CONTEXTO_OCR (fuente principal; si falta un dato, dilo explícitamente):\n"
-            f"{ctx}\n---"
-        )
     msgs = [{"role": "system", "content": sys}]
     for m in (chat_msgs or []):
         if m.get("role") in ("user", "assistant"):
@@ -243,33 +181,28 @@ def r1_reply(user_msg, chat_msgs, ocr_md, ocr_txt):
         user_msg = "Analiza el CONTEXTO_OCR anterior y responde a partir de ese contenido."
     try:
         msgs = to_chat_messages(chat_msgs, ocr_md, ocr_txt) + [{"role": "user", "content": user_msg}]
-        answer = r1_chat(msgs, temperature=0.2, max_tokens=512)
-        updated = (chat_msgs or []) + [
-            {"role": "user", "content": user_msg},
-            {"role": "assistant", "content": answer},
-        ]
         return updated, "", gr.update(value="")
     except Exception as e:
         err = f"{e.__class__.__name__}: {str(e) or repr(e)}"
         tb = traceback.format_exc(limit=2)
-        updated = (chat_msgs or []) + [
-            {"role": "user", "content": user_msg or ""},
-            {"role": "assistant", "content": f"⚠️ Error LLM: {err}"},
-        ]
         return updated, "", gr.update(value=f"{err}\n{tb}")
-def clear_chat():
-    return [], "", gr.update(value="")
 # ===============================================================
 # UI (Gradio 5)
 # ===============================================================
-with gr.Blocks(title="DeepSeek-OCR + DeepSeek-R1 Medical Mini", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
-        # DeepSeek-OCR → Chat Médico con **DeepSeek-R1 Medical Mini** (remoto HF o local GGUF)
         1) **Sube una imagen** y corre **OCR** (imagen anotada, Markdown y texto).
-        2) **Chatea** con **DeepSeek-R1 Medical Mini** usando automáticamente el **OCR** como contexto.
         *Uso educativo; no reemplaza consejo médico.*
         """
     )
@@ -280,18 +213,12 @@ with gr.Blocks(title="DeepSeek-OCR + DeepSeek-R1 Medical Mini", theme=gr.themes.
     with gr.Row():
         with gr.Column(scale=1):
             image_input = gr.Image(type="pil", label="Upload Image", sources=["upload", "clipboard", "webcam"])
-            model_size = gr.Dropdown(
-                choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"],
-                value="Gundam (Recommended)", label="Model Size",
-            )
-            task_type = gr.Dropdown(
-                choices=["Free OCR", "Convert to Markdown"],
-                value="Convert to Markdown", label="Task Type",
-            )
-            eval_mode_checkbox = gr.Checkbox(
-                value=False, label="Enable Evaluation Mode",
-                info="Solo texto (más rápido). Desmárcalo para ver imagen anotada y markdown.",
-            )
             submit_btn = gr.Button("Process Image", variant="primary")
         with gr.Column(scale=2):
@@ -304,10 +231,10 @@ with gr.Blocks(title="DeepSeek-OCR + DeepSeek-R1 Medical Mini", theme=gr.themes.
                 md_preview = gr.Textbox(label="Snapshot Markdown OCR", lines=10, interactive=False)
                 txt_preview = gr.Textbox(label="Snapshot Texto OCR", lines=10, interactive=False)
-    gr.Markdown("## Chat Clínico (DeepSeek-R1 Medical Mini)")
     with gr.Row():
         with gr.Column(scale=2):
-            chatbot = gr.Chatbot(label="Asistente OCR (R1 Medical Mini)", type="messages", height=420)
             user_in = gr.Textbox(label="Mensaje", placeholder="Escribe tu consulta… (vacío = analiza solo el OCR)", lines=2)
             with gr.Row():
                 send_btn = gr.Button("Enviar", variant="primary")
@@ -315,7 +242,6 @@ with gr.Blocks(title="DeepSeek-OCR + DeepSeek-R1 Medical Mini", theme=gr.themes.
         with gr.Column(scale=1):
             error_box = gr.Textbox(label="Debug (si hay error)", lines=8, interactive=False)
-    # OCR → outputs y estados
     submit_btn.click(
         fn=process_image,
         inputs=[image_input, model_size, task_type, eval_mode_checkbox],
@@ -326,12 +252,8 @@ with gr.Blocks(title="DeepSeek-OCR + DeepSeek-R1 Medical Mini", theme=gr.themes.
         outputs=[ocr_md_state, ocr_txt_state, md_preview, txt_preview],
     )
-    # Chat
-    send_btn.click(
-        fn=r1_reply,
-        inputs=[user_in, chatbot, ocr_md_state, ocr_txt_state],
-        outputs=[chatbot, user_in, error_box],
-    )
     clear_btn.click(fn=clear_chat, outputs=[chatbot, user_in, error_box])
 if __name__ == "__main__":

+# app.py — DeepSeek-OCR + DeepSeek-R1 Medical Mini (GGUF local rápido) — Gradio 5
 import os, tempfile, traceback
 import gradio as gr
 import torch
 from PIL import Image
 from transformers import AutoModel, AutoTokenizer
 import spaces
+from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
 # ===============================================================
+# CHAT: DeepSeek-R1 Medical Mini — SOLO LOCAL (GGUF) para máxima rapidez sin tokens
+# - Puedes forzar un archivo con GGUF_REPO / GGUF_FILE
+# - Si no especificas, probamos Q4 (rápido) y caemos a f16 si no está
 # ===============================================================
+GGUF_REPO = os.getenv("GGUF_REPO", "mradermacher/DeepSeek-r1-Medical-Mini-GGUF").strip()
+GGUF_FILE = os.getenv("GGUF_FILE", "").strip()
+# Orden de preferencia (más rápido -> más pesado). Cambia nombres si tu repo usa otros.
+_DEFAULT_CANDIDATES = [
+    "DeepSeek-r1-Medical-Mini.Q4_K_M.gguf",
+    "DeepSeek-r1-Medical-Mini.Q4_0.gguf",
+    "DeepSeek-r1-Medical-Mini.Q5_0.gguf",
+    "DeepSeek-r1-Medical-Mini.Q8_0.gguf",
+    "DeepSeek-r1-Medical-Mini.f16.gguf",
+]
+GGUF_CANDIDATES = [GGUF_FILE] if GGUF_FILE else _DEFAULT_CANDIDATES
 N_CTX = int(os.getenv("N_CTX", "2048"))
 N_THREADS = int(os.getenv("N_THREADS", str(os.cpu_count() or 4)))
+N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "0"))   # Zero/CPU => 0
 N_BATCH = int(os.getenv("N_BATCH", "96"))
 _llm = None
 def _download_gguf():
     last_err = None
+    for fname in GGUF_CANDIDATES:
         try:
+            path = hf_hub_download(repo_id=GGUF_REPO, filename=fname)
+            return path, fname
         except Exception as e:
             last_err = e
+    raise RuntimeError(f"No se pudo descargar GGUF desde {GGUF_REPO}. Último error: {last_err}")
 def get_llm():
     global _llm
     if _llm is not None:
         return _llm
+    gguf_path, used = _download_gguf()
+    print(f"[R1/llama.cpp] usando: {used}")
     _llm = Llama(
         model_path=gguf_path,
         n_ctx=N_CTX,
         n_threads=N_THREADS,
         n_gpu_layers=N_GPU_LAYERS,
     )
     return _llm
+def _format_chatml(messages):
+    parts = []
+    for m in messages:
+        parts.append(f"<|im_start|>{m.get('role','user')}\n{m.get('content','')}<|im_end|>\n")
+    parts.append("<|im_start|>assistant\n")
+    return "".join(parts)
+def r1_chat_local(messages, temperature=0.2, max_tokens=384):
+    # llama.cpp acepta messages directamente; si tu build no, usa prompt=_format_chatml(messages)
+    llm = get_llm()
+    out = llm.create_chat_completion(messages=messages, temperature=temperature, max_tokens=max_tokens)
+    return out["choices"][0]["message"]["content"]
+# Warmup opcional
+if os.getenv("WARMUP", "0") == "1":
+    try: get_llm()
+    except Exception: pass
 # ===============================================================
+# DeepSeek-OCR (intacto) con fallback si no hay FlashAttention2
 # ===============================================================
 def _best_dtype():
     if torch.cuda.is_available():
 def _load_ocr_model():
     model_name = "deepseek-ai/DeepSeek-OCR"
     ocr_tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+    attn_impl = os.getenv("OCR_ATTN_IMPL", "flash_attention_2")
     try:
         ocr_model = AutoModel.from_pretrained(
+            model_name, _attn_implementation=attn_impl, trust_remote_code=True, use_safetensors=True
         ).eval()
         return ocr_tokenizer, ocr_model
     except Exception as e:
+        if any(k in str(e).lower() for k in ["flash_attn", "flashattention2", "flash_attention_2"]):
             ocr_model = AutoModel.from_pretrained(
+                model_name, _attn_implementation="eager", trust_remote_code=True, use_safetensors=True
             ).eval()
             return ocr_tokenizer, ocr_model
         raise
 @spaces.GPU
 def process_image(image, model_size, task_type, is_eval_mode):
     if image is None:
         return None, "Please upload an image first.", "Please upload an image first."
     dtype = _best_dtype()
     model_device = model.cuda().to(dtype) if torch.cuda.is_available() else model.to(dtype)
     with tempfile.TemporaryDirectory() as output_path:
+        prompt = "<image>\nFree OCR. " if task_type == "Free OCR" else "<image>\n<|grounding|>Convert the document to markdown. "
         temp_image_path = os.path.join(output_path, "temp_image.jpg")
         image.save(temp_image_path)
         image_result_path = os.path.join(output_path, "result_with_boxes.jpg")
         markdown_result_path = os.path.join(output_path, "result.mmd")
+        markdown_content = "Markdown result was not generated. This is expected for 'Free OCR' task."
         if os.path.exists(markdown_result_path):
             with open(markdown_result_path, "r", encoding="utf-8") as f:
                 markdown_content = f.read()
         result_image = None
         if os.path.exists(image_result_path):
+            result_image = Image.open(image_result_path); result_image.load()
         text_result = plain_text_result if plain_text_result else markdown_content
         return result_image, markdown_content, text_result
 # ===============================================================
+# Chat (inyecta OCR) — con R1 local
 # ===============================================================
+def _truncate(text, max_chars=3000): return (text or "")[:max_chars]
 def _system_prompt():
+    return ("Eres un asistente clínico educativo. No sustituyes el juicio médico. "
+            "Usa CONTEXTO_OCR si existe; si falta, pídelo. Evita diagnósticos definitivos.")
+def _ocr_context(ocr_md, ocr_txt): return _truncate(ocr_md) or _truncate(ocr_txt) or ""
 def to_chat_messages(chat_msgs, ocr_md, ocr_txt):
     sys = _system_prompt()
     ctx = _ocr_context(ocr_md, ocr_txt)
     if ctx:
+        sys += ("\n\n---\n"
+                "CONTEXTO_OCR (fuente principal; si falta un dato, dilo explícitamente):\n"
+                f"{ctx}\n---")
     msgs = [{"role": "system", "content": sys}]
     for m in (chat_msgs or []):
         if m.get("role") in ("user", "assistant"):
         user_msg = "Analiza el CONTEXTO_OCR anterior y responde a partir de ese contenido."
     try:
         msgs = to_chat_messages(chat_msgs, ocr_md, ocr_txt) + [{"role": "user", "content": user_msg}]
+        answer = r1_chat_local(msgs, temperature=0.2, max_tokens=512)
+        updated = (chat_msgs or []) + [{"role": "user", "content": user_msg},
+                                       {"role": "assistant", "content": answer}]
         return updated, "", gr.update(value="")
     except Exception as e:
         err = f"{e.__class__.__name__}: {str(e) or repr(e)}"
         tb = traceback.format_exc(limit=2)
+        updated = (chat_msgs or []) + [{"role": "user", "content": user_msg or ""},
+                                       {"role": "assistant", "content": f"⚠️ Error LLM: {err}"}]
         return updated, "", gr.update(value=f"{err}\n{tb}")
+def clear_chat(): return [], "", gr.update(value="")
 # ===============================================================
 # UI (Gradio 5)
 # ===============================================================
+with gr.Blocks(title="DeepSeek-OCR + R1 Medical (GGUF rápido)", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
+        # DeepSeek-OCR → Chat Médico con **DeepSeek-R1 Medical Mini (GGUF local rápido)**
         1) **Sube una imagen** y corre **OCR** (imagen anotada, Markdown y texto).
+        2) **Chatea** con **R1 Medical Mini** usando automáticamente el **OCR** como contexto.
         *Uso educativo; no reemplaza consejo médico.*
         """
     )
     with gr.Row():
         with gr.Column(scale=1):
             image_input = gr.Image(type="pil", label="Upload Image", sources=["upload", "clipboard", "webcam"])
+            model_size = gr.Dropdown(choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"],
+                                     value="Gundam (Recommended)", label="Model Size")
+            task_type = gr.Dropdown(choices=["Free OCR", "Convert to Markdown"],
+                                    value="Convert to Markdown", label="Task Type")
+            eval_mode_checkbox = gr.Checkbox(value=False, label="Enable Evaluation Mode",
+                                             info="Solo texto (más rápido). Desmárcalo para ver imagen anotada y markdown.")
             submit_btn = gr.Button("Process Image", variant="primary")
         with gr.Column(scale=2):
                 md_preview = gr.Textbox(label="Snapshot Markdown OCR", lines=10, interactive=False)
                 txt_preview = gr.Textbox(label="Snapshot Texto OCR", lines=10, interactive=False)
+    gr.Markdown("## Chat Clínico (R1 Medical Mini — GGUF local)")
     with gr.Row():
         with gr.Column(scale=2):
+            chatbot = gr.Chatbot(label="Asistente OCR (R1 GGUF)", type="messages", height=420)
             user_in = gr.Textbox(label="Mensaje", placeholder="Escribe tu consulta… (vacío = analiza solo el OCR)", lines=2)
             with gr.Row():
                 send_btn = gr.Button("Enviar", variant="primary")
         with gr.Column(scale=1):
             error_box = gr.Textbox(label="Debug (si hay error)", lines=8, interactive=False)
     submit_btn.click(
         fn=process_image,
         inputs=[image_input, model_size, task_type, eval_mode_checkbox],
         outputs=[ocr_md_state, ocr_txt_state, md_preview, txt_preview],
     )
+    send_btn.click(fn=r1_reply, inputs=[user_in, chatbot, ocr_md_state, ocr_txt_state],
+                   outputs=[chatbot, user_in, error_box])
     clear_btn.click(fn=clear_chat, outputs=[chatbot, user_in, error_box])
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -1,8 +1,7 @@
-# --- Core runtime ---
 gradio==5.49.1
 spaces>=0.28.3
-# PyTorch + Transformers
 torch==2.6.0
 torchvision==0.21.0
 transformers==4.46.3
@@ -12,7 +11,7 @@ safetensors>=0.4.5
 huggingface-hub>=0.30.0
 hf-transfer>=0.1.6
-# Vision / utils
 pillow>=10.4.0
 numpy>=1.26.0
 tqdm>=4.66.4
@@ -25,9 +24,9 @@ pydantic==2.10.6
 protobuf<4
 click<8.1
-# Llama.cpp (GGUF local para el chat si R1_REMOTE=0)
 llama-cpp-python==0.2.90
-# --- Opcional (GPU para acelerar el OCR con flash_attention_2) ---
 # flash-attn==2.7.3 --no-build-isolation
 # xformers==0.0.28.post1

 gradio==5.49.1
 spaces>=0.28.3
+# PyTorch + Transformers (para DeepSeek-OCR)
 torch==2.6.0
 torchvision==0.21.0
 transformers==4.46.3
 huggingface-hub>=0.30.0
 hf-transfer>=0.1.6
+# Utils y visión
 pillow>=10.4.0
 numpy>=1.26.0
 tqdm>=4.66.4
 protobuf<4
 click<8.1
+# LLM local (GGUF)
 llama-cpp-python==0.2.90
+# (Opcional GPU para acelerar OCR; en CPU/Zero no instales)
 # flash-attn==2.7.3 --no-build-isolation
 # xformers==0.0.28.post1