Spaces:
Sleeping
Sleeping
Commit
·
42632ea
1
Parent(s):
2c7042c
SS
Browse files- README.md +18 -45
- app.py +76 -154
- requirements.txt +4 -5
README.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
---
|
| 2 |
-
title: OpScan.IA — DeepSeek-OCR +
|
| 3 |
emoji: 🩺
|
| 4 |
colorFrom: gray
|
| 5 |
colorTo: purple
|
|
@@ -9,51 +9,24 @@ app_file: app.py
|
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
-
# OpScan.IA — DeepSeek-OCR + DeepSeek-R1 Medical Mini
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
|
| 18 |
-
|
|
|
|
| 19 |
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
-
|
| 24 |
-
-
|
| 25 |
-
- **Modos del chat**:
|
| 26 |
-
- **Remoto (HF Inference)**: `R1_REMOTE=1` (sin token si el modelo es público).
|
| 27 |
-
- **Local GGUF (CPU/Zero)**: `R1_REMOTE=0` con `llama.cpp`.
|
| 28 |
-
- **Tolerante a entorno**: si el OCR falla por `FlashAttention2`, cae a `_attn_implementation="eager"` automáticamente.
|
| 29 |
-
|
| 30 |
-
---
|
| 31 |
-
|
| 32 |
-
## 📦 Requisitos
|
| 33 |
-
|
| 34 |
-
`requirements.txt`:
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
tokenizers==0.20.3
|
| 43 |
-
accelerate>=0.34.2
|
| 44 |
-
safetensors>=0.4.5
|
| 45 |
-
huggingface-hub>=0.30.0
|
| 46 |
-
hf-transfer>=0.1.6
|
| 47 |
-
pillow>=10.4.0
|
| 48 |
-
numpy>=1.26.0
|
| 49 |
-
tqdm>=4.66.4
|
| 50 |
-
requests>=2.31.0
|
| 51 |
-
einops>=0.7.0
|
| 52 |
-
addict>=2.4.0
|
| 53 |
-
easydict>=1.13
|
| 54 |
-
sentencepiece>=0.2.0
|
| 55 |
-
pydantic==2.10.6
|
| 56 |
-
protobuf<4
|
| 57 |
-
click<8.1
|
| 58 |
-
llama-cpp-python==0.2.90
|
| 59 |
-
# (Opcional GPU) flash-attn / xformers
|
|
|
|
| 1 |
---
|
| 2 |
+
title: OpScan.IA — DeepSeek-OCR + R1 Medical Mini (GGUF rápido)
|
| 3 |
emoji: 🩺
|
| 4 |
colorFrom: gray
|
| 5 |
colorTo: purple
|
|
|
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# OpScan.IA — DeepSeek-OCR + DeepSeek-R1 Medical Mini (GGUF local rápido)
|
| 13 |
|
| 14 |
+
**Objetivo:** máxima velocidad **sin tokens** en Spaces Zero/CPU.
|
| 15 |
+
El chat usa **DeepSeek-R1 Medical Mini** en **GGUF** (cuantizado Q4 si está disponible) con `llama.cpp`.
|
| 16 |
+
El OCR se hace con **DeepSeek-OCR** (con *fallback* automático a `_attn_implementation="eager"` si no hay FlashAttention2).
|
| 17 |
|
| 18 |
+
## Requisitos
|
| 19 |
+
Ver `requirements.txt`.
|
| 20 |
|
| 21 |
+
## Variables opcionales
|
| 22 |
+
- `GGUF_REPO` (default: `mradermacher/DeepSeek-r1-Medical-Mini-GGUF`)
|
| 23 |
+
- `GGUF_FILE` (si no se define, el app prueba en orden: `Q4_K_M`, `Q4_0`, `Q5_0`, `Q8_0`, `f16`)
|
| 24 |
+
- `N_CTX` (2048), `N_THREADS` (auto), `N_GPU_LAYERS` (0), `N_BATCH` (96), `WARMUP` (0/1)
|
| 25 |
+
- `OCR_ATTN_IMPL`: `flash_attention_2` o `eager`
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
+
## Ejecución local
|
| 28 |
+
```bash
|
| 29 |
+
python -m venv .venv
|
| 30 |
+
source .venv/bin/activate
|
| 31 |
+
pip install -r requirements.txt
|
| 32 |
+
python app.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
|
@@ -1,103 +1,55 @@
|
|
| 1 |
-
# app.py — DeepSeek-OCR + DeepSeek-R1 Medical Mini (
|
| 2 |
import os, tempfile, traceback
|
| 3 |
import gradio as gr
|
| 4 |
import torch
|
| 5 |
from PIL import Image
|
| 6 |
from transformers import AutoModel, AutoTokenizer
|
| 7 |
import spaces
|
| 8 |
-
from huggingface_hub import hf_hub_download
|
| 9 |
from llama_cpp import Llama
|
| 10 |
|
| 11 |
# ===============================================================
|
| 12 |
-
#
|
| 13 |
-
#
|
| 14 |
-
#
|
| 15 |
# ===============================================================
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
GGUF_CANDIDATES
|
| 28 |
-
"mradermacher/DeepSeek-r1-Medical-Mini-GGUF",
|
| 29 |
-
"DeepSeek-r1-Medical-Mini.f16.gguf"
|
| 30 |
-
))
|
| 31 |
|
| 32 |
N_CTX = int(os.getenv("N_CTX", "2048"))
|
| 33 |
N_THREADS = int(os.getenv("N_THREADS", str(os.cpu_count() or 4)))
|
| 34 |
-
N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "0"))
|
| 35 |
N_BATCH = int(os.getenv("N_BATCH", "96"))
|
| 36 |
|
| 37 |
-
# ---- Cliente remoto (HF Inference) ----
|
| 38 |
-
_remote_client = None
|
| 39 |
-
def get_remote_client():
|
| 40 |
-
global _remote_client
|
| 41 |
-
if _remote_client is None:
|
| 42 |
-
_remote_client = InferenceClient(model=R1_MODEL_ID, token=HF_TOKEN, timeout=60)
|
| 43 |
-
return _remote_client
|
| 44 |
-
|
| 45 |
-
# ---- Formato ChatML (compatible con DeepSeek/Qwen) ----
|
| 46 |
-
def _format_chatml(messages):
|
| 47 |
-
parts = []
|
| 48 |
-
for m in messages:
|
| 49 |
-
role = m.get("role", "user")
|
| 50 |
-
content = m.get("content", "")
|
| 51 |
-
parts.append(f"<|im_start|>{role}\n{content}<|im_end|>\n")
|
| 52 |
-
parts.append("<|im_start|>assistant\n")
|
| 53 |
-
return "".join(parts)
|
| 54 |
-
|
| 55 |
-
def r1_chat(messages, temperature=0.2, max_tokens=384):
|
| 56 |
-
"""Remoto (HF) o local (llama-cpp) para DeepSeek-R1 Medical Mini."""
|
| 57 |
-
if R1_REMOTE:
|
| 58 |
-
client = get_remote_client()
|
| 59 |
-
try:
|
| 60 |
-
# Algunos endpoints soportan chat_completion
|
| 61 |
-
resp = client.chat_completion(messages=messages, temperature=temperature, max_tokens=max_tokens)
|
| 62 |
-
return resp.choices[0].message["content"]
|
| 63 |
-
except Exception:
|
| 64 |
-
# Fallback universal a text_generation con ChatML
|
| 65 |
-
try:
|
| 66 |
-
prompt = _format_chatml(messages)
|
| 67 |
-
return client.text_generation(
|
| 68 |
-
prompt,
|
| 69 |
-
max_new_tokens=max_tokens,
|
| 70 |
-
temperature=temperature,
|
| 71 |
-
stop_sequences=["<|im_end|>"],
|
| 72 |
-
stream=False,
|
| 73 |
-
)
|
| 74 |
-
except Exception:
|
| 75 |
-
# Si remoto falla (401/429/etc), caemos a local si hay GGUF
|
| 76 |
-
pass
|
| 77 |
-
# Local GGUF
|
| 78 |
-
llm = get_llm()
|
| 79 |
-
out = llm.create_chat_completion(messages=messages, temperature=temperature, max_tokens=max_tokens)
|
| 80 |
-
return out["choices"][0]["message"]["content"]
|
| 81 |
-
|
| 82 |
-
# ---- Loader local (GGUF) ----
|
| 83 |
_llm = None
|
| 84 |
def _download_gguf():
|
| 85 |
last_err = None
|
| 86 |
-
for
|
| 87 |
try:
|
| 88 |
-
|
|
|
|
| 89 |
except Exception as e:
|
| 90 |
last_err = e
|
| 91 |
-
raise RuntimeError(f"No se pudo descargar
|
| 92 |
|
| 93 |
def get_llm():
|
| 94 |
global _llm
|
| 95 |
if _llm is not None:
|
| 96 |
return _llm
|
| 97 |
-
gguf_path,
|
|
|
|
| 98 |
_llm = Llama(
|
| 99 |
model_path=gguf_path,
|
| 100 |
-
# No forzamos chat_format; usamos el del GGUF del R1
|
| 101 |
n_ctx=N_CTX,
|
| 102 |
n_threads=N_THREADS,
|
| 103 |
n_gpu_layers=N_GPU_LAYERS,
|
|
@@ -106,15 +58,26 @@ def get_llm():
|
|
| 106 |
)
|
| 107 |
return _llm
|
| 108 |
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
# ===============================================================
|
| 117 |
-
# DeepSeek-OCR (
|
| 118 |
# ===============================================================
|
| 119 |
def _best_dtype():
|
| 120 |
if torch.cuda.is_available():
|
|
@@ -124,24 +87,16 @@ def _best_dtype():
|
|
| 124 |
def _load_ocr_model():
|
| 125 |
model_name = "deepseek-ai/DeepSeek-OCR"
|
| 126 |
ocr_tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
| 127 |
-
attn_impl = os.getenv("OCR_ATTN_IMPL", "flash_attention_2")
|
| 128 |
try:
|
| 129 |
ocr_model = AutoModel.from_pretrained(
|
| 130 |
-
model_name,
|
| 131 |
-
_attn_implementation=attn_impl,
|
| 132 |
-
trust_remote_code=True,
|
| 133 |
-
use_safetensors=True,
|
| 134 |
).eval()
|
| 135 |
return ocr_tokenizer, ocr_model
|
| 136 |
except Exception as e:
|
| 137 |
-
|
| 138 |
-
msg = str(e)
|
| 139 |
-
if "flash_attn" in msg or "FlashAttention2" in msg or "flash_attention_2" in msg:
|
| 140 |
ocr_model = AutoModel.from_pretrained(
|
| 141 |
-
model_name,
|
| 142 |
-
_attn_implementation="eager",
|
| 143 |
-
trust_remote_code=True,
|
| 144 |
-
use_safetensors=True,
|
| 145 |
).eval()
|
| 146 |
return ocr_tokenizer, ocr_model
|
| 147 |
raise
|
|
@@ -150,22 +105,13 @@ tokenizer, model = _load_ocr_model()
|
|
| 150 |
|
| 151 |
@spaces.GPU
|
| 152 |
def process_image(image, model_size, task_type, is_eval_mode):
|
| 153 |
-
"""
|
| 154 |
-
Devuelve: imagen anotada, markdown y texto (o markdown si no hay texto).
|
| 155 |
-
"""
|
| 156 |
if image is None:
|
| 157 |
return None, "Please upload an image first.", "Please upload an image first."
|
| 158 |
dtype = _best_dtype()
|
| 159 |
model_device = model.cuda().to(dtype) if torch.cuda.is_available() else model.to(dtype)
|
| 160 |
|
| 161 |
with tempfile.TemporaryDirectory() as output_path:
|
| 162 |
-
if task_type == "Free OCR"
|
| 163 |
-
prompt = "<image>\nFree OCR. "
|
| 164 |
-
elif task_type == "Convert to Markdown":
|
| 165 |
-
prompt = "<image>\n<|grounding|>Convert the document to markdown. "
|
| 166 |
-
else:
|
| 167 |
-
prompt = "<image>\nFree OCR. "
|
| 168 |
-
|
| 169 |
temp_image_path = os.path.join(output_path, "temp_image.jpg")
|
| 170 |
image.save(temp_image_path)
|
| 171 |
|
|
@@ -194,44 +140,36 @@ def process_image(image, model_size, task_type, is_eval_mode):
|
|
| 194 |
image_result_path = os.path.join(output_path, "result_with_boxes.jpg")
|
| 195 |
markdown_result_path = os.path.join(output_path, "result.mmd")
|
| 196 |
|
|
|
|
| 197 |
if os.path.exists(markdown_result_path):
|
| 198 |
with open(markdown_result_path, "r", encoding="utf-8") as f:
|
| 199 |
markdown_content = f.read()
|
| 200 |
-
else:
|
| 201 |
-
markdown_content = "Markdown result was not generated. This is expected for 'Free OCR' task."
|
| 202 |
|
| 203 |
result_image = None
|
| 204 |
if os.path.exists(image_result_path):
|
| 205 |
-
result_image = Image.open(image_result_path)
|
| 206 |
-
result_image.load()
|
| 207 |
|
| 208 |
text_result = plain_text_result if plain_text_result else markdown_content
|
| 209 |
return result_image, markdown_content, text_result
|
| 210 |
|
| 211 |
# ===============================================================
|
| 212 |
-
# Chat (inyecta OCR
|
| 213 |
# ===============================================================
|
| 214 |
-
def _truncate(text, max_chars=3000):
|
| 215 |
-
return (text or "")[:max_chars]
|
| 216 |
|
| 217 |
def _system_prompt():
|
| 218 |
-
return (
|
| 219 |
-
|
| 220 |
-
"Usa CONTEXTO_OCR si existe; si falta, pídelo. Evita diagnósticos definitivos."
|
| 221 |
-
)
|
| 222 |
|
| 223 |
-
def _ocr_context(ocr_md, ocr_txt):
|
| 224 |
-
return _truncate(ocr_md) or _truncate(ocr_txt) or ""
|
| 225 |
|
| 226 |
def to_chat_messages(chat_msgs, ocr_md, ocr_txt):
|
| 227 |
sys = _system_prompt()
|
| 228 |
ctx = _ocr_context(ocr_md, ocr_txt)
|
| 229 |
if ctx:
|
| 230 |
-
sys += (
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
f"{ctx}\n---"
|
| 234 |
-
)
|
| 235 |
msgs = [{"role": "system", "content": sys}]
|
| 236 |
for m in (chat_msgs or []):
|
| 237 |
if m.get("role") in ("user", "assistant"):
|
|
@@ -243,33 +181,28 @@ def r1_reply(user_msg, chat_msgs, ocr_md, ocr_txt):
|
|
| 243 |
user_msg = "Analiza el CONTEXTO_OCR anterior y responde a partir de ese contenido."
|
| 244 |
try:
|
| 245 |
msgs = to_chat_messages(chat_msgs, ocr_md, ocr_txt) + [{"role": "user", "content": user_msg}]
|
| 246 |
-
answer =
|
| 247 |
-
updated = (chat_msgs or []) + [
|
| 248 |
-
|
| 249 |
-
{"role": "assistant", "content": answer},
|
| 250 |
-
]
|
| 251 |
return updated, "", gr.update(value="")
|
| 252 |
except Exception as e:
|
| 253 |
err = f"{e.__class__.__name__}: {str(e) or repr(e)}"
|
| 254 |
tb = traceback.format_exc(limit=2)
|
| 255 |
-
updated = (chat_msgs or []) + [
|
| 256 |
-
|
| 257 |
-
{"role": "assistant", "content": f"⚠️ Error LLM: {err}"},
|
| 258 |
-
]
|
| 259 |
return updated, "", gr.update(value=f"{err}\n{tb}")
|
| 260 |
|
| 261 |
-
def clear_chat():
|
| 262 |
-
return [], "", gr.update(value="")
|
| 263 |
|
| 264 |
# ===============================================================
|
| 265 |
# UI (Gradio 5)
|
| 266 |
# ===============================================================
|
| 267 |
-
with gr.Blocks(title="DeepSeek-OCR +
|
| 268 |
gr.Markdown(
|
| 269 |
"""
|
| 270 |
-
# DeepSeek-OCR → Chat Médico con **DeepSeek-R1 Medical Mini
|
| 271 |
1) **Sube una imagen** y corre **OCR** (imagen anotada, Markdown y texto).
|
| 272 |
-
2) **Chatea** con **
|
| 273 |
*Uso educativo; no reemplaza consejo médico.*
|
| 274 |
"""
|
| 275 |
)
|
|
@@ -280,18 +213,12 @@ with gr.Blocks(title="DeepSeek-OCR + DeepSeek-R1 Medical Mini", theme=gr.themes.
|
|
| 280 |
with gr.Row():
|
| 281 |
with gr.Column(scale=1):
|
| 282 |
image_input = gr.Image(type="pil", label="Upload Image", sources=["upload", "clipboard", "webcam"])
|
| 283 |
-
model_size = gr.Dropdown(
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
value="Convert to Markdown", label="Task Type",
|
| 290 |
-
)
|
| 291 |
-
eval_mode_checkbox = gr.Checkbox(
|
| 292 |
-
value=False, label="Enable Evaluation Mode",
|
| 293 |
-
info="Solo texto (más rápido). Desmárcalo para ver imagen anotada y markdown.",
|
| 294 |
-
)
|
| 295 |
submit_btn = gr.Button("Process Image", variant="primary")
|
| 296 |
|
| 297 |
with gr.Column(scale=2):
|
|
@@ -304,10 +231,10 @@ with gr.Blocks(title="DeepSeek-OCR + DeepSeek-R1 Medical Mini", theme=gr.themes.
|
|
| 304 |
md_preview = gr.Textbox(label="Snapshot Markdown OCR", lines=10, interactive=False)
|
| 305 |
txt_preview = gr.Textbox(label="Snapshot Texto OCR", lines=10, interactive=False)
|
| 306 |
|
| 307 |
-
gr.Markdown("## Chat Clínico (
|
| 308 |
with gr.Row():
|
| 309 |
with gr.Column(scale=2):
|
| 310 |
-
chatbot = gr.Chatbot(label="Asistente OCR (R1
|
| 311 |
user_in = gr.Textbox(label="Mensaje", placeholder="Escribe tu consulta… (vacío = analiza solo el OCR)", lines=2)
|
| 312 |
with gr.Row():
|
| 313 |
send_btn = gr.Button("Enviar", variant="primary")
|
|
@@ -315,7 +242,6 @@ with gr.Blocks(title="DeepSeek-OCR + DeepSeek-R1 Medical Mini", theme=gr.themes.
|
|
| 315 |
with gr.Column(scale=1):
|
| 316 |
error_box = gr.Textbox(label="Debug (si hay error)", lines=8, interactive=False)
|
| 317 |
|
| 318 |
-
# OCR → outputs y estados
|
| 319 |
submit_btn.click(
|
| 320 |
fn=process_image,
|
| 321 |
inputs=[image_input, model_size, task_type, eval_mode_checkbox],
|
|
@@ -326,12 +252,8 @@ with gr.Blocks(title="DeepSeek-OCR + DeepSeek-R1 Medical Mini", theme=gr.themes.
|
|
| 326 |
outputs=[ocr_md_state, ocr_txt_state, md_preview, txt_preview],
|
| 327 |
)
|
| 328 |
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
fn=r1_reply,
|
| 332 |
-
inputs=[user_in, chatbot, ocr_md_state, ocr_txt_state],
|
| 333 |
-
outputs=[chatbot, user_in, error_box],
|
| 334 |
-
)
|
| 335 |
clear_btn.click(fn=clear_chat, outputs=[chatbot, user_in, error_box])
|
| 336 |
|
| 337 |
if __name__ == "__main__":
|
|
|
|
| 1 |
+
# app.py — DeepSeek-OCR + DeepSeek-R1 Medical Mini (GGUF local rápido) — Gradio 5
|
| 2 |
import os, tempfile, traceback
|
| 3 |
import gradio as gr
|
| 4 |
import torch
|
| 5 |
from PIL import Image
|
| 6 |
from transformers import AutoModel, AutoTokenizer
|
| 7 |
import spaces
|
| 8 |
+
from huggingface_hub import hf_hub_download
|
| 9 |
from llama_cpp import Llama
|
| 10 |
|
| 11 |
# ===============================================================
|
| 12 |
+
# CHAT: DeepSeek-R1 Medical Mini — SOLO LOCAL (GGUF) para máxima rapidez sin tokens
|
| 13 |
+
# - Puedes forzar un archivo con GGUF_REPO / GGUF_FILE
|
| 14 |
+
# - Si no especificas, probamos Q4 (rápido) y caemos a f16 si no está
|
| 15 |
# ===============================================================
|
| 16 |
+
GGUF_REPO = os.getenv("GGUF_REPO", "mradermacher/DeepSeek-r1-Medical-Mini-GGUF").strip()
|
| 17 |
+
GGUF_FILE = os.getenv("GGUF_FILE", "").strip()
|
| 18 |
+
|
| 19 |
+
# Orden de preferencia (más rápido -> más pesado). Cambia nombres si tu repo usa otros.
|
| 20 |
+
_DEFAULT_CANDIDATES = [
|
| 21 |
+
"DeepSeek-r1-Medical-Mini.Q4_K_M.gguf",
|
| 22 |
+
"DeepSeek-r1-Medical-Mini.Q4_0.gguf",
|
| 23 |
+
"DeepSeek-r1-Medical-Mini.Q5_0.gguf",
|
| 24 |
+
"DeepSeek-r1-Medical-Mini.Q8_0.gguf",
|
| 25 |
+
"DeepSeek-r1-Medical-Mini.f16.gguf",
|
| 26 |
+
]
|
| 27 |
+
GGUF_CANDIDATES = [GGUF_FILE] if GGUF_FILE else _DEFAULT_CANDIDATES
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
N_CTX = int(os.getenv("N_CTX", "2048"))
|
| 30 |
N_THREADS = int(os.getenv("N_THREADS", str(os.cpu_count() or 4)))
|
| 31 |
+
N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "0")) # Zero/CPU => 0
|
| 32 |
N_BATCH = int(os.getenv("N_BATCH", "96"))
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
_llm = None
|
| 35 |
def _download_gguf():
|
| 36 |
last_err = None
|
| 37 |
+
for fname in GGUF_CANDIDATES:
|
| 38 |
try:
|
| 39 |
+
path = hf_hub_download(repo_id=GGUF_REPO, filename=fname)
|
| 40 |
+
return path, fname
|
| 41 |
except Exception as e:
|
| 42 |
last_err = e
|
| 43 |
+
raise RuntimeError(f"No se pudo descargar GGUF desde {GGUF_REPO}. Último error: {last_err}")
|
| 44 |
|
| 45 |
def get_llm():
|
| 46 |
global _llm
|
| 47 |
if _llm is not None:
|
| 48 |
return _llm
|
| 49 |
+
gguf_path, used = _download_gguf()
|
| 50 |
+
print(f"[R1/llama.cpp] usando: {used}")
|
| 51 |
_llm = Llama(
|
| 52 |
model_path=gguf_path,
|
|
|
|
| 53 |
n_ctx=N_CTX,
|
| 54 |
n_threads=N_THREADS,
|
| 55 |
n_gpu_layers=N_GPU_LAYERS,
|
|
|
|
| 58 |
)
|
| 59 |
return _llm
|
| 60 |
|
| 61 |
+
def _format_chatml(messages):
|
| 62 |
+
parts = []
|
| 63 |
+
for m in messages:
|
| 64 |
+
parts.append(f"<|im_start|>{m.get('role','user')}\n{m.get('content','')}<|im_end|>\n")
|
| 65 |
+
parts.append("<|im_start|>assistant\n")
|
| 66 |
+
return "".join(parts)
|
| 67 |
+
|
| 68 |
+
def r1_chat_local(messages, temperature=0.2, max_tokens=384):
|
| 69 |
+
# llama.cpp acepta messages directamente; si tu build no, usa prompt=_format_chatml(messages)
|
| 70 |
+
llm = get_llm()
|
| 71 |
+
out = llm.create_chat_completion(messages=messages, temperature=temperature, max_tokens=max_tokens)
|
| 72 |
+
return out["choices"][0]["message"]["content"]
|
| 73 |
+
|
| 74 |
+
# Warmup opcional
|
| 75 |
+
if os.getenv("WARMUP", "0") == "1":
|
| 76 |
+
try: get_llm()
|
| 77 |
+
except Exception: pass
|
| 78 |
|
| 79 |
# ===============================================================
|
| 80 |
+
# DeepSeek-OCR (intacto) con fallback si no hay FlashAttention2
|
| 81 |
# ===============================================================
|
| 82 |
def _best_dtype():
|
| 83 |
if torch.cuda.is_available():
|
|
|
|
| 87 |
def _load_ocr_model():
|
| 88 |
model_name = "deepseek-ai/DeepSeek-OCR"
|
| 89 |
ocr_tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
| 90 |
+
attn_impl = os.getenv("OCR_ATTN_IMPL", "flash_attention_2")
|
| 91 |
try:
|
| 92 |
ocr_model = AutoModel.from_pretrained(
|
| 93 |
+
model_name, _attn_implementation=attn_impl, trust_remote_code=True, use_safetensors=True
|
|
|
|
|
|
|
|
|
|
| 94 |
).eval()
|
| 95 |
return ocr_tokenizer, ocr_model
|
| 96 |
except Exception as e:
|
| 97 |
+
if any(k in str(e).lower() for k in ["flash_attn", "flashattention2", "flash_attention_2"]):
|
|
|
|
|
|
|
| 98 |
ocr_model = AutoModel.from_pretrained(
|
| 99 |
+
model_name, _attn_implementation="eager", trust_remote_code=True, use_safetensors=True
|
|
|
|
|
|
|
|
|
|
| 100 |
).eval()
|
| 101 |
return ocr_tokenizer, ocr_model
|
| 102 |
raise
|
|
|
|
| 105 |
|
| 106 |
@spaces.GPU
|
| 107 |
def process_image(image, model_size, task_type, is_eval_mode):
|
|
|
|
|
|
|
|
|
|
| 108 |
if image is None:
|
| 109 |
return None, "Please upload an image first.", "Please upload an image first."
|
| 110 |
dtype = _best_dtype()
|
| 111 |
model_device = model.cuda().to(dtype) if torch.cuda.is_available() else model.to(dtype)
|
| 112 |
|
| 113 |
with tempfile.TemporaryDirectory() as output_path:
|
| 114 |
+
prompt = "<image>\nFree OCR. " if task_type == "Free OCR" else "<image>\n<|grounding|>Convert the document to markdown. "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
temp_image_path = os.path.join(output_path, "temp_image.jpg")
|
| 116 |
image.save(temp_image_path)
|
| 117 |
|
|
|
|
| 140 |
image_result_path = os.path.join(output_path, "result_with_boxes.jpg")
|
| 141 |
markdown_result_path = os.path.join(output_path, "result.mmd")
|
| 142 |
|
| 143 |
+
markdown_content = "Markdown result was not generated. This is expected for 'Free OCR' task."
|
| 144 |
if os.path.exists(markdown_result_path):
|
| 145 |
with open(markdown_result_path, "r", encoding="utf-8") as f:
|
| 146 |
markdown_content = f.read()
|
|
|
|
|
|
|
| 147 |
|
| 148 |
result_image = None
|
| 149 |
if os.path.exists(image_result_path):
|
| 150 |
+
result_image = Image.open(image_result_path); result_image.load()
|
|
|
|
| 151 |
|
| 152 |
text_result = plain_text_result if plain_text_result else markdown_content
|
| 153 |
return result_image, markdown_content, text_result
|
| 154 |
|
| 155 |
# ===============================================================
|
| 156 |
+
# Chat (inyecta OCR) — con R1 local
|
| 157 |
# ===============================================================
|
| 158 |
+
def _truncate(text, max_chars=3000): return (text or "")[:max_chars]
|
|
|
|
| 159 |
|
| 160 |
def _system_prompt():
|
| 161 |
+
return ("Eres un asistente clínico educativo. No sustituyes el juicio médico. "
|
| 162 |
+
"Usa CONTEXTO_OCR si existe; si falta, pídelo. Evita diagnósticos definitivos.")
|
|
|
|
|
|
|
| 163 |
|
| 164 |
+
def _ocr_context(ocr_md, ocr_txt): return _truncate(ocr_md) or _truncate(ocr_txt) or ""
|
|
|
|
| 165 |
|
| 166 |
def to_chat_messages(chat_msgs, ocr_md, ocr_txt):
|
| 167 |
sys = _system_prompt()
|
| 168 |
ctx = _ocr_context(ocr_md, ocr_txt)
|
| 169 |
if ctx:
|
| 170 |
+
sys += ("\n\n---\n"
|
| 171 |
+
"CONTEXTO_OCR (fuente principal; si falta un dato, dilo explícitamente):\n"
|
| 172 |
+
f"{ctx}\n---")
|
|
|
|
|
|
|
| 173 |
msgs = [{"role": "system", "content": sys}]
|
| 174 |
for m in (chat_msgs or []):
|
| 175 |
if m.get("role") in ("user", "assistant"):
|
|
|
|
| 181 |
user_msg = "Analiza el CONTEXTO_OCR anterior y responde a partir de ese contenido."
|
| 182 |
try:
|
| 183 |
msgs = to_chat_messages(chat_msgs, ocr_md, ocr_txt) + [{"role": "user", "content": user_msg}]
|
| 184 |
+
answer = r1_chat_local(msgs, temperature=0.2, max_tokens=512)
|
| 185 |
+
updated = (chat_msgs or []) + [{"role": "user", "content": user_msg},
|
| 186 |
+
{"role": "assistant", "content": answer}]
|
|
|
|
|
|
|
| 187 |
return updated, "", gr.update(value="")
|
| 188 |
except Exception as e:
|
| 189 |
err = f"{e.__class__.__name__}: {str(e) or repr(e)}"
|
| 190 |
tb = traceback.format_exc(limit=2)
|
| 191 |
+
updated = (chat_msgs or []) + [{"role": "user", "content": user_msg or ""},
|
| 192 |
+
{"role": "assistant", "content": f"⚠️ Error LLM: {err}"}]
|
|
|
|
|
|
|
| 193 |
return updated, "", gr.update(value=f"{err}\n{tb}")
|
| 194 |
|
| 195 |
+
def clear_chat(): return [], "", gr.update(value="")
|
|
|
|
| 196 |
|
| 197 |
# ===============================================================
|
| 198 |
# UI (Gradio 5)
|
| 199 |
# ===============================================================
|
| 200 |
+
with gr.Blocks(title="DeepSeek-OCR + R1 Medical (GGUF rápido)", theme=gr.themes.Soft()) as demo:
|
| 201 |
gr.Markdown(
|
| 202 |
"""
|
| 203 |
+
# DeepSeek-OCR → Chat Médico con **DeepSeek-R1 Medical Mini (GGUF local rápido)**
|
| 204 |
1) **Sube una imagen** y corre **OCR** (imagen anotada, Markdown y texto).
|
| 205 |
+
2) **Chatea** con **R1 Medical Mini** usando automáticamente el **OCR** como contexto.
|
| 206 |
*Uso educativo; no reemplaza consejo médico.*
|
| 207 |
"""
|
| 208 |
)
|
|
|
|
| 213 |
with gr.Row():
|
| 214 |
with gr.Column(scale=1):
|
| 215 |
image_input = gr.Image(type="pil", label="Upload Image", sources=["upload", "clipboard", "webcam"])
|
| 216 |
+
model_size = gr.Dropdown(choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"],
|
| 217 |
+
value="Gundam (Recommended)", label="Model Size")
|
| 218 |
+
task_type = gr.Dropdown(choices=["Free OCR", "Convert to Markdown"],
|
| 219 |
+
value="Convert to Markdown", label="Task Type")
|
| 220 |
+
eval_mode_checkbox = gr.Checkbox(value=False, label="Enable Evaluation Mode",
|
| 221 |
+
info="Solo texto (más rápido). Desmárcalo para ver imagen anotada y markdown.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
submit_btn = gr.Button("Process Image", variant="primary")
|
| 223 |
|
| 224 |
with gr.Column(scale=2):
|
|
|
|
| 231 |
md_preview = gr.Textbox(label="Snapshot Markdown OCR", lines=10, interactive=False)
|
| 232 |
txt_preview = gr.Textbox(label="Snapshot Texto OCR", lines=10, interactive=False)
|
| 233 |
|
| 234 |
+
gr.Markdown("## Chat Clínico (R1 Medical Mini — GGUF local)")
|
| 235 |
with gr.Row():
|
| 236 |
with gr.Column(scale=2):
|
| 237 |
+
chatbot = gr.Chatbot(label="Asistente OCR (R1 GGUF)", type="messages", height=420)
|
| 238 |
user_in = gr.Textbox(label="Mensaje", placeholder="Escribe tu consulta… (vacío = analiza solo el OCR)", lines=2)
|
| 239 |
with gr.Row():
|
| 240 |
send_btn = gr.Button("Enviar", variant="primary")
|
|
|
|
| 242 |
with gr.Column(scale=1):
|
| 243 |
error_box = gr.Textbox(label="Debug (si hay error)", lines=8, interactive=False)
|
| 244 |
|
|
|
|
| 245 |
submit_btn.click(
|
| 246 |
fn=process_image,
|
| 247 |
inputs=[image_input, model_size, task_type, eval_mode_checkbox],
|
|
|
|
| 252 |
outputs=[ocr_md_state, ocr_txt_state, md_preview, txt_preview],
|
| 253 |
)
|
| 254 |
|
| 255 |
+
send_btn.click(fn=r1_reply, inputs=[user_in, chatbot, ocr_md_state, ocr_txt_state],
|
| 256 |
+
outputs=[chatbot, user_in, error_box])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
clear_btn.click(fn=clear_chat, outputs=[chatbot, user_in, error_box])
|
| 258 |
|
| 259 |
if __name__ == "__main__":
|
requirements.txt
CHANGED
|
@@ -1,8 +1,7 @@
|
|
| 1 |
-
# --- Core runtime ---
|
| 2 |
gradio==5.49.1
|
| 3 |
spaces>=0.28.3
|
| 4 |
|
| 5 |
-
# PyTorch + Transformers
|
| 6 |
torch==2.6.0
|
| 7 |
torchvision==0.21.0
|
| 8 |
transformers==4.46.3
|
|
@@ -12,7 +11,7 @@ safetensors>=0.4.5
|
|
| 12 |
huggingface-hub>=0.30.0
|
| 13 |
hf-transfer>=0.1.6
|
| 14 |
|
| 15 |
-
#
|
| 16 |
pillow>=10.4.0
|
| 17 |
numpy>=1.26.0
|
| 18 |
tqdm>=4.66.4
|
|
@@ -25,9 +24,9 @@ pydantic==2.10.6
|
|
| 25 |
protobuf<4
|
| 26 |
click<8.1
|
| 27 |
|
| 28 |
-
#
|
| 29 |
llama-cpp-python==0.2.90
|
| 30 |
|
| 31 |
-
#
|
| 32 |
# flash-attn==2.7.3 --no-build-isolation
|
| 33 |
# xformers==0.0.28.post1
|
|
|
|
|
|
|
| 1 |
gradio==5.49.1
|
| 2 |
spaces>=0.28.3
|
| 3 |
|
| 4 |
+
# PyTorch + Transformers (para DeepSeek-OCR)
|
| 5 |
torch==2.6.0
|
| 6 |
torchvision==0.21.0
|
| 7 |
transformers==4.46.3
|
|
|
|
| 11 |
huggingface-hub>=0.30.0
|
| 12 |
hf-transfer>=0.1.6
|
| 13 |
|
| 14 |
+
# Utils y visión
|
| 15 |
pillow>=10.4.0
|
| 16 |
numpy>=1.26.0
|
| 17 |
tqdm>=4.66.4
|
|
|
|
| 24 |
protobuf<4
|
| 25 |
click<8.1
|
| 26 |
|
| 27 |
+
# LLM local (GGUF)
|
| 28 |
llama-cpp-python==0.2.90
|
| 29 |
|
| 30 |
+
# (Opcional GPU para acelerar OCR; en CPU/Zero no instales)
|
| 31 |
# flash-attn==2.7.3 --no-build-isolation
|
| 32 |
# xformers==0.0.28.post1
|