Spaces:
Sleeping
Sleeping
File size: 13,632 Bytes
2c7042c 3da4f0d 2c7042c 3da4f0d 2c7042c 3da4f0d 2c7042c 3da4f0d 2c7042c 3da4f0d a7d8dfa 3da4f0d 2c7042c 3da4f0d 73f54ee 2c7042c 73f54ee 3da4f0d 2c7042c 3da4f0d 2c7042c 3da4f0d 2c7042c 3da4f0d a7d8dfa 73f54ee 3bc3aa3 3da4f0d 3bc3aa3 6001782 3da4f0d 2c7042c 3da4f0d 2c7042c 3da4f0d 2c7042c 3da4f0d 2c7042c 3da4f0d 73f54ee 2c7042c 3da4f0d 2c7042c 3da4f0d ca8cbba 2c7042c 3da4f0d 73f54ee 2c7042c 73f54ee 3da4f0d 73f54ee 2c7042c 3da4f0d 2c7042c 3da4f0d 73f54ee 2c7042c 3da4f0d 2c7042c 3da4f0d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 |
# app.py — DeepSeek-OCR + DeepSeek-R1 Medical Mini (remoto HF o local GGUF) — Gradio 5
import os, tempfile, traceback
import gradio as gr
import torch
from PIL import Image
from transformers import AutoModel, AutoTokenizer
import spaces
from huggingface_hub import hf_hub_download, InferenceClient
from llama_cpp import Llama
# ===============================================================
# Configuración LLM (CHAT) — DeepSeek-R1 Medical Mini
# - Remoto (HF Inference): R1_REMOTE=1 y (opcional) R1_MODEL_ID, HF_TOKEN
# - Local GGUF (CPU/Zero): R1_REMOTE=0 y GGUF_REPO / GGUF_FILE
# ===============================================================
R1_REMOTE = os.getenv("R1_REMOTE", "0") == "1"
R1_MODEL_ID = os.getenv("R1_MODEL_ID", "Mouhib007/DeepSeek-r1-Medical-Mini")
HF_TOKEN = os.getenv("HF_TOKEN") # público -> puede ser None
# ---- Local GGUF (fallback / modo offline) ----
GGUF_CANDIDATES = []
ENV_REPO = os.getenv("GGUF_REPO", "").strip()
ENV_FILE = os.getenv("GGUF_FILE", "").strip()
if ENV_REPO and ENV_FILE:
GGUF_CANDIDATES.append((ENV_REPO, ENV_FILE))
# Candidato por defecto (ajústalo si usas otro)
GGUF_CANDIDATES.append((
"mradermacher/DeepSeek-r1-Medical-Mini-GGUF",
"DeepSeek-r1-Medical-Mini.f16.gguf"
))
N_CTX = int(os.getenv("N_CTX", "2048"))
N_THREADS = int(os.getenv("N_THREADS", str(os.cpu_count() or 4)))
N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "0"))
N_BATCH = int(os.getenv("N_BATCH", "96"))
# ---- Cliente remoto (HF Inference) ----
_remote_client = None
def get_remote_client():
global _remote_client
if _remote_client is None:
_remote_client = InferenceClient(model=R1_MODEL_ID, token=HF_TOKEN, timeout=60)
return _remote_client
# ---- Formato ChatML (compatible con DeepSeek/Qwen) ----
def _format_chatml(messages):
parts = []
for m in messages:
role = m.get("role", "user")
content = m.get("content", "")
parts.append(f"<|im_start|>{role}\n{content}<|im_end|>\n")
parts.append("<|im_start|>assistant\n")
return "".join(parts)
def r1_chat(messages, temperature=0.2, max_tokens=384):
"""Remoto (HF) o local (llama-cpp) para DeepSeek-R1 Medical Mini."""
if R1_REMOTE:
client = get_remote_client()
try:
# Algunos endpoints soportan chat_completion
resp = client.chat_completion(messages=messages, temperature=temperature, max_tokens=max_tokens)
return resp.choices[0].message["content"]
except Exception:
# Fallback universal a text_generation con ChatML
try:
prompt = _format_chatml(messages)
return client.text_generation(
prompt,
max_new_tokens=max_tokens,
temperature=temperature,
stop_sequences=["<|im_end|>"],
stream=False,
)
except Exception:
# Si remoto falla (401/429/etc), caemos a local si hay GGUF
pass
# Local GGUF
llm = get_llm()
out = llm.create_chat_completion(messages=messages, temperature=temperature, max_tokens=max_tokens)
return out["choices"][0]["message"]["content"]
# ---- Loader local (GGUF) ----
_llm = None
def _download_gguf():
last_err = None
for repo, fname in GGUF_CANDIDATES:
try:
return hf_hub_download(repo_id=repo, filename=fname), repo, fname
except Exception as e:
last_err = e
raise RuntimeError(f"No se pudo descargar ningún GGUF. Último error: {last_err}")
def get_llm():
global _llm
if _llm is not None:
return _llm
gguf_path, _, _ = _download_gguf()
_llm = Llama(
model_path=gguf_path,
# No forzamos chat_format; usamos el del GGUF del R1
n_ctx=N_CTX,
n_threads=N_THREADS,
n_gpu_layers=N_GPU_LAYERS,
n_batch=N_BATCH,
verbose=False,
)
return _llm
# Warmup opcional (para no esperar en el primer mensaje si usas local)
if os.getenv("WARMUP", "0") == "1" and not R1_REMOTE:
try:
get_llm()
except Exception:
pass
# ===============================================================
# DeepSeek-OCR (INTACTO — con fallback si no hay FlashAttention2)
# ===============================================================
def _best_dtype():
if torch.cuda.is_available():
return torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
return torch.float32
def _load_ocr_model():
model_name = "deepseek-ai/DeepSeek-OCR"
ocr_tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
attn_impl = os.getenv("OCR_ATTN_IMPL", "flash_attention_2") # por defecto igual que antes
try:
ocr_model = AutoModel.from_pretrained(
model_name,
_attn_implementation=attn_impl,
trust_remote_code=True,
use_safetensors=True,
).eval()
return ocr_tokenizer, ocr_model
except Exception as e:
# Si falla por FlashAttention2, reintenta en modo "eager" (CPU/compat)
msg = str(e)
if "flash_attn" in msg or "FlashAttention2" in msg or "flash_attention_2" in msg:
ocr_model = AutoModel.from_pretrained(
model_name,
_attn_implementation="eager",
trust_remote_code=True,
use_safetensors=True,
).eval()
return ocr_tokenizer, ocr_model
raise
tokenizer, model = _load_ocr_model()
@spaces.GPU
def process_image(image, model_size, task_type, is_eval_mode):
"""
Devuelve: imagen anotada, markdown y texto (o markdown si no hay texto).
"""
if image is None:
return None, "Please upload an image first.", "Please upload an image first."
dtype = _best_dtype()
model_device = model.cuda().to(dtype) if torch.cuda.is_available() else model.to(dtype)
with tempfile.TemporaryDirectory() as output_path:
if task_type == "Free OCR":
prompt = "<image>\nFree OCR. "
elif task_type == "Convert to Markdown":
prompt = "<image>\n<|grounding|>Convert the document to markdown. "
else:
prompt = "<image>\nFree OCR. "
temp_image_path = os.path.join(output_path, "temp_image.jpg")
image.save(temp_image_path)
size_configs = {
"Tiny": {"base_size": 512, "image_size": 512, "crop_mode": False},
"Small": {"base_size": 640, "image_size": 640, "crop_mode": False},
"Base": {"base_size": 1024, "image_size": 1024, "crop_mode": False},
"Large": {"base_size": 1280, "image_size": 1280, "crop_mode": False},
"Gundam (Recommended)": {"base_size": 1024, "image_size": 640, "crop_mode": True},
}
config = size_configs.get(model_size, size_configs["Gundam (Recommended)"])
plain_text_result = model_device.infer(
tokenizer,
prompt=prompt,
image_file=temp_image_path,
output_path=output_path,
base_size=config["base_size"],
image_size=config["image_size"],
crop_mode=config["crop_mode"],
save_results=True,
test_compress=True,
eval_mode=is_eval_mode,
)
image_result_path = os.path.join(output_path, "result_with_boxes.jpg")
markdown_result_path = os.path.join(output_path, "result.mmd")
if os.path.exists(markdown_result_path):
with open(markdown_result_path, "r", encoding="utf-8") as f:
markdown_content = f.read()
else:
markdown_content = "Markdown result was not generated. This is expected for 'Free OCR' task."
result_image = None
if os.path.exists(image_result_path):
result_image = Image.open(image_result_path)
result_image.load()
text_result = plain_text_result if plain_text_result else markdown_content
return result_image, markdown_content, text_result
# ===============================================================
# Chat (inyecta OCR en el primer system) — usando R1
# ===============================================================
def _truncate(text, max_chars=3000):
return (text or "")[:max_chars]
def _system_prompt():
return (
"Eres un asistente clínico educativo. No sustituyes el juicio médico. "
"Usa CONTEXTO_OCR si existe; si falta, pídelo. Evita diagnósticos definitivos."
)
def _ocr_context(ocr_md, ocr_txt):
return _truncate(ocr_md) or _truncate(ocr_txt) or ""
def to_chat_messages(chat_msgs, ocr_md, ocr_txt):
sys = _system_prompt()
ctx = _ocr_context(ocr_md, ocr_txt)
if ctx:
sys += (
"\n\n---\n"
"CONTEXTO_OCR (fuente principal; si falta un dato, dilo explícitamente):\n"
f"{ctx}\n---"
)
msgs = [{"role": "system", "content": sys}]
for m in (chat_msgs or []):
if m.get("role") in ("user", "assistant"):
msgs.append({"role": m["role"], "content": m.get("content", "")})
return msgs
def r1_reply(user_msg, chat_msgs, ocr_md, ocr_txt):
if not user_msg:
user_msg = "Analiza el CONTEXTO_OCR anterior y responde a partir de ese contenido."
try:
msgs = to_chat_messages(chat_msgs, ocr_md, ocr_txt) + [{"role": "user", "content": user_msg}]
answer = r1_chat(msgs, temperature=0.2, max_tokens=512)
updated = (chat_msgs or []) + [
{"role": "user", "content": user_msg},
{"role": "assistant", "content": answer},
]
return updated, "", gr.update(value="")
except Exception as e:
err = f"{e.__class__.__name__}: {str(e) or repr(e)}"
tb = traceback.format_exc(limit=2)
updated = (chat_msgs or []) + [
{"role": "user", "content": user_msg or ""},
{"role": "assistant", "content": f"⚠️ Error LLM: {err}"},
]
return updated, "", gr.update(value=f"{err}\n{tb}")
def clear_chat():
return [], "", gr.update(value="")
# ===============================================================
# UI (Gradio 5)
# ===============================================================
with gr.Blocks(title="DeepSeek-OCR + DeepSeek-R1 Medical Mini", theme=gr.themes.Soft()) as demo:
gr.Markdown(
"""
# DeepSeek-OCR → Chat Médico con **DeepSeek-R1 Medical Mini** (remoto HF o local GGUF)
1) **Sube una imagen** y corre **OCR** (imagen anotada, Markdown y texto).
2) **Chatea** con **DeepSeek-R1 Medical Mini** usando automáticamente el **OCR** como contexto.
*Uso educativo; no reemplaza consejo médico.*
"""
)
ocr_md_state = gr.State("")
ocr_txt_state = gr.State("")
with gr.Row():
with gr.Column(scale=1):
image_input = gr.Image(type="pil", label="Upload Image", sources=["upload", "clipboard", "webcam"])
model_size = gr.Dropdown(
choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"],
value="Gundam (Recommended)", label="Model Size",
)
task_type = gr.Dropdown(
choices=["Free OCR", "Convert to Markdown"],
value="Convert to Markdown", label="Task Type",
)
eval_mode_checkbox = gr.Checkbox(
value=False, label="Enable Evaluation Mode",
info="Solo texto (más rápido). Desmárcalo para ver imagen anotada y markdown.",
)
submit_btn = gr.Button("Process Image", variant="primary")
with gr.Column(scale=2):
with gr.Tabs():
with gr.TabItem("Annotated Image"): output_image = gr.Image(interactive=False)
with gr.TabItem("Markdown Preview"): output_markdown = gr.Markdown()
with gr.TabItem("Markdown Source (or Eval Output)"):
output_text = gr.Textbox(lines=18, show_copy_button=True, interactive=False)
with gr.Row():
md_preview = gr.Textbox(label="Snapshot Markdown OCR", lines=10, interactive=False)
txt_preview = gr.Textbox(label="Snapshot Texto OCR", lines=10, interactive=False)
gr.Markdown("## Chat Clínico (DeepSeek-R1 Medical Mini)")
with gr.Row():
with gr.Column(scale=2):
chatbot = gr.Chatbot(label="Asistente OCR (R1 Medical Mini)", type="messages", height=420)
user_in = gr.Textbox(label="Mensaje", placeholder="Escribe tu consulta… (vacío = analiza solo el OCR)", lines=2)
with gr.Row():
send_btn = gr.Button("Enviar", variant="primary")
clear_btn = gr.Button("Limpiar")
with gr.Column(scale=1):
error_box = gr.Textbox(label="Debug (si hay error)", lines=8, interactive=False)
# OCR → outputs y estados
submit_btn.click(
fn=process_image,
inputs=[image_input, model_size, task_type, eval_mode_checkbox],
outputs=[output_image, output_markdown, output_text],
).then(
fn=lambda md, tx: (md, tx, md, tx),
inputs=[output_markdown, output_text],
outputs=[ocr_md_state, ocr_txt_state, md_preview, txt_preview],
)
# Chat
send_btn.click(
fn=r1_reply,
inputs=[user_in, chatbot, ocr_md_state, ocr_txt_state],
outputs=[chatbot, user_in, error_box],
)
clear_btn.click(fn=clear_chat, outputs=[chatbot, user_in, error_box])
if __name__ == "__main__":
demo.queue(max_size=20)
demo.launch()
|