Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| HuggingFace Spaces Gradio App for DeepSeek-OCR-2 | |
| Uses lazy loading to avoid startup timeout on free CPU tier. | |
| """ | |
| import os | |
| import sys | |
| import traceback | |
| import time | |
| import threading | |
| import gradio as gr | |
| import torch | |
| from PIL import Image | |
| # Configuration | |
| MODEL_NAME = os.getenv("MODEL_NAME", "deepseek-ai/DeepSeek-OCR-2") | |
| MODEL_DTYPE = os.getenv("MODEL_DTYPE", "float16") | |
| MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "2048")) | |
| HF_TOKEN = os.getenv("HF_TOKEN", None) | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| DTYPE_MAP = { | |
| "float16": torch.float16, | |
| "bfloat16": torch.bfloat16, | |
| "float32": torch.float32, | |
| } | |
| TORCH_DTYPE = DTYPE_MAP.get(MODEL_DTYPE, torch.float16) | |
| # Global state for lazy loading | |
| _model = None | |
| _processor = None | |
| _model_lock = threading.Lock() | |
| _loading = False | |
| _load_error = None | |
| print(f"π App starting (lazy model loading)") | |
| print(f"π Device: {DEVICE}") | |
| print(f"π’ Dtype: {MODEL_DTYPE}") | |
| print(f"π¦ Model: {MODEL_NAME}") | |
| def get_model(): | |
| """Lazy load the model on first request.""" | |
| global _model, _processor, _loading, _load_error | |
| if _model is not None: | |
| return _model, _processor | |
| with _model_lock: | |
| # Double-check after acquiring lock | |
| if _model is not None: | |
| return _model, _processor | |
| if _load_error: | |
| raise RuntimeError(f"Model failed to load: {_load_error}") | |
| _loading = True | |
| print(f"β³ Loading model: {MODEL_NAME}...") | |
| print(f" Memory info: {torch.cuda.memory_allocated() if torch.cuda.is_available() else 'CPU mode'}") | |
| try: | |
| from transformers import AutoModel, AutoProcessor | |
| print("π¦ Loading processor...") | |
| processor = AutoProcessor.from_pretrained( | |
| MODEL_NAME, | |
| trust_remote_code=True, | |
| token=HF_TOKEN, | |
| ) | |
| print("π§ Loading model...") | |
| model = AutoModel.from_pretrained( | |
| MODEL_NAME, | |
| torch_dtype=TORCH_DTYPE, | |
| trust_remote_code=True, | |
| low_cpu_mem_usage=True, | |
| device_map="auto" if torch.cuda.is_available() else None, | |
| token=HF_TOKEN, | |
| ) | |
| print(f"π Moving model to {DEVICE}...") | |
| if not torch.cuda.is_available(): | |
| model = model.to(DEVICE) | |
| model = model.eval() | |
| _model = model | |
| _processor = processor | |
| _loading = False | |
| print(f"β Model loaded successfully on {DEVICE}") | |
| return _model, _processor | |
| except Exception as e: | |
| error_msg = f"{type(e).__name__}: {str(e)}" | |
| _load_error = error_msg | |
| _loading = False | |
| print(f"β Failed to load model: {error_msg}", file=sys.stderr) | |
| traceback.print_exc() | |
| raise RuntimeError(error_msg) | |
| def run_ocr(image): | |
| """Process image and return OCR results as text.""" | |
| if image is None: | |
| return "Error: No image provided" | |
| try: | |
| print("π OCR request received, loading model...") | |
| model, processor = get_model() | |
| print("β Model loaded, processing image...") | |
| except Exception as e: | |
| error_msg = f"Error loading model: {str(e)}" | |
| print(f"β {error_msg}") | |
| # Check if it's a memory issue | |
| if "memory" in str(e).lower() or "cuda" in str(e).lower(): | |
| return f"{error_msg}\n\nπ‘ This appears to be a memory issue. The DeepSeek-OCR-2 model (3B parameters) may be too large for the free CPU tier.\n\nSolutions:\n- Upgrade to GPU hardware (t4-small)\n- Try with smaller images\n- Use the local Docker version instead" | |
| else: | |
| return f"{error_msg}\n\nThis may be due to:\n- Network issues downloading the model\n- Temporary HuggingFace Hub issues\n- Hardware limitations\n\nTry again in a few moments or use the local Docker version." | |
| try: | |
| print("πΌοΈ Preprocessing image...") | |
| # Preprocess | |
| if image.mode != "RGB": | |
| image = image.convert("RGB") | |
| w, h = image.size | |
| print(f"π Original size: {w}x{h}") | |
| if max(w, h) > MAX_IMAGE_SIZE: | |
| scale = MAX_IMAGE_SIZE / max(w, h) | |
| image = image.resize((int(w * scale), int(h * scale)), Image.Resampling.LANCZOS) | |
| print(f"π Resized to: {image.size}") | |
| start = time.time() | |
| print("π Running inference...") | |
| # Run inference | |
| if hasattr(model, 'chat'): | |
| response = model.chat( | |
| processor, | |
| image, | |
| "Extract all text from this image.", | |
| history=[], | |
| ) | |
| text = response if isinstance(response, str) else str(response) | |
| else: | |
| inputs = processor(images=image, return_tensors="pt") | |
| inputs = {k: v.to(DEVICE) for k, v in inputs.items()} | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| if hasattr(outputs, 'logits'): | |
| ids = outputs.logits.argmax(-1) | |
| text = processor.batch_decode(ids, skip_special_tokens=True)[0] | |
| else: | |
| text = str(outputs) | |
| elapsed = time.time() - start | |
| print(f"β Inference completed in {elapsed:.2f}s") | |
| # Build result | |
| result = f"=== OCR Result ===\n\n{text}\n\n" | |
| result += f"--- Metadata ---\n" | |
| result += f"Model: {MODEL_NAME}\n" | |
| result += f"Device: {DEVICE}\n" | |
| result += f"Time: {elapsed:.2f}s\n" | |
| return result | |
| except Exception as e: | |
| error_msg = f"Error during inference: {str(e)}" | |
| print(f"β {error_msg}") | |
| return f"{error_msg}\n\n{traceback.format_exc()}" | |
| def get_status(): | |
| """Return current status without loading model.""" | |
| lines = [ | |
| "=== DeepSeek-OCR-2 Status ===", | |
| "", | |
| f"Model: {MODEL_NAME}", | |
| f"Device: {DEVICE}", | |
| f"Dtype: {MODEL_DTYPE}", | |
| f"CUDA Available: {torch.cuda.is_available()}", | |
| "", | |
| f"Model Loaded: {'Yes' if _model is not None else 'No (loads on first request)'}", | |
| ] | |
| if _loading: | |
| lines.append("Currently loading model...") | |
| if _load_error: | |
| lines.append(f"Error: {_load_error}") | |
| lines.extend([ | |
| "", | |
| "Note: Model loads on first OCR request to avoid startup timeout.", | |
| "First request may take 1-2 minutes on CPU.", | |
| ]) | |
| return "\n".join(lines) | |
| # Simple Gradio Interface - disable API docs to avoid schema bugs | |
| demo = gr.Interface( | |
| fn=run_ocr, | |
| inputs=gr.Image(type="pil", label="Upload Image"), | |
| outputs=gr.Textbox(label="OCR Result", lines=20), | |
| title="DeepSeek-OCR-2", | |
| description=f"Upload an image to extract text. Model: {MODEL_NAME} | Device: {DEVICE}\n\nNote: First request loads the model (~1-2 min on CPU).", | |
| allow_flagging="never", | |
| api_name=False # Disable API to avoid schema generation bug | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False) | |