CassianK's picture
Update app.py
b94ccd9 verified
raw
history blame
6.78 kB
# app.py β€” DeepSeek-OCR (HF Space, Claude Skill ready)
# 지원: /ocr API (REST) + Gradio UI
# ν˜Έν™˜: DeepSeek-OCR-main / DeepSeek-OCR-master / DeepSeek-OCR-hf
import io, os, sys, base64, traceback
from typing import Optional
from PIL import Image
import numpy as np
import gradio as gr
from fastapi import FastAPI, UploadFile, File, Body
from fastapi.responses import JSONResponse
# ─────────────────────────────────────────────
# 1. 경둜 μžλ™ 인식
# ─────────────────────────────────────────────
ROOT = os.path.dirname(__file__)
CANDIDATES = [
"DeepSeek-OCR-master",
"DeepSeek-OCR-hf",
os.path.join("DeepSeek-OCR-main", "DeepSeek-OCR-master"),
os.path.join("DeepSeek-OCR-main", "DeepSeek-OCR-hf"),
]
for rel in CANDIDATES:
absdir = os.path.join(ROOT, rel)
if os.path.isdir(absdir) and absdir not in sys.path:
sys.path.append(absdir)
print(f"[path] added: {absdir}")
# ─────────────────────────────────────────────
# 2. DeepSeek-OCR μ–΄λŒ‘ν„°
# ─────────────────────────────────────────────
class DeepSeekOCRAdapter:
def __init__(self):
self.fn = None
# (1) deepseek_ocr.py
try:
import deepseek_ocr as dso
if hasattr(dso, "ocr_image"):
self.fn = lambda img, lang="auto": dso.ocr_image(img, lang=lang)
print("[Adapter] Using deepseek_ocr.ocr_image()")
return
if hasattr(dso, "DeepSeekOCR"):
model = dso.DeepSeekOCR()
self.fn = lambda img, lang="auto": model.recognize(img, lang=lang)
print("[Adapter] Using deepseek_ocr.DeepSeekOCR()")
return
except Exception as e:
print("[Adapter] deepseek_ocr import failed:", e)
# (2) run_dpsk_ocr.py (HF용)
try:
import run_dpsk_ocr as runner
if hasattr(runner, "run"):
self.fn = lambda img, lang="auto": runner.run(img)
print("[Adapter] Using run_dpsk_ocr.run()")
return
if hasattr(runner, "infer"):
self.fn = lambda img, lang="auto": runner.infer(img)
print("[Adapter] Using run_dpsk_ocr.infer()")
return
except Exception as e:
print("[Adapter] run_dpsk_ocr import failed:", e)
# (3) run_dpsk_ocr_image.py (CLI μŠ€νƒ€μΌ)
try:
import run_dpsk_ocr_image as runner
for cand in ("run", "infer", "main"):
if hasattr(runner, cand):
fn = getattr(runner, cand)
def _call(img, lang="auto", _fn=fn):
import tempfile
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
img.save(tmp.name)
return str(_fn(tmp.name))
self.fn = _call
print(f"[Adapter] Using run_dpsk_ocr_image.{cand}()")
return
except Exception as e:
print("[Adapter] run_dpsk_ocr_image import failed:", e)
# fallback
self.fn = lambda img, lang="auto": "[DEMO] μ—°κ²° 성곡 β€” μ‹€μ œ μΆ”λ‘  ν•¨μˆ˜ 확인 ν•„μš”."
print("[Adapter] ⚠️ DEMO fallback active.")
def recognize(self, image: Image.Image, lang="auto"):
return self.fn(image, lang)
# ─────────────────────────────────────────────
# 3. μœ ν‹Έ
# ─────────────────────────────────────────────
def _to_pil(x) -> Image.Image:
if isinstance(x, Image.Image):
return x.convert("RGB")
if isinstance(x, (bytes, bytearray)):
return Image.open(io.BytesIO(x)).convert("RGB")
if isinstance(x, np.ndarray):
return Image.fromarray(x).convert("RGB")
raise TypeError("Unsupported image type")
def _b64_to_image(image_b64: str) -> Image.Image:
return _to_pil(base64.b64decode(image_b64))
def _url_to_image(url: str) -> Image.Image:
import requests
r = requests.get(url, timeout=20)
r.raise_for_status()
return _to_pil(r.content)
# ─────────────────────────────────────────────
# 4. FastAPI
# ─────────────────────────────────────────────
api = FastAPI(title="DeepSeek-OCR API")
_engine = DeepSeekOCRAdapter()
@api.post("/ocr")
async def ocr_endpoint(
image_b64: Optional[str] = Body(default=None),
image_url: Optional[str] = Body(default=None),
lang: str = Body(default="auto"),
file: Optional[UploadFile] = File(default=None),
):
try:
if file:
image = _to_pil(await file.read())
elif image_b64:
image = _b64_to_image(image_b64)
elif image_url:
image = _url_to_image(image_url)
else:
return JSONResponse(status_code=400, content={"ok": False, "error": "No image input"})
text = _engine.recognize(image, lang)
return {"ok": True, "text": text}
except Exception as e:
return JSONResponse(status_code=500, content={"ok": False, "error": str(e), "trace": traceback.format_exc()})
# ─────────────────────────────────────────────
# 5. Gradio UI
# ─────────────────────────────────────────────
def gradio_predict(img, lang):
if img is None:
return "No image provided."
return _engine.recognize(_to_pil(img), lang)
with gr.Blocks(title="DeepSeek-OCR (Claude Ready)") as demo:
gr.Markdown("### DeepSeek-OCR (HF Space)\n이미지λ₯Ό μ—…λ‘œλ“œν•˜λ©΄ ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•©λ‹ˆλ‹€.")
with gr.Row():
img = gr.Image(type="pil", label="Input Image")
out = gr.Textbox(label="OCR Result", lines=8)
lang = gr.Radio(["auto","en","ko","ja","zh"], value="auto", label="Language")
btn = gr.Button("Run OCR")
btn.click(gradio_predict, inputs=[img, lang], outputs=[out])
app = api # FastAPI μ—”μ§„ λ…ΈμΆœ
demo.queue() # μ΅œμ‹  gradio 버전에 맞게 μˆ˜μ •