CassianK commited on
Commit
9889d2d
Β·
verified Β·
1 Parent(s): 31c15d6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +193 -0
app.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py β€” DeepSeek-OCR (HF Space, Claude Skill-ready)
2
+ # - /ocr : REST API (POST) β†’ file / image_b64 / image_url 지원
3
+ # - Gradio UI : λΈŒλΌμš°μ €μ—μ„œ μ—…λ‘œλ“œ ν…ŒμŠ€νŠΈ
4
+ # 폴더 ꡬ쑰 μ „μ œ:
5
+ # /app.py
6
+ # /DeepSeek-OCR-master/ (repo κ·ΈλŒ€λ‘œ)
7
+ # /requirements.txt
8
+
9
+ import io
10
+ import os
11
+ import sys
12
+ import base64
13
+ import traceback
14
+ from typing import Optional
15
+
16
+ from PIL import Image
17
+ import numpy as np
18
+
19
+ import gradio as gr
20
+ from fastapi import FastAPI, UploadFile, File, Body
21
+ from fastapi.responses import JSONResponse
22
+
23
+ # ─────────────────────────────────────────────
24
+ # 0) Repo 경둜 μΆ”κ°€
25
+ # ─────────────────────────────────────────────
26
+ ROOT = os.path.dirname(__file__)
27
+ DEEPSEEK_ROOT = os.path.join(ROOT, "DeepSeek-OCR-master")
28
+ if DEEPSEEK_ROOT not in sys.path:
29
+ sys.path.append(DEEPSEEK_ROOT)
30
+
31
+ # ─────────────────────────────────────────────
32
+ # 1) DeepSeek-OCR μ–΄λŒ‘ν„°
33
+ # - μ €μž₯μ†Œκ°€ μ œκ³΅ν•˜λŠ” μ‹€μ œ μ§„μž…μ  이름이 λ‹€λ₯Ό 수 μžˆμ–΄
34
+ # μ—¬λŸ¬ νŒ¨ν„΄μ„ μ‹œλ„ν•˜λ„λ‘ κ΅¬μ„±ν–ˆμŠ΅λ‹ˆλ‹€.
35
+ # - ν•„μš” μ‹œ μ•„λž˜ "TODO" 뢀뢄을 μ‹€μ œ ν•¨μˆ˜λͺ…μœΌλ‘œ λ°”κΎΈμ„Έμš”.
36
+ # ─────────────────────────────────────────────
37
+ class DeepSeekOCRAdapter:
38
+ def __init__(self):
39
+ """
40
+ κ°€λŠ₯ν•œ μ—”νŠΈλ¦¬ μ‹œλ‚˜λ¦¬μ˜€:
41
+ A) deeps eek_ocr.py 내뢀에 클래슀/ν•¨μˆ˜ 제곡
42
+ - class DeepSeekOCR β†’ .recognize(Image) λ°˜ν™˜
43
+ - def ocr_image(Image, lang="auto") λ°˜ν™˜
44
+ B) run_dpsk_ocr_image.py 내뢀에 ν•¨μˆ˜ 제곡
45
+ - def infer(Image) λ˜λŠ” def run(Image, ...) λ“±
46
+ """
47
+ self.backend = None
48
+ self.fn = None # callable(image, lang='auto') -> str
49
+
50
+ # A-1) class DeepSeekOCR μ‹œλ„
51
+ try:
52
+ import deeps eek_ocr as dso # DeepSeek-OCR-master/deeps eek_ocr.py
53
+ if hasattr(dso, "DeepSeekOCR"):
54
+ self.backend = dso.DeepSeekOCR()
55
+ def _call(image: Image.Image, lang="auto"):
56
+ # ν΄λž˜μŠ€κ°€ recognize(image, lang) λ³΄μœ ν•œλ‹€κ³  κ°€μ •
57
+ if hasattr(self.backend, "recognize"):
58
+ return self.backend.recognize(image, lang=lang)
59
+ # ν˜Ήμ€ run/image_to_text λ“±μ˜ 이름일 수 있음
60
+ for cand in ("run", "infer", "image_to_text", "predict"):
61
+ if hasattr(self.backend, cand):
62
+ return getattr(self.backend, cand)(image)
63
+ raise AttributeError("DeepSeekOCR class found but no callable method.")
64
+ self.fn = _call
65
+ print("[DeepSeekOCRAdapter] Using deeps eek_ocr.DeepSeekOCR")
66
+ return
67
+ except Exception as e:
68
+ print("[DeepSeekOCRAdapter] A-1 fallback:", e)
69
+
70
+ # A-2) ν•¨μˆ˜ν˜• ocr_image μ‹œλ„
71
+ try:
72
+ import deeps eek_ocr as dso
73
+ if hasattr(dso, "ocr_image"):
74
+ def _call(image: Image.Image, lang="auto"):
75
+ return dso.ocr_image(image, lang=lang) # TODO: ν•„μš” μ‹œ 인자λͺ… λ§žμΆ”κΈ°
76
+ self.fn = _call
77
+ print("[DeepSeekOCRAdapter] Using deeps eek_ocr.ocr_image")
78
+ return
79
+ except Exception as e:
80
+ print("[DeepSeekOCRAdapter] A-2 fallback:", e)
81
+
82
+ # B) run_dpsk_ocr_image.py μŠ€ν¬λ¦½νŠΈν˜• μ‹œλ„
83
+ try:
84
+ import run_dpsk_ocr_image as runner
85
+ for cand in ("infer", "run", "predict", "main"):
86
+ if hasattr(runner, cand) and callable(getattr(runner, cand)):
87
+ def _call(image: Image.Image, lang="auto", _fn=getattr(runner, cand)):
88
+ # NOTE: ν•΄λ‹Ή ν•¨μˆ˜κ°€ PIL.Imageκ°€ μ•„λ‹Œ 파일경둜λ₯Ό μš”κ΅¬ν•  수 μžˆμŠ΅λ‹ˆλ‹€.
89
+ # 그런 경우 μž„μ‹œ 파일둜 μ €μž₯ν•΄ λ„˜κΉλ‹ˆλ‹€.
90
+ try:
91
+ return _fn(image) # PIL.Image 직접 λ°›λŠ” μΌ€μ΄μŠ€
92
+ except Exception:
93
+ import tempfile
94
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=True) as tmp:
95
+ image.save(tmp.name)
96
+ # κ°€μž₯ ν”ν•œ CLI μŠ€νƒ€μΌ: (path)만 or (path, config)
97
+ try:
98
+ return _fn(tmp.name)
99
+ except Exception:
100
+ # λ°˜ν™˜μ΄ dict/text λ“± 무엇이든 str둜 μΊμŠ€νŒ…
101
+ return str(_fn(tmp.name))
102
+ self.fn = _call
103
+ print(f"[DeepSeekOCRAdapter] Using run_dpsk_ocr_image.{cand}")
104
+ return
105
+ except Exception as e:
106
+ print("[DeepSeekOCRAdapter] B fallback:", e)
107
+
108
+ # λ§ˆμ§€λ§‰ μ•ˆμ „μž₯치: 데λͺ¨
109
+ print("[DeepSeekOCRAdapter] No concrete entry found. Falling back to DEMO.")
110
+ def _demo(image: Image.Image, lang="auto"):
111
+ return "[DEMO] μ—°κ²° μ™„λ£Œ β€” μ‹€μ œ ν•¨μˆ˜λͺ…을 app.pyμ—μ„œ ν•œ μ€„λ§Œ λ°”κΏ”μ£Όμ„Έμš”."
112
+ self.fn = _demo
113
+
114
+ def recognize(self, image: Image.Image, lang: str = "auto") -> str:
115
+ return self.fn(image, lang=lang)
116
+
117
+
118
+ # ─────────────────────────────────────────────
119
+ # 2) μœ ν‹Έ
120
+ # ─────────────────────────────────────────────
121
+ def _to_pil(x) -> Image.Image:
122
+ if isinstance(x, Image.Image):
123
+ return x.convert("RGB")
124
+ if isinstance(x, (bytes, bytearray)):
125
+ return Image.open(io.BytesIO(x)).convert("RGB")
126
+ if isinstance(x, np.ndarray):
127
+ return Image.fromarray(x).convert("RGB")
128
+ raise TypeError("Unsupported image type")
129
+
130
+ def _b64_to_image(image_b64: str) -> Image.Image:
131
+ raw = base64.b64decode(image_b64)
132
+ return _to_pil(raw)
133
+
134
+ def _url_to_image(url: str) -> Image.Image:
135
+ import requests
136
+ r = requests.get(url, timeout=20)
137
+ r.raise_for_status()
138
+ return _to_pil(r.content)
139
+
140
+
141
+ # ─────────────────────────────────────────────
142
+ # 3) FastAPI (REST)
143
+ # ─────────────────────────────────────────────
144
+ api = FastAPI(title="DeepSeek-OCR API")
145
+ _engine = DeepSeekOCRAdapter()
146
+
147
+ @api.post("/ocr")
148
+ async def ocr(
149
+ image_b64: Optional[str] = Body(default=None),
150
+ image_url: Optional[str] = Body(default=None),
151
+ lang: str = Body(default="auto"),
152
+ file: Optional[UploadFile] = File(default=None),
153
+ ):
154
+ try:
155
+ if file is not None:
156
+ image = _to_pil(await file.read())
157
+ elif image_b64:
158
+ image = _b64_to_image(image_b64)
159
+ elif image_url:
160
+ image = _url_to_image(image_url)
161
+ else:
162
+ return JSONResponse(status_code=400, content={
163
+ "ok": False, "error": "Provide one of: file | image_b64 | image_url"
164
+ })
165
+ text = _engine.recognize(image, lang=lang)
166
+ return {"ok": True, "text": text}
167
+ except Exception as e:
168
+ return JSONResponse(status_code=500, content={
169
+ "ok": False, "error": str(e), "trace": traceback.format_exc()
170
+ })
171
+
172
+ # ─────────────────────────────────────────────
173
+ # 4) Gradio UI (ν…ŒμŠ€νŠΈ)
174
+ # ─────────────────────────────────────────────
175
+ def _predict(image, lang):
176
+ if image is None:
177
+ return "No image."
178
+ pil = _to_pil(image)
179
+ return _engine.recognize(pil, lang=lang)
180
+
181
+ with gr.Blocks(title="DeepSeek-OCR (Claude-ready)") as demo:
182
+ gr.Markdown("### DeepSeek-OCR (HF Space)\n이미지λ₯Ό μ—…λ‘œλ“œν•˜λ©΄ ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•©λ‹ˆλ‹€.")
183
+ with gr.Row():
184
+ img = gr.Image(type="pil", label="Input image")
185
+ out = gr.Textbox(label="OCR Result", lines=8)
186
+ lang = gr.Radio(["auto","en","ko","ja","zh"], value="auto", label="Language")
187
+ btn = gr.Button("Run OCR")
188
+ btn.click(_predict, inputs=[img, lang], outputs=[out])
189
+
190
+ # HF SpacesλŠ” 보톡 Gradio 앱을 κΈ°λ³Έ μ—”νŠΈλ¦¬λ‘œ λ„μš°μ§€λ§Œ,
191
+ # FastAPI μ—”λ“œν¬μΈνŠΈλ„ ν•¨κ»˜ λ…ΈμΆœν•˜λ €λ©΄ μ•„λž˜μ²˜λŸΌ aliasλ₯Ό λ‘‘λ‹ˆλ‹€.
192
+ app = api
193
+ demo.queue(concurrency_count=1)