| """ |
| DeepSeek-OCR-2 API โ HuggingFace Spaces (CPU) |
| ============================================== |
| POST /ocr โ ุตูุฑุฉ + bbox ูุงุญุฏ ุงุฎุชูุงุฑู |
| POST /ocr/batch โ ุตูุฑุฉ + ูุงุฆู
ุฉ boxes ุฏูุนุฉ ูุงุญุฏุฉ โ ุงูุฌุฏูุฏ |
| POST /ocr/base64 โ JSON base64 |
| GET /health โ ูุญุต ุงูุญุงูุฉ |
| GET /demo โ ูุงุฌูุฉ ููุจ ู
ุฏู
ุฌุฉ |
| """ |
|
|
| import os, io, base64, json, tempfile, logging, time |
| from contextlib import asynccontextmanager |
| from typing import Optional, List |
|
|
| import torch |
| from PIL import Image |
| from fastapi import FastAPI, File, UploadFile, Form, HTTPException |
| from fastapi.middleware.cors import CORSMiddleware |
| from fastapi.responses import JSONResponse, HTMLResponse |
| from transformers import AutoModel, AutoTokenizer |
| from pydantic import BaseModel |
|
|
| logging.basicConfig(level=logging.INFO) |
| log = logging.getLogger("ocr-api") |
|
|
| MODEL_NAME = "deepseek-ai/DeepSeek-OCR-2" |
| model = None |
| tokenizer = None |
|
|
| |
| @asynccontextmanager |
| async def lifespan(app: FastAPI): |
| global model, tokenizer |
| log.info("Loading %s ...", MODEL_NAME) |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) |
| model = AutoModel.from_pretrained( |
| MODEL_NAME, |
| _attn_implementation="eager", |
| trust_remote_code=True, |
| torch_dtype=torch.bfloat16, |
| ) |
| model.eval() |
| log.info("Model ready (cpu, bfloat16)") |
| yield |
| del model, tokenizer |
|
|
| |
| app = FastAPI(title="DeepSeek-OCR-2 API", version="2.0.0", lifespan=lifespan) |
| app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"]) |
|
|
| |
| from contextlib import contextmanager |
|
|
| @contextmanager |
| def force_cpu(): |
| """ |
| DeepSeek-OCR-2's model.infer() has two CPU-breaking issues: |
| 1. Hardcodes .cuda() calls โ patched: .cuda() becomes a no-op |
| 2. Casts tensors to bfloat16 while model weights are float32 |
| โ patched: bfloat16 requests are silently changed to float32 |
| 3. Uses torch.autocast("cuda") which can still cast internally |
| โ patched: autocast is replaced with a no-op context manager |
| All patches are reverted after the 'with' block. |
| """ |
| import contextlib |
|
|
| _tensor_cuda = torch.Tensor.cuda |
| _module_cuda = torch.nn.Module.cuda |
| _tensor_to = torch.Tensor.to |
| _module_to = torch.nn.Module.to |
| _tensor_bf16 = torch.Tensor.bfloat16 |
| _autocast = torch.autocast |
|
|
| |
| def _noop_tensor_cuda(self, device=None, *args, **kwargs): |
| return self |
|
|
| def _noop_module_cuda(self, device=None): |
| return self |
|
|
| |
| |
| def _safe_tensor_to(self, *args, **kwargs): |
| new_args = [a for a in args |
| if not (isinstance(a, (str, torch.device)) and "cuda" in str(a))] |
| kwargs.pop("device", None) |
| if not new_args and not kwargs: |
| return self |
| try: |
| return _tensor_to(self, *new_args, **kwargs) |
| except Exception: |
| return self |
|
|
| def _safe_module_to(self, *args, **kwargs): |
| new_args = [a for a in args |
| if not (isinstance(a, (str, torch.device)) and "cuda" in str(a))] |
| kwargs.pop("device", None) |
| if not new_args and not kwargs: |
| return self |
| try: |
| return _module_to(self, *new_args, **kwargs) |
| except Exception: |
| return self |
|
|
| |
| def _noop_tensor_bf16(self): |
| return self |
|
|
| |
| def _noop_autocast(*args, **kwargs): |
| return contextlib.nullcontext() |
|
|
| torch.Tensor.cuda = _noop_tensor_cuda |
| torch.nn.Module.cuda = _noop_module_cuda |
| torch.Tensor.to = _safe_tensor_to |
| torch.nn.Module.to = _safe_module_to |
| torch.Tensor.bfloat16 = _noop_tensor_bf16 |
| torch.autocast = _noop_autocast |
|
|
| try: |
| yield |
| finally: |
| torch.Tensor.cuda = _tensor_cuda |
| torch.nn.Module.cuda = _module_cuda |
| torch.Tensor.to = _tensor_to |
| torch.nn.Module.to = _module_to |
| torch.Tensor.bfloat16 = _tensor_bf16 |
| torch.autocast = _autocast |
|
|
|
|
| |
| def run_ocr(pil_image: Image.Image, mode: str = "free") -> str: |
| """ |
| Run DeepSeek-OCR-2 on a PIL image and return extracted text. |
| Works on both CPU (HF free tier) and GPU. |
| """ |
| prompt_text = ( |
| "Convert the document to markdown." |
| if mode == "markdown" |
| else "Please OCR the image and return all text exactly." |
| ) |
|
|
| with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: |
| tmp_path = tmp.name |
| pil_image.save(tmp_path, format="PNG") |
|
|
| try: |
| if hasattr(model, "infer"): |
| |
| |
| |
| import io, sys |
| from contextlib import redirect_stdout |
|
|
| with tempfile.TemporaryDirectory() as out_dir: |
| stdout_buf = io.StringIO() |
| with force_cpu(), redirect_stdout(stdout_buf): |
| result = model.infer( |
| tokenizer, |
| prompt=f"<image>\n{prompt_text}", |
| image_file=tmp_path, |
| output_path=out_dir, |
| base_size=1024, |
| image_size=768, |
| crop_mode=True, |
| save_results=True, |
| ) |
|
|
| |
| captured = stdout_buf.getvalue() |
| sys.stdout.write(captured) |
| sys.stdout.flush() |
|
|
| |
| text = "" |
|
|
| |
| if result: |
| if isinstance(result, dict): |
| text = result.get("text", result.get("output", "")) |
| elif isinstance(result, str): |
| text = result |
|
|
| |
| if not text and captured: |
| |
| |
| cleaned = captured.strip() |
| for sep in ["=====================", "=====", "-----"]: |
| if sep in cleaned: |
| cleaned = cleaned.split(sep, 1)[-1].strip() |
| break |
| text = cleaned |
|
|
| |
| if not text: |
| import glob |
| for ext in ["*.txt", "*.md", "*.json"]: |
| files = glob.glob(os.path.join(out_dir, "**", ext), recursive=True) |
| for fpath in files: |
| try: |
| with open(fpath, "r", encoding="utf-8") as f: |
| file_text = f.read().strip() |
| if file_text: |
| text = file_text |
| break |
| except Exception: |
| pass |
| if text: |
| break |
|
|
| return text |
|
|
| |
| messages = [{"role": "user", "content": [ |
| {"type": "image", "image": tmp_path}, |
| {"type": "text", "text": prompt_text}, |
| ]}] |
| text_in = tokenizer.apply_chat_template( |
| messages, tokenize=False, add_generation_prompt=True |
| ) |
| inputs = tokenizer(text_in, return_tensors="pt") |
| with torch.no_grad(): |
| out = model.generate(**inputs, max_new_tokens=1024, do_sample=False) |
| new_ids = out[:, inputs["input_ids"].shape[1]:] |
| return tokenizer.decode(new_ids[0], skip_special_tokens=True) |
|
|
| finally: |
| os.unlink(tmp_path) |
|
|
|
|
| def crop_img(img: Image.Image, x: int, y: int, w: int, h: int) -> Image.Image: |
| iw, ih = img.size |
| x1, y1 = max(0, x), max(0, y) |
| x2, y2 = min(iw, x + w), min(ih, y + h) |
| if x2 <= x1 or y2 <= y1: |
| raise ValueError(f"Invalid bbox x={x} y={y} w={w} h={h} for {iw}ร{ih} image") |
| return img.crop((x1, y1, x2, y2)) |
|
|
|
|
| |
|
|
| @app.get("/") |
| async def root(): |
| return {"status": "ok", "model": MODEL_NAME, "device": "cpu", |
| "demo": "/demo", "docs": "/docs"} |
|
|
| @app.get("/health") |
| async def health(): |
| return {"status": "ok", "model_loaded": model is not None} |
|
|
|
|
| |
| @app.post("/ocr") |
| async def ocr_single( |
| image: UploadFile = File(...), |
| x: Optional[int] = Form(None), |
| y: Optional[int] = Form(None), |
| w: Optional[int] = Form(None), |
| h: Optional[int] = Form(None), |
| box_id: Optional[int] = Form(None, description="ุฑูู
ุงูู
ุฑุจุน ููุชุนุฑู ุนููู ูู ุงููุชูุฌุฉ"), |
| mode: str = Form("free"), |
| ): |
| if model is None: |
| raise HTTPException(503, "Model not loaded yet โ wait a moment and retry") |
|
|
| data = await image.read() |
| try: |
| pil = Image.open(io.BytesIO(data)).convert("RGB") |
| except Exception as e: |
| raise HTTPException(400, f"Cannot decode image: {e}") |
|
|
| img_w, img_h = pil.size |
| cropped = False |
| if all(v is not None for v in [x, y, w, h]): |
| try: |
| pil = crop_img(pil, x, y, w, h) |
| cropped = True |
| except ValueError as e: |
| raise HTTPException(400, str(e)) |
|
|
| t0 = time.time() |
| try: |
| text = run_ocr(pil, mode=mode) |
| except Exception as e: |
| log.exception("OCR error") |
| raise HTTPException(500, f"OCR failed: {e}") |
|
|
| return JSONResponse({ |
| "box_id": box_id, |
| "text": text, |
| "mode": mode, |
| "cropped": cropped, |
| "bbox": {"x": x, "y": y, "w": w, "h": h} if cropped else None, |
| "image_size": {"w": img_w, "h": img_h}, |
| "elapsed_sec": round(time.time() - t0, 2), |
| }) |
|
|
|
|
| |
| @app.post("/ocr/batch") |
| async def ocr_batch( |
| image: UploadFile = File(...), |
| boxes: str = Form(..., description=""" |
| JSON array of box objects, e.g.: |
| [{"id":1,"x":10,"y":20,"w":100,"h":50}, |
| {"id":2,"x":200,"y":30,"w":150,"h":60}] |
| id, x, y, w, h are all required per box. |
| """), |
| mode: str = Form("free"), |
| ): |
| """ |
| ุงุณุชูุจุงู ุตูุฑุฉ + ูุงุฆู
ุฉ ู
ุฑุจุนุงุช JSON โ OCR ููู ู
ุฑุจุน โ ูุชุงุฆุฌ ู
ุฑุชุจุฉ ุจููุณ ุงูุชุฑุชูุจ. |
| ุทูุจ ูุงุญุฏ ุจุฏูุงู ู
ู N ุทูุจ ู
ููุตู. |
| """ |
| if model is None: |
| raise HTTPException(503, "Model not loaded yet") |
|
|
| |
| data = await image.read() |
| try: |
| pil_full = Image.open(io.BytesIO(data)).convert("RGB") |
| except Exception as e: |
| raise HTTPException(400, f"Cannot decode image: {e}") |
|
|
| img_w, img_h = pil_full.size |
|
|
| |
| try: |
| box_list = json.loads(boxes) |
| if not isinstance(box_list, list): |
| raise ValueError("boxes must be a JSON array") |
| for b in box_list: |
| for k in ("id", "x", "y", "w", "h"): |
| if k not in b: |
| raise ValueError(f"Each box must have '{k}' field") |
| except (json.JSONDecodeError, ValueError) as e: |
| raise HTTPException(400, f"Invalid boxes JSON: {e}") |
|
|
| |
| t_total = time.time() |
| results = [] |
|
|
| for b in box_list: |
| bid = b["id"] |
| t0 = time.time() |
| try: |
| cropped_pil = crop_img(pil_full, b["x"], b["y"], b["w"], b["h"]) |
| text = run_ocr(cropped_pil, mode=mode) |
| status = "ok" |
| error = None |
| except ValueError as e: |
| text = "" |
| status = "invalid_bbox" |
| error = str(e) |
| except Exception as e: |
| log.exception("OCR error box_id=%s", bid) |
| text = "" |
| status = "error" |
| error = str(e) |
|
|
| results.append({ |
| "box_id": bid, |
| "x": b["x"], |
| "y": b["y"], |
| "w": b["w"], |
| "h": b["h"], |
| "text": text, |
| "status": status, |
| "error": error, |
| "elapsed_sec": round(time.time() - t0, 2), |
| }) |
| log.info("box %s done in %.1fs โ status=%s", bid, results[-1]["elapsed_sec"], status) |
|
|
| return JSONResponse({ |
| "mode": mode, |
| "image_size": {"w": img_w, "h": img_h}, |
| "total_boxes": len(results), |
| "total_elapsed_sec": round(time.time() - t_total, 2), |
| "results": results, |
| }) |
|
|
|
|
| |
| class BoxItem(BaseModel): |
| id: int |
| x: int |
| y: int |
| w: int |
| h: int |
|
|
| class OCRBatchB64Request(BaseModel): |
| image_b64: str |
| boxes: List[BoxItem] |
| mode: str = "free" |
|
|
| class OCRSingleB64Request(BaseModel): |
| image_b64: str |
| box_id: Optional[int] = None |
| x: Optional[int] = None |
| y: Optional[int] = None |
| w: Optional[int] = None |
| h: Optional[int] = None |
| mode: str = "free" |
|
|
| @app.post("/ocr/base64") |
| async def ocr_base64(req: OCRSingleB64Request): |
| if model is None: |
| raise HTTPException(503, "Model not loaded yet") |
| try: |
| pil = Image.open(io.BytesIO(base64.b64decode(req.image_b64))).convert("RGB") |
| except Exception as e: |
| raise HTTPException(400, f"Bad base64: {e}") |
|
|
| img_w, img_h = pil.size |
| cropped = False |
| if all(v is not None for v in [req.x, req.y, req.w, req.h]): |
| try: |
| pil = crop_img(pil, req.x, req.y, req.w, req.h) |
| cropped = True |
| except ValueError as e: |
| raise HTTPException(400, str(e)) |
|
|
| t0 = time.time() |
| try: |
| text = run_ocr(pil, mode=req.mode) |
| except Exception as e: |
| raise HTTPException(500, f"OCR failed: {e}") |
|
|
| return JSONResponse({ |
| "box_id": req.box_id, |
| "text": text, |
| "mode": req.mode, |
| "cropped": cropped, |
| "bbox": {"x": req.x, "y": req.y, "w": req.w, "h": req.h} if cropped else None, |
| "image_size": {"w": img_w, "h": img_h}, |
| "elapsed_sec": round(time.time() - t0, 2), |
| }) |
|
|
| @app.post("/ocr/batch/base64") |
| async def ocr_batch_base64(req: OCRBatchB64Request): |
| if model is None: |
| raise HTTPException(503, "Model not loaded yet") |
| try: |
| pil_full = Image.open(io.BytesIO(base64.b64decode(req.image_b64))).convert("RGB") |
| except Exception as e: |
| raise HTTPException(400, f"Bad base64: {e}") |
|
|
| img_w, img_h = pil_full.size |
| t_total = time.time() |
| results = [] |
| for b in req.boxes: |
| t0 = time.time() |
| try: |
| cropped_pil = crop_img(pil_full, b.x, b.y, b.w, b.h) |
| text = run_ocr(cropped_pil, mode=req.mode) |
| status, err = "ok", None |
| except ValueError as e: |
| text, status, err = "", "invalid_bbox", str(e) |
| except Exception as e: |
| text, status, err = "", "error", str(e) |
|
|
| results.append({ |
| "box_id": b.id, "x": b.x, "y": b.y, "w": b.w, "h": b.h, |
| "text": text, "status": status, "error": err, |
| "elapsed_sec": round(time.time() - t0, 2), |
| }) |
|
|
| return JSONResponse({ |
| "mode": req.mode, |
| "image_size": {"w": img_w, "h": img_h}, |
| "total_boxes": len(results), |
| "total_elapsed_sec": round(time.time() - t_total, 2), |
| "results": results, |
| }) |
|
|
|
|
| |
| @app.get("/demo", response_class=HTMLResponse) |
| async def demo(): |
| return HTMLResponse(content=DEMO_HTML) |
|
|
|
|
| DEMO_HTML = r"""<!DOCTYPE html> |
| <html lang="ar" dir="rtl"> |
| <head> |
| <meta charset="UTF-8"/><meta name="viewport" content="width=device-width,initial-scale=1"/> |
| <title>OCR Batch โ ุงุณุชุฎุฑุงุฌ ุงููุต</title> |
| <style> |
| *,*::before,*::after{box-sizing:border-box;margin:0;padding:0} |
| :root{--bg:#0f172a;--sur:#1e293b;--card:#1a2744;--bdr:#334155; |
| --acc:#3b82f6;--dan:#ef4444;--ok:#22c55e;--warn:#f59e0b; |
| --txt:#e2e8f0;--mut:#94a3b8;--r:10px} |
| body{background:var(--bg);color:var(--txt);font-family:'Segoe UI',system-ui,sans-serif; |
| min-height:100vh;display:flex;flex-direction:column;align-items:center; |
| padding:20px 16px;gap:14px} |
| h1{font-size:1.35rem;font-weight:700}h1 span{color:var(--acc)} |
| |
| /* config */ |
| #cfg{width:100%;max-width:900px;display:flex;gap:10px;flex-wrap:wrap;align-items:center} |
| #cfg input{flex:1;min-width:200px;background:var(--sur);border:1px solid var(--bdr); |
| border-radius:var(--r);padding:8px 12px;color:var(--txt);font-size:.88rem} |
| #cfg select{background:var(--sur);border:1px solid var(--bdr);border-radius:var(--r); |
| padding:8px 10px;color:var(--txt);font-size:.88rem} |
| |
| /* buttons */ |
| .btn{padding:8px 16px;border-radius:var(--r);border:none;font-size:.87rem; |
| font-weight:600;cursor:pointer;transition:opacity .15s;white-space:nowrap} |
| .btn:hover{opacity:.8}.btn:disabled{opacity:.4;cursor:not-allowed} |
| .bp{background:var(--acc);color:#fff} |
| .bd{background:var(--dan);color:#fff} |
| .bg{background:var(--sur);color:var(--txt);border:1px solid var(--bdr)} |
| .bw{background:var(--warn);color:#000} |
| |
| /* status */ |
| #st{font-size:.83rem;color:var(--mut)} |
| #st.ok{color:var(--ok)}#st.err{color:var(--dan)}#st.ld{color:var(--acc)}#st.warn{color:var(--warn)} |
| |
| /* upload */ |
| #upz{width:100%;max-width:900px;border:2px dashed var(--bdr);border-radius:var(--r); |
| padding:28px;text-align:center;color:var(--mut);cursor:pointer;transition:.2s} |
| #upz:hover,#upz.drag{border-color:var(--acc);background:#3b82f611} |
| |
| /* workspace */ |
| #ws{display:none;width:100%;max-width:900px;flex-direction:column;gap:12px} |
| #ws.v{display:flex} |
| |
| /* toolbar */ |
| #tb{display:flex;gap:8px;flex-wrap:wrap;align-items:center} |
| #box-count{font-size:.82rem;color:var(--mut);background:var(--sur); |
| border:1px solid var(--bdr);border-radius:20px;padding:4px 12px} |
| |
| /* mode toggle */ |
| #send-mode{display:flex;gap:6px;align-items:center;font-size:.82rem;color:var(--mut)} |
| #send-mode label{display:flex;align-items:center;gap:4px;cursor:pointer} |
| |
| /* canvas */ |
| #cw{position:relative;width:100%;background:var(--sur);border:1px solid var(--bdr); |
| border-radius:var(--r);overflow:hidden;display:flex;justify-content:center} |
| canvas{display:block;max-width:100%;cursor:crosshair} |
| |
| /* box labels overlay (absolute over canvas) */ |
| #labels{position:absolute;top:0;left:50%;transform:translateX(-50%); |
| pointer-events:none;width:100%;height:100%} |
| |
| /* results table */ |
| #res-wrap{width:100%} |
| #res-head{display:flex;justify-content:space-between;align-items:center; |
| margin-bottom:8px;font-size:.85rem;font-weight:600} |
| .rc{background:var(--card);border:1px solid var(--bdr);border-radius:var(--r); |
| padding:12px 14px;display:grid;gap:8px;margin-bottom:8px} |
| .rh{display:flex;gap:10px;align-items:center;flex-wrap:wrap} |
| .bid{background:var(--acc);color:#fff;border-radius:20px;padding:2px 10px; |
| font-size:.78rem;font-weight:700;white-space:nowrap} |
| .coords{font-size:.78rem;color:var(--mut);font-family:monospace} |
| .elapsed{font-size:.76rem;color:var(--mut);margin-right:auto} |
| .rt{background:var(--sur);border:1px solid var(--bdr);border-radius:8px; |
| padding:10px 12px;font-size:.9rem;line-height:1.6;white-space:pre-wrap; |
| direction:auto;min-height:40px;outline:none} |
| .rt:focus{border-color:var(--acc)} |
| .rt.err-text{color:var(--dan)} |
| .ra{display:flex;gap:8px} |
| |
| /* progress bar */ |
| #prog-wrap{display:none;width:100%;background:var(--bdr);border-radius:20px;height:6px;overflow:hidden} |
| #prog{height:100%;background:var(--acc);transition:width .4s;border-radius:20px} |
| |
| /* skeleton */ |
| @keyframes sh{0%{background-position:-300px 0}100%{background-position:300px 0}} |
| .sk{background:linear-gradient(90deg,var(--card) 25%,var(--bdr) 50%,var(--card) 75%); |
| background-size:600px 100%;animation:sh 1.3s infinite;border-radius:6px; |
| height:16px;width:100%;margin:3px 0} |
| |
| /* summary badge */ |
| .sum{display:flex;gap:6px;flex-wrap:wrap;align-items:center} |
| .badge{border-radius:6px;padding:3px 10px;font-size:.78rem;font-weight:600} |
| .b-ok{background:#16a34a22;color:var(--ok);border:1px solid #16a34a44} |
| .b-err{background:#dc262622;color:var(--dan);border:1px solid #dc262644} |
| .b-t{background:#1e40af22;color:var(--acc);border:1px solid #1e40af44} |
| </style> |
| </head> |
| <body> |
| |
| <h1>๐ OCR Batch โ <span>ุชุญุฏูุฏ ู
ุฑุจุนุงุช ู
ุชุนุฏุฏุฉ</span></h1> |
| |
| <div id="cfg"> |
| <input id="api" type="text" placeholder="ุฑุงุจุท ุงูู API โ ู
ุซุงู: https://zienabm-ocr.hf.space" |
| value="https://zienabm-ocr.hf.space"/> |
| <select id="mode"> |
| <option value="free">Free OCR</option> |
| <option value="markdown">Markdown</option> |
| </select> |
| <button class="btn bg" onclick="doHealth()">ูุญุต ุงูุงุชุตุงู</button> |
| </div> |
| |
| <div id="st">ุฃุฏุฎู ุฑุงุจุท ุงูู API ุซู
ุงุฑูุน ุตูุฑุฉ</div> |
| |
| <div id="upz" onclick="document.getElementById('fi').click()" |
| ondragover="event.preventDefault();this.classList.add('drag')" |
| ondragleave="this.classList.remove('drag')" |
| ondrop="onDrop(event)"> |
| <input type="file" id="fi" accept="image/*" style="display:none" onchange="loadFile(this.files[0])"> |
| <svg width="32" height="32" fill="none" stroke="currentColor" stroke-width="1.5" viewBox="0 0 24 24" style="margin-bottom:6px"> |
| <path d="M4 16v2a2 2 0 002 2h12a2 2 0 002-2v-2M12 12V4m0 0L8 8m4-4l4 4"/> |
| </svg> |
| <p>ุงุณุญุจ ุงูุตูุฑุฉ ููุง ุฃู <strong>ุงุถุบุท ููุงุฎุชูุงุฑ</strong></p> |
| <p style="font-size:.78rem;margin-top:4px">JPG ยท PNG ยท WEBP</p> |
| </div> |
| |
| <div id="ws"> |
| <!-- toolbar --> |
| <div id="tb"> |
| <span id="box-count">0 ู
ุฑุจุน</span> |
| |
| <div id="send-mode"> |
| ุฅุฑุณุงู: |
| <label><input type="radio" name="sm" value="batch" checked> ุฏูุนุฉ ูุงุญุฏุฉ (batch)</label> |
| <label><input type="radio" name="sm" value="auto"> ููุฑู ููู ู
ุฑุจุน</label> |
| </div> |
| |
| <button class="btn bw" id="btn-send" onclick="sendBatch()" disabled> |
| ๐ค ุฅุฑุณุงู ุงููู |
| </button> |
| <button class="btn bg" onclick="undo()">โฉ ุชุฑุงุฌุน</button> |
| <button class="btn bd" onclick="clearAll()">๐ ู
ุณุญ ุงููู</button> |
| <label class="btn bg" style="cursor:pointer">๐ผ ุชุบููุฑ ุงูุตูุฑุฉ |
| <input type="file" accept="image/*" style="display:none" onchange="loadFile(this.files[0])"> |
| </label> |
| <button class="btn bg" onclick="doFullOCR()">๐ OCR ูุงู
ูุฉ</button> |
| </div> |
| |
| <!-- progress --> |
| <div id="prog-wrap"><div id="prog" style="width:0%"></div></div> |
| |
| <!-- canvas --> |
| <div id="cw"> |
| <canvas id="cv"></canvas> |
| <div id="labels"></div> |
| </div> |
| |
| <!-- results --> |
| <div id="res-wrap" style="display:none"> |
| <div id="res-head"> |
| <span id="res-title">ุงููุชุงุฆุฌ</span> |
| <div class="sum" id="res-sum"></div> |
| </div> |
| <div id="res"></div> |
| </div> |
| </div> |
| |
| <script> |
| // โโ State โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ |
| var cv=document.getElementById('cv'), ctx=cv.getContext('2d'); |
| var labelsDiv=document.getElementById('labels'); |
| var img=null, file=null, scale=1; |
| var boxes=[]; // [{id,x,y,w,h}] |
| var drawing=false, start=null, cur=null; |
| var busy=false, nextId=1; |
| |
| // โโ Helpers โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ |
| function apiUrl(){return document.getElementById('api').value.trim().replace(/\/$/,'');} |
| function ocrMode(){return document.getElementById('mode').value;} |
| function sendMode(){return document.querySelector('input[name="sm"]:checked').value;} |
| function setSt(m,c){var e=document.getElementById('st');e.textContent=m;e.className=c||'';} |
| function esc(s){return s.replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>');} |
| function updateCount(){ |
| document.getElementById('box-count').textContent=boxes.length+' ู
ุฑุจุน'; |
| var btn=document.getElementById('btn-send'); |
| btn.disabled=(boxes.length===0||sendMode()==='auto'); |
| } |
| document.querySelectorAll('input[name="sm"]').forEach(function(r){ |
| r.addEventListener('change',updateCount); |
| }); |
| |
| // โโ Health โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ |
| async function doHealth(){ |
| var u=apiUrl();if(!u){setSt('ุฃุฏุฎู ุฑุงุจุท ุงูู API','err');return;} |
| setSt('ุฌุงุฑู ุงููุญุต โฆ','ld'); |
| try{ |
| var r=await fetch(u+'/health'),d=await r.json(); |
| setSt(d.model_loaded?'โ ุงูุงุชุตุงู ูุงุฌุญ โ ุงููู
ูุฐุฌ ุฌุงูุฒ':'โ ู
ุชุตู โ ุงููู
ูุฐุฌ ูุง ูุฒุงู ููุญู
ููู', |
| d.model_loaded?'ok':'warn'); |
| }catch(e){setSt('โ ุชุนุฐูุฑ ุงูุงุชุตุงู: '+e.message,'err');} |
| } |
| |
| // โโ Load image โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ |
| function loadFile(f){ |
| if(!f)return; |
| if(!apiUrl()){setSt('ุฃุฏุฎู ุฑุงุจุท ุงูู API ุฃููุงู','err');return;} |
| file=f; |
| var url=URL.createObjectURL(f); |
| var im=new Image(); |
| im.onload=function(){ |
| img=im; |
| var maxW=document.getElementById('cw').clientWidth||800; |
| var sc=Math.min(1,maxW/im.naturalWidth); |
| cv.width=Math.round(im.naturalWidth*sc); |
| cv.height=Math.round(im.naturalHeight*sc); |
| scale=1/sc; boxes=[]; nextId=1; |
| document.getElementById('res').innerHTML=''; |
| document.getElementById('res-wrap').style.display='none'; |
| document.getElementById('prog-wrap').style.display='none'; |
| document.getElementById('ws').classList.add('v'); |
| document.getElementById('upz').style.display='none'; |
| redraw(); updateCount(); |
| setSt('ุงุฑุณู
ู
ุฑุจุนุงุช ุญูู ุงูู
ูุงุทู โ ุซู
ุงุถุบุท "ุฅุฑุณุงู ุงููู"'); |
| URL.revokeObjectURL(url); |
| }; |
| im.src=url; |
| } |
| |
| function onDrop(e){ |
| e.preventDefault();document.getElementById('upz').classList.remove('drag'); |
| var f=e.dataTransfer.files[0];if(f&&f.type.startsWith('image/'))loadFile(f); |
| } |
| |
| // โโ Canvas events โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ |
| function pos(e){var r=cv.getBoundingClientRect();return{x:e.clientX-r.left,y:e.clientY-r.top};} |
| function onDown(p){if(!img)return;drawing=true;start=p;cur=p;} |
| function onMove(p){cur=p;redraw(start,cur);} |
| function onUp(p){ |
| if(!drawing||!start)return;drawing=false; |
| var b=toBox(start,p); |
| if(b.w>5&&b.h>5){ |
| b.id=nextId++; |
| boxes.push(b); |
| redraw(); |
| updateCount(); |
| // ุฅุฐุง ุงููุถุน ููุฑู โ ุฃุฑุณู ู
ุจุงุดุฑุฉ |
| if(sendMode()==='auto') runSingle(b); |
| else setSt('ู
ุฑุจุน '+b.id+' ุชู
. ุงุฑุณู
ุงูู
ุฒูุฏ ุฃู ุงุถุบุท "ุฅุฑุณุงู ุงููู"'); |
| }else redraw(); |
| start=null; |
| } |
| function toBox(a,b){ |
| var x=Math.min(a.x,b.x),y=Math.min(a.y,b.y),w=Math.abs(a.x-b.x),h=Math.abs(a.y-b.y); |
| return{x:Math.round(x*scale),y:Math.round(y*scale),w:Math.round(w*scale),h:Math.round(h*scale)}; |
| } |
| |
| cv.addEventListener('mousedown',function(e){onDown(pos(e));}); |
| cv.addEventListener('mousemove',function(e){if(drawing)onMove(pos(e));}); |
| cv.addEventListener('mouseup',function(e){onUp(pos(e));}); |
| cv.addEventListener('touchstart',function(e){e.preventDefault();onDown(pos(e.touches[0]));},{passive:false}); |
| cv.addEventListener('touchmove',function(e){e.preventDefault();if(drawing)onMove(pos(e.touches[0]));},{passive:false}); |
| cv.addEventListener('touchend',function(e){e.preventDefault();onUp(pos(e.changedTouches[0]));},{passive:false}); |
| |
| // โโ Draw canvas + numbered labels โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ |
| var COLORS=['#3b82f6','#22c55e','#f59e0b','#a855f7','#ec4899','#14b8a6','#f97316','#6366f1']; |
| |
| function redraw(ps,pe){ |
| ctx.clearRect(0,0,cv.width,cv.height); |
| if(img)ctx.drawImage(img,0,0,cv.width,cv.height); |
| var s=1/scale; |
| |
| // clear old labels |
| labelsDiv.innerHTML=''; |
| |
| boxes.forEach(function(b,i){ |
| var col=COLORS[i%COLORS.length]; |
| var bx=b.x*s, by=b.y*s, bw=b.w*s, bh=b.h*s; |
| ctx.strokeStyle=col; ctx.lineWidth=2; |
| ctx.fillStyle=col+'22'; |
| ctx.fillRect(bx,by,bw,bh); |
| ctx.strokeRect(bx,by,bw,bh); |
| |
| // number label |
| var lbl=document.createElement('div'); |
| lbl.textContent=b.id; |
| lbl.style.cssText='position:absolute;left:'+(bx+4)+'px;top:'+(by+4)+'px;' |
| +'background:'+col+';color:#fff;border-radius:50%;width:22px;height:22px;' |
| +'display:flex;align-items:center;justify-content:center;' |
| +'font-size:.72rem;font-weight:700;line-height:1;'; |
| labelsDiv.appendChild(lbl); |
| }); |
| |
| // live preview |
| if(ps&&pe){ |
| var x=Math.min(ps.x,pe.x),y=Math.min(ps.y,pe.y),w=Math.abs(ps.x-pe.x),h=Math.abs(ps.y-pe.y); |
| ctx.setLineDash([5,3]);ctx.strokeStyle='#ef4444';ctx.lineWidth=2; |
| ctx.strokeRect(x,y,w,h);ctx.setLineDash([]); |
| } |
| } |
| |
| function undo(){ |
| if(boxes.length){boxes.pop();nextId--;redraw();updateCount();} |
| } |
| function clearAll(){ |
| boxes=[];nextId=1;redraw();updateCount(); |
| document.getElementById('res').innerHTML=''; |
| document.getElementById('res-wrap').style.display='none'; |
| } |
| |
| // โโ BATCH send (ุทูุจ ูุงุญุฏ ููู ุงูู
ุฑุจุนุงุช) โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ |
| async function sendBatch(){ |
| if(!file||boxes.length===0)return; |
| if(busy){setSt('โณ ููุฌุฏ ุทูุจ ููุฏ ุงูุชูููุฐ โฆ','ld');return;} |
| busy=true; |
| var total=boxes.length; |
| |
| // ุฅุนุฏุงุฏ ู
ูุทูุฉ ุงููุชุงุฆุฌ |
| document.getElementById('res-wrap').style.display='block'; |
| document.getElementById('res-sum').innerHTML=''; |
| document.getElementById('res').innerHTML=''; |
| document.getElementById('res-title').textContent='ุงููุชุงุฆุฌ ('+total+' ู
ุฑุจุน)'; |
| document.getElementById('prog-wrap').style.display='block'; |
| setProgress(5); |
| |
| // skeleton ููู ู
ุฑุจุน ู
ุณุจูุงู |
| boxes.forEach(function(b){addSkeleton(b);}); |
| setSt('โณ ุฅุฑุณุงู '+total+' ู
ุฑุจุน ุฏูุนุฉ ูุงุญุฏุฉ โ ูุฏ ูุณุชุบุฑู ุจุถุน ุฏูุงุฆู ุนูู CPU โฆ','ld'); |
| |
| var fd=new FormData(); |
| fd.append('image',file); |
| fd.append('boxes',JSON.stringify(boxes)); |
| fd.append('mode',ocrMode()); |
| |
| try{ |
| var r=await fetch(apiUrl()+'/ocr/batch',{method:'POST',body:fd}); |
| setProgress(80); |
| if(!r.ok){ |
| var e=await r.json().catch(()=>({detail:r.statusText})); |
| throw new Error(e.detail||r.statusText); |
| } |
| var data=await r.json(); |
| setProgress(100); |
| |
| // ู
ูุก ุงููุชุงุฆุฌ |
| var okCount=0, errCount=0; |
| data.results.forEach(function(res){ |
| fillCard(res); |
| if(res.status==='ok')okCount++;else errCount++; |
| }); |
| |
| // summary |
| var sumEl=document.getElementById('res-sum'); |
| sumEl.innerHTML= |
| '<span class="badge b-ok">โ '+okCount+' ูุฌุญ</span>' |
| +(errCount?'<span class="badge b-err">โ '+errCount+' ูุดู</span>':'') |
| +'<span class="badge b-t">โฑ '+data.total_elapsed_sec+'s</span>'; |
| |
| setSt('โ ุงูุชู
ู โ '+okCount+'/'+total+' ู
ุฑุจุน','ok'); |
| }catch(e){ |
| setSt('โ '+e.message,'err'); |
| boxes.forEach(function(b){ |
| var el=document.getElementById('card-'+b.id); |
| if(el) el.querySelector('.rt').textContent='โ '+e.message; |
| }); |
| } |
| |
| setTimeout(function(){ |
| document.getElementById('prog-wrap').style.display='none'; |
| setProgress(0); |
| },1500); |
| busy=false; |
| } |
| |
| // โโ SINGLE send (ููุฑู ุนูุฏ ุฑุณู
ูู ู
ุฑุจุน) โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ |
| async function runSingle(box){ |
| if(busy&&sendMode()==='auto'){ |
| // queue: wait a bit and retry |
| setTimeout(function(){runSingle(box);},500); return; |
| } |
| busy=true; |
| document.getElementById('res-wrap').style.display='block'; |
| addSkeleton(box); |
| setSt('โณ OCR ู
ุฑุจุน '+box.id+' โฆ','ld'); |
| |
| var fd=new FormData(); |
| fd.append('image',file); |
| fd.append('x',String(box.x));fd.append('y',String(box.y)); |
| fd.append('w',String(box.w));fd.append('h',String(box.h)); |
| fd.append('box_id',String(box.id)); |
| fd.append('mode',ocrMode()); |
| |
| try{ |
| var r=await fetch(apiUrl()+'/ocr',{method:'POST',body:fd}); |
| if(!r.ok){var e=await r.json().catch(()=>({detail:r.statusText}));throw new Error(e.detail);} |
| var d=await r.json(); |
| fillCard({box_id:d.box_id,x:box.x,y:box.y,w:box.w,h:box.h, |
| text:d.text,status:'ok',elapsed_sec:d.elapsed_sec}); |
| setSt('โ ู
ุฑุจุน '+box.id+' ุงูุชู
ู','ok'); |
| }catch(e){ |
| fillCard({box_id:box.id,x:box.x,y:box.y,w:box.w,h:box.h, |
| text:'โ '+e.message,status:'error',elapsed_sec:0}); |
| setSt('โ '+e.message,'err'); |
| } |
| busy=false; |
| } |
| |
| // โโ Full image OCR โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ |
| async function doFullOCR(){ |
| if(!file){setSt('ุงุฑูุน ุตูุฑุฉ ุฃููุงู','err');return;} |
| if(busy){setSt('โณ ููุฌุฏ ุทูุจ ููุฏ ุงูุชูููุฐ โฆ','ld');return;} |
| busy=true; |
| var fakeBox={id:'full',x:null,y:null,w:null,h:null}; |
| document.getElementById('res-wrap').style.display='block'; |
| addSkeleton(fakeBox); |
| setSt('โณ OCR ูุงู
ูุฉ ููุตูุฑุฉ โฆ','ld'); |
| var fd=new FormData();fd.append('image',file);fd.append('mode',ocrMode()); |
| try{ |
| var r=await fetch(apiUrl()+'/ocr',{method:'POST',body:fd}); |
| if(!r.ok){var e=await r.json().catch(()=>({detail:r.statusText}));throw new Error(e.detail);} |
| var d=await r.json(); |
| fillCard({box_id:'full',x:null,y:null,w:null,h:null, |
| text:d.text,status:'ok',elapsed_sec:d.elapsed_sec}); |
| setSt('โ OCR ูุงู
ูุฉ','ok'); |
| }catch(e){ |
| fillCard({box_id:'full',x:null,y:null,w:null,h:null, |
| text:'โ '+e.message,status:'error',elapsed_sec:0}); |
| setSt('โ '+e.message,'err'); |
| } |
| busy=false; |
| } |
| |
| // โโ Result cards โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ |
| function addSkeleton(b){ |
| var id=b.id,loc=b.x!=null?'x:'+b.x+' y:'+b.y+' โ '+b.w+'ร'+b.h+' px':'ุงูุตูุฑุฉ ูุงู
ูุฉ'; |
| var el=document.getElementById('card-'+id); |
| if(!el){el=document.createElement('div');el.id='card-'+id;document.getElementById('res').prepend(el);} |
| el.className='rc'; |
| el.innerHTML='<div class="rh"><span class="bid">ู
ุฑุจุน '+id+'</span>' |
| +'<span class="coords">'+loc+'</span></div>' |
| +'<div class="sk"></div><div class="sk" style="width:60%"></div>'; |
| } |
| |
| function fillCard(res){ |
| var id=res.box_id,loc=res.x!=null?'x:'+res.x+' y:'+res.y+' โ '+res.w+'ร'+res.h+' px':'ุงูุตูุฑุฉ ูุงู
ูุฉ'; |
| var el=document.getElementById('card-'+id); |
| if(!el){el=document.createElement('div');el.id='card-'+id;document.getElementById('res').prepend(el);} |
| var col=res.status==='ok'?'var(--ok)':'var(--dan)'; |
| var i=(id==='full'?0:boxes.findIndex(function(b){return b.id===id;})); |
| var badgeCol=id==='full'?'#6366f1':COLORS[i<0?0:i%COLORS.length]; |
| el.className='rc'; |
| el.innerHTML='<div class="rh">' |
| +'<span class="bid" style="background:'+badgeCol+'">ู
ุฑุจุน '+id+'</span>' |
| +'<span class="coords">'+loc+'</span>' |
| +'<span class="elapsed">โฑ '+res.elapsed_sec+'s</span>' |
| +'<span style="font-size:.76rem;color:'+col+'">'+ |
| (res.status==='ok'?'โ ูุงุฌุญ':'โ '+res.status)+'</span></div>' |
| +'<div class="rt'+(res.status!=='ok'?' err-text':'')+'" contenteditable="true" id="t-'+id+'">' |
| +esc(res.text)+'</div>' |
| +'<div class="ra">' |
| +'<button class="btn bp" onclick="cp(\''+id+'\')">ูุณุฎ</button>' |
| +'<button class="btn bg" onclick="document.getElementById(\'card-'+id+'\').remove()">ุญุฐู</button>' |
| +'</div>'; |
| } |
| |
| function cp(id){ |
| var e=document.getElementById('t-'+id); |
| if(e)navigator.clipboard.writeText(e.innerText) |
| .then(function(){setSt('ุชู
ุงููุณุฎ โ','ok');}); |
| } |
| |
| function setProgress(pct){document.getElementById('prog').style.width=pct+'%';} |
| </script> |
| </body> |
| </html>""" |