Spaces:
Sleeping
Sleeping
Commit Β·
0533780
0
Parent(s):
Initial deploy
Browse files- .gitignore +20 -0
- Dockerfile +33 -0
- Extension/background.js +124 -0
- Extension/content.css +82 -0
- Extension/content.js +184 -0
- Extension/generate_icons.py +43 -0
- Extension/icons/icon128.png +0 -0
- Extension/icons/icon16.png +0 -0
- Extension/icons/icon48.png +0 -0
- Extension/manifest.json +54 -0
- Extension/popup.html +302 -0
- Extension/popup.js +73 -0
- Extension/sidebar.html +391 -0
- README.md +277 -0
- background.js +124 -0
- content.css +82 -0
- content.js +184 -0
- docker-compose.yml +36 -0
- frontend/index.html +759 -0
- generate_icons.py +43 -0
- main.py +182 -0
- manifest.json +54 -0
- ocr_engine.py +242 -0
- popup.html +302 -0
- popup.js +73 -0
- requirements.txt +27 -0
- sidebar.html +391 -0
.gitignore
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
.venv/
|
| 3 |
+
__pycache__/
|
| 4 |
+
*.pyc
|
| 5 |
+
*.pyo
|
| 6 |
+
*.pyd
|
| 7 |
+
*.egg-info/
|
| 8 |
+
dist/
|
| 9 |
+
build/
|
| 10 |
+
|
| 11 |
+
# HuggingFace model cache (don't commit 2GB of weights)
|
| 12 |
+
.cache/
|
| 13 |
+
|
| 14 |
+
# OS
|
| 15 |
+
.DS_Store
|
| 16 |
+
Thumbs.db
|
| 17 |
+
|
| 18 |
+
# Temp files
|
| 19 |
+
*.tmp
|
| 20 |
+
*.log
|
Dockerfile
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ββ GLM-OCR β Dockerfile (Hugging Face Spaces) ββββββββββββββββββββββββββββ
|
| 2 |
+
#
|
| 3 |
+
# HF Spaces builds this automatically when you push to your Space repo.
|
| 4 |
+
# No local Docker needed β HF handles the build and hosting.
|
| 5 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 6 |
+
|
| 7 |
+
FROM pytorch/pytorch:2.2.0-cuda12.1-cudnn8-runtime
|
| 8 |
+
|
| 9 |
+
# ββ System deps ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 10 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 11 |
+
libgl1-mesa-glx \
|
| 12 |
+
libglib2.0-0 \
|
| 13 |
+
curl \
|
| 14 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 15 |
+
|
| 16 |
+
# ββ Workdir ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 17 |
+
WORKDIR /app
|
| 18 |
+
|
| 19 |
+
# ββ Python deps ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 20 |
+
COPY requirements.txt ./
|
| 21 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 22 |
+
|
| 23 |
+
# ββ Copy source (flat structure) βββββββββββββββββββββββββββββββββββββββββββ
|
| 24 |
+
COPY main.py ocr_engine.py ./
|
| 25 |
+
COPY frontend/ ./frontend/
|
| 26 |
+
|
| 27 |
+
# ββ Model weights download at first startup (not baked into image) βββββββββ
|
| 28 |
+
# HF Spaces caches ~/.cache/huggingface across restarts on paid tiers.
|
| 29 |
+
# On the free tier the model (~1-2 GB) re-downloads on each cold start.
|
| 30 |
+
|
| 31 |
+
# ββ Expose & run (HF Spaces requires port 7860) ββββββββββββββββββββββββββββ
|
| 32 |
+
EXPOSE 7860
|
| 33 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
Extension/background.js
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// background.js β Service worker
|
| 2 |
+
// Handles: tab screenshot, image crop, OCR API call, result relay
|
| 3 |
+
|
| 4 |
+
const OCR_ENDPOINT = "http://localhost:8000/ocr";
|
| 5 |
+
const OCR_MODE = "recognize"; // or "parse"
|
| 6 |
+
|
| 7 |
+
// ββ Listen for messages from content.js βββββββββββββββββββββββββββββββββββββ
|
| 8 |
+
|
| 9 |
+
chrome.runtime.onMessage.addListener((msg, sender, sendResponse) => {
|
| 10 |
+
|
| 11 |
+
if (msg.type === "CAPTURE_REGION") {
|
| 12 |
+
handleCapture(msg.rect, sender.tab)
|
| 13 |
+
.then(result => sendResponse({ success: true, ...result }))
|
| 14 |
+
.catch(error => sendResponse({ success: false, error: error.message }));
|
| 15 |
+
return true; // keep channel open for async
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
if (msg.type === "PING") {
|
| 19 |
+
checkServer().then(ok => sendResponse({ ok }));
|
| 20 |
+
return true;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
if (msg.type === "OPEN_SIDEBAR") {
|
| 24 |
+
// Open the sidebar as a side panel in the current tab
|
| 25 |
+
chrome.tabs.sendMessage(sender.tab.id, { type: "SHOW_SIDEBAR" });
|
| 26 |
+
return false;
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
});
|
| 30 |
+
|
| 31 |
+
// ββ Capture + crop + OCR βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 32 |
+
|
| 33 |
+
async function handleCapture(rect, tab) {
|
| 34 |
+
// 1. Capture the entire visible tab as a data URL
|
| 35 |
+
const dataUrl = await chrome.tabs.captureVisibleTab(tab.windowId, {
|
| 36 |
+
format: "png",
|
| 37 |
+
quality: 100,
|
| 38 |
+
});
|
| 39 |
+
|
| 40 |
+
// 2. Crop to the selected rect using OffscreenCanvas
|
| 41 |
+
const croppedBlob = await cropImage(dataUrl, rect);
|
| 42 |
+
|
| 43 |
+
// 3. Send to GLM-OCR backend
|
| 44 |
+
const formData = new FormData();
|
| 45 |
+
formData.append("file", croppedBlob, "selection.png");
|
| 46 |
+
formData.append("mode", OCR_MODE);
|
| 47 |
+
|
| 48 |
+
const res = await fetch(OCR_ENDPOINT, {
|
| 49 |
+
method: "POST",
|
| 50 |
+
body: formData,
|
| 51 |
+
});
|
| 52 |
+
|
| 53 |
+
if (!res.ok) {
|
| 54 |
+
const err = await res.json().catch(() => ({}));
|
| 55 |
+
throw new Error(err.detail || `Server returned ${res.status}`);
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
const data = await res.json();
|
| 59 |
+
|
| 60 |
+
// Also store the cropped image as a data URL for display in the sidebar
|
| 61 |
+
const croppedDataUrl = await blobToDataUrl(croppedBlob);
|
| 62 |
+
|
| 63 |
+
return {
|
| 64 |
+
text: data.text,
|
| 65 |
+
word_count: data.word_count,
|
| 66 |
+
char_count: data.char_count,
|
| 67 |
+
latency_ms: data.latency_ms,
|
| 68 |
+
mode: data.mode,
|
| 69 |
+
device: data.device,
|
| 70 |
+
imageDataUrl: croppedDataUrl,
|
| 71 |
+
timestamp: new Date().toISOString(),
|
| 72 |
+
};
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
// ββ Image cropping using OffscreenCanvas βββββββββββββββββββββββββββββββββββββ
|
| 76 |
+
|
| 77 |
+
async function cropImage(dataUrl, rect) {
|
| 78 |
+
// Decode the full screenshot
|
| 79 |
+
const res = await fetch(dataUrl);
|
| 80 |
+
const blob = await res.blob();
|
| 81 |
+
const bitmap = await createImageBitmap(blob);
|
| 82 |
+
|
| 83 |
+
// Scale rect by device pixel ratio (already baked into captureVisibleTab)
|
| 84 |
+
// captureVisibleTab captures at device pixel ratio already, so rect coords
|
| 85 |
+
// from getBoundingClientRect need to be scaled.
|
| 86 |
+
const dpr = rect.dpr || 1;
|
| 87 |
+
const sx = Math.round(rect.x * dpr);
|
| 88 |
+
const sy = Math.round(rect.y * dpr);
|
| 89 |
+
const sw = Math.round(rect.width * dpr);
|
| 90 |
+
const sh = Math.round(rect.height * dpr);
|
| 91 |
+
|
| 92 |
+
// Clamp to bitmap bounds
|
| 93 |
+
const cx = Math.max(0, sx);
|
| 94 |
+
const cy = Math.max(0, sy);
|
| 95 |
+
const cw = Math.min(sw, bitmap.width - cx);
|
| 96 |
+
const ch = Math.min(sh, bitmap.height - cy);
|
| 97 |
+
|
| 98 |
+
const canvas = new OffscreenCanvas(cw, ch);
|
| 99 |
+
const ctx = canvas.getContext("2d");
|
| 100 |
+
ctx.drawImage(bitmap, cx, cy, cw, ch, 0, 0, cw, ch);
|
| 101 |
+
|
| 102 |
+
return canvas.convertToBlob({ type: "image/png" });
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
function blobToDataUrl(blob) {
|
| 106 |
+
return new Promise((resolve, reject) => {
|
| 107 |
+
const reader = new FileReader();
|
| 108 |
+
reader.onload = () => resolve(reader.result);
|
| 109 |
+
reader.onerror = reject;
|
| 110 |
+
reader.readAsDataURL(blob);
|
| 111 |
+
});
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
// ββ Server health check βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 115 |
+
|
| 116 |
+
async function checkServer() {
|
| 117 |
+
try {
|
| 118 |
+
const r = await fetch("http://localhost:8000/health", { signal: AbortSignal.timeout(3000) });
|
| 119 |
+
const d = await r.json();
|
| 120 |
+
return d.status === "ok";
|
| 121 |
+
} catch {
|
| 122 |
+
return false;
|
| 123 |
+
}
|
| 124 |
+
}
|
Extension/content.css
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* content.css β Styles injected into every page for the selection overlay */
|
| 2 |
+
|
| 3 |
+
/* ββ Overlay ββ */
|
| 4 |
+
#glmocr-overlay {
|
| 5 |
+
position: fixed !important;
|
| 6 |
+
inset: 0 !important;
|
| 7 |
+
background: rgba(0, 0, 0, 0.45) !important;
|
| 8 |
+
z-index: 2147483646 !important;
|
| 9 |
+
cursor: crosshair !important;
|
| 10 |
+
user-select: none !important;
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
/* ββ Hint text ββ */
|
| 14 |
+
#glmocr-hint {
|
| 15 |
+
position: absolute !important;
|
| 16 |
+
top: 20px !important;
|
| 17 |
+
left: 50% !important;
|
| 18 |
+
transform: translateX(-50%) !important;
|
| 19 |
+
background: rgba(0, 0, 0, 0.8) !important;
|
| 20 |
+
color: #f5f0e8 !important;
|
| 21 |
+
font-family: 'IBM Plex Mono', monospace, monospace !important;
|
| 22 |
+
font-size: 13px !important;
|
| 23 |
+
padding: 10px 18px !important;
|
| 24 |
+
border-radius: 4px !important;
|
| 25 |
+
letter-spacing: 0.04em !important;
|
| 26 |
+
pointer-events: none !important;
|
| 27 |
+
white-space: nowrap !important;
|
| 28 |
+
border: 1px solid rgba(255,255,255,0.15) !important;
|
| 29 |
+
transition: opacity 0.2s !important;
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
/* ββ Selection box ββ */
|
| 33 |
+
#glmocr-selbox {
|
| 34 |
+
position: fixed !important;
|
| 35 |
+
display: none !important;
|
| 36 |
+
border: 2px solid #c94a1f !important;
|
| 37 |
+
background: rgba(201, 74, 31, 0.08) !important;
|
| 38 |
+
box-shadow: 0 0 0 9999px rgba(0, 0, 0, 0.35) !important;
|
| 39 |
+
pointer-events: none !important;
|
| 40 |
+
z-index: 2147483647 !important;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
/* ββ Sidebar iframe ββ */
|
| 44 |
+
#glmocr-sidebar {
|
| 45 |
+
position: fixed !important;
|
| 46 |
+
top: 0 !important;
|
| 47 |
+
right: 0 !important;
|
| 48 |
+
width: 380px !important;
|
| 49 |
+
height: 100vh !important;
|
| 50 |
+
border: none !important;
|
| 51 |
+
z-index: 2147483645 !important;
|
| 52 |
+
border-left: 2px solid #d4cfc3 !important;
|
| 53 |
+
box-shadow: -4px 0 24px rgba(0,0,0,0.12) !important;
|
| 54 |
+
animation: glmocr-slideIn 0.25s cubic-bezier(0.22, 1, 0.36, 1) !important;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
@keyframes glmocr-slideIn {
|
| 58 |
+
from { transform: translateX(100%); opacity: 0; }
|
| 59 |
+
to { transform: translateX(0); opacity: 1; }
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
/* ββ Toast ββ */
|
| 63 |
+
#glmocr-toast {
|
| 64 |
+
position: fixed !important;
|
| 65 |
+
bottom: 24px !important;
|
| 66 |
+
left: 50% !important;
|
| 67 |
+
transform: translateX(-50%) !important;
|
| 68 |
+
background: #0f0e0d !important;
|
| 69 |
+
color: #f5f0e8 !important;
|
| 70 |
+
font-family: 'IBM Plex Mono', monospace, monospace !important;
|
| 71 |
+
font-size: 13px !important;
|
| 72 |
+
padding: 10px 20px !important;
|
| 73 |
+
border-radius: 4px !important;
|
| 74 |
+
z-index: 2147483647 !important;
|
| 75 |
+
white-space: nowrap !important;
|
| 76 |
+
animation: glmocr-fadeUp 0.3s ease both !important;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
@keyframes glmocr-fadeUp {
|
| 80 |
+
from { opacity: 0; transform: translateX(-50%) translateY(12px); }
|
| 81 |
+
to { opacity: 1; transform: translateX(-50%) translateY(0); }
|
| 82 |
+
}
|
Extension/content.js
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// content.js β Injected into every page
|
| 2 |
+
// Manages the screen selection overlay and sidebar panel
|
| 3 |
+
|
| 4 |
+
let overlayActive = false;
|
| 5 |
+
let sidebarFrame = null;
|
| 6 |
+
|
| 7 |
+
// ββ Listen for messages from background / popup βββββββββββββββββββββββββββββββ
|
| 8 |
+
|
| 9 |
+
chrome.runtime.onMessage.addListener((msg, _sender, sendResponse) => {
|
| 10 |
+
|
| 11 |
+
if (msg.type === "START_SELECTION") {
|
| 12 |
+
if (!overlayActive) startSelection();
|
| 13 |
+
sendResponse({ ok: true });
|
| 14 |
+
return false;
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
if (msg.type === "SHOW_SIDEBAR") {
|
| 18 |
+
showSidebar({});
|
| 19 |
+
return false;
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
if (msg.type === "SHOW_RESULT") {
|
| 23 |
+
showSidebar(msg.data);
|
| 24 |
+
return false;
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
});
|
| 28 |
+
|
| 29 |
+
// ββ Selection overlay βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 30 |
+
|
| 31 |
+
function startSelection() {
|
| 32 |
+
overlayActive = true;
|
| 33 |
+
|
| 34 |
+
// Dim the page
|
| 35 |
+
const overlay = document.createElement("div");
|
| 36 |
+
overlay.id = "glmocr-overlay";
|
| 37 |
+
|
| 38 |
+
// Crosshair hint
|
| 39 |
+
const hint = document.createElement("div");
|
| 40 |
+
hint.id = "glmocr-hint";
|
| 41 |
+
hint.textContent = "Drag to select text region β Press Esc to cancel";
|
| 42 |
+
overlay.appendChild(hint);
|
| 43 |
+
|
| 44 |
+
// Selection box
|
| 45 |
+
const selBox = document.createElement("div");
|
| 46 |
+
selBox.id = "glmocr-selbox";
|
| 47 |
+
overlay.appendChild(selBox);
|
| 48 |
+
|
| 49 |
+
document.body.appendChild(overlay);
|
| 50 |
+
|
| 51 |
+
let startX = 0, startY = 0, isDragging = false;
|
| 52 |
+
|
| 53 |
+
function onMouseDown(e) {
|
| 54 |
+
if (e.button !== 0) return;
|
| 55 |
+
isDragging = true;
|
| 56 |
+
startX = e.clientX;
|
| 57 |
+
startY = e.clientY;
|
| 58 |
+
selBox.style.cssText = `left:${startX}px; top:${startY}px; width:0; height:0; display:block`;
|
| 59 |
+
hint.style.opacity = "0";
|
| 60 |
+
e.preventDefault();
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
function onMouseMove(e) {
|
| 64 |
+
if (!isDragging) return;
|
| 65 |
+
const x = Math.min(e.clientX, startX);
|
| 66 |
+
const y = Math.min(e.clientY, startY);
|
| 67 |
+
const w = Math.abs(e.clientX - startX);
|
| 68 |
+
const h = Math.abs(e.clientY - startY);
|
| 69 |
+
selBox.style.cssText = `left:${x}px; top:${y}px; width:${w}px; height:${h}px; display:block`;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
function onMouseUp(e) {
|
| 73 |
+
if (!isDragging) return;
|
| 74 |
+
isDragging = false;
|
| 75 |
+
|
| 76 |
+
const x = Math.min(e.clientX, startX);
|
| 77 |
+
const y = Math.min(e.clientY, startY);
|
| 78 |
+
const w = Math.abs(e.clientX - startX);
|
| 79 |
+
const h = Math.abs(e.clientY - startY);
|
| 80 |
+
|
| 81 |
+
cleanup();
|
| 82 |
+
|
| 83 |
+
if (w < 10 || h < 10) {
|
| 84 |
+
showToast("Selection too small β try again.");
|
| 85 |
+
return;
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
const dpr = window.devicePixelRatio || 1;
|
| 89 |
+
const rect = {
|
| 90 |
+
x: x + window.scrollX,
|
| 91 |
+
y: y + window.scrollY,
|
| 92 |
+
width: w,
|
| 93 |
+
height: h,
|
| 94 |
+
dpr,
|
| 95 |
+
};
|
| 96 |
+
|
| 97 |
+
runOcr(rect);
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
function onKeyDown(e) {
|
| 101 |
+
if (e.key === "Escape") {
|
| 102 |
+
cleanup();
|
| 103 |
+
showToast("Cancelled.");
|
| 104 |
+
}
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
function cleanup() {
|
| 108 |
+
overlayActive = false;
|
| 109 |
+
overlay.removeEventListener("mousedown", onMouseDown);
|
| 110 |
+
overlay.removeEventListener("mousemove", onMouseMove);
|
| 111 |
+
overlay.removeEventListener("mouseup", onMouseUp);
|
| 112 |
+
document.removeEventListener("keydown", onKeyDown);
|
| 113 |
+
overlay.remove();
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
overlay.addEventListener("mousedown", onMouseDown);
|
| 117 |
+
overlay.addEventListener("mousemove", onMouseMove);
|
| 118 |
+
overlay.addEventListener("mouseup", onMouseUp);
|
| 119 |
+
document.addEventListener("keydown", onKeyDown);
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
// ββ Send region to background for capture + OCR βββββββββββββββββββββββββββββββ
|
| 123 |
+
|
| 124 |
+
function runOcr(rect) {
|
| 125 |
+
// Show a loading sidebar immediately
|
| 126 |
+
showSidebar({ loading: true });
|
| 127 |
+
|
| 128 |
+
chrome.runtime.sendMessage({ type: "CAPTURE_REGION", rect }, (response) => {
|
| 129 |
+
if (chrome.runtime.lastError) {
|
| 130 |
+
showSidebar({ error: chrome.runtime.lastError.message });
|
| 131 |
+
return;
|
| 132 |
+
}
|
| 133 |
+
if (response.success) {
|
| 134 |
+
showSidebar(response);
|
| 135 |
+
} else {
|
| 136 |
+
showSidebar({ error: response.error });
|
| 137 |
+
}
|
| 138 |
+
});
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
// ββ Sidebar panel βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 142 |
+
|
| 143 |
+
function showSidebar(data) {
|
| 144 |
+
// Remove existing sidebar if any
|
| 145 |
+
if (sidebarFrame) sidebarFrame.remove();
|
| 146 |
+
|
| 147 |
+
const frame = document.createElement("iframe");
|
| 148 |
+
frame.id = "glmocr-sidebar";
|
| 149 |
+
frame.src = chrome.runtime.getURL("sidebar.html");
|
| 150 |
+
|
| 151 |
+
document.body.appendChild(frame);
|
| 152 |
+
sidebarFrame = frame;
|
| 153 |
+
|
| 154 |
+
// Wait for iframe to load, then send data
|
| 155 |
+
frame.onload = () => {
|
| 156 |
+
frame.contentWindow.postMessage({ type: "SIDEBAR_DATA", data }, "*");
|
| 157 |
+
};
|
| 158 |
+
|
| 159 |
+
// Close button via message from sidebar
|
| 160 |
+
window.addEventListener("message", (e) => {
|
| 161 |
+
if (e.data?.type === "CLOSE_SIDEBAR") {
|
| 162 |
+
frame.remove();
|
| 163 |
+
sidebarFrame = null;
|
| 164 |
+
}
|
| 165 |
+
if (e.data?.type === "START_NEW_SELECTION") {
|
| 166 |
+
frame.remove();
|
| 167 |
+
sidebarFrame = null;
|
| 168 |
+
startSelection();
|
| 169 |
+
}
|
| 170 |
+
});
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
// ββ Toast notification ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 174 |
+
|
| 175 |
+
function showToast(msg) {
|
| 176 |
+
const existing = document.getElementById("glmocr-toast");
|
| 177 |
+
if (existing) existing.remove();
|
| 178 |
+
|
| 179 |
+
const toast = document.createElement("div");
|
| 180 |
+
toast.id = "glmocr-toast";
|
| 181 |
+
toast.textContent = msg;
|
| 182 |
+
document.body.appendChild(toast);
|
| 183 |
+
setTimeout(() => toast?.remove(), 3000);
|
| 184 |
+
}
|
Extension/generate_icons.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
generate_icons.py β Run this once to create the extension icons.
|
| 3 |
+
Requires Pillow: pip install Pillow
|
| 4 |
+
"""
|
| 5 |
+
from PIL import Image, ImageDraw, ImageFont
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
os.makedirs("icons", exist_ok=True)
|
| 9 |
+
|
| 10 |
+
def make_icon(size):
|
| 11 |
+
img = Image.new("RGBA", (size, size), (0, 0, 0, 0))
|
| 12 |
+
draw = ImageDraw.Draw(img)
|
| 13 |
+
|
| 14 |
+
# Background rounded rect
|
| 15 |
+
pad = size // 8
|
| 16 |
+
draw.rounded_rectangle(
|
| 17 |
+
[pad, pad, size - pad, size - pad],
|
| 18 |
+
radius=size // 5,
|
| 19 |
+
fill="#c94a1f"
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
# Letter "G" for GLM
|
| 23 |
+
font_size = int(size * 0.52)
|
| 24 |
+
try:
|
| 25 |
+
font = ImageFont.truetype("arial.ttf", font_size)
|
| 26 |
+
except:
|
| 27 |
+
font = ImageFont.load_default()
|
| 28 |
+
|
| 29 |
+
text = "G"
|
| 30 |
+
bbox = draw.textbbox((0, 0), text, font=font)
|
| 31 |
+
tw = bbox[2] - bbox[0]
|
| 32 |
+
th = bbox[3] - bbox[1]
|
| 33 |
+
tx = (size - tw) // 2 - bbox[0]
|
| 34 |
+
ty = (size - th) // 2 - bbox[1]
|
| 35 |
+
draw.text((tx, ty), text, fill="white", font=font)
|
| 36 |
+
|
| 37 |
+
img.save(f"icons/icon{size}.png")
|
| 38 |
+
print(f"Created icons/icon{size}.png")
|
| 39 |
+
|
| 40 |
+
for s in [16, 48, 128]:
|
| 41 |
+
make_icon(s)
|
| 42 |
+
|
| 43 |
+
print("Done. Icons created in icons/")
|
Extension/icons/icon128.png
ADDED
|
|
Extension/icons/icon16.png
ADDED
|
|
Extension/icons/icon48.png
ADDED
|
|
Extension/manifest.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"manifest_version": 3,
|
| 3 |
+
"name": "GLM-OCR β Text from Screen",
|
| 4 |
+
"version": "1.0.0",
|
| 5 |
+
"description": "Select any region on screen and extract text using the self-hosted GLM-OCR model.",
|
| 6 |
+
|
| 7 |
+
"permissions": [
|
| 8 |
+
"activeTab",
|
| 9 |
+
"scripting",
|
| 10 |
+
"tabs",
|
| 11 |
+
"storage"
|
| 12 |
+
],
|
| 13 |
+
|
| 14 |
+
"host_permissions": [
|
| 15 |
+
"http://localhost:8000/*",
|
| 16 |
+
"<all_urls>"
|
| 17 |
+
],
|
| 18 |
+
|
| 19 |
+
"background": {
|
| 20 |
+
"service_worker": "background.js"
|
| 21 |
+
},
|
| 22 |
+
|
| 23 |
+
"action": {
|
| 24 |
+
"default_popup": "popup.html",
|
| 25 |
+
"default_icon": {
|
| 26 |
+
"16": "icons/icon16.png",
|
| 27 |
+
"48": "icons/icon48.png",
|
| 28 |
+
"128": "icons/icon128.png"
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
|
| 32 |
+
"content_scripts": [
|
| 33 |
+
{
|
| 34 |
+
"matches": ["<all_urls>"],
|
| 35 |
+
"js": ["content.js"],
|
| 36 |
+
"css": ["content.css"],
|
| 37 |
+
"run_at": "document_idle",
|
| 38 |
+
"all_frames": false
|
| 39 |
+
}
|
| 40 |
+
],
|
| 41 |
+
|
| 42 |
+
"icons": {
|
| 43 |
+
"16": "icons/icon16.png",
|
| 44 |
+
"48": "icons/icon48.png",
|
| 45 |
+
"128": "icons/icon128.png"
|
| 46 |
+
},
|
| 47 |
+
|
| 48 |
+
"web_accessible_resources": [
|
| 49 |
+
{
|
| 50 |
+
"resources": ["sidebar.html"],
|
| 51 |
+
"matches": ["<all_urls>"]
|
| 52 |
+
}
|
| 53 |
+
]
|
| 54 |
+
}
|
Extension/popup.html
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8"/>
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
|
| 6 |
+
<title>GLM-OCR</title>
|
| 7 |
+
<link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;700&family=DM+Serif+Display:ital@0;1&family=DM+Sans:wght@400;500&display=swap" rel="stylesheet"/>
|
| 8 |
+
<style>
|
| 9 |
+
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
|
| 10 |
+
:root {
|
| 11 |
+
--ink: #0f0e0d;
|
| 12 |
+
--paper: #f5f0e8;
|
| 13 |
+
--warm: #ede8dc;
|
| 14 |
+
--border: #d4cfc3;
|
| 15 |
+
--muted: #8f8880;
|
| 16 |
+
--accent: #c94a1f;
|
| 17 |
+
--green: #1a6b4a;
|
| 18 |
+
--mono: 'IBM Plex Mono', monospace;
|
| 19 |
+
--serif: 'DM Serif Display', serif;
|
| 20 |
+
--sans: 'DM Sans', sans-serif;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
body {
|
| 24 |
+
width: 300px;
|
| 25 |
+
background: var(--paper);
|
| 26 |
+
color: var(--ink);
|
| 27 |
+
font-family: var(--sans);
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
body::before {
|
| 31 |
+
content: '';
|
| 32 |
+
position: fixed; inset: 0;
|
| 33 |
+
background-image: radial-gradient(circle, rgba(0,0,0,0.05) 1px, transparent 1px);
|
| 34 |
+
background-size: 16px 16px;
|
| 35 |
+
pointer-events: none;
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
.inner { position: relative; }
|
| 39 |
+
|
| 40 |
+
/* Header */
|
| 41 |
+
.header {
|
| 42 |
+
padding: 16px 18px 14px;
|
| 43 |
+
border-bottom: 2px solid var(--ink);
|
| 44 |
+
display: flex;
|
| 45 |
+
align-items: center;
|
| 46 |
+
justify-content: space-between;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
.logo {
|
| 50 |
+
font-family: var(--serif);
|
| 51 |
+
font-size: 1.1rem;
|
| 52 |
+
letter-spacing: -0.01em;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
.logo em { font-style: italic; color: var(--accent); }
|
| 56 |
+
|
| 57 |
+
.server-badge {
|
| 58 |
+
display: flex;
|
| 59 |
+
align-items: center;
|
| 60 |
+
gap: 5px;
|
| 61 |
+
font-family: var(--mono);
|
| 62 |
+
font-size: 0.58rem;
|
| 63 |
+
color: var(--muted);
|
| 64 |
+
letter-spacing: 0.06em;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
.dot {
|
| 68 |
+
width: 6px; height: 6px;
|
| 69 |
+
border-radius: 50%;
|
| 70 |
+
background: var(--muted);
|
| 71 |
+
}
|
| 72 |
+
.dot.ok { background: var(--green); }
|
| 73 |
+
.dot.err { background: var(--accent); }
|
| 74 |
+
.dot.pulse { animation: blink 1.2s ease-in-out infinite; }
|
| 75 |
+
@keyframes blink { 0%,100%{opacity:1} 50%{opacity:0.3} }
|
| 76 |
+
|
| 77 |
+
/* Main CTA */
|
| 78 |
+
.cta-area {
|
| 79 |
+
padding: 20px 18px;
|
| 80 |
+
border-bottom: 1px solid var(--border);
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
.cta-label {
|
| 84 |
+
font-family: var(--mono);
|
| 85 |
+
font-size: 0.62rem;
|
| 86 |
+
color: var(--muted);
|
| 87 |
+
letter-spacing: 0.1em;
|
| 88 |
+
text-transform: uppercase;
|
| 89 |
+
margin-bottom: 10px;
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
.select-btn {
|
| 93 |
+
width: 100%;
|
| 94 |
+
padding: 14px;
|
| 95 |
+
background: var(--accent);
|
| 96 |
+
color: white;
|
| 97 |
+
border: none;
|
| 98 |
+
border-radius: 2px;
|
| 99 |
+
font-family: var(--serif);
|
| 100 |
+
font-size: 1rem;
|
| 101 |
+
cursor: pointer;
|
| 102 |
+
transition: background 0.15s;
|
| 103 |
+
display: flex;
|
| 104 |
+
align-items: center;
|
| 105 |
+
justify-content: center;
|
| 106 |
+
gap: 8px;
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
.select-btn:hover:not(:disabled) { background: #b53d15; }
|
| 110 |
+
.select-btn:disabled { opacity: 0.35; cursor: not-allowed; }
|
| 111 |
+
|
| 112 |
+
.select-btn .shortcut {
|
| 113 |
+
font-family: var(--mono);
|
| 114 |
+
font-size: 0.6rem;
|
| 115 |
+
opacity: 0.7;
|
| 116 |
+
margin-left: auto;
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
.offline-msg {
|
| 120 |
+
display: none;
|
| 121 |
+
margin-top: 10px;
|
| 122 |
+
font-family: var(--mono);
|
| 123 |
+
font-size: 0.65rem;
|
| 124 |
+
color: var(--accent);
|
| 125 |
+
line-height: 1.6;
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
.offline-msg.show { display: block; }
|
| 129 |
+
|
| 130 |
+
.offline-msg a {
|
| 131 |
+
color: var(--accent);
|
| 132 |
+
text-decoration: underline;
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
/* How it works */
|
| 136 |
+
.how {
|
| 137 |
+
padding: 16px 18px;
|
| 138 |
+
border-bottom: 1px solid var(--border);
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
.how-title {
|
| 142 |
+
font-family: var(--mono);
|
| 143 |
+
font-size: 0.6rem;
|
| 144 |
+
color: var(--muted);
|
| 145 |
+
letter-spacing: 0.1em;
|
| 146 |
+
text-transform: uppercase;
|
| 147 |
+
margin-bottom: 12px;
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
.step {
|
| 151 |
+
display: flex;
|
| 152 |
+
gap: 10px;
|
| 153 |
+
align-items: flex-start;
|
| 154 |
+
margin-bottom: 8px;
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
.step:last-child { margin-bottom: 0; }
|
| 158 |
+
|
| 159 |
+
.step-num {
|
| 160 |
+
font-family: var(--mono);
|
| 161 |
+
font-size: 0.6rem;
|
| 162 |
+
color: var(--accent);
|
| 163 |
+
font-weight: 700;
|
| 164 |
+
flex-shrink: 0;
|
| 165 |
+
margin-top: 2px;
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
.step-text {
|
| 169 |
+
font-size: 0.78rem;
|
| 170 |
+
line-height: 1.5;
|
| 171 |
+
color: var(--ink);
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
/* Settings */
|
| 175 |
+
.settings {
|
| 176 |
+
padding: 14px 18px;
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
.settings-title {
|
| 180 |
+
font-family: var(--mono);
|
| 181 |
+
font-size: 0.6rem;
|
| 182 |
+
color: var(--muted);
|
| 183 |
+
letter-spacing: 0.1em;
|
| 184 |
+
text-transform: uppercase;
|
| 185 |
+
margin-bottom: 10px;
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
.setting-row {
|
| 189 |
+
display: flex;
|
| 190 |
+
align-items: center;
|
| 191 |
+
justify-content: space-between;
|
| 192 |
+
margin-bottom: 8px;
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
.setting-label {
|
| 196 |
+
font-family: var(--mono);
|
| 197 |
+
font-size: 0.68rem;
|
| 198 |
+
color: var(--ink);
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
.mode-toggle {
|
| 202 |
+
display: flex;
|
| 203 |
+
gap: 4px;
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
.mode-opt {
|
| 207 |
+
font-family: var(--mono);
|
| 208 |
+
font-size: 0.58rem;
|
| 209 |
+
padding: 4px 8px;
|
| 210 |
+
border: 1px solid var(--border);
|
| 211 |
+
background: transparent;
|
| 212 |
+
color: var(--muted);
|
| 213 |
+
cursor: pointer;
|
| 214 |
+
border-radius: 2px;
|
| 215 |
+
transition: all 0.12s;
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
.mode-opt.active {
|
| 219 |
+
background: var(--ink);
|
| 220 |
+
border-color: var(--ink);
|
| 221 |
+
color: var(--paper);
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
/* Footer */
|
| 225 |
+
.footer {
|
| 226 |
+
padding: 10px 18px;
|
| 227 |
+
border-top: 1px solid var(--border);
|
| 228 |
+
font-family: var(--mono);
|
| 229 |
+
font-size: 0.58rem;
|
| 230 |
+
color: var(--muted);
|
| 231 |
+
display: flex;
|
| 232 |
+
justify-content: space-between;
|
| 233 |
+
}
|
| 234 |
+
</style>
|
| 235 |
+
</head>
|
| 236 |
+
<body>
|
| 237 |
+
<div class="inner">
|
| 238 |
+
|
| 239 |
+
<!-- Header -->
|
| 240 |
+
<div class="header">
|
| 241 |
+
<div class="logo">GLM-<em>OCR</em></div>
|
| 242 |
+
<div class="server-badge">
|
| 243 |
+
<div class="dot pulse" id="dot"></div>
|
| 244 |
+
<span id="server-label">checkingβ¦</span>
|
| 245 |
+
</div>
|
| 246 |
+
</div>
|
| 247 |
+
|
| 248 |
+
<!-- CTA -->
|
| 249 |
+
<div class="cta-area">
|
| 250 |
+
<div class="cta-label">Select region on screen</div>
|
| 251 |
+
<button class="select-btn" id="select-btn" disabled>
|
| 252 |
+
β Select & Extract Text
|
| 253 |
+
</button>
|
| 254 |
+
<div class="offline-msg" id="offline-msg">
|
| 255 |
+
β GLM-OCR server not running.<br>
|
| 256 |
+
Start it with <code>python main.py</code> at <a href="http://localhost:8000" target="_blank">localhost:8000</a>.
|
| 257 |
+
</div>
|
| 258 |
+
</div>
|
| 259 |
+
|
| 260 |
+
<!-- How it works -->
|
| 261 |
+
<div class="how">
|
| 262 |
+
<div class="how-title">How it works</div>
|
| 263 |
+
<div class="step">
|
| 264 |
+
<div class="step-num">01</div>
|
| 265 |
+
<div class="step-text">Click the button above β page dims</div>
|
| 266 |
+
</div>
|
| 267 |
+
<div class="step">
|
| 268 |
+
<div class="step-num">02</div>
|
| 269 |
+
<div class="step-text">Drag a box around the text you want</div>
|
| 270 |
+
</div>
|
| 271 |
+
<div class="step">
|
| 272 |
+
<div class="step-num">03</div>
|
| 273 |
+
<div class="step-text">GLM-OCR extracts text into a sidebar</div>
|
| 274 |
+
</div>
|
| 275 |
+
<div class="step">
|
| 276 |
+
<div class="step-num">04</div>
|
| 277 |
+
<div class="step-text">Copy or download the result</div>
|
| 278 |
+
</div>
|
| 279 |
+
</div>
|
| 280 |
+
|
| 281 |
+
<!-- Settings -->
|
| 282 |
+
<div class="settings">
|
| 283 |
+
<div class="settings-title">Settings</div>
|
| 284 |
+
<div class="setting-row">
|
| 285 |
+
<span class="setting-label">OCR Mode</span>
|
| 286 |
+
<div class="mode-toggle">
|
| 287 |
+
<button class="mode-opt active" data-mode="recognize">recognize</button>
|
| 288 |
+
<button class="mode-opt" data-mode="parse">parse</button>
|
| 289 |
+
</div>
|
| 290 |
+
</div>
|
| 291 |
+
</div>
|
| 292 |
+
|
| 293 |
+
<div class="footer">
|
| 294 |
+
<span>zai-org/GLM-OCR Β· 0.9B</span>
|
| 295 |
+
<span>self-hosted</span>
|
| 296 |
+
</div>
|
| 297 |
+
|
| 298 |
+
</div>
|
| 299 |
+
|
| 300 |
+
<script src="popup.js"></script>
|
| 301 |
+
</body>
|
| 302 |
+
</html>
|
Extension/popup.js
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// popup.js
|
| 2 |
+
|
| 3 |
+
const selectBtn = document.getElementById("select-btn");
|
| 4 |
+
const dot = document.getElementById("dot");
|
| 5 |
+
const serverLabel = document.getElementById("server-label");
|
| 6 |
+
const offlineMsg = document.getElementById("offline-msg");
|
| 7 |
+
|
| 8 |
+
let selectedMode = "recognize";
|
| 9 |
+
|
| 10 |
+
// ββ Check server health βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 11 |
+
async function checkServer() {
|
| 12 |
+
try {
|
| 13 |
+
const r = await fetch("http://localhost:8000/health", {
|
| 14 |
+
signal: AbortSignal.timeout(3000),
|
| 15 |
+
});
|
| 16 |
+
const d = await r.json();
|
| 17 |
+
return d.status === "ok";
|
| 18 |
+
} catch {
|
| 19 |
+
return false;
|
| 20 |
+
}
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
async function updateServerStatus() {
|
| 24 |
+
const ok = await checkServer();
|
| 25 |
+
dot.className = `dot ${ok ? "ok" : "err"}`;
|
| 26 |
+
serverLabel.textContent = ok ? "server ready" : "offline";
|
| 27 |
+
selectBtn.disabled = !ok;
|
| 28 |
+
offlineMsg.classList.toggle("show", !ok);
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
updateServerStatus();
|
| 32 |
+
|
| 33 |
+
// ββ Mode toggle βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 34 |
+
document.querySelectorAll(".mode-opt").forEach(btn => {
|
| 35 |
+
btn.addEventListener("click", () => {
|
| 36 |
+
document.querySelectorAll(".mode-opt").forEach(b => b.classList.remove("active"));
|
| 37 |
+
btn.classList.add("active");
|
| 38 |
+
selectedMode = btn.dataset.mode;
|
| 39 |
+
chrome.storage.local.set({ ocrMode: selectedMode });
|
| 40 |
+
});
|
| 41 |
+
});
|
| 42 |
+
|
| 43 |
+
// Restore saved mode
|
| 44 |
+
chrome.storage.local.get(["ocrMode"], ({ ocrMode }) => {
|
| 45 |
+
if (ocrMode) {
|
| 46 |
+
selectedMode = ocrMode;
|
| 47 |
+
document.querySelectorAll(".mode-opt").forEach(btn => {
|
| 48 |
+
btn.classList.toggle("active", btn.dataset.mode === ocrMode);
|
| 49 |
+
});
|
| 50 |
+
}
|
| 51 |
+
});
|
| 52 |
+
|
| 53 |
+
// ββ Select button βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 54 |
+
selectBtn.addEventListener("click", async () => {
|
| 55 |
+
// Save current mode to storage so background can read it
|
| 56 |
+
await chrome.storage.local.set({ ocrMode: selectedMode });
|
| 57 |
+
|
| 58 |
+
// Get current tab and inject the selection
|
| 59 |
+
const [tab] = await chrome.tabs.query({ active: true, currentWindow: true });
|
| 60 |
+
|
| 61 |
+
await chrome.scripting.executeScript({
|
| 62 |
+
target: { tabId: tab.id },
|
| 63 |
+
func: () => {
|
| 64 |
+
window.postMessage({ type: "GLMOCR_START" }, "*");
|
| 65 |
+
},
|
| 66 |
+
});
|
| 67 |
+
|
| 68 |
+
// Tell content script to start selection mode
|
| 69 |
+
chrome.tabs.sendMessage(tab.id, { type: "START_SELECTION" });
|
| 70 |
+
|
| 71 |
+
// Close popup so it doesn't obscure the page
|
| 72 |
+
window.close();
|
| 73 |
+
});
|
Extension/sidebar.html
ADDED
|
@@ -0,0 +1,391 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8"/>
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
|
| 6 |
+
<title>GLM-OCR Result</title>
|
| 7 |
+
<link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;500;700&family=DM+Serif+Display:ital@0;1&family=DM+Sans:wght@400;500&display=swap" rel="stylesheet"/>
|
| 8 |
+
<style>
|
| 9 |
+
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
|
| 10 |
+
:root {
|
| 11 |
+
--ink: #0f0e0d;
|
| 12 |
+
--paper: #f5f0e8;
|
| 13 |
+
--warm: #ede8dc;
|
| 14 |
+
--border: #d4cfc3;
|
| 15 |
+
--muted: #8f8880;
|
| 16 |
+
--accent: #c94a1f;
|
| 17 |
+
--green: #1a6b4a;
|
| 18 |
+
--mono: 'IBM Plex Mono', monospace;
|
| 19 |
+
--serif: 'DM Serif Display', serif;
|
| 20 |
+
--sans: 'DM Sans', sans-serif;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
html, body {
|
| 24 |
+
height: 100%;
|
| 25 |
+
background: var(--paper);
|
| 26 |
+
color: var(--ink);
|
| 27 |
+
font-family: var(--sans);
|
| 28 |
+
overflow: hidden;
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
body::before {
|
| 32 |
+
content: '';
|
| 33 |
+
position: fixed; inset: 0;
|
| 34 |
+
background-image: radial-gradient(circle, rgba(0,0,0,0.05) 1px, transparent 1px);
|
| 35 |
+
background-size: 16px 16px;
|
| 36 |
+
pointer-events: none;
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
.sidebar {
|
| 40 |
+
position: relative;
|
| 41 |
+
height: 100vh;
|
| 42 |
+
display: flex;
|
| 43 |
+
flex-direction: column;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
/* ββ Header ββ */
|
| 47 |
+
.sb-header {
|
| 48 |
+
padding: 14px 16px;
|
| 49 |
+
border-bottom: 2px solid var(--ink);
|
| 50 |
+
display: flex;
|
| 51 |
+
align-items: center;
|
| 52 |
+
justify-content: space-between;
|
| 53 |
+
flex-shrink: 0;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
.sb-title {
|
| 57 |
+
font-family: var(--serif);
|
| 58 |
+
font-size: 1rem;
|
| 59 |
+
letter-spacing: -0.01em;
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
.sb-title em { font-style: italic; color: var(--accent); }
|
| 63 |
+
|
| 64 |
+
.sb-close {
|
| 65 |
+
font-family: var(--mono);
|
| 66 |
+
font-size: 0.6rem;
|
| 67 |
+
padding: 5px 10px;
|
| 68 |
+
border: 1px solid var(--border);
|
| 69 |
+
background: transparent;
|
| 70 |
+
cursor: pointer;
|
| 71 |
+
border-radius: 2px;
|
| 72 |
+
color: var(--muted);
|
| 73 |
+
transition: all 0.12s;
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
.sb-close:hover { border-color: var(--ink); color: var(--ink); }
|
| 77 |
+
|
| 78 |
+
/* ββ Scrollable body ββ */
|
| 79 |
+
.sb-body {
|
| 80 |
+
flex: 1;
|
| 81 |
+
overflow-y: auto;
|
| 82 |
+
display: flex;
|
| 83 |
+
flex-direction: column;
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
/* ββ Loading ββ */
|
| 87 |
+
.sb-loading {
|
| 88 |
+
flex: 1;
|
| 89 |
+
display: flex;
|
| 90 |
+
flex-direction: column;
|
| 91 |
+
align-items: center;
|
| 92 |
+
justify-content: center;
|
| 93 |
+
gap: 16px;
|
| 94 |
+
padding: 24px;
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
.scan-bar-wrap { width: 140px; height: 3px; background: var(--border); border-radius: 2px; overflow: hidden; }
|
| 98 |
+
.scan-bar { height: 100%; background: var(--accent); border-radius: 2px; animation: scan 1.4s ease-in-out infinite; }
|
| 99 |
+
@keyframes scan { 0%{transform:translateX(-100%)} 50%{transform:translateX(0)} 100%{transform:translateX(100%)} }
|
| 100 |
+
|
| 101 |
+
.loading-label {
|
| 102 |
+
font-family: var(--mono);
|
| 103 |
+
font-size: 0.68rem;
|
| 104 |
+
color: var(--muted);
|
| 105 |
+
animation: blink 1.4s ease-in-out infinite;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
@keyframes blink { 0%,100%{opacity:1} 50%{opacity:0.3} }
|
| 109 |
+
|
| 110 |
+
/* ββ Error ββ */
|
| 111 |
+
.sb-error {
|
| 112 |
+
margin: 16px;
|
| 113 |
+
background: #fff0f0;
|
| 114 |
+
border: 1px solid rgba(201,74,31,0.3);
|
| 115 |
+
border-radius: 2px;
|
| 116 |
+
padding: 14px;
|
| 117 |
+
font-family: var(--mono);
|
| 118 |
+
font-size: 0.72rem;
|
| 119 |
+
color: var(--accent);
|
| 120 |
+
line-height: 1.7;
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
/* ββ Image preview ββ */
|
| 124 |
+
.sb-image-wrap {
|
| 125 |
+
padding: 14px 16px 0;
|
| 126 |
+
flex-shrink: 0;
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
.sb-image-label {
|
| 130 |
+
font-family: var(--mono);
|
| 131 |
+
font-size: 0.58rem;
|
| 132 |
+
color: var(--muted);
|
| 133 |
+
letter-spacing: 0.1em;
|
| 134 |
+
text-transform: uppercase;
|
| 135 |
+
margin-bottom: 8px;
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
.sb-image {
|
| 139 |
+
width: 100%;
|
| 140 |
+
max-height: 160px;
|
| 141 |
+
object-fit: contain;
|
| 142 |
+
border: 1px solid var(--border);
|
| 143 |
+
border-radius: 2px;
|
| 144 |
+
background: var(--warm);
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
/* ββ Meta chips ββ */
|
| 148 |
+
.sb-meta {
|
| 149 |
+
padding: 10px 16px;
|
| 150 |
+
display: flex;
|
| 151 |
+
gap: 10px;
|
| 152 |
+
flex-wrap: wrap;
|
| 153 |
+
border-bottom: 1px solid var(--border);
|
| 154 |
+
flex-shrink: 0;
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
.chip {
|
| 158 |
+
font-family: var(--mono);
|
| 159 |
+
font-size: 0.6rem;
|
| 160 |
+
color: var(--muted);
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
.chip strong { color: var(--green); }
|
| 164 |
+
|
| 165 |
+
/* ββ Extracted text ββ */
|
| 166 |
+
.sb-text-section {
|
| 167 |
+
padding: 14px 16px;
|
| 168 |
+
display: flex;
|
| 169 |
+
flex-direction: column;
|
| 170 |
+
gap: 8px;
|
| 171 |
+
flex: 1;
|
| 172 |
+
min-height: 0;
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
.sb-text-label {
|
| 176 |
+
font-family: var(--mono);
|
| 177 |
+
font-size: 0.58rem;
|
| 178 |
+
color: var(--muted);
|
| 179 |
+
letter-spacing: 0.1em;
|
| 180 |
+
text-transform: uppercase;
|
| 181 |
+
flex-shrink: 0;
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
.sb-text {
|
| 185 |
+
background: var(--warm);
|
| 186 |
+
border: 1px solid var(--border);
|
| 187 |
+
border-radius: 2px;
|
| 188 |
+
padding: 14px;
|
| 189 |
+
font-family: var(--mono);
|
| 190 |
+
font-size: 0.78rem;
|
| 191 |
+
line-height: 1.85;
|
| 192 |
+
white-space: pre-wrap;
|
| 193 |
+
word-break: break-word;
|
| 194 |
+
overflow-y: auto;
|
| 195 |
+
flex: 1;
|
| 196 |
+
min-height: 120px;
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
/* ββ Actions ββ */
|
| 200 |
+
.sb-actions {
|
| 201 |
+
padding: 12px 16px;
|
| 202 |
+
border-top: 1px solid var(--border);
|
| 203 |
+
display: flex;
|
| 204 |
+
gap: 8px;
|
| 205 |
+
flex-shrink: 0;
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
.action-btn {
|
| 209 |
+
font-family: var(--mono);
|
| 210 |
+
font-size: 0.62rem;
|
| 211 |
+
letter-spacing: 0.04em;
|
| 212 |
+
padding: 9px 12px;
|
| 213 |
+
border: 1px solid var(--border);
|
| 214 |
+
background: transparent;
|
| 215 |
+
color: var(--ink);
|
| 216 |
+
cursor: pointer;
|
| 217 |
+
border-radius: 2px;
|
| 218 |
+
transition: all 0.12s;
|
| 219 |
+
flex: 1;
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
.action-btn:hover { border-color: var(--ink); }
|
| 223 |
+
|
| 224 |
+
.action-btn.primary {
|
| 225 |
+
background: var(--accent);
|
| 226 |
+
border-color: var(--accent);
|
| 227 |
+
color: white;
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
.action-btn.primary:hover { background: #b53d15; }
|
| 231 |
+
|
| 232 |
+
/* ββ Toast ββ */
|
| 233 |
+
.toast {
|
| 234 |
+
position: fixed;
|
| 235 |
+
bottom: 16px;
|
| 236 |
+
left: 50%;
|
| 237 |
+
transform: translateX(-50%) translateY(40px);
|
| 238 |
+
opacity: 0;
|
| 239 |
+
background: var(--ink);
|
| 240 |
+
color: var(--paper);
|
| 241 |
+
font-family: var(--mono);
|
| 242 |
+
font-size: 0.65rem;
|
| 243 |
+
padding: 8px 16px;
|
| 244 |
+
border-radius: 2px;
|
| 245 |
+
white-space: nowrap;
|
| 246 |
+
transition: all 0.3s cubic-bezier(0.34, 1.56, 0.64, 1);
|
| 247 |
+
z-index: 999;
|
| 248 |
+
}
|
| 249 |
+
|
| 250 |
+
.toast.show {
|
| 251 |
+
transform: translateX(-50%) translateY(0);
|
| 252 |
+
opacity: 1;
|
| 253 |
+
}
|
| 254 |
+
</style>
|
| 255 |
+
</head>
|
| 256 |
+
<body>
|
| 257 |
+
<div class="sidebar">
|
| 258 |
+
|
| 259 |
+
<!-- Header -->
|
| 260 |
+
<div class="sb-header">
|
| 261 |
+
<div class="sb-title">GLM-<em>OCR</em> Result</div>
|
| 262 |
+
<button class="sb-close" id="close-btn">β Close</button>
|
| 263 |
+
</div>
|
| 264 |
+
|
| 265 |
+
<!-- Body -->
|
| 266 |
+
<div class="sb-body" id="sb-body">
|
| 267 |
+
|
| 268 |
+
<!-- Loading state (default) -->
|
| 269 |
+
<div class="sb-loading" id="state-loading">
|
| 270 |
+
<div class="scan-bar-wrap"><div class="scan-bar"></div></div>
|
| 271 |
+
<div class="loading-label">Running GLM-OCRβ¦</div>
|
| 272 |
+
</div>
|
| 273 |
+
|
| 274 |
+
</div>
|
| 275 |
+
|
| 276 |
+
<!-- Actions (shown after result) -->
|
| 277 |
+
<div class="sb-actions" id="sb-actions" style="display:none">
|
| 278 |
+
<button class="action-btn primary" id="new-btn">β New Selection</button>
|
| 279 |
+
<button class="action-btn" id="copy-btn">Copy</button>
|
| 280 |
+
<button class="action-btn" id="dl-btn">β .txt</button>
|
| 281 |
+
</div>
|
| 282 |
+
|
| 283 |
+
</div>
|
| 284 |
+
|
| 285 |
+
<div class="toast" id="toast"></div>
|
| 286 |
+
|
| 287 |
+
<script>
|
| 288 |
+
let extractedText = "";
|
| 289 |
+
|
| 290 |
+
// ββ Receive data from content.js ββββββββββββββββββββββββββββββββββββββββββ
|
| 291 |
+
window.addEventListener("message", (e) => {
|
| 292 |
+
if (e.data?.type !== "SIDEBAR_DATA") return;
|
| 293 |
+
const data = e.data.data;
|
| 294 |
+
|
| 295 |
+
if (data.loading) return; // already showing loading state
|
| 296 |
+
|
| 297 |
+
renderResult(data);
|
| 298 |
+
});
|
| 299 |
+
|
| 300 |
+
function renderResult(data) {
|
| 301 |
+
const body = document.getElementById("sb-body");
|
| 302 |
+
const actions = document.getElementById("sb-actions");
|
| 303 |
+
|
| 304 |
+
if (data.error) {
|
| 305 |
+
body.innerHTML = `<div class="sb-error">β ${data.error}<br><br>Make sure the GLM-OCR server is running at localhost:8000.</div>`;
|
| 306 |
+
actions.style.display = "flex";
|
| 307 |
+
return;
|
| 308 |
+
}
|
| 309 |
+
|
| 310 |
+
extractedText = data.text || "";
|
| 311 |
+
|
| 312 |
+
const latency = data.latency_ms ? `${(data.latency_ms / 1000).toFixed(2)}s` : "β";
|
| 313 |
+
|
| 314 |
+
body.innerHTML = `
|
| 315 |
+
<div class="sb-image-wrap">
|
| 316 |
+
<div class="sb-image-label">Selected Region</div>
|
| 317 |
+
<img class="sb-image" src="${data.imageDataUrl || ''}" alt="Selection"/>
|
| 318 |
+
</div>
|
| 319 |
+
|
| 320 |
+
<div class="sb-meta">
|
| 321 |
+
<span class="chip">words: <strong>${data.word_count || 0}</strong></span>
|
| 322 |
+
<span class="chip">chars: <strong>${data.char_count || 0}</strong></span>
|
| 323 |
+
<span class="chip">latency: <strong>${latency}</strong></span>
|
| 324 |
+
<span class="chip">device: <strong>${data.device || 'β'}</strong></span>
|
| 325 |
+
</div>
|
| 326 |
+
|
| 327 |
+
<div class="sb-text-section">
|
| 328 |
+
<div class="sb-text-label">Extracted Text</div>
|
| 329 |
+
<div class="sb-text" id="result-text">${data.text ? escapeHtml(data.text) : '<span style="color:var(--muted);">[No text detected]</span>'}</div>
|
| 330 |
+
</div>
|
| 331 |
+
`;
|
| 332 |
+
|
| 333 |
+
actions.style.display = "flex";
|
| 334 |
+
}
|
| 335 |
+
|
| 336 |
+
function escapeHtml(str) {
|
| 337 |
+
return str
|
| 338 |
+
.replace(/&/g, "&")
|
| 339 |
+
.replace(/</g, "<")
|
| 340 |
+
.replace(/>/g, ">");
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
// ββ Close βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 344 |
+
document.getElementById("close-btn").addEventListener("click", () => {
|
| 345 |
+
window.parent.postMessage({ type: "CLOSE_SIDEBAR" }, "*");
|
| 346 |
+
});
|
| 347 |
+
|
| 348 |
+
// ββ New selection βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 349 |
+
document.getElementById("new-btn").addEventListener("click", () => {
|
| 350 |
+
window.parent.postMessage({ type: "START_NEW_SELECTION" }, "*");
|
| 351 |
+
});
|
| 352 |
+
|
| 353 |
+
// ββ Copy βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββοΏ½οΏ½ββββββ
|
| 354 |
+
document.getElementById("copy-btn").addEventListener("click", async () => {
|
| 355 |
+
try {
|
| 356 |
+
await navigator.clipboard.writeText(extractedText);
|
| 357 |
+
toast("Copied!");
|
| 358 |
+
} catch {
|
| 359 |
+
// fallback: select all text in the result box
|
| 360 |
+
const el = document.getElementById("result-text");
|
| 361 |
+
if (el) {
|
| 362 |
+
const range = document.createRange();
|
| 363 |
+
range.selectNodeContents(el);
|
| 364 |
+
const sel = window.getSelection();
|
| 365 |
+
sel.removeAllRanges();
|
| 366 |
+
sel.addRange(range);
|
| 367 |
+
}
|
| 368 |
+
toast("Select text above and copy manually.");
|
| 369 |
+
}
|
| 370 |
+
});
|
| 371 |
+
|
| 372 |
+
// ββ Download ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 373 |
+
document.getElementById("dl-btn").addEventListener("click", () => {
|
| 374 |
+
const blob = new Blob([extractedText], { type: "text/plain" });
|
| 375 |
+
const a = document.createElement("a");
|
| 376 |
+
a.href = URL.createObjectURL(blob);
|
| 377 |
+
a.download = `glm-ocr-${Date.now()}.txt`;
|
| 378 |
+
a.click();
|
| 379 |
+
URL.revokeObjectURL(a.href);
|
| 380 |
+
});
|
| 381 |
+
|
| 382 |
+
// ββ Toast βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 383 |
+
function toast(msg) {
|
| 384 |
+
const t = document.getElementById("toast");
|
| 385 |
+
t.textContent = msg;
|
| 386 |
+
t.classList.add("show");
|
| 387 |
+
setTimeout(() => t.classList.remove("show"), 2000);
|
| 388 |
+
}
|
| 389 |
+
</script>
|
| 390 |
+
</body>
|
| 391 |
+
</html>
|
README.md
ADDED
|
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: GLM OCR
|
| 3 |
+
emoji: π
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
pinned: false
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# GOT-OCR 2.0 β Self-Hosted OCR Engine
|
| 12 |
+
|
| 13 |
+
> A full-stack portfolio project: self-hosted OCR backend powered by **General OCR Theory (GOT-OCR 2.0)**, a 580M-param vision-language model trained end-to-end for document understanding.
|
| 14 |
+
|
| 15 |
+
---
|
| 16 |
+
|
| 17 |
+
## What is GOT-OCR 2.0?
|
| 18 |
+
|
| 19 |
+
GOT-OCR 2.0 is a state-of-the-art open-source OCR model from the paper
|
| 20 |
+
**"General OCR Theory: Towards OCR-2.0 via a Unified End-to-end Model"** (arXiv:2409.01704).
|
| 21 |
+
|
| 22 |
+
Unlike traditional OCR (Tesseract, etc.) it uses a **vision encoder + language model** architecture:
|
| 23 |
+
|
| 24 |
+
```
|
| 25 |
+
Image β [CLIP-style Vision Encoder] β [Qwen2 LM Backbone] β Text
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
It handles:
|
| 29 |
+
- Plain text from documents, screenshots, photos
|
| 30 |
+
- **Tables** (preserved structure)
|
| 31 |
+
- **Mathematical equations** (LaTeX output)
|
| 32 |
+
- **Code blocks** (syntax preserved)
|
| 33 |
+
- **Multilingual** text
|
| 34 |
+
- **Handwriting**
|
| 35 |
+
|
| 36 |
+
---
|
| 37 |
+
|
| 38 |
+
## Project Structure
|
| 39 |
+
|
| 40 |
+
```
|
| 41 |
+
got-ocr-project/
|
| 42 |
+
βββ backend/
|
| 43 |
+
β βββ main.py # FastAPI server β routes, CORS, request handling
|
| 44 |
+
β βββ ocr_engine.py # Model loading, inference, OcrResult dataclass
|
| 45 |
+
β βββ requirements.txt
|
| 46 |
+
βββ frontend/
|
| 47 |
+
β βββ index.html # Single-file frontend (served by FastAPI)
|
| 48 |
+
βββ Dockerfile # CUDA + CPU build
|
| 49 |
+
βββ docker-compose.yml # One-command deployment
|
| 50 |
+
βββ README.md
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
---
|
| 54 |
+
|
| 55 |
+
## Quickstart
|
| 56 |
+
|
| 57 |
+
### Option 1 β Docker (recommended)
|
| 58 |
+
|
| 59 |
+
```bash
|
| 60 |
+
git clone https://github.com/YOUR_USERNAME/got-ocr-project
|
| 61 |
+
cd got-ocr-project
|
| 62 |
+
|
| 63 |
+
# CPU-only (comment out the `deploy` block in docker-compose.yml first)
|
| 64 |
+
docker compose up --build
|
| 65 |
+
|
| 66 |
+
# With GPU
|
| 67 |
+
docker compose up --build
|
| 68 |
+
|
| 69 |
+
# App is live at http://localhost:8000
|
| 70 |
+
```
|
| 71 |
+
|
| 72 |
+
The first build downloads ~2GB of model weights β cached in a Docker volume afterward.
|
| 73 |
+
|
| 74 |
+
---
|
| 75 |
+
|
| 76 |
+
### Option 2 β Local Python (no Docker)
|
| 77 |
+
|
| 78 |
+
```bash
|
| 79 |
+
# 1. Clone
|
| 80 |
+
git clone https://github.com/YOUR_USERNAME/got-ocr-project
|
| 81 |
+
cd got-ocr-project
|
| 82 |
+
|
| 83 |
+
# 2. Create virtualenv
|
| 84 |
+
python -m venv .venv
|
| 85 |
+
source .venv/bin/activate # Windows: .venv\Scripts\activate
|
| 86 |
+
|
| 87 |
+
# 3. Install dependencies
|
| 88 |
+
pip install -r backend/requirements.txt
|
| 89 |
+
|
| 90 |
+
# 4. Run
|
| 91 |
+
cd backend
|
| 92 |
+
python main.py
|
| 93 |
+
|
| 94 |
+
# App is live at http://localhost:8000
|
| 95 |
+
```
|
| 96 |
+
|
| 97 |
+
Model weights (~2 GB) download automatically from HuggingFace on first startup.
|
| 98 |
+
|
| 99 |
+
---
|
| 100 |
+
|
| 101 |
+
## API Reference
|
| 102 |
+
|
| 103 |
+
### `POST /ocr`
|
| 104 |
+
|
| 105 |
+
Run OCR on an uploaded image.
|
| 106 |
+
|
| 107 |
+
**Request** β `multipart/form-data`
|
| 108 |
+
|
| 109 |
+
| Field | Type | Required | Description |
|
| 110 |
+
|--------|--------|----------|-------------|
|
| 111 |
+
| `file` | file | β
| Image file (PNG, JPG, WEBP, GIF, BMP, TIFF, max 20 MB) |
|
| 112 |
+
| `mode` | string | β | `ocr` (default) for plain text Β· `format` for structured (Markdown/LaTeX) |
|
| 113 |
+
|
| 114 |
+
**Response** β `application/json`
|
| 115 |
+
|
| 116 |
+
```json
|
| 117 |
+
{
|
| 118 |
+
"success": true,
|
| 119 |
+
"text": "Extracted text here...",
|
| 120 |
+
"word_count": 142,
|
| 121 |
+
"char_count": 863,
|
| 122 |
+
"latency_ms": 1240.5,
|
| 123 |
+
"mode": "ocr",
|
| 124 |
+
"model_id": "stepfun-ai/GOT-OCR2_0",
|
| 125 |
+
"device": "cuda"
|
| 126 |
+
}
|
| 127 |
+
```
|
| 128 |
+
|
| 129 |
+
**Example β curl**
|
| 130 |
+
```bash
|
| 131 |
+
curl -X POST http://localhost:8000/ocr \
|
| 132 |
+
-F "file=@document.png" \
|
| 133 |
+
-F "mode=ocr"
|
| 134 |
+
```
|
| 135 |
+
|
| 136 |
+
**Example β Python**
|
| 137 |
+
```python
|
| 138 |
+
import requests
|
| 139 |
+
|
| 140 |
+
with open("document.png", "rb") as f:
|
| 141 |
+
r = requests.post(
|
| 142 |
+
"http://localhost:8000/ocr",
|
| 143 |
+
files={"file": ("document.png", f, "image/png")},
|
| 144 |
+
data={"mode": "ocr"},
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
data = r.json()
|
| 148 |
+
print(data["text"])
|
| 149 |
+
```
|
| 150 |
+
|
| 151 |
+
**Example β JavaScript fetch**
|
| 152 |
+
```js
|
| 153 |
+
const formData = new FormData();
|
| 154 |
+
formData.append("file", imageFile);
|
| 155 |
+
formData.append("mode", "ocr");
|
| 156 |
+
|
| 157 |
+
const res = await fetch("http://localhost:8000/ocr", { method: "POST", body: formData });
|
| 158 |
+
const data = await res.json();
|
| 159 |
+
console.log(data.text);
|
| 160 |
+
```
|
| 161 |
+
|
| 162 |
+
---
|
| 163 |
+
|
| 164 |
+
### `GET /health`
|
| 165 |
+
|
| 166 |
+
```json
|
| 167 |
+
{
|
| 168 |
+
"status": "ok",
|
| 169 |
+
"model": {
|
| 170 |
+
"model_id": "stepfun-ai/GOT-OCR2_0",
|
| 171 |
+
"device": "cuda",
|
| 172 |
+
"loaded": true,
|
| 173 |
+
"gpu_name": "NVIDIA RTX 3090",
|
| 174 |
+
"gpu_memory_gb": 24.0
|
| 175 |
+
}
|
| 176 |
+
}
|
| 177 |
+
```
|
| 178 |
+
|
| 179 |
+
### `GET /metrics`
|
| 180 |
+
|
| 181 |
+
Session-level stats (resets on server restart).
|
| 182 |
+
|
| 183 |
+
```json
|
| 184 |
+
{
|
| 185 |
+
"total_requests": 14,
|
| 186 |
+
"total_words_extracted": 3821,
|
| 187 |
+
"avg_latency_ms": 980.4,
|
| 188 |
+
"error_count": 0,
|
| 189 |
+
"uptime_seconds": 3620.1
|
| 190 |
+
}
|
| 191 |
+
```
|
| 192 |
+
|
| 193 |
+
---
|
| 194 |
+
|
| 195 |
+
## Hardware Requirements
|
| 196 |
+
|
| 197 |
+
| Setup | VRAM / RAM | Latency |
|
| 198 |
+
|-------|------------|---------|
|
| 199 |
+
| NVIDIA GPU (8 GB+ VRAM) | ~4 GB VRAM | ~0.5 β 1.5 s/image |
|
| 200 |
+
| CPU (16 GB RAM) | ~3 GB RAM | ~8 β 20 s/image |
|
| 201 |
+
| Apple Silicon (MPS) | ~4 GB | ~3 β 6 s/image |
|
| 202 |
+
|
| 203 |
+
---
|
| 204 |
+
|
| 205 |
+
## Deployment to Cloud
|
| 206 |
+
|
| 207 |
+
### Fly.io (GPU, pay-as-you-go)
|
| 208 |
+
|
| 209 |
+
```bash
|
| 210 |
+
fly launch --name got-ocr
|
| 211 |
+
fly scale vm a100-40gb
|
| 212 |
+
fly deploy
|
| 213 |
+
```
|
| 214 |
+
|
| 215 |
+
### Render / Railway (CPU only)
|
| 216 |
+
|
| 217 |
+
Push to GitHub β connect repo β set start command:
|
| 218 |
+
```
|
| 219 |
+
cd backend && uvicorn main:app --host 0.0.0.0 --port $PORT
|
| 220 |
+
```
|
| 221 |
+
|
| 222 |
+
### Vast.ai / RunPod (cheapest GPU)
|
| 223 |
+
|
| 224 |
+
Rent an RTX 3090 node, SSH in, clone repo, run `docker compose up`.
|
| 225 |
+
|
| 226 |
+
---
|
| 227 |
+
|
| 228 |
+
## Architecture Diagram
|
| 229 |
+
|
| 230 |
+
```
|
| 231 |
+
Browser
|
| 232 |
+
β
|
| 233 |
+
β POST /ocr (multipart image + mode)
|
| 234 |
+
βΌ
|
| 235 |
+
βββββββββββββββββββββββββββββββββββββββ
|
| 236 |
+
β FastAPI (main.py) β
|
| 237 |
+
β β CORS middleware β
|
| 238 |
+
β β file validation (type, size) β
|
| 239 |
+
β β session metrics β
|
| 240 |
+
ββββββββββββββββ¬βββββββββββββββββββββββ
|
| 241 |
+
β image_bytes, mode
|
| 242 |
+
βΌ
|
| 243 |
+
βββββββββββββββββββββββββββββββββββββββ
|
| 244 |
+
β GotOcrEngine (ocr_engine.py) β
|
| 245 |
+
β β PIL validation & RGB conversion β
|
| 246 |
+
β β writes temp PNG to disk β
|
| 247 |
+
β β model.chat(tokenizer, path, β¦) β
|
| 248 |
+
β β returns OcrResult dataclass β
|
| 249 |
+
ββββββββββββββββ¬βββββββββββββββββββββββ
|
| 250 |
+
β (torch.inference_mode)
|
| 251 |
+
βΌ
|
| 252 |
+
βββββββββββββββββββββββββββββββββββββββ
|
| 253 |
+
β GOT-OCR 2.0 Model β
|
| 254 |
+
β stepfun-ai/GOT-OCR2_0 β
|
| 255 |
+
β β Vision encoder (SigLIP) β
|
| 256 |
+
β β LM backbone (Qwen2-0.5B) β
|
| 257 |
+
β β Runs on CUDA / CPU / MPS β
|
| 258 |
+
βββββββββββββββββββββββββββββββββββββββ
|
| 259 |
+
```
|
| 260 |
+
|
| 261 |
+
---
|
| 262 |
+
|
| 263 |
+
## What makes this a good portfolio project?
|
| 264 |
+
|
| 265 |
+
- **Self-hosted ML inference** β no API dependency, model runs on your server
|
| 266 |
+
- **End-to-end system** β frontend + REST API + ML pipeline + Docker
|
| 267 |
+
- **Production patterns** β lifespan events, CORS, input validation, error handling, metrics endpoint, health check
|
| 268 |
+
- **Real ML engineering** β model loading, device management, temp file handling, `torch.inference_mode()`
|
| 269 |
+
- **Clean code** β dataclasses, logging, type hints, docstrings
|
| 270 |
+
|
| 271 |
+
---
|
| 272 |
+
|
| 273 |
+
## References
|
| 274 |
+
|
| 275 |
+
- Paper: [GOT-OCR 2.0](https://arxiv.org/abs/2409.01704)
|
| 276 |
+
- Model: [stepfun-ai/GOT-OCR2_0](https://huggingface.co/stepfun-ai/GOT-OCR2_0)
|
| 277 |
+
- Code: [GOT-OCR2.0 GitHub](https://github.com/Ucas-HaoranWei/GOT-OCR2.0)
|
background.js
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// background.js β Service worker
|
| 2 |
+
// Handles: tab screenshot, image crop, OCR API call, result relay
|
| 3 |
+
|
| 4 |
+
const OCR_ENDPOINT = "http://localhost:8000/ocr";
|
| 5 |
+
const OCR_MODE = "recognize"; // or "parse"
|
| 6 |
+
|
| 7 |
+
// ββ Listen for messages from content.js βββββββββββββββββββββββββββββββββββββ
|
| 8 |
+
|
| 9 |
+
chrome.runtime.onMessage.addListener((msg, sender, sendResponse) => {
|
| 10 |
+
|
| 11 |
+
if (msg.type === "CAPTURE_REGION") {
|
| 12 |
+
handleCapture(msg.rect, sender.tab)
|
| 13 |
+
.then(result => sendResponse({ success: true, ...result }))
|
| 14 |
+
.catch(error => sendResponse({ success: false, error: error.message }));
|
| 15 |
+
return true; // keep channel open for async
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
if (msg.type === "PING") {
|
| 19 |
+
checkServer().then(ok => sendResponse({ ok }));
|
| 20 |
+
return true;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
if (msg.type === "OPEN_SIDEBAR") {
|
| 24 |
+
// Open the sidebar as a side panel in the current tab
|
| 25 |
+
chrome.tabs.sendMessage(sender.tab.id, { type: "SHOW_SIDEBAR" });
|
| 26 |
+
return false;
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
});
|
| 30 |
+
|
| 31 |
+
// ββ Capture + crop + OCR βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 32 |
+
|
| 33 |
+
async function handleCapture(rect, tab) {
|
| 34 |
+
// 1. Capture the entire visible tab as a data URL
|
| 35 |
+
const dataUrl = await chrome.tabs.captureVisibleTab(tab.windowId, {
|
| 36 |
+
format: "png",
|
| 37 |
+
quality: 100,
|
| 38 |
+
});
|
| 39 |
+
|
| 40 |
+
// 2. Crop to the selected rect using OffscreenCanvas
|
| 41 |
+
const croppedBlob = await cropImage(dataUrl, rect);
|
| 42 |
+
|
| 43 |
+
// 3. Send to GLM-OCR backend
|
| 44 |
+
const formData = new FormData();
|
| 45 |
+
formData.append("file", croppedBlob, "selection.png");
|
| 46 |
+
formData.append("mode", OCR_MODE);
|
| 47 |
+
|
| 48 |
+
const res = await fetch(OCR_ENDPOINT, {
|
| 49 |
+
method: "POST",
|
| 50 |
+
body: formData,
|
| 51 |
+
});
|
| 52 |
+
|
| 53 |
+
if (!res.ok) {
|
| 54 |
+
const err = await res.json().catch(() => ({}));
|
| 55 |
+
throw new Error(err.detail || `Server returned ${res.status}`);
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
const data = await res.json();
|
| 59 |
+
|
| 60 |
+
// Also store the cropped image as a data URL for display in the sidebar
|
| 61 |
+
const croppedDataUrl = await blobToDataUrl(croppedBlob);
|
| 62 |
+
|
| 63 |
+
return {
|
| 64 |
+
text: data.text,
|
| 65 |
+
word_count: data.word_count,
|
| 66 |
+
char_count: data.char_count,
|
| 67 |
+
latency_ms: data.latency_ms,
|
| 68 |
+
mode: data.mode,
|
| 69 |
+
device: data.device,
|
| 70 |
+
imageDataUrl: croppedDataUrl,
|
| 71 |
+
timestamp: new Date().toISOString(),
|
| 72 |
+
};
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
// ββ Image cropping using OffscreenCanvas βββββββββββββββββββββββββββββββββββββ
|
| 76 |
+
|
| 77 |
+
async function cropImage(dataUrl, rect) {
|
| 78 |
+
// Decode the full screenshot
|
| 79 |
+
const res = await fetch(dataUrl);
|
| 80 |
+
const blob = await res.blob();
|
| 81 |
+
const bitmap = await createImageBitmap(blob);
|
| 82 |
+
|
| 83 |
+
// Scale rect by device pixel ratio (already baked into captureVisibleTab)
|
| 84 |
+
// captureVisibleTab captures at device pixel ratio already, so rect coords
|
| 85 |
+
// from getBoundingClientRect need to be scaled.
|
| 86 |
+
const dpr = rect.dpr || 1;
|
| 87 |
+
const sx = Math.round(rect.x * dpr);
|
| 88 |
+
const sy = Math.round(rect.y * dpr);
|
| 89 |
+
const sw = Math.round(rect.width * dpr);
|
| 90 |
+
const sh = Math.round(rect.height * dpr);
|
| 91 |
+
|
| 92 |
+
// Clamp to bitmap bounds
|
| 93 |
+
const cx = Math.max(0, sx);
|
| 94 |
+
const cy = Math.max(0, sy);
|
| 95 |
+
const cw = Math.min(sw, bitmap.width - cx);
|
| 96 |
+
const ch = Math.min(sh, bitmap.height - cy);
|
| 97 |
+
|
| 98 |
+
const canvas = new OffscreenCanvas(cw, ch);
|
| 99 |
+
const ctx = canvas.getContext("2d");
|
| 100 |
+
ctx.drawImage(bitmap, cx, cy, cw, ch, 0, 0, cw, ch);
|
| 101 |
+
|
| 102 |
+
return canvas.convertToBlob({ type: "image/png" });
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
function blobToDataUrl(blob) {
|
| 106 |
+
return new Promise((resolve, reject) => {
|
| 107 |
+
const reader = new FileReader();
|
| 108 |
+
reader.onload = () => resolve(reader.result);
|
| 109 |
+
reader.onerror = reject;
|
| 110 |
+
reader.readAsDataURL(blob);
|
| 111 |
+
});
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
// ββ Server health check βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 115 |
+
|
| 116 |
+
async function checkServer() {
|
| 117 |
+
try {
|
| 118 |
+
const r = await fetch("http://localhost:8000/health", { signal: AbortSignal.timeout(3000) });
|
| 119 |
+
const d = await r.json();
|
| 120 |
+
return d.status === "ok";
|
| 121 |
+
} catch {
|
| 122 |
+
return false;
|
| 123 |
+
}
|
| 124 |
+
}
|
content.css
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* content.css β Styles injected into every page for the selection overlay */
|
| 2 |
+
|
| 3 |
+
/* ββ Overlay ββ */
|
| 4 |
+
#glmocr-overlay {
|
| 5 |
+
position: fixed !important;
|
| 6 |
+
inset: 0 !important;
|
| 7 |
+
background: rgba(0, 0, 0, 0.45) !important;
|
| 8 |
+
z-index: 2147483646 !important;
|
| 9 |
+
cursor: crosshair !important;
|
| 10 |
+
user-select: none !important;
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
/* ββ Hint text ββ */
|
| 14 |
+
#glmocr-hint {
|
| 15 |
+
position: absolute !important;
|
| 16 |
+
top: 20px !important;
|
| 17 |
+
left: 50% !important;
|
| 18 |
+
transform: translateX(-50%) !important;
|
| 19 |
+
background: rgba(0, 0, 0, 0.8) !important;
|
| 20 |
+
color: #f5f0e8 !important;
|
| 21 |
+
font-family: 'IBM Plex Mono', monospace, monospace !important;
|
| 22 |
+
font-size: 13px !important;
|
| 23 |
+
padding: 10px 18px !important;
|
| 24 |
+
border-radius: 4px !important;
|
| 25 |
+
letter-spacing: 0.04em !important;
|
| 26 |
+
pointer-events: none !important;
|
| 27 |
+
white-space: nowrap !important;
|
| 28 |
+
border: 1px solid rgba(255,255,255,0.15) !important;
|
| 29 |
+
transition: opacity 0.2s !important;
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
/* ββ Selection box ββ */
|
| 33 |
+
#glmocr-selbox {
|
| 34 |
+
position: fixed !important;
|
| 35 |
+
display: none !important;
|
| 36 |
+
border: 2px solid #c94a1f !important;
|
| 37 |
+
background: rgba(201, 74, 31, 0.08) !important;
|
| 38 |
+
box-shadow: 0 0 0 9999px rgba(0, 0, 0, 0.35) !important;
|
| 39 |
+
pointer-events: none !important;
|
| 40 |
+
z-index: 2147483647 !important;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
/* ββ Sidebar iframe ββ */
|
| 44 |
+
#glmocr-sidebar {
|
| 45 |
+
position: fixed !important;
|
| 46 |
+
top: 0 !important;
|
| 47 |
+
right: 0 !important;
|
| 48 |
+
width: 380px !important;
|
| 49 |
+
height: 100vh !important;
|
| 50 |
+
border: none !important;
|
| 51 |
+
z-index: 2147483645 !important;
|
| 52 |
+
border-left: 2px solid #d4cfc3 !important;
|
| 53 |
+
box-shadow: -4px 0 24px rgba(0,0,0,0.12) !important;
|
| 54 |
+
animation: glmocr-slideIn 0.25s cubic-bezier(0.22, 1, 0.36, 1) !important;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
@keyframes glmocr-slideIn {
|
| 58 |
+
from { transform: translateX(100%); opacity: 0; }
|
| 59 |
+
to { transform: translateX(0); opacity: 1; }
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
/* ββ Toast ββ */
|
| 63 |
+
#glmocr-toast {
|
| 64 |
+
position: fixed !important;
|
| 65 |
+
bottom: 24px !important;
|
| 66 |
+
left: 50% !important;
|
| 67 |
+
transform: translateX(-50%) !important;
|
| 68 |
+
background: #0f0e0d !important;
|
| 69 |
+
color: #f5f0e8 !important;
|
| 70 |
+
font-family: 'IBM Plex Mono', monospace, monospace !important;
|
| 71 |
+
font-size: 13px !important;
|
| 72 |
+
padding: 10px 20px !important;
|
| 73 |
+
border-radius: 4px !important;
|
| 74 |
+
z-index: 2147483647 !important;
|
| 75 |
+
white-space: nowrap !important;
|
| 76 |
+
animation: glmocr-fadeUp 0.3s ease both !important;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
@keyframes glmocr-fadeUp {
|
| 80 |
+
from { opacity: 0; transform: translateX(-50%) translateY(12px); }
|
| 81 |
+
to { opacity: 1; transform: translateX(-50%) translateY(0); }
|
| 82 |
+
}
|
content.js
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// content.js β Injected into every page
|
| 2 |
+
// Manages the screen selection overlay and sidebar panel
|
| 3 |
+
|
| 4 |
+
let overlayActive = false;
|
| 5 |
+
let sidebarFrame = null;
|
| 6 |
+
|
| 7 |
+
// ββ Listen for messages from background / popup βββββββββββββββββββββββββββββββ
|
| 8 |
+
|
| 9 |
+
chrome.runtime.onMessage.addListener((msg, _sender, sendResponse) => {
|
| 10 |
+
|
| 11 |
+
if (msg.type === "START_SELECTION") {
|
| 12 |
+
if (!overlayActive) startSelection();
|
| 13 |
+
sendResponse({ ok: true });
|
| 14 |
+
return false;
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
if (msg.type === "SHOW_SIDEBAR") {
|
| 18 |
+
showSidebar({});
|
| 19 |
+
return false;
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
if (msg.type === "SHOW_RESULT") {
|
| 23 |
+
showSidebar(msg.data);
|
| 24 |
+
return false;
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
});
|
| 28 |
+
|
| 29 |
+
// ββ Selection overlay βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 30 |
+
|
| 31 |
+
function startSelection() {
|
| 32 |
+
overlayActive = true;
|
| 33 |
+
|
| 34 |
+
// Dim the page
|
| 35 |
+
const overlay = document.createElement("div");
|
| 36 |
+
overlay.id = "glmocr-overlay";
|
| 37 |
+
|
| 38 |
+
// Crosshair hint
|
| 39 |
+
const hint = document.createElement("div");
|
| 40 |
+
hint.id = "glmocr-hint";
|
| 41 |
+
hint.textContent = "Drag to select text region β Press Esc to cancel";
|
| 42 |
+
overlay.appendChild(hint);
|
| 43 |
+
|
| 44 |
+
// Selection box
|
| 45 |
+
const selBox = document.createElement("div");
|
| 46 |
+
selBox.id = "glmocr-selbox";
|
| 47 |
+
overlay.appendChild(selBox);
|
| 48 |
+
|
| 49 |
+
document.body.appendChild(overlay);
|
| 50 |
+
|
| 51 |
+
let startX = 0, startY = 0, isDragging = false;
|
| 52 |
+
|
| 53 |
+
function onMouseDown(e) {
|
| 54 |
+
if (e.button !== 0) return;
|
| 55 |
+
isDragging = true;
|
| 56 |
+
startX = e.clientX;
|
| 57 |
+
startY = e.clientY;
|
| 58 |
+
selBox.style.cssText = `left:${startX}px; top:${startY}px; width:0; height:0; display:block`;
|
| 59 |
+
hint.style.opacity = "0";
|
| 60 |
+
e.preventDefault();
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
function onMouseMove(e) {
|
| 64 |
+
if (!isDragging) return;
|
| 65 |
+
const x = Math.min(e.clientX, startX);
|
| 66 |
+
const y = Math.min(e.clientY, startY);
|
| 67 |
+
const w = Math.abs(e.clientX - startX);
|
| 68 |
+
const h = Math.abs(e.clientY - startY);
|
| 69 |
+
selBox.style.cssText = `left:${x}px; top:${y}px; width:${w}px; height:${h}px; display:block`;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
function onMouseUp(e) {
|
| 73 |
+
if (!isDragging) return;
|
| 74 |
+
isDragging = false;
|
| 75 |
+
|
| 76 |
+
const x = Math.min(e.clientX, startX);
|
| 77 |
+
const y = Math.min(e.clientY, startY);
|
| 78 |
+
const w = Math.abs(e.clientX - startX);
|
| 79 |
+
const h = Math.abs(e.clientY - startY);
|
| 80 |
+
|
| 81 |
+
cleanup();
|
| 82 |
+
|
| 83 |
+
if (w < 10 || h < 10) {
|
| 84 |
+
showToast("Selection too small β try again.");
|
| 85 |
+
return;
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
const dpr = window.devicePixelRatio || 1;
|
| 89 |
+
const rect = {
|
| 90 |
+
x: x + window.scrollX,
|
| 91 |
+
y: y + window.scrollY,
|
| 92 |
+
width: w,
|
| 93 |
+
height: h,
|
| 94 |
+
dpr,
|
| 95 |
+
};
|
| 96 |
+
|
| 97 |
+
runOcr(rect);
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
function onKeyDown(e) {
|
| 101 |
+
if (e.key === "Escape") {
|
| 102 |
+
cleanup();
|
| 103 |
+
showToast("Cancelled.");
|
| 104 |
+
}
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
function cleanup() {
|
| 108 |
+
overlayActive = false;
|
| 109 |
+
overlay.removeEventListener("mousedown", onMouseDown);
|
| 110 |
+
overlay.removeEventListener("mousemove", onMouseMove);
|
| 111 |
+
overlay.removeEventListener("mouseup", onMouseUp);
|
| 112 |
+
document.removeEventListener("keydown", onKeyDown);
|
| 113 |
+
overlay.remove();
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
overlay.addEventListener("mousedown", onMouseDown);
|
| 117 |
+
overlay.addEventListener("mousemove", onMouseMove);
|
| 118 |
+
overlay.addEventListener("mouseup", onMouseUp);
|
| 119 |
+
document.addEventListener("keydown", onKeyDown);
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
// ββ Send region to background for capture + OCR βββββββββββββββββββββββββββββββ
|
| 123 |
+
|
| 124 |
+
function runOcr(rect) {
|
| 125 |
+
// Show a loading sidebar immediately
|
| 126 |
+
showSidebar({ loading: true });
|
| 127 |
+
|
| 128 |
+
chrome.runtime.sendMessage({ type: "CAPTURE_REGION", rect }, (response) => {
|
| 129 |
+
if (chrome.runtime.lastError) {
|
| 130 |
+
showSidebar({ error: chrome.runtime.lastError.message });
|
| 131 |
+
return;
|
| 132 |
+
}
|
| 133 |
+
if (response.success) {
|
| 134 |
+
showSidebar(response);
|
| 135 |
+
} else {
|
| 136 |
+
showSidebar({ error: response.error });
|
| 137 |
+
}
|
| 138 |
+
});
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
// ββ Sidebar panel βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 142 |
+
|
| 143 |
+
function showSidebar(data) {
|
| 144 |
+
// Remove existing sidebar if any
|
| 145 |
+
if (sidebarFrame) sidebarFrame.remove();
|
| 146 |
+
|
| 147 |
+
const frame = document.createElement("iframe");
|
| 148 |
+
frame.id = "glmocr-sidebar";
|
| 149 |
+
frame.src = chrome.runtime.getURL("sidebar.html");
|
| 150 |
+
|
| 151 |
+
document.body.appendChild(frame);
|
| 152 |
+
sidebarFrame = frame;
|
| 153 |
+
|
| 154 |
+
// Wait for iframe to load, then send data
|
| 155 |
+
frame.onload = () => {
|
| 156 |
+
frame.contentWindow.postMessage({ type: "SIDEBAR_DATA", data }, "*");
|
| 157 |
+
};
|
| 158 |
+
|
| 159 |
+
// Close button via message from sidebar
|
| 160 |
+
window.addEventListener("message", (e) => {
|
| 161 |
+
if (e.data?.type === "CLOSE_SIDEBAR") {
|
| 162 |
+
frame.remove();
|
| 163 |
+
sidebarFrame = null;
|
| 164 |
+
}
|
| 165 |
+
if (e.data?.type === "START_NEW_SELECTION") {
|
| 166 |
+
frame.remove();
|
| 167 |
+
sidebarFrame = null;
|
| 168 |
+
startSelection();
|
| 169 |
+
}
|
| 170 |
+
});
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
// ββ Toast notification ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 174 |
+
|
| 175 |
+
function showToast(msg) {
|
| 176 |
+
const existing = document.getElementById("glmocr-toast");
|
| 177 |
+
if (existing) existing.remove();
|
| 178 |
+
|
| 179 |
+
const toast = document.createElement("div");
|
| 180 |
+
toast.id = "glmocr-toast";
|
| 181 |
+
toast.textContent = msg;
|
| 182 |
+
document.body.appendChild(toast);
|
| 183 |
+
setTimeout(() => toast?.remove(), 3000);
|
| 184 |
+
}
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: "3.9"
|
| 2 |
+
|
| 3 |
+
services:
|
| 4 |
+
got-ocr:
|
| 5 |
+
build: .
|
| 6 |
+
image: got-ocr:latest
|
| 7 |
+
container_name: got-ocr-backend
|
| 8 |
+
ports:
|
| 9 |
+
- "8000:8000"
|
| 10 |
+
environment:
|
| 11 |
+
- PYTHONUNBUFFERED=1
|
| 12 |
+
restart: unless-stopped
|
| 13 |
+
|
| 14 |
+
# ββ GPU support (comment out for CPU-only) βββββββββββββββββββββββββββ
|
| 15 |
+
deploy:
|
| 16 |
+
resources:
|
| 17 |
+
reservations:
|
| 18 |
+
devices:
|
| 19 |
+
- driver: nvidia
|
| 20 |
+
count: 1
|
| 21 |
+
capabilities: [gpu]
|
| 22 |
+
|
| 23 |
+
# ββ Health check βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 24 |
+
healthcheck:
|
| 25 |
+
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
| 26 |
+
interval: 30s
|
| 27 |
+
timeout: 10s
|
| 28 |
+
retries: 5
|
| 29 |
+
start_period: 60s # give model time to load
|
| 30 |
+
|
| 31 |
+
# ββ Optional: persist HuggingFace model cache across rebuilds ββββββββ
|
| 32 |
+
volumes:
|
| 33 |
+
- hf_cache:/root/.cache/huggingface
|
| 34 |
+
|
| 35 |
+
volumes:
|
| 36 |
+
hf_cache:
|
frontend/index.html
ADDED
|
@@ -0,0 +1,759 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8"/>
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
|
| 6 |
+
<title>GLM-OCR β Self-Hosted Document OCR</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com"/>
|
| 8 |
+
<link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;500;700&family=DM+Serif+Display:ital@0;1&family=DM+Sans:wght@300;400;500&display=swap" rel="stylesheet"/>
|
| 9 |
+
<style>
|
| 10 |
+
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
|
| 11 |
+
|
| 12 |
+
:root {
|
| 13 |
+
--ink: #0f0e0d;
|
| 14 |
+
--paper: #f5f0e8;
|
| 15 |
+
--warm: #ede8dc;
|
| 16 |
+
--border: #d4cfc3;
|
| 17 |
+
--muted: #8f8880;
|
| 18 |
+
--accent: #c94a1f;
|
| 19 |
+
--green: #1a6b4a;
|
| 20 |
+
--mono: 'IBM Plex Mono', monospace;
|
| 21 |
+
--serif: 'DM Serif Display', serif;
|
| 22 |
+
--sans: 'DM Sans', sans-serif;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
html { scroll-behavior: smooth; }
|
| 26 |
+
|
| 27 |
+
body {
|
| 28 |
+
background: var(--paper);
|
| 29 |
+
color: var(--ink);
|
| 30 |
+
font-family: var(--sans);
|
| 31 |
+
min-height: 100vh;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
body::before {
|
| 35 |
+
content: '';
|
| 36 |
+
position: fixed;
|
| 37 |
+
inset: 0;
|
| 38 |
+
background-image: radial-gradient(circle, rgba(0,0,0,0.055) 1px, transparent 1px);
|
| 39 |
+
background-size: 18px 18px;
|
| 40 |
+
pointer-events: none;
|
| 41 |
+
z-index: 0;
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
.page { position: relative; z-index: 1; }
|
| 45 |
+
|
| 46 |
+
/* ββ MASTHEAD ββ */
|
| 47 |
+
.masthead {
|
| 48 |
+
border-bottom: 3px solid var(--ink);
|
| 49 |
+
padding: 0 48px;
|
| 50 |
+
display: grid;
|
| 51 |
+
grid-template-columns: 1fr auto 1fr;
|
| 52 |
+
align-items: center;
|
| 53 |
+
min-height: 68px;
|
| 54 |
+
gap: 16px;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
.masthead-left {
|
| 58 |
+
font-family: var(--mono);
|
| 59 |
+
font-size: 0.62rem;
|
| 60 |
+
color: var(--muted);
|
| 61 |
+
letter-spacing: 0.08em;
|
| 62 |
+
text-transform: uppercase;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
.masthead-center {
|
| 66 |
+
font-family: var(--serif);
|
| 67 |
+
font-size: 1.3rem;
|
| 68 |
+
white-space: nowrap;
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
.masthead-right {
|
| 72 |
+
display: flex;
|
| 73 |
+
justify-content: flex-end;
|
| 74 |
+
gap: 6px;
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
.pill {
|
| 78 |
+
font-family: var(--mono);
|
| 79 |
+
font-size: 0.6rem;
|
| 80 |
+
letter-spacing: 0.08em;
|
| 81 |
+
text-transform: uppercase;
|
| 82 |
+
padding: 4px 9px;
|
| 83 |
+
border-radius: 2px;
|
| 84 |
+
border: 1px solid var(--border);
|
| 85 |
+
color: var(--muted);
|
| 86 |
+
display: flex;
|
| 87 |
+
align-items: center;
|
| 88 |
+
gap: 5px;
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
.pill.live { border-color: var(--green); color: var(--green); }
|
| 92 |
+
|
| 93 |
+
.status-dot {
|
| 94 |
+
width: 6px; height: 6px;
|
| 95 |
+
border-radius: 50%;
|
| 96 |
+
background: var(--muted);
|
| 97 |
+
flex-shrink: 0;
|
| 98 |
+
}
|
| 99 |
+
.status-dot.ok { background: var(--green); }
|
| 100 |
+
.status-dot.err { background: var(--accent); }
|
| 101 |
+
.status-dot.pulse {
|
| 102 |
+
animation: blink 1.2s ease-in-out infinite;
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
@keyframes blink { 0%,100%{opacity:1} 50%{opacity:0.3} }
|
| 106 |
+
|
| 107 |
+
/* ββ HERO ββ */
|
| 108 |
+
.hero {
|
| 109 |
+
padding: 64px 48px 48px;
|
| 110 |
+
border-bottom: 1px solid var(--border);
|
| 111 |
+
display: grid;
|
| 112 |
+
grid-template-columns: 1fr 1fr;
|
| 113 |
+
gap: 48px;
|
| 114 |
+
align-items: end;
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
.hero-headline {
|
| 118 |
+
font-family: var(--serif);
|
| 119 |
+
font-size: clamp(2.8rem, 5.5vw, 5rem);
|
| 120 |
+
line-height: 1.02;
|
| 121 |
+
letter-spacing: -0.02em;
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
.hero-headline em { font-style: italic; color: var(--accent); }
|
| 125 |
+
|
| 126 |
+
.hero-right { display: flex; flex-direction: column; gap: 20px; }
|
| 127 |
+
|
| 128 |
+
.hero-desc {
|
| 129 |
+
font-size: 0.88rem;
|
| 130 |
+
color: var(--muted);
|
| 131 |
+
line-height: 1.75;
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
.hero-stats {
|
| 135 |
+
display: flex;
|
| 136 |
+
gap: 24px;
|
| 137 |
+
flex-wrap: wrap;
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
.stat { display: flex; flex-direction: column; gap: 2px; }
|
| 141 |
+
|
| 142 |
+
.stat strong {
|
| 143 |
+
font-family: var(--serif);
|
| 144 |
+
font-size: 1.5rem;
|
| 145 |
+
color: var(--accent);
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
.stat span {
|
| 149 |
+
font-family: var(--mono);
|
| 150 |
+
font-size: 0.58rem;
|
| 151 |
+
color: var(--muted);
|
| 152 |
+
letter-spacing: 0.1em;
|
| 153 |
+
text-transform: uppercase;
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
/* ββ MAIN ββ */
|
| 157 |
+
.main {
|
| 158 |
+
display: grid;
|
| 159 |
+
grid-template-columns: 1fr 1fr;
|
| 160 |
+
border-bottom: 1px solid var(--border);
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
.col { padding: 36px 48px; }
|
| 164 |
+
.col + .col { border-left: 1px solid var(--border); }
|
| 165 |
+
|
| 166 |
+
.col-label {
|
| 167 |
+
font-family: var(--mono);
|
| 168 |
+
font-size: 0.62rem;
|
| 169 |
+
color: var(--muted);
|
| 170 |
+
letter-spacing: 0.12em;
|
| 171 |
+
text-transform: uppercase;
|
| 172 |
+
margin-bottom: 20px;
|
| 173 |
+
display: flex;
|
| 174 |
+
align-items: center;
|
| 175 |
+
gap: 8px;
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
.col-label::after { content: ''; flex: 1; height: 1px; background: var(--border); }
|
| 179 |
+
|
| 180 |
+
/* ββ DROPZONE ββ */
|
| 181 |
+
#dropzone {
|
| 182 |
+
border: 2px dashed var(--border);
|
| 183 |
+
border-radius: 4px;
|
| 184 |
+
min-height: 240px;
|
| 185 |
+
display: flex;
|
| 186 |
+
flex-direction: column;
|
| 187 |
+
align-items: center;
|
| 188 |
+
justify-content: center;
|
| 189 |
+
gap: 14px;
|
| 190 |
+
cursor: pointer;
|
| 191 |
+
transition: all 0.2s;
|
| 192 |
+
position: relative;
|
| 193 |
+
overflow: hidden;
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
#dropzone:hover, #dropzone.over {
|
| 197 |
+
border-color: var(--accent);
|
| 198 |
+
background: rgba(201,74,31,0.04);
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
#dropzone.over::after {
|
| 202 |
+
content: 'Drop!';
|
| 203 |
+
position: absolute; inset: 0;
|
| 204 |
+
background: rgba(201,74,31,0.08);
|
| 205 |
+
display: grid; place-items: center;
|
| 206 |
+
font-family: var(--serif);
|
| 207 |
+
font-size: 2rem;
|
| 208 |
+
color: var(--accent);
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
.dz-icon { font-size: 2.2rem; }
|
| 212 |
+
|
| 213 |
+
.dz-label strong { display: block; font-weight: 500; font-size: 0.88rem; margin-bottom: 5px; text-align:center; }
|
| 214 |
+
.dz-label span { font-family: var(--mono); font-size: 0.64rem; color: var(--muted); }
|
| 215 |
+
|
| 216 |
+
#file-input { display: none; }
|
| 217 |
+
|
| 218 |
+
/* Preview */
|
| 219 |
+
#preview-wrap { display: none; }
|
| 220 |
+
#preview-wrap.active { display: block; }
|
| 221 |
+
|
| 222 |
+
#preview-img {
|
| 223 |
+
width: 100%; max-height: 240px;
|
| 224 |
+
object-fit: contain;
|
| 225 |
+
border: 1px solid var(--border);
|
| 226 |
+
border-radius: 2px;
|
| 227 |
+
background: var(--warm);
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
.file-meta {
|
| 231 |
+
margin-top: 8px;
|
| 232 |
+
font-family: var(--mono);
|
| 233 |
+
font-size: 0.65rem;
|
| 234 |
+
color: var(--muted);
|
| 235 |
+
display: flex;
|
| 236 |
+
justify-content: space-between;
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
/* ββ MODE ββ */
|
| 240 |
+
.mode-row {
|
| 241 |
+
margin: 18px 0 14px;
|
| 242 |
+
display: flex;
|
| 243 |
+
gap: 8px;
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
.mode-btn {
|
| 247 |
+
font-family: var(--mono);
|
| 248 |
+
font-size: 0.67rem;
|
| 249 |
+
letter-spacing: 0.04em;
|
| 250 |
+
padding: 9px 14px;
|
| 251 |
+
border: 1px solid var(--border);
|
| 252 |
+
background: transparent;
|
| 253 |
+
color: var(--muted);
|
| 254 |
+
cursor: pointer;
|
| 255 |
+
border-radius: 2px;
|
| 256 |
+
transition: all 0.15s;
|
| 257 |
+
display: flex;
|
| 258 |
+
flex-direction: column;
|
| 259 |
+
gap: 2px;
|
| 260 |
+
flex: 1;
|
| 261 |
+
text-align: left;
|
| 262 |
+
}
|
| 263 |
+
|
| 264 |
+
.mode-btn .mode-name { font-weight: 700; color: var(--ink); }
|
| 265 |
+
.mode-btn .mode-desc { font-size: 0.58rem; }
|
| 266 |
+
|
| 267 |
+
.mode-btn.selected {
|
| 268 |
+
background: var(--ink);
|
| 269 |
+
border-color: var(--ink);
|
| 270 |
+
color: var(--paper);
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
.mode-btn.selected .mode-name { color: var(--paper); }
|
| 274 |
+
|
| 275 |
+
/* ββ RUN BTN ββ */
|
| 276 |
+
.run-btn {
|
| 277 |
+
width: 100%;
|
| 278 |
+
padding: 14px;
|
| 279 |
+
background: var(--accent);
|
| 280 |
+
color: white;
|
| 281 |
+
border: none;
|
| 282 |
+
border-radius: 2px;
|
| 283 |
+
font-family: var(--serif);
|
| 284 |
+
font-size: 1.05rem;
|
| 285 |
+
cursor: pointer;
|
| 286 |
+
transition: background 0.15s;
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
.run-btn:hover:not(:disabled) { background: #b53d15; }
|
| 290 |
+
.run-btn:disabled { opacity: 0.35; cursor: not-allowed; }
|
| 291 |
+
|
| 292 |
+
.clear-link {
|
| 293 |
+
font-family: var(--mono);
|
| 294 |
+
font-size: 0.64rem;
|
| 295 |
+
color: var(--muted);
|
| 296 |
+
text-decoration: underline;
|
| 297 |
+
cursor: pointer;
|
| 298 |
+
display: none;
|
| 299 |
+
margin-top: 10px;
|
| 300 |
+
text-align: center;
|
| 301 |
+
}
|
| 302 |
+
|
| 303 |
+
/* ββ OUTPUT ββ */
|
| 304 |
+
.output-area {
|
| 305 |
+
min-height: 300px;
|
| 306 |
+
display: flex;
|
| 307 |
+
flex-direction: column;
|
| 308 |
+
}
|
| 309 |
+
|
| 310 |
+
#out-placeholder {
|
| 311 |
+
flex: 1;
|
| 312 |
+
display: flex;
|
| 313 |
+
flex-direction: column;
|
| 314 |
+
align-items: center;
|
| 315 |
+
justify-content: center;
|
| 316 |
+
gap: 10px;
|
| 317 |
+
border: 1px dashed var(--border);
|
| 318 |
+
border-radius: 2px;
|
| 319 |
+
}
|
| 320 |
+
|
| 321 |
+
#out-placeholder .ph { font-size: 2rem; opacity: 0.3; }
|
| 322 |
+
#out-placeholder p { font-family: var(--mono); font-size: 0.68rem; color: var(--muted); text-align: center; line-height: 1.9; }
|
| 323 |
+
|
| 324 |
+
/* Loading */
|
| 325 |
+
#out-loading { display: none; flex: 1; flex-direction: column; align-items: center; justify-content: center; gap: 16px; }
|
| 326 |
+
#out-loading.active { display: flex; }
|
| 327 |
+
|
| 328 |
+
.scan-bar-wrap { width: 160px; height: 3px; background: var(--border); border-radius: 2px; overflow: hidden; }
|
| 329 |
+
.scan-bar { height: 100%; background: var(--accent); border-radius: 2px; animation: scan 1.4s ease-in-out infinite; }
|
| 330 |
+
@keyframes scan { 0%{transform:translateX(-100%)} 50%{transform:translateX(0)} 100%{transform:translateX(100%)} }
|
| 331 |
+
|
| 332 |
+
.scan-label { font-family: var(--mono); font-size: 0.68rem; color: var(--muted); animation: blink 1.4s ease-in-out infinite; }
|
| 333 |
+
|
| 334 |
+
/* Error */
|
| 335 |
+
#out-error { display: none; background: #fff0f0; border: 1px solid rgba(201,74,31,0.3); border-radius: 2px; padding: 16px; font-family: var(--mono); font-size: 0.72rem; color: var(--accent); line-height: 1.7; }
|
| 336 |
+
#out-error.active { display: block; }
|
| 337 |
+
|
| 338 |
+
/* Result */
|
| 339 |
+
#out-result { display: none; flex-direction: column; gap: 10px; }
|
| 340 |
+
#out-result.active { display: flex; }
|
| 341 |
+
|
| 342 |
+
#result-meta { display: flex; gap: 14px; flex-wrap: wrap; }
|
| 343 |
+
|
| 344 |
+
.chip { font-family: var(--mono); font-size: 0.62rem; color: var(--muted); }
|
| 345 |
+
.chip strong { color: var(--green); }
|
| 346 |
+
|
| 347 |
+
#result-content {
|
| 348 |
+
background: var(--warm);
|
| 349 |
+
border: 1px solid var(--border);
|
| 350 |
+
border-radius: 2px;
|
| 351 |
+
padding: 18px;
|
| 352 |
+
font-family: var(--mono);
|
| 353 |
+
font-size: 0.78rem;
|
| 354 |
+
line-height: 1.9;
|
| 355 |
+
white-space: pre-wrap;
|
| 356 |
+
word-break: break-word;
|
| 357 |
+
max-height: 340px;
|
| 358 |
+
overflow-y: auto;
|
| 359 |
+
flex: 1;
|
| 360 |
+
}
|
| 361 |
+
|
| 362 |
+
.result-actions { display: flex; gap: 8px; }
|
| 363 |
+
|
| 364 |
+
.action-btn {
|
| 365 |
+
font-family: var(--mono);
|
| 366 |
+
font-size: 0.65rem;
|
| 367 |
+
letter-spacing: 0.05em;
|
| 368 |
+
padding: 9px 14px;
|
| 369 |
+
border: 1px solid var(--border);
|
| 370 |
+
background: transparent;
|
| 371 |
+
color: var(--ink);
|
| 372 |
+
cursor: pointer;
|
| 373 |
+
border-radius: 2px;
|
| 374 |
+
transition: border-color 0.15s;
|
| 375 |
+
flex: 1;
|
| 376 |
+
}
|
| 377 |
+
|
| 378 |
+
.action-btn:hover { border-color: var(--ink); }
|
| 379 |
+
|
| 380 |
+
/* ββ STATUS BAR ββ */
|
| 381 |
+
.statusbar {
|
| 382 |
+
border-top: 3px double var(--border);
|
| 383 |
+
padding: 14px 48px;
|
| 384 |
+
display: flex;
|
| 385 |
+
gap: 32px;
|
| 386 |
+
flex-wrap: wrap;
|
| 387 |
+
font-family: var(--mono);
|
| 388 |
+
font-size: 0.64rem;
|
| 389 |
+
color: var(--muted);
|
| 390 |
+
}
|
| 391 |
+
|
| 392 |
+
.statusbar strong { color: var(--green); }
|
| 393 |
+
|
| 394 |
+
footer {
|
| 395 |
+
border-top: 1px solid var(--border);
|
| 396 |
+
padding: 18px 48px;
|
| 397 |
+
display: flex;
|
| 398 |
+
justify-content: space-between;
|
| 399 |
+
font-family: var(--mono);
|
| 400 |
+
font-size: 0.62rem;
|
| 401 |
+
color: var(--muted);
|
| 402 |
+
}
|
| 403 |
+
|
| 404 |
+
footer a { color: var(--ink); text-decoration: underline; }
|
| 405 |
+
|
| 406 |
+
/* ββ TOAST ββ */
|
| 407 |
+
.toast {
|
| 408 |
+
position: fixed;
|
| 409 |
+
bottom: 24px; right: 24px;
|
| 410 |
+
background: var(--ink);
|
| 411 |
+
color: var(--paper);
|
| 412 |
+
font-family: var(--mono);
|
| 413 |
+
font-size: 0.7rem;
|
| 414 |
+
padding: 11px 18px;
|
| 415 |
+
border-radius: 2px;
|
| 416 |
+
transform: translateY(60px);
|
| 417 |
+
opacity: 0;
|
| 418 |
+
transition: all 0.3s cubic-bezier(0.34, 1.56, 0.64, 1);
|
| 419 |
+
z-index: 999;
|
| 420 |
+
}
|
| 421 |
+
|
| 422 |
+
.toast.show { transform: translateY(0); opacity: 1; }
|
| 423 |
+
|
| 424 |
+
@keyframes fadeUp { from{opacity:0;transform:translateY(16px)} to{opacity:1;transform:translateY(0)} }
|
| 425 |
+
.masthead { animation: fadeUp 0.5s ease both; }
|
| 426 |
+
.hero { animation: fadeUp 0.5s 0.08s ease both; }
|
| 427 |
+
.main { animation: fadeUp 0.5s 0.16s ease both; }
|
| 428 |
+
|
| 429 |
+
@media (max-width: 820px) {
|
| 430 |
+
.masthead, .hero, .col, .statusbar, footer { padding-left: 24px; padding-right: 24px; }
|
| 431 |
+
.hero, .main { grid-template-columns: 1fr; }
|
| 432 |
+
.col + .col { border-left: none; border-top: 1px solid var(--border); }
|
| 433 |
+
}
|
| 434 |
+
</style>
|
| 435 |
+
</head>
|
| 436 |
+
<body>
|
| 437 |
+
<div class="page">
|
| 438 |
+
|
| 439 |
+
<!-- MASTHEAD -->
|
| 440 |
+
<div class="masthead">
|
| 441 |
+
<div class="masthead-left">zai-org/GLM-OCR Β· 0.9B params</div>
|
| 442 |
+
<div class="masthead-center">GLM-OCR Engine</div>
|
| 443 |
+
<div class="masthead-right">
|
| 444 |
+
<div class="pill" id="server-pill">
|
| 445 |
+
<div class="status-dot pulse" id="status-dot"></div>
|
| 446 |
+
<span id="status-label">connectingβ¦</span>
|
| 447 |
+
</div>
|
| 448 |
+
<div class="pill live">self-hosted</div>
|
| 449 |
+
</div>
|
| 450 |
+
</div>
|
| 451 |
+
|
| 452 |
+
<!-- HERO -->
|
| 453 |
+
<section class="hero">
|
| 454 |
+
<div>
|
| 455 |
+
<h1 class="hero-headline">GLM<br><em>Vision</em><br>OCR</h1>
|
| 456 |
+
</div>
|
| 457 |
+
<div class="hero-right">
|
| 458 |
+
<p class="hero-desc">
|
| 459 |
+
Self-hosted OCR powered by <strong>zai-org/GLM-OCR</strong> β a 0.9B vision-language model
|
| 460 |
+
ranking #1 on OmniDocBench V1.5. Handles plain text, tables, math formulas,
|
| 461 |
+
and structured document parsing.
|
| 462 |
+
</p>
|
| 463 |
+
<div class="hero-stats">
|
| 464 |
+
<div class="stat"><strong id="stat-count">0</strong><span>Processed</span></div>
|
| 465 |
+
<div class="stat"><strong id="stat-words">0</strong><span>Words</span></div>
|
| 466 |
+
<div class="stat"><strong id="stat-lat">β</strong><span>Avg Latency</span></div>
|
| 467 |
+
</div>
|
| 468 |
+
</div>
|
| 469 |
+
</section>
|
| 470 |
+
|
| 471 |
+
<!-- MAIN -->
|
| 472 |
+
<div class="main">
|
| 473 |
+
|
| 474 |
+
<!-- LEFT -->
|
| 475 |
+
<div class="col">
|
| 476 |
+
<div class="col-label">01 Upload Image</div>
|
| 477 |
+
|
| 478 |
+
<div id="dropzone">
|
| 479 |
+
<div class="dz-icon">πΌ</div>
|
| 480 |
+
<div class="dz-label">
|
| 481 |
+
<strong>Drag & drop an image</strong>
|
| 482 |
+
<span>PNG Β· JPG Β· WEBP Β· BMP Β· TIFF Β· Max 20 MB</span>
|
| 483 |
+
</div>
|
| 484 |
+
<input type="file" id="file-input" accept="image/*"/>
|
| 485 |
+
</div>
|
| 486 |
+
|
| 487 |
+
<div id="preview-wrap">
|
| 488 |
+
<img id="preview-img" src="" alt="Preview"/>
|
| 489 |
+
<div class="file-meta">
|
| 490 |
+
<span id="file-name"></span>
|
| 491 |
+
<span id="file-size"></span>
|
| 492 |
+
</div>
|
| 493 |
+
</div>
|
| 494 |
+
|
| 495 |
+
<!-- Mode selector -->
|
| 496 |
+
<div class="mode-row">
|
| 497 |
+
<button class="mode-btn selected" data-mode="recognize">
|
| 498 |
+
<span class="mode-name">recognize</span>
|
| 499 |
+
<span class="mode-desc">Plain text Β· preserves layout</span>
|
| 500 |
+
</button>
|
| 501 |
+
<button class="mode-btn" data-mode="parse">
|
| 502 |
+
<span class="mode-name">parse</span>
|
| 503 |
+
<span class="mode-desc">Structured markdown output</span>
|
| 504 |
+
</button>
|
| 505 |
+
</div>
|
| 506 |
+
|
| 507 |
+
<button class="run-btn" id="run-btn" disabled>β‘ Run GLM-OCR</button>
|
| 508 |
+
<div class="clear-link" id="clear-link">Clear image</div>
|
| 509 |
+
</div>
|
| 510 |
+
|
| 511 |
+
<!-- RIGHT -->
|
| 512 |
+
<div class="col">
|
| 513 |
+
<div class="col-label">02 Extracted Text</div>
|
| 514 |
+
|
| 515 |
+
<div class="output-area">
|
| 516 |
+
<div id="out-placeholder">
|
| 517 |
+
<div class="ph">π</div>
|
| 518 |
+
<p>Upload an image and click<br>"Run GLM-OCR" to begin.</p>
|
| 519 |
+
</div>
|
| 520 |
+
|
| 521 |
+
<div id="out-loading">
|
| 522 |
+
<div class="scan-bar-wrap"><div class="scan-bar"></div></div>
|
| 523 |
+
<div class="scan-label" id="loading-label">Initialisingβ¦</div>
|
| 524 |
+
</div>
|
| 525 |
+
|
| 526 |
+
<div id="out-error"></div>
|
| 527 |
+
|
| 528 |
+
<div id="out-result">
|
| 529 |
+
<div id="result-meta"></div>
|
| 530 |
+
<div id="result-content"></div>
|
| 531 |
+
<div class="result-actions">
|
| 532 |
+
<button class="action-btn" id="copy-btn">Copy text</button>
|
| 533 |
+
<button class="action-btn" id="dl-btn">Download .txt</button>
|
| 534 |
+
</div>
|
| 535 |
+
</div>
|
| 536 |
+
</div>
|
| 537 |
+
</div>
|
| 538 |
+
</div>
|
| 539 |
+
|
| 540 |
+
<!-- STATUS BAR -->
|
| 541 |
+
<div class="statusbar">
|
| 542 |
+
<span>Model: <strong id="sb-model">β</strong></span>
|
| 543 |
+
<span>Device: <strong id="sb-device">β</strong></span>
|
| 544 |
+
<span>Uptime: <strong id="sb-uptime">β</strong></span>
|
| 545 |
+
<span>Errors: <strong id="sb-errors">β</strong></span>
|
| 546 |
+
</div>
|
| 547 |
+
|
| 548 |
+
<footer>
|
| 549 |
+
<span>GLM-OCR Β· <a href="https://arxiv.org/abs/2603.10910" target="_blank">Paper β</a> Β· <a href="https://huggingface.co/zai-org/GLM-OCR" target="_blank">HuggingFace β</a></span>
|
| 550 |
+
<span>Self-hosted Β· No data leaves your server Β· CS Portfolio Project</span>
|
| 551 |
+
</footer>
|
| 552 |
+
</div>
|
| 553 |
+
|
| 554 |
+
<div class="toast" id="toast"></div>
|
| 555 |
+
|
| 556 |
+
<script>
|
| 557 |
+
const API = '';
|
| 558 |
+
let selectedMode = 'recognize';
|
| 559 |
+
let imageFile = null;
|
| 560 |
+
|
| 561 |
+
// Elements
|
| 562 |
+
const dropzone = document.getElementById('dropzone');
|
| 563 |
+
const fileInput = document.getElementById('file-input');
|
| 564 |
+
const previewWrap= document.getElementById('preview-wrap');
|
| 565 |
+
const previewImg = document.getElementById('preview-img');
|
| 566 |
+
const runBtn = document.getElementById('run-btn');
|
| 567 |
+
const clearLink = document.getElementById('clear-link');
|
| 568 |
+
|
| 569 |
+
const outPlaceholder = document.getElementById('out-placeholder');
|
| 570 |
+
const outLoading = document.getElementById('out-loading');
|
| 571 |
+
const outError = document.getElementById('out-error');
|
| 572 |
+
const outResult = document.getElementById('out-result');
|
| 573 |
+
const loadingLabel = document.getElementById('loading-label');
|
| 574 |
+
const resultMeta = document.getElementById('result-meta');
|
| 575 |
+
const resultContent = document.getElementById('result-content');
|
| 576 |
+
|
| 577 |
+
// ββ Health ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 578 |
+
async function pollHealth() {
|
| 579 |
+
try {
|
| 580 |
+
const r = await fetch(`${API}/health`);
|
| 581 |
+
const data = await r.json();
|
| 582 |
+
const dot = document.getElementById('status-dot');
|
| 583 |
+
const lbl = document.getElementById('status-label');
|
| 584 |
+
|
| 585 |
+
if (data.status === 'ok') {
|
| 586 |
+
dot.className = 'status-dot ok';
|
| 587 |
+
lbl.textContent = 'model ready';
|
| 588 |
+
document.getElementById('sb-model').textContent = data.model.model_id?.split('/')[1] || 'β';
|
| 589 |
+
document.getElementById('sb-device').textContent = data.model.device || 'β';
|
| 590 |
+
} else {
|
| 591 |
+
dot.className = 'status-dot pulse';
|
| 592 |
+
lbl.textContent = 'loading modelβ¦';
|
| 593 |
+
setTimeout(pollHealth, 3000);
|
| 594 |
+
}
|
| 595 |
+
} catch {
|
| 596 |
+
document.getElementById('status-dot').className = 'status-dot err';
|
| 597 |
+
document.getElementById('status-label').textContent = 'server offline';
|
| 598 |
+
}
|
| 599 |
+
}
|
| 600 |
+
|
| 601 |
+
async function pollMetrics() {
|
| 602 |
+
try {
|
| 603 |
+
const r = await fetch(`${API}/metrics`);
|
| 604 |
+
const data = await r.json();
|
| 605 |
+
document.getElementById('stat-count').textContent = data.total_requests;
|
| 606 |
+
document.getElementById('stat-words').textContent = data.total_words_extracted.toLocaleString();
|
| 607 |
+
document.getElementById('stat-lat').textContent = data.avg_latency_ms
|
| 608 |
+
? `${(data.avg_latency_ms / 1000).toFixed(1)}s` : 'β';
|
| 609 |
+
document.getElementById('sb-uptime').textContent =
|
| 610 |
+
`${Math.floor(data.uptime_seconds / 60)}m ${(data.uptime_seconds % 60) | 0}s`;
|
| 611 |
+
document.getElementById('sb-errors').textContent = data.error_count;
|
| 612 |
+
} catch {}
|
| 613 |
+
}
|
| 614 |
+
|
| 615 |
+
pollHealth();
|
| 616 |
+
pollMetrics();
|
| 617 |
+
setInterval(pollMetrics, 5000);
|
| 618 |
+
|
| 619 |
+
// ββ Mode ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 620 |
+
document.querySelectorAll('.mode-btn').forEach(btn => {
|
| 621 |
+
btn.addEventListener('click', () => {
|
| 622 |
+
document.querySelectorAll('.mode-btn').forEach(b => b.classList.remove('selected'));
|
| 623 |
+
btn.classList.add('selected');
|
| 624 |
+
selectedMode = btn.dataset.mode;
|
| 625 |
+
});
|
| 626 |
+
});
|
| 627 |
+
|
| 628 |
+
// ββ File ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 629 |
+
function loadFile(file) {
|
| 630 |
+
if (!file || !file.type.startsWith('image/')) return;
|
| 631 |
+
imageFile = file;
|
| 632 |
+
const reader = new FileReader();
|
| 633 |
+
reader.onload = e => {
|
| 634 |
+
previewImg.src = e.target.result;
|
| 635 |
+
document.getElementById('file-name').textContent = file.name;
|
| 636 |
+
document.getElementById('file-size').textContent = `${(file.size/1024).toFixed(1)} KB`;
|
| 637 |
+
dropzone.style.display = 'none';
|
| 638 |
+
previewWrap.classList.add('active');
|
| 639 |
+
clearLink.style.display = 'block';
|
| 640 |
+
runBtn.disabled = false;
|
| 641 |
+
resetOutput();
|
| 642 |
+
};
|
| 643 |
+
reader.readAsDataURL(file);
|
| 644 |
+
}
|
| 645 |
+
|
| 646 |
+
dropzone.addEventListener('click', () => fileInput.click());
|
| 647 |
+
fileInput.addEventListener('change', e => loadFile(e.target.files[0]));
|
| 648 |
+
dropzone.addEventListener('dragover', e => { e.preventDefault(); dropzone.classList.add('over'); });
|
| 649 |
+
dropzone.addEventListener('dragleave', () => dropzone.classList.remove('over'));
|
| 650 |
+
dropzone.addEventListener('drop', e => {
|
| 651 |
+
e.preventDefault(); dropzone.classList.remove('over'); loadFile(e.dataTransfer.files[0]);
|
| 652 |
+
});
|
| 653 |
+
|
| 654 |
+
clearLink.addEventListener('click', () => {
|
| 655 |
+
imageFile = null; fileInput.value = '';
|
| 656 |
+
previewWrap.classList.remove('active');
|
| 657 |
+
dropzone.style.display = '';
|
| 658 |
+
clearLink.style.display = 'none';
|
| 659 |
+
runBtn.disabled = true;
|
| 660 |
+
resetOutput();
|
| 661 |
+
});
|
| 662 |
+
|
| 663 |
+
// ββ Output state βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 664 |
+
function resetOutput() {
|
| 665 |
+
outPlaceholder.style.display = '';
|
| 666 |
+
outLoading.classList.remove('active');
|
| 667 |
+
outError.classList.remove('active');
|
| 668 |
+
outResult.classList.remove('active');
|
| 669 |
+
}
|
| 670 |
+
|
| 671 |
+
function showLoading(msg) {
|
| 672 |
+
outPlaceholder.style.display = 'none';
|
| 673 |
+
loadingLabel.textContent = msg || 'Running GLM-OCRβ¦';
|
| 674 |
+
outLoading.classList.add('active');
|
| 675 |
+
outError.classList.remove('active');
|
| 676 |
+
outResult.classList.remove('active');
|
| 677 |
+
}
|
| 678 |
+
|
| 679 |
+
function showError(msg) {
|
| 680 |
+
outLoading.classList.remove('active');
|
| 681 |
+
outError.classList.add('active');
|
| 682 |
+
outError.textContent = `β ${msg}`;
|
| 683 |
+
}
|
| 684 |
+
|
| 685 |
+
function showResult(data) {
|
| 686 |
+
outLoading.classList.remove('active');
|
| 687 |
+
outResult.classList.add('active');
|
| 688 |
+
resultMeta.innerHTML = [
|
| 689 |
+
`<span class="chip">words: <strong>${data.word_count}</strong></span>`,
|
| 690 |
+
`<span class="chip">chars: <strong>${data.char_count}</strong></span>`,
|
| 691 |
+
`<span class="chip">latency: <strong>${(data.latency_ms/1000).toFixed(2)}s</strong></span>`,
|
| 692 |
+
`<span class="chip">device: <strong>${data.device}</strong></span>`,
|
| 693 |
+
`<span class="chip">mode: <strong>${data.mode}</strong></span>`,
|
| 694 |
+
].join('');
|
| 695 |
+
resultContent.textContent = data.text || '[No text detected]';
|
| 696 |
+
pollMetrics();
|
| 697 |
+
}
|
| 698 |
+
|
| 699 |
+
// ββ Loading messages ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 700 |
+
const msgs = ['Running GLM-OCRβ¦', 'Encoding imageβ¦', 'Decoding tokensβ¦', 'Assembling outputβ¦'];
|
| 701 |
+
let msgTimer = null;
|
| 702 |
+
|
| 703 |
+
function startLoadingAnim() {
|
| 704 |
+
let i = 0;
|
| 705 |
+
showLoading(msgs[0]);
|
| 706 |
+
msgTimer = setInterval(() => { i = (i+1) % msgs.length; loadingLabel.textContent = msgs[i]; }, 2000);
|
| 707 |
+
}
|
| 708 |
+
|
| 709 |
+
function stopLoadingAnim() { clearInterval(msgTimer); }
|
| 710 |
+
|
| 711 |
+
// ββ Run ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 712 |
+
runBtn.addEventListener('click', async () => {
|
| 713 |
+
if (!imageFile) return;
|
| 714 |
+
runBtn.disabled = true;
|
| 715 |
+
startLoadingAnim();
|
| 716 |
+
|
| 717 |
+
const form = new FormData();
|
| 718 |
+
form.append('file', imageFile);
|
| 719 |
+
form.append('mode', selectedMode);
|
| 720 |
+
|
| 721 |
+
try {
|
| 722 |
+
const r = await fetch(`${API}/ocr`, { method: 'POST', body: form });
|
| 723 |
+
const data = await r.json();
|
| 724 |
+
if (!r.ok) throw new Error(data.detail || `Error ${r.status}`);
|
| 725 |
+
showResult(data);
|
| 726 |
+
} catch (err) {
|
| 727 |
+
showError(err.message);
|
| 728 |
+
} finally {
|
| 729 |
+
stopLoadingAnim();
|
| 730 |
+
runBtn.disabled = false;
|
| 731 |
+
}
|
| 732 |
+
});
|
| 733 |
+
|
| 734 |
+
// ββ Copy βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 735 |
+
document.getElementById('copy-btn').addEventListener('click', async () => {
|
| 736 |
+
try { await navigator.clipboard.writeText(resultContent.textContent); toast('Copied!'); }
|
| 737 |
+
catch { toast('Select text manually.'); }
|
| 738 |
+
});
|
| 739 |
+
|
| 740 |
+
// ββ Download βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 741 |
+
document.getElementById('dl-btn').addEventListener('click', () => {
|
| 742 |
+
const blob = new Blob([resultContent.textContent], { type: 'text/plain' });
|
| 743 |
+
const a = document.createElement('a');
|
| 744 |
+
a.href = URL.createObjectURL(blob);
|
| 745 |
+
a.download = `glm-ocr-${Date.now()}.txt`;
|
| 746 |
+
a.click();
|
| 747 |
+
URL.revokeObjectURL(a.href);
|
| 748 |
+
});
|
| 749 |
+
|
| 750 |
+
// ββ Toast βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 751 |
+
function toast(msg) {
|
| 752 |
+
const t = document.getElementById('toast');
|
| 753 |
+
t.textContent = msg;
|
| 754 |
+
t.classList.add('show');
|
| 755 |
+
setTimeout(() => t.classList.remove('show'), 2200);
|
| 756 |
+
}
|
| 757 |
+
</script>
|
| 758 |
+
</body>
|
| 759 |
+
</html>
|
generate_icons.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
generate_icons.py β Run this once to create the extension icons.
|
| 3 |
+
Requires Pillow: pip install Pillow
|
| 4 |
+
"""
|
| 5 |
+
from PIL import Image, ImageDraw, ImageFont
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
os.makedirs("icons", exist_ok=True)
|
| 9 |
+
|
| 10 |
+
def make_icon(size):
|
| 11 |
+
img = Image.new("RGBA", (size, size), (0, 0, 0, 0))
|
| 12 |
+
draw = ImageDraw.Draw(img)
|
| 13 |
+
|
| 14 |
+
# Background rounded rect
|
| 15 |
+
pad = size // 8
|
| 16 |
+
draw.rounded_rectangle(
|
| 17 |
+
[pad, pad, size - pad, size - pad],
|
| 18 |
+
radius=size // 5,
|
| 19 |
+
fill="#c94a1f"
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
# Letter "G" for GLM
|
| 23 |
+
font_size = int(size * 0.52)
|
| 24 |
+
try:
|
| 25 |
+
font = ImageFont.truetype("arial.ttf", font_size)
|
| 26 |
+
except:
|
| 27 |
+
font = ImageFont.load_default()
|
| 28 |
+
|
| 29 |
+
text = "G"
|
| 30 |
+
bbox = draw.textbbox((0, 0), text, font=font)
|
| 31 |
+
tw = bbox[2] - bbox[0]
|
| 32 |
+
th = bbox[3] - bbox[1]
|
| 33 |
+
tx = (size - tw) // 2 - bbox[0]
|
| 34 |
+
ty = (size - th) // 2 - bbox[1]
|
| 35 |
+
draw.text((tx, ty), text, fill="white", font=font)
|
| 36 |
+
|
| 37 |
+
img.save(f"icons/icon{size}.png")
|
| 38 |
+
print(f"Created icons/icon{size}.png")
|
| 39 |
+
|
| 40 |
+
for s in [16, 48, 128]:
|
| 41 |
+
make_icon(s)
|
| 42 |
+
|
| 43 |
+
print("Done. Icons created in icons/")
|
main.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
main.py β FastAPI server for zai-org/GLM-OCR
|
| 3 |
+
|
| 4 |
+
Endpoints:
|
| 5 |
+
GET / β Serves the frontend HTML
|
| 6 |
+
GET /health β Liveness probe + model info
|
| 7 |
+
POST /ocr β Run OCR on uploaded image
|
| 8 |
+
GET /metrics β Session-level stats
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import logging
|
| 12 |
+
import time
|
| 13 |
+
from contextlib import asynccontextmanager
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
|
| 16 |
+
import uvicorn
|
| 17 |
+
from fastapi import FastAPI, File, Form, HTTPException, UploadFile, Request
|
| 18 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 19 |
+
from fastapi.responses import FileResponse, JSONResponse
|
| 20 |
+
from pydantic import BaseModel
|
| 21 |
+
from typing import Annotated
|
| 22 |
+
|
| 23 |
+
from ocr_engine import engine, OcrResult, OcrMode
|
| 24 |
+
|
| 25 |
+
# ββ Logging βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 26 |
+
|
| 27 |
+
logging.basicConfig(
|
| 28 |
+
level=logging.INFO,
|
| 29 |
+
format="%(asctime)s | %(levelname)-8s | %(name)s β %(message)s",
|
| 30 |
+
datefmt="%H:%M:%S",
|
| 31 |
+
)
|
| 32 |
+
logger = logging.getLogger(__name__)
|
| 33 |
+
|
| 34 |
+
# ββ Session metrics βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 35 |
+
|
| 36 |
+
class SessionMetrics:
|
| 37 |
+
def __init__(self):
|
| 38 |
+
self.total_requests = 0
|
| 39 |
+
self.total_words = 0
|
| 40 |
+
self.total_chars = 0
|
| 41 |
+
self.total_ms = 0.0
|
| 42 |
+
self.errors = 0
|
| 43 |
+
self.started_at = time.time()
|
| 44 |
+
|
| 45 |
+
def record(self, result: OcrResult):
|
| 46 |
+
self.total_requests += 1
|
| 47 |
+
self.total_words += result.word_count
|
| 48 |
+
self.total_chars += result.char_count
|
| 49 |
+
self.total_ms += result.latency_ms
|
| 50 |
+
|
| 51 |
+
def to_dict(self) -> dict:
|
| 52 |
+
avg = self.total_ms / self.total_requests if self.total_requests else 0
|
| 53 |
+
return {
|
| 54 |
+
"total_requests": self.total_requests,
|
| 55 |
+
"total_words_extracted": self.total_words,
|
| 56 |
+
"total_chars_extracted": self.total_chars,
|
| 57 |
+
"avg_latency_ms": round(avg, 1),
|
| 58 |
+
"error_count": self.errors,
|
| 59 |
+
"uptime_seconds": round(time.time() - self.started_at, 1),
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
metrics = SessionMetrics()
|
| 63 |
+
|
| 64 |
+
# ββ Lifespan βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 65 |
+
|
| 66 |
+
@asynccontextmanager
|
| 67 |
+
async def lifespan(app: FastAPI):
|
| 68 |
+
logger.info("π Starting up β loading GLM-OCR model β¦")
|
| 69 |
+
engine.load()
|
| 70 |
+
logger.info("β
Model ready.")
|
| 71 |
+
yield
|
| 72 |
+
logger.info("π Shutting down β¦")
|
| 73 |
+
engine.unload()
|
| 74 |
+
|
| 75 |
+
# ββ App ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 76 |
+
|
| 77 |
+
app = FastAPI(
|
| 78 |
+
title="GLM-OCR API",
|
| 79 |
+
description="Self-hosted OCR backend powered by zai-org/GLM-OCR",
|
| 80 |
+
version="1.0.0",
|
| 81 |
+
lifespan=lifespan,
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
app.add_middleware(
|
| 85 |
+
CORSMiddleware,
|
| 86 |
+
allow_origins=["*"],
|
| 87 |
+
allow_methods=["GET", "POST"],
|
| 88 |
+
allow_headers=["*"],
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
# ββ Schemas βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 92 |
+
|
| 93 |
+
class OcrResponse(BaseModel):
|
| 94 |
+
success: bool
|
| 95 |
+
text: str
|
| 96 |
+
word_count: int
|
| 97 |
+
char_count: int
|
| 98 |
+
latency_ms: float
|
| 99 |
+
mode: str
|
| 100 |
+
model_id: str
|
| 101 |
+
device: str
|
| 102 |
+
|
| 103 |
+
# ββ Routes ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 104 |
+
|
| 105 |
+
@app.get("/", include_in_schema=False)
|
| 106 |
+
async def serve_frontend():
|
| 107 |
+
frontend = Path(__file__).parent / "frontend" / "index.html"
|
| 108 |
+
if not frontend.exists():
|
| 109 |
+
return JSONResponse({"message": "Frontend not found."}, 404)
|
| 110 |
+
return FileResponse(str(frontend))
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
@app.get("/health")
|
| 114 |
+
async def health():
|
| 115 |
+
return {
|
| 116 |
+
"status": "ok" if engine.loaded else "loading",
|
| 117 |
+
"model": engine.info,
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
@app.post("/ocr", response_model=OcrResponse)
|
| 122 |
+
async def run_ocr(
|
| 123 |
+
file: Annotated[UploadFile, File(description="Image file (PNG, JPG, WEBP, BMP, TIFF)")],
|
| 124 |
+
mode: Annotated[OcrMode, Form(description="'recognize' for plain text Β· 'parse' for structured markdown")] = "recognize",
|
| 125 |
+
):
|
| 126 |
+
"""
|
| 127 |
+
Run GLM-OCR on an uploaded image.
|
| 128 |
+
|
| 129 |
+
**mode options:**
|
| 130 |
+
- `recognize` β extracts raw text, preserves layout (default)
|
| 131 |
+
- `parse` β returns structured markdown (headers, tables, lists)
|
| 132 |
+
"""
|
| 133 |
+
allowed = {"image/png", "image/jpeg", "image/webp", "image/gif", "image/bmp", "image/tiff"}
|
| 134 |
+
if file.content_type and file.content_type not in allowed:
|
| 135 |
+
raise HTTPException(status_code=415, detail=f"Unsupported file type: {file.content_type}")
|
| 136 |
+
|
| 137 |
+
image_bytes = await file.read()
|
| 138 |
+
if not image_bytes:
|
| 139 |
+
raise HTTPException(status_code=400, detail="Empty file.")
|
| 140 |
+
if len(image_bytes) > 20 * 1024 * 1024:
|
| 141 |
+
raise HTTPException(status_code=413, detail="File too large. Max 20 MB.")
|
| 142 |
+
|
| 143 |
+
logger.info(f"OCR | file={file.filename} size={len(image_bytes)/1024:.1f}KB mode={mode}")
|
| 144 |
+
|
| 145 |
+
try:
|
| 146 |
+
result = engine.run(image_bytes, mode=mode)
|
| 147 |
+
except ValueError as e:
|
| 148 |
+
metrics.errors += 1
|
| 149 |
+
raise HTTPException(status_code=422, detail=str(e))
|
| 150 |
+
except Exception as e:
|
| 151 |
+
metrics.errors += 1
|
| 152 |
+
logger.exception("Inference error")
|
| 153 |
+
raise HTTPException(status_code=500, detail=f"Inference failed: {e}")
|
| 154 |
+
|
| 155 |
+
metrics.record(result)
|
| 156 |
+
logger.info(f"Done | {result.word_count} words | {result.latency_ms:.0f}ms")
|
| 157 |
+
|
| 158 |
+
return OcrResponse(
|
| 159 |
+
success = True,
|
| 160 |
+
text = result.text,
|
| 161 |
+
word_count = result.word_count,
|
| 162 |
+
char_count = result.char_count,
|
| 163 |
+
latency_ms = result.latency_ms,
|
| 164 |
+
mode = result.mode,
|
| 165 |
+
model_id = result.model_id,
|
| 166 |
+
device = result.device,
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
@app.get("/metrics")
|
| 171 |
+
async def get_metrics():
|
| 172 |
+
return metrics.to_dict()
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
@app.exception_handler(Exception)
|
| 176 |
+
async def global_handler(request: Request, exc: Exception):
|
| 177 |
+
logger.exception(f"Unhandled: {request.url}")
|
| 178 |
+
return JSONResponse(status_code=500, content={"detail": "Internal server error"})
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
if __name__ == "__main__":
|
| 182 |
+
uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=False)
|
manifest.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"manifest_version": 3,
|
| 3 |
+
"name": "GLM-OCR β Text from Screen",
|
| 4 |
+
"version": "1.0.0",
|
| 5 |
+
"description": "Select any region on screen and extract text using the self-hosted GLM-OCR model.",
|
| 6 |
+
|
| 7 |
+
"permissions": [
|
| 8 |
+
"activeTab",
|
| 9 |
+
"scripting",
|
| 10 |
+
"tabs",
|
| 11 |
+
"storage"
|
| 12 |
+
],
|
| 13 |
+
|
| 14 |
+
"host_permissions": [
|
| 15 |
+
"http://localhost:8000/*",
|
| 16 |
+
"<all_urls>"
|
| 17 |
+
],
|
| 18 |
+
|
| 19 |
+
"background": {
|
| 20 |
+
"service_worker": "background.js"
|
| 21 |
+
},
|
| 22 |
+
|
| 23 |
+
"action": {
|
| 24 |
+
"default_popup": "popup.html",
|
| 25 |
+
"default_icon": {
|
| 26 |
+
"16": "icons/icon16.png",
|
| 27 |
+
"48": "icons/icon48.png",
|
| 28 |
+
"128": "icons/icon128.png"
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
|
| 32 |
+
"content_scripts": [
|
| 33 |
+
{
|
| 34 |
+
"matches": ["<all_urls>"],
|
| 35 |
+
"js": ["content.js"],
|
| 36 |
+
"css": ["content.css"],
|
| 37 |
+
"run_at": "document_idle",
|
| 38 |
+
"all_frames": false
|
| 39 |
+
}
|
| 40 |
+
],
|
| 41 |
+
|
| 42 |
+
"icons": {
|
| 43 |
+
"16": "icons/icon16.png",
|
| 44 |
+
"48": "icons/icon48.png",
|
| 45 |
+
"128": "icons/icon128.png"
|
| 46 |
+
},
|
| 47 |
+
|
| 48 |
+
"web_accessible_resources": [
|
| 49 |
+
{
|
| 50 |
+
"resources": ["sidebar.html"],
|
| 51 |
+
"matches": ["<all_urls>"]
|
| 52 |
+
}
|
| 53 |
+
]
|
| 54 |
+
}
|
ocr_engine.py
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ocr_engine.py β zai-org/GLM-OCR inference module
|
| 3 |
+
|
| 4 |
+
GLM-OCR is a 0.9B multimodal OCR model built on the GLM-V encoder-decoder
|
| 5 |
+
architecture. It uses a CogViT visual encoder + GLM-0.5B language decoder,
|
| 6 |
+
trained with Multi-Token Prediction loss for high-quality document OCR.
|
| 7 |
+
|
| 8 |
+
Model: https://huggingface.co/zai-org/GLM-OCR
|
| 9 |
+
Paper: https://arxiv.org/abs/2603.10910
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import io
|
| 13 |
+
import time
|
| 14 |
+
import logging
|
| 15 |
+
import tempfile
|
| 16 |
+
from dataclasses import dataclass
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
from typing import Literal
|
| 19 |
+
|
| 20 |
+
import torch
|
| 21 |
+
import torch.nn.functional as F
|
| 22 |
+
from PIL import Image
|
| 23 |
+
from transformers import AutoProcessor, AutoModelForImageTextToText
|
| 24 |
+
|
| 25 |
+
logger = logging.getLogger(__name__)
|
| 26 |
+
|
| 27 |
+
# ββ Config βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 28 |
+
|
| 29 |
+
MODEL_ID = "zai-org/GLM-OCR"
|
| 30 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 31 |
+
|
| 32 |
+
# Two prompt modes supported by GLM-OCR:
|
| 33 |
+
# "recognize" β "Text Recognition:" (extract raw text, preserves structure)
|
| 34 |
+
# "parse" β "Document Parsing:" (structured markdown output)
|
| 35 |
+
OcrMode = Literal["recognize", "parse"]
|
| 36 |
+
|
| 37 |
+
PROMPTS = {
|
| 38 |
+
"recognize": "Text Recognition:",
|
| 39 |
+
"parse": "Document Parsing:",
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
# ββ Result dataclass ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 43 |
+
|
| 44 |
+
@dataclass
|
| 45 |
+
class OcrResult:
|
| 46 |
+
text: str
|
| 47 |
+
mode: str
|
| 48 |
+
word_count: int
|
| 49 |
+
char_count: int
|
| 50 |
+
latency_ms: float
|
| 51 |
+
device: str
|
| 52 |
+
model_id: str
|
| 53 |
+
|
| 54 |
+
# ββ Engine ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 55 |
+
|
| 56 |
+
class GlmOcrEngine:
|
| 57 |
+
"""
|
| 58 |
+
Wraps zai-org/GLM-OCR. Call .load() once at startup,
|
| 59 |
+
then .run(image_bytes, mode) per request.
|
| 60 |
+
"""
|
| 61 |
+
|
| 62 |
+
def __init__(self):
|
| 63 |
+
self.model = None
|
| 64 |
+
self.processor = None
|
| 65 |
+
self.loaded = False
|
| 66 |
+
|
| 67 |
+
# ββ Lifecycle βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 68 |
+
|
| 69 |
+
def load(self) -> None:
|
| 70 |
+
if self.loaded:
|
| 71 |
+
return
|
| 72 |
+
|
| 73 |
+
logger.info(f"Loading {MODEL_ID} on {DEVICE} β¦")
|
| 74 |
+
t0 = time.time()
|
| 75 |
+
|
| 76 |
+
self.processor = AutoProcessor.from_pretrained(
|
| 77 |
+
MODEL_ID,
|
| 78 |
+
trust_remote_code=True,
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
self.model = AutoModelForImageTextToText.from_pretrained(
|
| 82 |
+
MODEL_ID,
|
| 83 |
+
torch_dtype="auto", # fp16 on CUDA, fp32 on CPU
|
| 84 |
+
device_map="auto", # spreads across available devices
|
| 85 |
+
trust_remote_code=True,
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
# ββ CPU patch: replace the slow Conv3d patch_embed with matmul ββββββ
|
| 89 |
+
# The default Conv3d produces ~22k individual 1x1x1 kernels on CPU
|
| 90 |
+
# which is catastrophically slow. This replaces it with a single F.linear
|
| 91 |
+
# call, bringing CPU inference from ~30min to ~30s per image.
|
| 92 |
+
# See: https://huggingface.co/zai-org/GLM-OCR/discussions/36
|
| 93 |
+
if DEVICE == "cpu":
|
| 94 |
+
self._apply_cpu_patch()
|
| 95 |
+
|
| 96 |
+
self.model.eval()
|
| 97 |
+
self.loaded = True
|
| 98 |
+
logger.info(f"Model loaded in {time.time() - t0:.1f}s")
|
| 99 |
+
|
| 100 |
+
def _apply_cpu_patch(self):
|
| 101 |
+
"""Replace Conv3d patch_embed with matmul for fast CPU inference."""
|
| 102 |
+
try:
|
| 103 |
+
base_model = self.model.model if hasattr(self.model, 'model') else self.model
|
| 104 |
+
patch_embed = base_model.visual.patch_embed
|
| 105 |
+
proj = patch_embed.proj
|
| 106 |
+
|
| 107 |
+
in_features = (
|
| 108 |
+
patch_embed.in_channels *
|
| 109 |
+
patch_embed.temporal_patch_size *
|
| 110 |
+
patch_embed.patch_size ** 2
|
| 111 |
+
)
|
| 112 |
+
embed_dim = patch_embed.embed_dim
|
| 113 |
+
weight = proj.weight
|
| 114 |
+
bias = proj.bias
|
| 115 |
+
|
| 116 |
+
def _fast_forward(hidden_states: torch.Tensor) -> torch.Tensor:
|
| 117 |
+
target_dtype = weight.dtype
|
| 118 |
+
hidden_states = hidden_states.reshape(-1, in_features).to(dtype=target_dtype)
|
| 119 |
+
return F.linear(hidden_states, weight.reshape(embed_dim, -1), bias)
|
| 120 |
+
|
| 121 |
+
patch_embed.forward = _fast_forward
|
| 122 |
+
logger.info("CPU matmul patch applied to patch_embed.")
|
| 123 |
+
except Exception as e:
|
| 124 |
+
logger.warning(f"Could not apply CPU patch (will still work, just slower): {e}")
|
| 125 |
+
|
| 126 |
+
def unload(self) -> None:
|
| 127 |
+
if self.model:
|
| 128 |
+
del self.model
|
| 129 |
+
del self.processor
|
| 130 |
+
self.model = None
|
| 131 |
+
self.processor = None
|
| 132 |
+
self.loaded = False
|
| 133 |
+
if torch.cuda.is_available():
|
| 134 |
+
torch.cuda.empty_cache()
|
| 135 |
+
logger.info("Model unloaded.")
|
| 136 |
+
|
| 137 |
+
# ββ Inference βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 138 |
+
|
| 139 |
+
def run(self, image_bytes: bytes, mode: OcrMode = "recognize") -> OcrResult:
|
| 140 |
+
"""
|
| 141 |
+
Run GLM-OCR on raw image bytes.
|
| 142 |
+
|
| 143 |
+
Args:
|
| 144 |
+
image_bytes: Raw bytes of the uploaded image.
|
| 145 |
+
mode:
|
| 146 |
+
'recognize' β plain text extraction ("Text Recognition:")
|
| 147 |
+
'parse' β structured markdown output ("Document Parsing:")
|
| 148 |
+
|
| 149 |
+
Returns:
|
| 150 |
+
OcrResult with extracted text and metadata.
|
| 151 |
+
"""
|
| 152 |
+
if not self.loaded:
|
| 153 |
+
raise RuntimeError("Engine not loaded. Call .load() first.")
|
| 154 |
+
|
| 155 |
+
# Validate image
|
| 156 |
+
img = self._validate_image(image_bytes)
|
| 157 |
+
|
| 158 |
+
# Save to temp file β processor loads from path/URL
|
| 159 |
+
tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
|
| 160 |
+
img.save(tmp.name, format="PNG")
|
| 161 |
+
tmp.close()
|
| 162 |
+
|
| 163 |
+
prompt_text = PROMPTS[mode]
|
| 164 |
+
|
| 165 |
+
messages = [
|
| 166 |
+
{
|
| 167 |
+
"role": "user",
|
| 168 |
+
"content": [
|
| 169 |
+
{"type": "image", "url": tmp.name},
|
| 170 |
+
{"type": "text", "text": prompt_text},
|
| 171 |
+
],
|
| 172 |
+
}
|
| 173 |
+
]
|
| 174 |
+
|
| 175 |
+
t0 = time.time()
|
| 176 |
+
try:
|
| 177 |
+
inputs = self.processor.apply_chat_template(
|
| 178 |
+
messages,
|
| 179 |
+
tokenize=True,
|
| 180 |
+
add_generation_prompt=True,
|
| 181 |
+
return_dict=True,
|
| 182 |
+
return_tensors="pt",
|
| 183 |
+
).to(self.model.device)
|
| 184 |
+
|
| 185 |
+
# token_type_ids not used by this model
|
| 186 |
+
inputs.pop("token_type_ids", None)
|
| 187 |
+
|
| 188 |
+
with torch.inference_mode():
|
| 189 |
+
generated_ids = self.model.generate(
|
| 190 |
+
**inputs,
|
| 191 |
+
max_new_tokens=8192,
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
# Decode only the newly generated tokens
|
| 195 |
+
output_text = self.processor.decode(
|
| 196 |
+
generated_ids[0][inputs["input_ids"].shape[1]:],
|
| 197 |
+
skip_special_tokens=False,
|
| 198 |
+
)
|
| 199 |
+
finally:
|
| 200 |
+
Path(tmp.name).unlink(missing_ok=True)
|
| 201 |
+
|
| 202 |
+
latency_ms = (time.time() - t0) * 1000
|
| 203 |
+
text = output_text.strip() if output_text else ""
|
| 204 |
+
|
| 205 |
+
return OcrResult(
|
| 206 |
+
text = text,
|
| 207 |
+
mode = mode,
|
| 208 |
+
word_count = len(text.split()) if text else 0,
|
| 209 |
+
char_count = len(text),
|
| 210 |
+
latency_ms = round(latency_ms, 1),
|
| 211 |
+
device = str(next(self.model.parameters()).device),
|
| 212 |
+
model_id = MODEL_ID,
|
| 213 |
+
)
|
| 214 |
+
|
| 215 |
+
# ββ Helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 216 |
+
|
| 217 |
+
@staticmethod
|
| 218 |
+
def _validate_image(image_bytes: bytes) -> Image.Image:
|
| 219 |
+
try:
|
| 220 |
+
img = Image.open(io.BytesIO(image_bytes))
|
| 221 |
+
img.verify()
|
| 222 |
+
img = Image.open(io.BytesIO(image_bytes))
|
| 223 |
+
return img.convert("RGB")
|
| 224 |
+
except Exception as e:
|
| 225 |
+
raise ValueError(f"Invalid image: {e}") from e
|
| 226 |
+
|
| 227 |
+
@property
|
| 228 |
+
def info(self) -> dict:
|
| 229 |
+
return {
|
| 230 |
+
"model_id": MODEL_ID,
|
| 231 |
+
"device": DEVICE,
|
| 232 |
+
"loaded": self.loaded,
|
| 233 |
+
"cuda_available": torch.cuda.is_available(),
|
| 234 |
+
"gpu_name": torch.cuda.get_device_name(0) if torch.cuda.is_available() else None,
|
| 235 |
+
"gpu_memory_gb": round(
|
| 236 |
+
torch.cuda.get_device_properties(0).total_memory / 1e9, 1
|
| 237 |
+
) if torch.cuda.is_available() else None,
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
# ββ Singleton βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 242 |
+
engine = GlmOcrEngine()
|
popup.html
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8"/>
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
|
| 6 |
+
<title>GLM-OCR</title>
|
| 7 |
+
<link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;700&family=DM+Serif+Display:ital@0;1&family=DM+Sans:wght@400;500&display=swap" rel="stylesheet"/>
|
| 8 |
+
<style>
|
| 9 |
+
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
|
| 10 |
+
:root {
|
| 11 |
+
--ink: #0f0e0d;
|
| 12 |
+
--paper: #f5f0e8;
|
| 13 |
+
--warm: #ede8dc;
|
| 14 |
+
--border: #d4cfc3;
|
| 15 |
+
--muted: #8f8880;
|
| 16 |
+
--accent: #c94a1f;
|
| 17 |
+
--green: #1a6b4a;
|
| 18 |
+
--mono: 'IBM Plex Mono', monospace;
|
| 19 |
+
--serif: 'DM Serif Display', serif;
|
| 20 |
+
--sans: 'DM Sans', sans-serif;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
body {
|
| 24 |
+
width: 300px;
|
| 25 |
+
background: var(--paper);
|
| 26 |
+
color: var(--ink);
|
| 27 |
+
font-family: var(--sans);
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
body::before {
|
| 31 |
+
content: '';
|
| 32 |
+
position: fixed; inset: 0;
|
| 33 |
+
background-image: radial-gradient(circle, rgba(0,0,0,0.05) 1px, transparent 1px);
|
| 34 |
+
background-size: 16px 16px;
|
| 35 |
+
pointer-events: none;
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
.inner { position: relative; }
|
| 39 |
+
|
| 40 |
+
/* Header */
|
| 41 |
+
.header {
|
| 42 |
+
padding: 16px 18px 14px;
|
| 43 |
+
border-bottom: 2px solid var(--ink);
|
| 44 |
+
display: flex;
|
| 45 |
+
align-items: center;
|
| 46 |
+
justify-content: space-between;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
.logo {
|
| 50 |
+
font-family: var(--serif);
|
| 51 |
+
font-size: 1.1rem;
|
| 52 |
+
letter-spacing: -0.01em;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
.logo em { font-style: italic; color: var(--accent); }
|
| 56 |
+
|
| 57 |
+
.server-badge {
|
| 58 |
+
display: flex;
|
| 59 |
+
align-items: center;
|
| 60 |
+
gap: 5px;
|
| 61 |
+
font-family: var(--mono);
|
| 62 |
+
font-size: 0.58rem;
|
| 63 |
+
color: var(--muted);
|
| 64 |
+
letter-spacing: 0.06em;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
.dot {
|
| 68 |
+
width: 6px; height: 6px;
|
| 69 |
+
border-radius: 50%;
|
| 70 |
+
background: var(--muted);
|
| 71 |
+
}
|
| 72 |
+
.dot.ok { background: var(--green); }
|
| 73 |
+
.dot.err { background: var(--accent); }
|
| 74 |
+
.dot.pulse { animation: blink 1.2s ease-in-out infinite; }
|
| 75 |
+
@keyframes blink { 0%,100%{opacity:1} 50%{opacity:0.3} }
|
| 76 |
+
|
| 77 |
+
/* Main CTA */
|
| 78 |
+
.cta-area {
|
| 79 |
+
padding: 20px 18px;
|
| 80 |
+
border-bottom: 1px solid var(--border);
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
.cta-label {
|
| 84 |
+
font-family: var(--mono);
|
| 85 |
+
font-size: 0.62rem;
|
| 86 |
+
color: var(--muted);
|
| 87 |
+
letter-spacing: 0.1em;
|
| 88 |
+
text-transform: uppercase;
|
| 89 |
+
margin-bottom: 10px;
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
.select-btn {
|
| 93 |
+
width: 100%;
|
| 94 |
+
padding: 14px;
|
| 95 |
+
background: var(--accent);
|
| 96 |
+
color: white;
|
| 97 |
+
border: none;
|
| 98 |
+
border-radius: 2px;
|
| 99 |
+
font-family: var(--serif);
|
| 100 |
+
font-size: 1rem;
|
| 101 |
+
cursor: pointer;
|
| 102 |
+
transition: background 0.15s;
|
| 103 |
+
display: flex;
|
| 104 |
+
align-items: center;
|
| 105 |
+
justify-content: center;
|
| 106 |
+
gap: 8px;
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
.select-btn:hover:not(:disabled) { background: #b53d15; }
|
| 110 |
+
.select-btn:disabled { opacity: 0.35; cursor: not-allowed; }
|
| 111 |
+
|
| 112 |
+
.select-btn .shortcut {
|
| 113 |
+
font-family: var(--mono);
|
| 114 |
+
font-size: 0.6rem;
|
| 115 |
+
opacity: 0.7;
|
| 116 |
+
margin-left: auto;
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
.offline-msg {
|
| 120 |
+
display: none;
|
| 121 |
+
margin-top: 10px;
|
| 122 |
+
font-family: var(--mono);
|
| 123 |
+
font-size: 0.65rem;
|
| 124 |
+
color: var(--accent);
|
| 125 |
+
line-height: 1.6;
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
.offline-msg.show { display: block; }
|
| 129 |
+
|
| 130 |
+
.offline-msg a {
|
| 131 |
+
color: var(--accent);
|
| 132 |
+
text-decoration: underline;
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
/* How it works */
|
| 136 |
+
.how {
|
| 137 |
+
padding: 16px 18px;
|
| 138 |
+
border-bottom: 1px solid var(--border);
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
.how-title {
|
| 142 |
+
font-family: var(--mono);
|
| 143 |
+
font-size: 0.6rem;
|
| 144 |
+
color: var(--muted);
|
| 145 |
+
letter-spacing: 0.1em;
|
| 146 |
+
text-transform: uppercase;
|
| 147 |
+
margin-bottom: 12px;
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
.step {
|
| 151 |
+
display: flex;
|
| 152 |
+
gap: 10px;
|
| 153 |
+
align-items: flex-start;
|
| 154 |
+
margin-bottom: 8px;
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
.step:last-child { margin-bottom: 0; }
|
| 158 |
+
|
| 159 |
+
.step-num {
|
| 160 |
+
font-family: var(--mono);
|
| 161 |
+
font-size: 0.6rem;
|
| 162 |
+
color: var(--accent);
|
| 163 |
+
font-weight: 700;
|
| 164 |
+
flex-shrink: 0;
|
| 165 |
+
margin-top: 2px;
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
.step-text {
|
| 169 |
+
font-size: 0.78rem;
|
| 170 |
+
line-height: 1.5;
|
| 171 |
+
color: var(--ink);
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
/* Settings */
|
| 175 |
+
.settings {
|
| 176 |
+
padding: 14px 18px;
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
.settings-title {
|
| 180 |
+
font-family: var(--mono);
|
| 181 |
+
font-size: 0.6rem;
|
| 182 |
+
color: var(--muted);
|
| 183 |
+
letter-spacing: 0.1em;
|
| 184 |
+
text-transform: uppercase;
|
| 185 |
+
margin-bottom: 10px;
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
.setting-row {
|
| 189 |
+
display: flex;
|
| 190 |
+
align-items: center;
|
| 191 |
+
justify-content: space-between;
|
| 192 |
+
margin-bottom: 8px;
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
.setting-label {
|
| 196 |
+
font-family: var(--mono);
|
| 197 |
+
font-size: 0.68rem;
|
| 198 |
+
color: var(--ink);
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
.mode-toggle {
|
| 202 |
+
display: flex;
|
| 203 |
+
gap: 4px;
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
.mode-opt {
|
| 207 |
+
font-family: var(--mono);
|
| 208 |
+
font-size: 0.58rem;
|
| 209 |
+
padding: 4px 8px;
|
| 210 |
+
border: 1px solid var(--border);
|
| 211 |
+
background: transparent;
|
| 212 |
+
color: var(--muted);
|
| 213 |
+
cursor: pointer;
|
| 214 |
+
border-radius: 2px;
|
| 215 |
+
transition: all 0.12s;
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
.mode-opt.active {
|
| 219 |
+
background: var(--ink);
|
| 220 |
+
border-color: var(--ink);
|
| 221 |
+
color: var(--paper);
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
/* Footer */
|
| 225 |
+
.footer {
|
| 226 |
+
padding: 10px 18px;
|
| 227 |
+
border-top: 1px solid var(--border);
|
| 228 |
+
font-family: var(--mono);
|
| 229 |
+
font-size: 0.58rem;
|
| 230 |
+
color: var(--muted);
|
| 231 |
+
display: flex;
|
| 232 |
+
justify-content: space-between;
|
| 233 |
+
}
|
| 234 |
+
</style>
|
| 235 |
+
</head>
|
| 236 |
+
<body>
|
| 237 |
+
<div class="inner">
|
| 238 |
+
|
| 239 |
+
<!-- Header -->
|
| 240 |
+
<div class="header">
|
| 241 |
+
<div class="logo">GLM-<em>OCR</em></div>
|
| 242 |
+
<div class="server-badge">
|
| 243 |
+
<div class="dot pulse" id="dot"></div>
|
| 244 |
+
<span id="server-label">checkingβ¦</span>
|
| 245 |
+
</div>
|
| 246 |
+
</div>
|
| 247 |
+
|
| 248 |
+
<!-- CTA -->
|
| 249 |
+
<div class="cta-area">
|
| 250 |
+
<div class="cta-label">Select region on screen</div>
|
| 251 |
+
<button class="select-btn" id="select-btn" disabled>
|
| 252 |
+
β Select & Extract Text
|
| 253 |
+
</button>
|
| 254 |
+
<div class="offline-msg" id="offline-msg">
|
| 255 |
+
β GLM-OCR server not running.<br>
|
| 256 |
+
Start it with <code>python main.py</code> at <a href="http://localhost:8000" target="_blank">localhost:8000</a>.
|
| 257 |
+
</div>
|
| 258 |
+
</div>
|
| 259 |
+
|
| 260 |
+
<!-- How it works -->
|
| 261 |
+
<div class="how">
|
| 262 |
+
<div class="how-title">How it works</div>
|
| 263 |
+
<div class="step">
|
| 264 |
+
<div class="step-num">01</div>
|
| 265 |
+
<div class="step-text">Click the button above β page dims</div>
|
| 266 |
+
</div>
|
| 267 |
+
<div class="step">
|
| 268 |
+
<div class="step-num">02</div>
|
| 269 |
+
<div class="step-text">Drag a box around the text you want</div>
|
| 270 |
+
</div>
|
| 271 |
+
<div class="step">
|
| 272 |
+
<div class="step-num">03</div>
|
| 273 |
+
<div class="step-text">GLM-OCR extracts text into a sidebar</div>
|
| 274 |
+
</div>
|
| 275 |
+
<div class="step">
|
| 276 |
+
<div class="step-num">04</div>
|
| 277 |
+
<div class="step-text">Copy or download the result</div>
|
| 278 |
+
</div>
|
| 279 |
+
</div>
|
| 280 |
+
|
| 281 |
+
<!-- Settings -->
|
| 282 |
+
<div class="settings">
|
| 283 |
+
<div class="settings-title">Settings</div>
|
| 284 |
+
<div class="setting-row">
|
| 285 |
+
<span class="setting-label">OCR Mode</span>
|
| 286 |
+
<div class="mode-toggle">
|
| 287 |
+
<button class="mode-opt active" data-mode="recognize">recognize</button>
|
| 288 |
+
<button class="mode-opt" data-mode="parse">parse</button>
|
| 289 |
+
</div>
|
| 290 |
+
</div>
|
| 291 |
+
</div>
|
| 292 |
+
|
| 293 |
+
<div class="footer">
|
| 294 |
+
<span>zai-org/GLM-OCR Β· 0.9B</span>
|
| 295 |
+
<span>self-hosted</span>
|
| 296 |
+
</div>
|
| 297 |
+
|
| 298 |
+
</div>
|
| 299 |
+
|
| 300 |
+
<script src="popup.js"></script>
|
| 301 |
+
</body>
|
| 302 |
+
</html>
|
popup.js
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// popup.js
|
| 2 |
+
|
| 3 |
+
const selectBtn = document.getElementById("select-btn");
|
| 4 |
+
const dot = document.getElementById("dot");
|
| 5 |
+
const serverLabel = document.getElementById("server-label");
|
| 6 |
+
const offlineMsg = document.getElementById("offline-msg");
|
| 7 |
+
|
| 8 |
+
let selectedMode = "recognize";
|
| 9 |
+
|
| 10 |
+
// ββ Check server health βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 11 |
+
async function checkServer() {
|
| 12 |
+
try {
|
| 13 |
+
const r = await fetch("http://localhost:8000/health", {
|
| 14 |
+
signal: AbortSignal.timeout(3000),
|
| 15 |
+
});
|
| 16 |
+
const d = await r.json();
|
| 17 |
+
return d.status === "ok";
|
| 18 |
+
} catch {
|
| 19 |
+
return false;
|
| 20 |
+
}
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
async function updateServerStatus() {
|
| 24 |
+
const ok = await checkServer();
|
| 25 |
+
dot.className = `dot ${ok ? "ok" : "err"}`;
|
| 26 |
+
serverLabel.textContent = ok ? "server ready" : "offline";
|
| 27 |
+
selectBtn.disabled = !ok;
|
| 28 |
+
offlineMsg.classList.toggle("show", !ok);
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
updateServerStatus();
|
| 32 |
+
|
| 33 |
+
// ββ Mode toggle βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 34 |
+
document.querySelectorAll(".mode-opt").forEach(btn => {
|
| 35 |
+
btn.addEventListener("click", () => {
|
| 36 |
+
document.querySelectorAll(".mode-opt").forEach(b => b.classList.remove("active"));
|
| 37 |
+
btn.classList.add("active");
|
| 38 |
+
selectedMode = btn.dataset.mode;
|
| 39 |
+
chrome.storage.local.set({ ocrMode: selectedMode });
|
| 40 |
+
});
|
| 41 |
+
});
|
| 42 |
+
|
| 43 |
+
// Restore saved mode
|
| 44 |
+
chrome.storage.local.get(["ocrMode"], ({ ocrMode }) => {
|
| 45 |
+
if (ocrMode) {
|
| 46 |
+
selectedMode = ocrMode;
|
| 47 |
+
document.querySelectorAll(".mode-opt").forEach(btn => {
|
| 48 |
+
btn.classList.toggle("active", btn.dataset.mode === ocrMode);
|
| 49 |
+
});
|
| 50 |
+
}
|
| 51 |
+
});
|
| 52 |
+
|
| 53 |
+
// ββ Select button βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 54 |
+
selectBtn.addEventListener("click", async () => {
|
| 55 |
+
// Save current mode to storage so background can read it
|
| 56 |
+
await chrome.storage.local.set({ ocrMode: selectedMode });
|
| 57 |
+
|
| 58 |
+
// Get current tab and inject the selection
|
| 59 |
+
const [tab] = await chrome.tabs.query({ active: true, currentWindow: true });
|
| 60 |
+
|
| 61 |
+
await chrome.scripting.executeScript({
|
| 62 |
+
target: { tabId: tab.id },
|
| 63 |
+
func: () => {
|
| 64 |
+
window.postMessage({ type: "GLMOCR_START" }, "*");
|
| 65 |
+
},
|
| 66 |
+
});
|
| 67 |
+
|
| 68 |
+
// Tell content script to start selection mode
|
| 69 |
+
chrome.tabs.sendMessage(tab.id, { type: "START_SELECTION" });
|
| 70 |
+
|
| 71 |
+
// Close popup so it doesn't obscure the page
|
| 72 |
+
window.close();
|
| 73 |
+
});
|
requirements.txt
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# GLM-OCR Backend β Python dependencies
|
| 2 |
+
# Install: pip install -r requirements.txt
|
| 3 |
+
|
| 4 |
+
# Web framework
|
| 5 |
+
fastapi==0.115.5
|
| 6 |
+
uvicorn[standard]==0.32.1
|
| 7 |
+
python-multipart>=0.0.12
|
| 8 |
+
|
| 9 |
+
# GLM-OCR requires transformers >= 5.3.0
|
| 10 |
+
# Install latest directly from GitHub to be safe:
|
| 11 |
+
# pip install git+https://github.com/huggingface/transformers.git
|
| 12 |
+
transformers>=5.3.0
|
| 13 |
+
|
| 14 |
+
# ML
|
| 15 |
+
torch>=2.2.0
|
| 16 |
+
torchvision>=0.17.0
|
| 17 |
+
accelerate>=1.1.0
|
| 18 |
+
|
| 19 |
+
# Image
|
| 20 |
+
Pillow>=10.4.0
|
| 21 |
+
|
| 22 |
+
# Misc
|
| 23 |
+
pydantic>=2.9.0
|
| 24 |
+
safetensors>=0.4.5
|
| 25 |
+
einops>=0.8.0
|
| 26 |
+
sentencepiece>=0.2.0
|
| 27 |
+
tiktoken>=0.7.0
|
sidebar.html
ADDED
|
@@ -0,0 +1,391 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8"/>
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
|
| 6 |
+
<title>GLM-OCR Result</title>
|
| 7 |
+
<link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;500;700&family=DM+Serif+Display:ital@0;1&family=DM+Sans:wght@400;500&display=swap" rel="stylesheet"/>
|
| 8 |
+
<style>
|
| 9 |
+
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
|
| 10 |
+
:root {
|
| 11 |
+
--ink: #0f0e0d;
|
| 12 |
+
--paper: #f5f0e8;
|
| 13 |
+
--warm: #ede8dc;
|
| 14 |
+
--border: #d4cfc3;
|
| 15 |
+
--muted: #8f8880;
|
| 16 |
+
--accent: #c94a1f;
|
| 17 |
+
--green: #1a6b4a;
|
| 18 |
+
--mono: 'IBM Plex Mono', monospace;
|
| 19 |
+
--serif: 'DM Serif Display', serif;
|
| 20 |
+
--sans: 'DM Sans', sans-serif;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
html, body {
|
| 24 |
+
height: 100%;
|
| 25 |
+
background: var(--paper);
|
| 26 |
+
color: var(--ink);
|
| 27 |
+
font-family: var(--sans);
|
| 28 |
+
overflow: hidden;
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
body::before {
|
| 32 |
+
content: '';
|
| 33 |
+
position: fixed; inset: 0;
|
| 34 |
+
background-image: radial-gradient(circle, rgba(0,0,0,0.05) 1px, transparent 1px);
|
| 35 |
+
background-size: 16px 16px;
|
| 36 |
+
pointer-events: none;
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
.sidebar {
|
| 40 |
+
position: relative;
|
| 41 |
+
height: 100vh;
|
| 42 |
+
display: flex;
|
| 43 |
+
flex-direction: column;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
/* ββ Header ββ */
|
| 47 |
+
.sb-header {
|
| 48 |
+
padding: 14px 16px;
|
| 49 |
+
border-bottom: 2px solid var(--ink);
|
| 50 |
+
display: flex;
|
| 51 |
+
align-items: center;
|
| 52 |
+
justify-content: space-between;
|
| 53 |
+
flex-shrink: 0;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
.sb-title {
|
| 57 |
+
font-family: var(--serif);
|
| 58 |
+
font-size: 1rem;
|
| 59 |
+
letter-spacing: -0.01em;
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
.sb-title em { font-style: italic; color: var(--accent); }
|
| 63 |
+
|
| 64 |
+
.sb-close {
|
| 65 |
+
font-family: var(--mono);
|
| 66 |
+
font-size: 0.6rem;
|
| 67 |
+
padding: 5px 10px;
|
| 68 |
+
border: 1px solid var(--border);
|
| 69 |
+
background: transparent;
|
| 70 |
+
cursor: pointer;
|
| 71 |
+
border-radius: 2px;
|
| 72 |
+
color: var(--muted);
|
| 73 |
+
transition: all 0.12s;
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
.sb-close:hover { border-color: var(--ink); color: var(--ink); }
|
| 77 |
+
|
| 78 |
+
/* ββ Scrollable body ββ */
|
| 79 |
+
.sb-body {
|
| 80 |
+
flex: 1;
|
| 81 |
+
overflow-y: auto;
|
| 82 |
+
display: flex;
|
| 83 |
+
flex-direction: column;
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
/* ββ Loading ββ */
|
| 87 |
+
.sb-loading {
|
| 88 |
+
flex: 1;
|
| 89 |
+
display: flex;
|
| 90 |
+
flex-direction: column;
|
| 91 |
+
align-items: center;
|
| 92 |
+
justify-content: center;
|
| 93 |
+
gap: 16px;
|
| 94 |
+
padding: 24px;
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
.scan-bar-wrap { width: 140px; height: 3px; background: var(--border); border-radius: 2px; overflow: hidden; }
|
| 98 |
+
.scan-bar { height: 100%; background: var(--accent); border-radius: 2px; animation: scan 1.4s ease-in-out infinite; }
|
| 99 |
+
@keyframes scan { 0%{transform:translateX(-100%)} 50%{transform:translateX(0)} 100%{transform:translateX(100%)} }
|
| 100 |
+
|
| 101 |
+
.loading-label {
|
| 102 |
+
font-family: var(--mono);
|
| 103 |
+
font-size: 0.68rem;
|
| 104 |
+
color: var(--muted);
|
| 105 |
+
animation: blink 1.4s ease-in-out infinite;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
@keyframes blink { 0%,100%{opacity:1} 50%{opacity:0.3} }
|
| 109 |
+
|
| 110 |
+
/* ββ Error ββ */
|
| 111 |
+
.sb-error {
|
| 112 |
+
margin: 16px;
|
| 113 |
+
background: #fff0f0;
|
| 114 |
+
border: 1px solid rgba(201,74,31,0.3);
|
| 115 |
+
border-radius: 2px;
|
| 116 |
+
padding: 14px;
|
| 117 |
+
font-family: var(--mono);
|
| 118 |
+
font-size: 0.72rem;
|
| 119 |
+
color: var(--accent);
|
| 120 |
+
line-height: 1.7;
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
/* ββ Image preview ββ */
|
| 124 |
+
.sb-image-wrap {
|
| 125 |
+
padding: 14px 16px 0;
|
| 126 |
+
flex-shrink: 0;
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
.sb-image-label {
|
| 130 |
+
font-family: var(--mono);
|
| 131 |
+
font-size: 0.58rem;
|
| 132 |
+
color: var(--muted);
|
| 133 |
+
letter-spacing: 0.1em;
|
| 134 |
+
text-transform: uppercase;
|
| 135 |
+
margin-bottom: 8px;
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
.sb-image {
|
| 139 |
+
width: 100%;
|
| 140 |
+
max-height: 160px;
|
| 141 |
+
object-fit: contain;
|
| 142 |
+
border: 1px solid var(--border);
|
| 143 |
+
border-radius: 2px;
|
| 144 |
+
background: var(--warm);
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
/* ββ Meta chips ββ */
|
| 148 |
+
.sb-meta {
|
| 149 |
+
padding: 10px 16px;
|
| 150 |
+
display: flex;
|
| 151 |
+
gap: 10px;
|
| 152 |
+
flex-wrap: wrap;
|
| 153 |
+
border-bottom: 1px solid var(--border);
|
| 154 |
+
flex-shrink: 0;
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
.chip {
|
| 158 |
+
font-family: var(--mono);
|
| 159 |
+
font-size: 0.6rem;
|
| 160 |
+
color: var(--muted);
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
.chip strong { color: var(--green); }
|
| 164 |
+
|
| 165 |
+
/* ββ Extracted text ββ */
|
| 166 |
+
.sb-text-section {
|
| 167 |
+
padding: 14px 16px;
|
| 168 |
+
display: flex;
|
| 169 |
+
flex-direction: column;
|
| 170 |
+
gap: 8px;
|
| 171 |
+
flex: 1;
|
| 172 |
+
min-height: 0;
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
.sb-text-label {
|
| 176 |
+
font-family: var(--mono);
|
| 177 |
+
font-size: 0.58rem;
|
| 178 |
+
color: var(--muted);
|
| 179 |
+
letter-spacing: 0.1em;
|
| 180 |
+
text-transform: uppercase;
|
| 181 |
+
flex-shrink: 0;
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
.sb-text {
|
| 185 |
+
background: var(--warm);
|
| 186 |
+
border: 1px solid var(--border);
|
| 187 |
+
border-radius: 2px;
|
| 188 |
+
padding: 14px;
|
| 189 |
+
font-family: var(--mono);
|
| 190 |
+
font-size: 0.78rem;
|
| 191 |
+
line-height: 1.85;
|
| 192 |
+
white-space: pre-wrap;
|
| 193 |
+
word-break: break-word;
|
| 194 |
+
overflow-y: auto;
|
| 195 |
+
flex: 1;
|
| 196 |
+
min-height: 120px;
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
/* ββ Actions ββ */
|
| 200 |
+
.sb-actions {
|
| 201 |
+
padding: 12px 16px;
|
| 202 |
+
border-top: 1px solid var(--border);
|
| 203 |
+
display: flex;
|
| 204 |
+
gap: 8px;
|
| 205 |
+
flex-shrink: 0;
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
.action-btn {
|
| 209 |
+
font-family: var(--mono);
|
| 210 |
+
font-size: 0.62rem;
|
| 211 |
+
letter-spacing: 0.04em;
|
| 212 |
+
padding: 9px 12px;
|
| 213 |
+
border: 1px solid var(--border);
|
| 214 |
+
background: transparent;
|
| 215 |
+
color: var(--ink);
|
| 216 |
+
cursor: pointer;
|
| 217 |
+
border-radius: 2px;
|
| 218 |
+
transition: all 0.12s;
|
| 219 |
+
flex: 1;
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
.action-btn:hover { border-color: var(--ink); }
|
| 223 |
+
|
| 224 |
+
.action-btn.primary {
|
| 225 |
+
background: var(--accent);
|
| 226 |
+
border-color: var(--accent);
|
| 227 |
+
color: white;
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
.action-btn.primary:hover { background: #b53d15; }
|
| 231 |
+
|
| 232 |
+
/* ββ Toast ββ */
|
| 233 |
+
.toast {
|
| 234 |
+
position: fixed;
|
| 235 |
+
bottom: 16px;
|
| 236 |
+
left: 50%;
|
| 237 |
+
transform: translateX(-50%) translateY(40px);
|
| 238 |
+
opacity: 0;
|
| 239 |
+
background: var(--ink);
|
| 240 |
+
color: var(--paper);
|
| 241 |
+
font-family: var(--mono);
|
| 242 |
+
font-size: 0.65rem;
|
| 243 |
+
padding: 8px 16px;
|
| 244 |
+
border-radius: 2px;
|
| 245 |
+
white-space: nowrap;
|
| 246 |
+
transition: all 0.3s cubic-bezier(0.34, 1.56, 0.64, 1);
|
| 247 |
+
z-index: 999;
|
| 248 |
+
}
|
| 249 |
+
|
| 250 |
+
.toast.show {
|
| 251 |
+
transform: translateX(-50%) translateY(0);
|
| 252 |
+
opacity: 1;
|
| 253 |
+
}
|
| 254 |
+
</style>
|
| 255 |
+
</head>
|
| 256 |
+
<body>
|
| 257 |
+
<div class="sidebar">
|
| 258 |
+
|
| 259 |
+
<!-- Header -->
|
| 260 |
+
<div class="sb-header">
|
| 261 |
+
<div class="sb-title">GLM-<em>OCR</em> Result</div>
|
| 262 |
+
<button class="sb-close" id="close-btn">β Close</button>
|
| 263 |
+
</div>
|
| 264 |
+
|
| 265 |
+
<!-- Body -->
|
| 266 |
+
<div class="sb-body" id="sb-body">
|
| 267 |
+
|
| 268 |
+
<!-- Loading state (default) -->
|
| 269 |
+
<div class="sb-loading" id="state-loading">
|
| 270 |
+
<div class="scan-bar-wrap"><div class="scan-bar"></div></div>
|
| 271 |
+
<div class="loading-label">Running GLM-OCRβ¦</div>
|
| 272 |
+
</div>
|
| 273 |
+
|
| 274 |
+
</div>
|
| 275 |
+
|
| 276 |
+
<!-- Actions (shown after result) -->
|
| 277 |
+
<div class="sb-actions" id="sb-actions" style="display:none">
|
| 278 |
+
<button class="action-btn primary" id="new-btn">β New Selection</button>
|
| 279 |
+
<button class="action-btn" id="copy-btn">Copy</button>
|
| 280 |
+
<button class="action-btn" id="dl-btn">β .txt</button>
|
| 281 |
+
</div>
|
| 282 |
+
|
| 283 |
+
</div>
|
| 284 |
+
|
| 285 |
+
<div class="toast" id="toast"></div>
|
| 286 |
+
|
| 287 |
+
<script>
|
| 288 |
+
let extractedText = "";
|
| 289 |
+
|
| 290 |
+
// ββ Receive data from content.js ββββββββββββββββββββββββββββββββββββββββββ
|
| 291 |
+
window.addEventListener("message", (e) => {
|
| 292 |
+
if (e.data?.type !== "SIDEBAR_DATA") return;
|
| 293 |
+
const data = e.data.data;
|
| 294 |
+
|
| 295 |
+
if (data.loading) return; // already showing loading state
|
| 296 |
+
|
| 297 |
+
renderResult(data);
|
| 298 |
+
});
|
| 299 |
+
|
| 300 |
+
function renderResult(data) {
|
| 301 |
+
const body = document.getElementById("sb-body");
|
| 302 |
+
const actions = document.getElementById("sb-actions");
|
| 303 |
+
|
| 304 |
+
if (data.error) {
|
| 305 |
+
body.innerHTML = `<div class="sb-error">β ${data.error}<br><br>Make sure the GLM-OCR server is running at localhost:8000.</div>`;
|
| 306 |
+
actions.style.display = "flex";
|
| 307 |
+
return;
|
| 308 |
+
}
|
| 309 |
+
|
| 310 |
+
extractedText = data.text || "";
|
| 311 |
+
|
| 312 |
+
const latency = data.latency_ms ? `${(data.latency_ms / 1000).toFixed(2)}s` : "β";
|
| 313 |
+
|
| 314 |
+
body.innerHTML = `
|
| 315 |
+
<div class="sb-image-wrap">
|
| 316 |
+
<div class="sb-image-label">Selected Region</div>
|
| 317 |
+
<img class="sb-image" src="${data.imageDataUrl || ''}" alt="Selection"/>
|
| 318 |
+
</div>
|
| 319 |
+
|
| 320 |
+
<div class="sb-meta">
|
| 321 |
+
<span class="chip">words: <strong>${data.word_count || 0}</strong></span>
|
| 322 |
+
<span class="chip">chars: <strong>${data.char_count || 0}</strong></span>
|
| 323 |
+
<span class="chip">latency: <strong>${latency}</strong></span>
|
| 324 |
+
<span class="chip">device: <strong>${data.device || 'β'}</strong></span>
|
| 325 |
+
</div>
|
| 326 |
+
|
| 327 |
+
<div class="sb-text-section">
|
| 328 |
+
<div class="sb-text-label">Extracted Text</div>
|
| 329 |
+
<div class="sb-text" id="result-text">${data.text ? escapeHtml(data.text) : '<span style="color:var(--muted);">[No text detected]</span>'}</div>
|
| 330 |
+
</div>
|
| 331 |
+
`;
|
| 332 |
+
|
| 333 |
+
actions.style.display = "flex";
|
| 334 |
+
}
|
| 335 |
+
|
| 336 |
+
function escapeHtml(str) {
|
| 337 |
+
return str
|
| 338 |
+
.replace(/&/g, "&")
|
| 339 |
+
.replace(/</g, "<")
|
| 340 |
+
.replace(/>/g, ">");
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
// ββ Close βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 344 |
+
document.getElementById("close-btn").addEventListener("click", () => {
|
| 345 |
+
window.parent.postMessage({ type: "CLOSE_SIDEBAR" }, "*");
|
| 346 |
+
});
|
| 347 |
+
|
| 348 |
+
// ββ New selection βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 349 |
+
document.getElementById("new-btn").addEventListener("click", () => {
|
| 350 |
+
window.parent.postMessage({ type: "START_NEW_SELECTION" }, "*");
|
| 351 |
+
});
|
| 352 |
+
|
| 353 |
+
// ββ Copy βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββοΏ½οΏ½ββββββ
|
| 354 |
+
document.getElementById("copy-btn").addEventListener("click", async () => {
|
| 355 |
+
try {
|
| 356 |
+
await navigator.clipboard.writeText(extractedText);
|
| 357 |
+
toast("Copied!");
|
| 358 |
+
} catch {
|
| 359 |
+
// fallback: select all text in the result box
|
| 360 |
+
const el = document.getElementById("result-text");
|
| 361 |
+
if (el) {
|
| 362 |
+
const range = document.createRange();
|
| 363 |
+
range.selectNodeContents(el);
|
| 364 |
+
const sel = window.getSelection();
|
| 365 |
+
sel.removeAllRanges();
|
| 366 |
+
sel.addRange(range);
|
| 367 |
+
}
|
| 368 |
+
toast("Select text above and copy manually.");
|
| 369 |
+
}
|
| 370 |
+
});
|
| 371 |
+
|
| 372 |
+
// ββ Download ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 373 |
+
document.getElementById("dl-btn").addEventListener("click", () => {
|
| 374 |
+
const blob = new Blob([extractedText], { type: "text/plain" });
|
| 375 |
+
const a = document.createElement("a");
|
| 376 |
+
a.href = URL.createObjectURL(blob);
|
| 377 |
+
a.download = `glm-ocr-${Date.now()}.txt`;
|
| 378 |
+
a.click();
|
| 379 |
+
URL.revokeObjectURL(a.href);
|
| 380 |
+
});
|
| 381 |
+
|
| 382 |
+
// ββ Toast βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 383 |
+
function toast(msg) {
|
| 384 |
+
const t = document.getElementById("toast");
|
| 385 |
+
t.textContent = msg;
|
| 386 |
+
t.classList.add("show");
|
| 387 |
+
setTimeout(() => t.classList.remove("show"), 2000);
|
| 388 |
+
}
|
| 389 |
+
</script>
|
| 390 |
+
</body>
|
| 391 |
+
</html>
|