import os
import gc
import json
import base64
import time
from pathlib import Path
from io import BytesIO
from threading import Thread
from urllib.parse import urlparse
from urllib.request import urlopen
import gradio as gr
import requests
import spaces
import torch
from PIL import Image
from huggingface_hub import snapshot_download
from transformers import (
LightOnOcrForConditionalGeneration,
LightOnOcrProcessor,
Qwen2VLForConditionalGeneration,
Qwen3VLForConditionalGeneration,
Qwen2_5_VLForConditionalGeneration,
AutoModelForCausalLM,
AutoProcessor,
AutoModel,
AutoTokenizer,
TextIteratorStreamer,
)
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
MAX_MAX_NEW_TOKENS = 8192 #4096
DEFAULT_MAX_NEW_TOKENS = 4096 #1024
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "10000"))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("CUDA_VISIBLE_DEVICES=", os.environ.get("CUDA_VISIBLE_DEVICES"))
print("torch.__version__ =", torch.__version__)
print("torch.version.cuda =", torch.version.cuda)
print("cuda available:", torch.cuda.is_available())
print("cuda device count:", torch.cuda.device_count())
if torch.cuda.is_available():
print("current device:", torch.cuda.current_device())
print("device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
print("Using device:", device)
def get_attention_fallbacks() -> list[str | None]:
fallbacks = []
if torch.cuda.is_available() and os.getenv("USE_FLASH_ATTN", "0") == "1":
fallbacks.append("kernels-community/flash-attn2")
if torch.cuda.is_available():
fallbacks.append("sdpa")
fallbacks.append("eager")
fallbacks.append(None)
return fallbacks
def load_model_with_attention_fallback(model_cls, model_id, **kwargs):
last_error = None
for attn_impl in get_attention_fallbacks():
load_kwargs = dict(kwargs)
label = attn_impl or "default"
if attn_impl is None:
load_kwargs.pop("attn_implementation", None)
else:
load_kwargs["attn_implementation"] = attn_impl
try:
print(f"Loading {model_id} with attention backend: {label}")
return model_cls.from_pretrained(model_id, **load_kwargs)
except Exception as exc:
last_error = exc
print(f"Failed loading {model_id} with attention backend {label}: {exc}")
raise last_error
def patch_dots_ocr_configuration(repo_path: str) -> None:
config_path = Path(repo_path) / "configuration_dots.py"
if not config_path.exists():
return
source = config_path.read_text(encoding="utf-8")
updated = source
if 'attributes = ["image_processor", "tokenizer"]' not in updated:
updated = updated.replace(
"class DotsVLProcessor(Qwen2_5_VLProcessor):\n",
'class DotsVLProcessor(Qwen2_5_VLProcessor):\n attributes = ["image_processor", "tokenizer"]\n',
1,
)
if "def __init__(self, image_processor=None, tokenizer=None, chat_template=None, **kwargs):" in updated:
updated = updated.replace(
"def __init__(self, image_processor=None, tokenizer=None, chat_template=None, **kwargs):",
"def __init__(self, image_processor=None, tokenizer=None, video_processor=None, chat_template=None, **kwargs):",
1,
)
if "super().__init__(image_processor, tokenizer, chat_template=chat_template)" in updated:
updated = updated.replace(
"super().__init__(image_processor, tokenizer, chat_template=chat_template)",
"super().__init__(image_processor, tokenizer, video_processor, chat_template=chat_template)",
1,
)
if updated != source:
config_path.write_text(updated, encoding="utf-8")
print(f"Patched dots.OCR processor config: {config_path}")
def resolve_dots_ocr_model_path(repo_id: str) -> str:
try:
AutoProcessor.from_pretrained(repo_id, trust_remote_code=True)
return repo_id
except TypeError as exc:
if "video_processor" not in str(exc):
raise
print("dots.OCR processor compatibility issue detected, applying local patch...")
local_path = snapshot_download(repo_id=repo_id, local_dir="/tmp/dots_ocr_model", local_dir_use_symlinks=False)
patch_dots_ocr_configuration(local_path)
return local_path
MODEL_ID_V = "nanonets/Nanonets-OCR2-3B"
processor_v = AutoProcessor.from_pretrained(MODEL_ID_V, trust_remote_code=True)
model_v = load_model_with_attention_fallback(
Qwen2_5_VLForConditionalGeneration,
MODEL_ID_V,
trust_remote_code=True,
torch_dtype=torch.float16
).to(device).eval()
MODEL_ID_Y = "lightonai/LightOnOCR-2-1B"
LIGHTON_DTYPE = torch.bfloat16 if torch.cuda.is_available() else torch.float32
processor_y = LightOnOcrProcessor.from_pretrained(MODEL_ID_Y)
model_y = LightOnOcrForConditionalGeneration.from_pretrained(
MODEL_ID_Y,
torch_dtype=LIGHTON_DTYPE,
).to(device).eval()
MODEL_ID_X = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
model_x = load_model_with_attention_fallback(
Qwen2VLForConditionalGeneration,
MODEL_ID_X,
trust_remote_code=True,
torch_dtype=torch.float16
).to(device).eval()
MODEL_ID_W = "allenai/olmOCR-7B-0725"
processor_w = AutoProcessor.from_pretrained(MODEL_ID_W, trust_remote_code=True)
model_w = load_model_with_attention_fallback(
Qwen2_5_VLForConditionalGeneration,
MODEL_ID_W,
trust_remote_code=True,
torch_dtype=torch.float16
).to(device).eval()
MODEL_ID_M = "Qwen/Qwen3-VL-4B-Instruct"
processor_m = AutoProcessor.from_pretrained(MODEL_ID_M)
model_m = load_model_with_attention_fallback(
Qwen3VLForConditionalGeneration,
MODEL_ID_M,
torch_dtype="auto"
).to(device).eval()
MODEL_ID_A = "mistralai/Ministral-3-8B-Instruct-2512"
ALBERT_API_URL = "https://albert.api.etalab.gouv.fr/v1/chat/completions"
MODEL_MAP = {
"Nanonets-OCR2-3B": (processor_v, model_v),
"LightOnOCR-2-1B": (processor_y, model_y),
"olmOCR-7B-0725": (processor_w, model_w),
"Qwen3-VL-4B-Instruct": (processor_m, model_m),
"Qwen2-VL-OCR-2B": (processor_x, model_x),
"Ministral-3-8B-Instruct-2512": (None, MODEL_ID_A),
}
MODEL_CHOICES = list(MODEL_MAP.keys())
PROMPTS = {
"OCR_GENERAL": {
"name": "Simple Text Etraction",
"description": "Extract the text including inside images and logos",
"prompt": "Extract the text including inside images and logos",
"icon": "📝"
},
"OCR_MARKDOWN": {
"name": "OCR -> Markdown",
"description": "Perform OCR and convert to Markdown",
"prompt": "Perform OCR including inside images and logos and convert to Markdown.",
"icon": "🔍"
},
"STRUCTURED_EXTRACTION": {
"name": "Json Metadata Extraction",
"description": "Extract metadata",
"prompt": """Extract the document title from this cover page.
Output ONLY valid JSON:
{
"title": ""
}""",
"icon": "🏷️"
},
"STRUCTURED_LOCATED_EXTRACTION": {
"name": "Located Json Metadata Extraction",
"description": "Extract located metadata",
"prompt": """Extract the document title from the middle central block of this cover page.
Output ONLY valid JSON:
{
"title": ""
}""",
"icon": "📍"
},
"STRUCTURED_GROUNDED_EXTRACTION": {
"name": "Grounded Json Metadata Extraction",
"description": "Extract grounded metadata",
"prompt": """Extract the document title usually located around (x=0.5015, y=0.442) from this cover page.
Output ONLY valid JSON:
{
"title": ""
}""",
"icon": "📍"
},
"FULL_SCHEMA_JSON": {
"name": "Full Json Schema",
"description": "Extract full document metadata content as JSON with schema",
"prompt": """Analyze this thesis/dissertation cover image and extract ONLY visible information.
CRITICAL: Only extract information that is CLEARLY VISIBLE on the page.
DO NOT invent, guess, or hallucinate any data. If a field is not visible, use null.
Return ONLY valid JSON with this exact structure:
{
"title": "Main title of the thesis or dissertation as it appears on the title page",
"subtitle": "Subtitle or remainder of the title, usually following a colon; null if not present",
"author": "Full name of the author (student) who wrote the thesis or dissertation",
"degree_type": "Academic degree sought by the author (e.g. PhD, Doctorate, Master's degree, Master's thesis)",
"discipline": "Academic field or discipline of the thesis if explicitly stated; null if not present. Possible values: Mathématiques|Physics|Biology|others",
"granting_institution": "Institution where the thesis was submitted and the degree is granted (degree-granting institution)",
"doctoral_school": "Doctoral school or graduate program, if explicitly mentioned; null if not present",
"co_tutelle_institutions": "List of institutions involved in a joint supervision or co-tutelle agreement; empty list if none",
"partner_institutions": "List of partner institutions associated with the thesis but not granting the degree; empty list if none",
"defense_year": "Year the thesis or dissertation was defended, in YYYY format; null if not visible",
"defense_place": "City or place where the defense took place, if stated; null if not present",
"thesis_advisor": "Main thesis advisor or supervisor (director of thesis); full name; null if not present",
"co_advisors": "List of co-advisors or co-supervisors if explicitly mentioned; full names; empty list if none",
"jury_president": "President or chair of the thesis examination committee, if specified; null if not present",
"reviewers": "List of reviewers or rapporteurs of the thesis, if specified; full names; empty list if none",
"committee_members": "List of other thesis committee or jury members, excluding advisor and reviewers; full names; empty list if none",
"language": "Language in which the thesis is written, if explicitly stated; null if not present",
"confidence": "Confidence score between 0.0 and 1.0 indicating reliability of the extracted metadata"
}
IMPORTANT: Return null for any field where information is NOT clearly visible.
Return ONLY the JSON, no explanation.""",
"icon": "📄"
},
"NUEXTRACT_SCHEMA_JSON": {
"name": "NuExtract Json Schema",
"description": "Strict data extraction following deterministic JSON schema",
"prompt": """{
"title": "verbatim-string",
"subtitle": "verbatim-string",
"author": "verbatim-string",
"degree_type": "verbatim-string",
"discipline": [["Mathématiques", "Physique", "Autres"]],
"granting_institution": ["verbatim-string"],
"doctoral_school": ["verbatim-string"],
"co_tutelle_institutions": ["verbatim-string"],
"partner_institutions": ["verbatim-string"],
"defense_year": "integer",
"thesis_advisor": ["verbatim-string"],
"co_advisors": ["verbatim-string"],
"jury_president": "verbatim-string",
"reviewers": ["verbatim-string"],
"other_jury_members": ["verbatim-string"],
"language": "verbatim-string",
"confidence": "float"
}""",
"icon": "📄"
}
}
image_examples = [
{"query": PROMPTS["OCR_GENERAL"]["prompt"], "image": "examples/dumas_01382452.png", "model": "Nanonets-OCR2-3B"},
{"query": PROMPTS["OCR_MARKDOWN"]["prompt"], "image": "examples/dumas_01646440.png", "model": "olmOCR-7B-0725"},
{"query": PROMPTS["STRUCTURED_EXTRACTION"]["prompt"], "image": "examples/ephesvt_theses_doc13.jpg", "model": "Qwen3-VL-4B-Instruct"},
{"query": PROMPTS["STRUCTURED_LOCATED_EXTRACTION"]["prompt"], "image": "examples/memoires_cridaf_doc07.jpg", "model": "Qwen2-VL-OCR-2B"},
{"query": PROMPTS["STRUCTURED_GROUNDED_EXTRACTION"]["prompt"], "image": "examples/thesefr_2015PA010690.png", "model": "LightOnOCR-2-1B"},
{"query": "", "image": "examples/thesefr_2015PA010690.png", "model": "LightOnOCR-2-1B"},
]
def select_model(model_name: str):
if model_name not in MODEL_MAP:
raise ValueError("Invalid model selected.")
return MODEL_MAP[model_name]
def pil_to_data_url(img: Image.Image, fmt="PNG"):
buf = BytesIO()
img.save(buf, format=fmt)
data = base64.b64encode(buf.getvalue()).decode()
mime = "image/png" if fmt.upper() == "PNG" else "image/jpeg"
return f"data:{mime};base64,{data}"
def file_to_data_url(path):
if not os.path.exists(path):
return ""
ext = path.rsplit(".", 1)[-1].lower()
mime = {
"jpg": "image/jpeg",
"jpeg": "image/jpeg",
"png": "image/png",
"webp": "image/webp",
}.get(ext, "image/jpeg")
with open(path, "rb") as f:
data = base64.b64encode(f.read()).decode()
return f"data:{mime};base64,{data}"
def image_url_to_data_url(url):
try:
with urlopen(url, timeout=15) as response:
content_type = response.headers.get_content_type() or "image/jpeg"
data = base64.b64encode(response.read()).decode()
return f"data:{content_type};base64,{data}"
except Exception as e:
print("Image URL load error:", e)
return ""
def is_image_url(value):
try:
parsed = urlparse(str(value))
except Exception:
return False
return parsed.scheme in {"http", "https"} and bool(parsed.netloc)
def make_thumb_b64(path, max_dim=240):
try:
if is_image_url(path):
with urlopen(path, timeout=15) as response:
img = Image.open(BytesIO(response.read())).convert("RGB")
else:
img = Image.open(path).convert("RGB")
img.thumbnail((max_dim, max_dim))
return pil_to_data_url(img, "JPEG")
except Exception as e:
print("Thumbnail error:", e)
return ""
def build_example_cards_html():
cards = ""
for i, ex in enumerate(image_examples):
thumb = make_thumb_b64(ex["image"])
prompt_short = ex["query"][:72] + ("..." if len(ex["query"]) > 72 else "")
cards += f"""
{"
" if thumb else "
Preview
"}
{ex["model"]}
{prompt_short}
"""
return cards
EXAMPLE_CARDS_HTML = build_example_cards_html()
def load_example_data(idx_str):
try:
idx = int(str(idx_str).strip())
except Exception:
return gr.update(value=json.dumps({"status": "error", "message": "Invalid example index"}))
if idx < 0 or idx >= len(image_examples):
return gr.update(value=json.dumps({"status": "error", "message": "Example index out of range"}))
ex = image_examples[idx]
img_b64 = file_to_data_url(ex["image"])
if not img_b64:
return gr.update(value=json.dumps({"status": "error", "message": "Could not load example image"}))
return gr.update(value=json.dumps({
"status": "ok",
"query": ex["query"],
"image": img_b64,
"model": ex["model"],
"name": os.path.basename(ex["image"]),
}))
def b64_to_pil(b64_str):
if not b64_str:
return None
try:
if is_image_url(b64_str):
with urlopen(b64_str, timeout=15) as response:
return Image.open(BytesIO(response.read())).convert("RGB")
if b64_str.startswith("data:"):
_, data = b64_str.split(",", 1)
else:
data = b64_str
image_data = base64.b64decode(data)
return Image.open(BytesIO(image_data)).convert("RGB")
except Exception:
return None
def calc_timeout_duration(*args, **kwargs):
gpu_timeout = kwargs.get("gpu_timeout", None)
if gpu_timeout is None and args:
gpu_timeout = args[-1]
try:
return int(gpu_timeout)
except Exception:
return 60
def align_inputs_to_model_dtype(inputs, model):
model_dtype = getattr(model, "dtype", None)
if model_dtype is None:
try:
model_dtype = next(model.parameters()).dtype
except StopIteration:
model_dtype = None
if model_dtype is None:
return inputs
for key, value in list(inputs.items()):
if torch.is_tensor(value) and value.is_floating_point():
inputs[key] = value.to(dtype=model_dtype)
return inputs
def model_requires_text_prompt(model_name: str) -> bool:
return model_name != "LightOnOCR-2-1B"
def call_albert_api(model_id, text, image, max_new_tokens, temperature, top_p):
api_key = os.getenv("ALBERT_API_KEY")
if not api_key:
raise RuntimeError("ALBERT_API_KEY environment variable is not set.")
image_data_url = pil_to_data_url(image, "PNG")
payload = {
"model": model_id,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": text},
{"type": "image_url", "image_url": {"url": image_data_url}},
],
}
],
"max_tokens": int(max_new_tokens),
"temperature": float(temperature),
"top_p": float(top_p),
}
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}",
}
response = requests.post(ALBERT_API_URL, headers=headers, json=payload, timeout=180)
response.raise_for_status()
data = response.json()
choices = data.get("choices") or []
if not choices:
raise RuntimeError(f"Albert API returned no choices: {json.dumps(data)}")
message = choices[0].get("message") or {}
content = message.get("content", "")
if isinstance(content, str):
return content
if isinstance(content, list):
text_parts = [
item.get("text", "")
for item in content
if isinstance(item, dict) and item.get("type") == "text"
]
return "\n".join(part for part in text_parts if part).strip()
return str(content).strip()
@spaces.GPU(duration=calc_timeout_duration)
def generate_image(model_name, text, image, max_new_tokens, temperature, top_p, top_k, repetition_penalty, gpu_timeout):
try:
if not model_name or model_name not in MODEL_MAP:
yield "[ERROR] Please select a valid model."
return
if image is None:
yield "[ERROR] Please upload an image."
return
text = str(text or "").strip()
if not model_requires_text_prompt(model_name):
text = ""
if model_requires_text_prompt(model_name) and not text:
yield "[ERROR] Please enter your OCR/query instruction."
return
if len(str(text)) > MAX_INPUT_TOKEN_LENGTH * 8:
yield "[ERROR] Query is too long. Please shorten your input."
return
if model_name == "Ministral-3-8B-Instruct-2512":
output_text = call_albert_api(
model_id=MODEL_ID_A,
text=text,
image=image,
max_new_tokens=max_new_tokens,
temperature=temperature,
top_p=top_p,
)
if output_text.strip():
yield output_text
else:
yield "[ERROR] No output was generated."
return
processor, model = select_model(model_name)
if model_name == "LightOnOCR-2-1B":
conversation = [{"role": "user", "content": [{"type": "image", "image": image}]}]
inputs = processor.apply_chat_template(
conversation,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
)
inputs = {
k: v.to(device=device, dtype=LIGHTON_DTYPE) if torch.is_tensor(v) and v.is_floating_point() else v.to(device)
for k, v in inputs.items()
}
output_ids = model.generate(**inputs, max_new_tokens=int(max_new_tokens))
generated_ids = output_ids[0, inputs["input_ids"].shape[1]:]
output_text = processor.decode(generated_ids, skip_special_tokens=True)
if output_text.strip():
yield output_text
else:
yield "[ERROR] No output was generated."
return
streamer = TextIteratorStreamer(
processor.tokenizer if hasattr(processor, "tokenizer") else processor,
skip_prompt=True,
skip_special_tokens=True
)
generation_error = {"error": None}
if model_name == "Qwen3-VL-4B-Instruct":
messages = [
{
"role": "user",
"content": [
{"type": "image", "image": image},
{"type": "text", "text": text},
],
}
]
inputs = processor.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_dict=True,
return_tensors="pt",
)
inputs.pop("token_type_ids", None)
inputs = {k: v.to(model.device) if hasattr(v, "to") else v for k, v in inputs.items()}
generation_kwargs = {
**inputs,
"streamer": streamer,
"max_new_tokens": int(max_new_tokens),
"do_sample": True,
"temperature": float(temperature),
"top_p": float(top_p),
"top_k": int(top_k),
"repetition_penalty": float(repetition_penalty),
}
else:
messages = [{
"role": "user",
"content": [
{"type": "image"},
{"type": "text", "text": text},
]
}]
prompt_full = processor.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
inputs = processor(
text=[prompt_full],
images=[image],
return_tensors="pt",
padding=True,
truncation=True,
max_length=MAX_INPUT_TOKEN_LENGTH
).to(device)
inputs = align_inputs_to_model_dtype(inputs, model)
generation_kwargs = {
**inputs,
"streamer": streamer,
"max_new_tokens": int(max_new_tokens),
"do_sample": True,
"temperature": float(temperature),
"top_p": float(top_p),
"top_k": int(top_k),
"repetition_penalty": float(repetition_penalty),
}
def _run_generation():
try:
model.generate(**generation_kwargs)
except Exception as e:
generation_error["error"] = e
try:
streamer.end()
except Exception:
pass
thread = Thread(target=_run_generation, daemon=True)
thread.start()
buffer = ""
for new_text in streamer:
buffer += new_text.replace("<|im_end|>", "")
time.sleep(0.01)
yield buffer
thread.join(timeout=1.0)
if generation_error["error"] is not None:
err_msg = f"[ERROR] Inference failed: {str(generation_error['error'])}"
if buffer.strip():
yield buffer + "\n\n" + err_msg
else:
yield err_msg
return
if not buffer.strip():
yield "[ERROR] No output was generated."
except Exception as e:
yield f"[ERROR] {str(e)}"
finally:
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
def run_ocr(model_name, text, image_b64, max_new_tokens_v, temperature_v, top_p_v, top_k_v, repetition_penalty_v, gpu_timeout_v):
try:
image = b64_to_pil(image_b64)
yield from generate_image(
model_name=model_name,
text=text,
image=image,
max_new_tokens=max_new_tokens_v,
temperature=temperature_v,
top_p=top_p_v,
top_k=top_k_v,
repetition_penalty=repetition_penalty_v,
gpu_timeout=gpu_timeout_v,
)
except Exception as e:
yield f"[ERROR] {str(e)}"
def noop():
return None
css = r"""
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500;600&display=swap');
*{box-sizing:border-box;margin:0;padding:0}
html,body{height:100%;overflow-x:hidden}
body,.gradio-container{
background:#0f0f13!important;
font-family:'Inter',system-ui,-apple-system,sans-serif!important;
font-size:14px!important;color:#e4e4e7!important;min-height:100vh;overflow-x:hidden;
}
.dark body,.dark .gradio-container{background:#0f0f13!important;color:#e4e4e7!important}
footer{display:none!important}
.hidden-input{display:none!important;height:0!important;overflow:hidden!important;margin:0!important;padding:0!important}
#gradio-run-btn,#example-load-btn{
position:absolute!important;left:-9999px!important;top:-9999px!important;
width:1px!important;height:1px!important;opacity:0.01!important;
pointer-events:none!important;overflow:hidden!important;
}
.app-shell{
background:#18181b;border:1px solid #27272a;border-radius:16px;
margin:12px auto;max-width:1580px;overflow:hidden;
box-shadow:0 25px 50px -12px rgba(0,0,0,.6),0 0 0 1px rgba(255,255,255,.03);
}
.app-header{
background:linear-gradient(135deg,#18181b,#1e1e24);border-bottom:1px solid #27272a;
padding:14px 24px;display:flex;align-items:center;justify-content:space-between;flex-wrap:wrap;gap:12px;
}
.app-header-left{display:flex;align-items:center;gap:12px}
.app-logo{
width:38px;height:38px;background:linear-gradient(135deg,#ADFF2F,#C6FF66,#D8FF8A);
border-radius:10px;display:flex;align-items:center;justify-content:center;
box-shadow:0 4px 12px rgba(173,255,47,.28);
}
.app-logo svg{width:22px;height:22px;fill:#fff;flex-shrink:0}
.app-title{
font-size:18px;font-weight:700;background:linear-gradient(135deg,#f5f5f5,#bdbdbd);
-webkit-background-clip:text;-webkit-text-fill-color:transparent;letter-spacing:-.3px;
}
.app-badge{
font-size:11px;font-weight:600;padding:3px 10px;border-radius:20px;
background:rgba(173,255,47,.12);color:#D6FF8C;border:1px solid rgba(173,255,47,.28);letter-spacing:.3px;
}
.app-badge.fast{background:rgba(173,255,47,.08);color:#C6FF66;border:1px solid rgba(173,255,47,.22)}
.model-tabs-bar{
background:#18181b;border-bottom:1px solid #27272a;padding:10px 16px;
display:flex;gap:8px;align-items:center;flex-wrap:wrap;
}
.model-tab{
display:inline-flex;align-items:center;justify-content:center;gap:6px;
min-width:32px;height:34px;background:transparent;border:1px solid #27272a;
border-radius:999px;cursor:pointer;font-size:12px;font-weight:600;padding:0 12px;
color:#ffffff!important;transition:all .15s ease;
}
.model-tab:hover{background:rgba(173,255,47,.10);border-color:rgba(173,255,47,.35)}
.model-tab.active{background:rgba(173,255,47,.18);border-color:#ADFF2F;color:#fff!important;box-shadow:0 0 0 2px rgba(173,255,47,.08)}
.model-tab-label{font-size:12px;color:#ffffff!important;font-weight:600}
.app-main-row{display:flex;gap:0;flex:1;overflow:hidden}
.app-main-left{flex:1;display:flex;flex-direction:column;min-width:0;border-right:1px solid #27272a}
.app-main-right{
width:min(48vw,700px);min-width:620px;display:flex;flex-direction:column;
flex-shrink:0;background:#18181b;
}
#image-drop-zone{
position:relative;background:#09090b;height:440px;min-height:440px;max-height:440px;
overflow:hidden;
}
#image-drop-zone.drag-over{outline:2px solid #ADFF2F;outline-offset:-2px;background:rgba(173,255,47,.04)}
.upload-prompt-modern{
position:absolute;inset:0;display:flex;align-items:center;justify-content:center;
padding:20px;z-index:20;overflow:hidden;
}
.upload-click-area{
display:flex;flex-direction:column;align-items:center;justify-content:center;
cursor:pointer;padding:28px 36px;max-width:92%;max-height:92%;
border:2px dashed #3f3f46;border-radius:16px;
background:rgba(173,255,47,.03);transition:all .2s ease;gap:8px;text-align:center;
overflow:hidden;
}
.upload-prompt-stack{
display:flex;flex-direction:column;align-items:center;justify-content:center;
gap:14px;width:min(560px,100%);max-width:100%;
}
.upload-click-area:hover{background:rgba(173,255,47,.08);border-color:#ADFF2F;transform:scale(1.02)}
.upload-click-area:active{background:rgba(173,255,47,.12);transform:scale(.99)}
.upload-click-area svg{width:86px;height:86px;max-width:100%;flex-shrink:0}
.upload-main-text{color:#a1a1aa;font-size:14px;font-weight:600;margin-top:4px}
.upload-sub-text{color:#71717a;font-size:12px}
.upload-url-box{
display:flex;gap:8px;width:min(520px,100%);max-width:100%;
align-items:center;justify-content:center;flex-wrap:wrap;
}
.upload-url-input{
flex:1;min-width:240px;background:#09090b;border:1px solid #27272a;border-radius:10px;
color:#e4e4e7;padding:10px 12px;font-size:13px;outline:none;
}
.upload-url-input:focus{border-color:#ADFF2F;box-shadow:0 0 0 3px rgba(173,255,47,.14)}
.upload-url-input::placeholder{color:#52525b}
.upload-url-btn{
min-width:96px;height:40px;padding:0 14px;background:rgba(173,255,47,.12);border:1px solid rgba(173,255,47,.28);
border-radius:10px;cursor:pointer;color:#D6FF8C;font-size:12px;font-weight:700;transition:all .15s ease;
}
.upload-url-btn:hover{background:#ADFF2F;border-color:#ADFF2F;color:#111}
.single-preview-wrap{
width:100%;height:100%;display:none;align-items:center;justify-content:center;padding:16px;
overflow:hidden;
}
.single-preview-card{
width:100%;height:100%;max-width:100%;max-height:100%;border-radius:14px;
overflow:hidden;border:1px solid #27272a;background:#111114;
display:flex;align-items:center;justify-content:center;position:relative;
}
.single-preview-card img{
width:100%;height:100%;max-width:100%;max-height:100%;
object-fit:contain;display:block;
}
.preview-overlay-actions{
position:absolute;top:12px;right:12px;display:flex;gap:8px;z-index:5;
}
.preview-action-btn{
display:inline-flex;align-items:center;justify-content:center;
min-width:34px;height:34px;padding:0 12px;background:rgba(0,0,0,.65);
border:1px solid rgba(255,255,255,.14);border-radius:10px;cursor:pointer;
color:#fff!important;font-size:12px;font-weight:600;transition:all .15s ease;
}
.preview-action-btn:hover{background:#ADFF2F;border-color:#ADFF2F;color:#111!important}
.hint-bar{
background:rgba(173,255,47,.05);border-top:1px solid #27272a;border-bottom:1px solid #27272a;
padding:10px 20px;font-size:13px;color:#a1a1aa;line-height:1.7;
}
.hint-bar b{color:#D6FF8C;font-weight:600}
.hint-bar kbd{
display:inline-block;padding:1px 6px;background:#27272a;border:1px solid #3f3f46;
border-radius:4px;font-family:'JetBrains Mono',monospace;font-size:11px;color:#a1a1aa;
}
.examples-section{border-top:1px solid #27272a;padding:12px 16px}
.examples-title{
font-size:12px;font-weight:600;color:#71717a;text-transform:uppercase;
letter-spacing:.8px;margin-bottom:10px;
}
.examples-scroll{display:flex;gap:10px;overflow-x:auto;padding-bottom:8px}
.examples-scroll::-webkit-scrollbar{height:6px}
.examples-scroll::-webkit-scrollbar-track{background:#09090b;border-radius:3px}
.examples-scroll::-webkit-scrollbar-thumb{background:#27272a;border-radius:3px}
.examples-scroll::-webkit-scrollbar-thumb:hover{background:#3f3f46}
.example-card{
flex-shrink:0;width:220px;background:#09090b;border:1px solid #27272a;
border-radius:10px;overflow:hidden;cursor:pointer;transition:all .2s ease;
}
.example-card:hover{border-color:#ADFF2F;transform:translateY(-2px);box-shadow:0 4px 12px rgba(173,255,47,.12)}
.example-card.loading{opacity:.5;pointer-events:none}
.example-thumb-wrap{height:120px;overflow:hidden;background:#18181b}
.example-thumb-wrap img{width:100%;height:100%;object-fit:cover}
.example-thumb-placeholder{
width:100%;height:100%;display:flex;align-items:center;justify-content:center;
background:#18181b;color:#3f3f46;font-size:11px;
}
.example-meta-row{padding:6px 10px;display:flex;align-items:center;gap:6px}
.example-badge{
display:inline-flex;padding:2px 7px;background:rgba(173,255,47,.12);border-radius:4px;
font-size:10px;font-weight:600;color:#D6FF8C;font-family:'JetBrains Mono',monospace;white-space:nowrap;
}
.example-prompt-text{
padding:0 10px 8px;font-size:11px;color:#a1a1aa;line-height:1.4;
display:-webkit-box;-webkit-line-clamp:2;-webkit-box-orient:vertical;overflow:hidden;
}
.panel-card{border-bottom:1px solid #27272a}
.panel-card-title{
padding:12px 20px;font-size:12px;font-weight:600;color:#71717a;
text-transform:uppercase;letter-spacing:.8px;border-bottom:1px solid rgba(39,39,42,.6);
}
.panel-card-body{padding:18px 22px 20px;display:flex;flex-direction:column;gap:10px}
.modern-label{font-size:13px;font-weight:500;color:#a1a1aa;margin-bottom:4px;display:block}
.modern-textarea{
width:100%;background:#09090b;border:1px solid #27272a;border-radius:8px;
padding:14px 16px;font-family:'Inter',sans-serif;font-size:14px;color:#e4e4e7;
resize:vertical;outline:none;min-height:220px;transition:border-color .2s;
}
.is-hidden{display:none!important}
.modern-textarea:focus{border-color:#ADFF2F;box-shadow:0 0 0 3px rgba(173,255,47,.14)}
.modern-textarea::placeholder{color:#3f3f46}
.modern-textarea.error-flash{
border-color:#ef4444!important;box-shadow:0 0 0 3px rgba(239,68,68,.2)!important;animation:shake .4s ease;
}
@keyframes shake{0%,100%{transform:translateX(0)}20%,60%{transform:translateX(-4px)}40%,80%{transform:translateX(4px)}}
.toast-notification{
position:fixed;top:24px;left:50%;transform:translateX(-50%) translateY(-120%);
z-index:9999;padding:10px 24px;border-radius:10px;font-family:'Inter',sans-serif;
font-size:14px;font-weight:600;display:flex;align-items:center;gap:8px;
box-shadow:0 8px 24px rgba(0,0,0,.5);
transition:transform .35s cubic-bezier(.34,1.56,.64,1),opacity .35s ease;opacity:0;pointer-events:none;
}
.toast-notification.visible{transform:translateX(-50%) translateY(0);opacity:1;pointer-events:auto}
.toast-notification.error{background:linear-gradient(135deg,#dc2626,#b91c1c);color:#fff;border:1px solid rgba(255,255,255,.15)}
.toast-notification.warning{background:linear-gradient(135deg,#84cc16,#65a30d);color:#111;border:1px solid rgba(255,255,255,.08)}
.toast-notification.info{background:linear-gradient(135deg,#a3e635,#84cc16);color:#111;border:1px solid rgba(255,255,255,.08)}
.toast-notification .toast-icon{font-size:16px;line-height:1}
.toast-notification .toast-text{line-height:1.3}
.btn-run{
display:flex;align-items:center;justify-content:center;gap:8px;width:100%;
background:linear-gradient(135deg,#ADFF2F,#8FD61F);border:none;border-radius:10px;
padding:12px 24px;cursor:pointer;font-size:15px;font-weight:700;font-family:'Inter',sans-serif;
color:#ffffff!important;-webkit-text-fill-color:#ffffff!important;
transition:all .2s ease;letter-spacing:-.2px;
box-shadow:0 4px 16px rgba(173,255,47,.25),inset 0 1px 0 rgba(255,255,255,.25);
}
.btn-run:hover{
background:linear-gradient(135deg,#C6FF66,#ADFF2F);transform:translateY(-1px);
box-shadow:0 6px 24px rgba(173,255,47,.35),inset 0 1px 0 rgba(255,255,255,.25);
}
.btn-run:active{transform:translateY(0);box-shadow:0 2px 8px rgba(173,255,47,.25)}
#custom-run-btn,#custom-run-btn *,#run-btn-label,.btn-run,.btn-run *{
color:#ffffff!important;-webkit-text-fill-color:#ffffff!important;fill:#ffffff!important;
}
body:not(.dark) .btn-run,body:not(.dark) .btn-run *,
.dark .btn-run,.dark .btn-run *,
.gradio-container .btn-run,.gradio-container .btn-run *,
.gradio-container #custom-run-btn,.gradio-container #custom-run-btn *{
color:#ffffff!important;-webkit-text-fill-color:#ffffff!important;fill:#ffffff!important;
}
.output-frame{border-bottom:1px solid #27272a;display:flex;flex-direction:column;position:relative}
.output-frame .out-title,
.output-frame .out-title *,
#output-title-label{
color:#ffffff!important;
-webkit-text-fill-color:#ffffff!important;
}
.output-frame .out-title{
padding:10px 20px;font-size:13px;font-weight:700;
text-transform:uppercase;letter-spacing:.8px;border-bottom:1px solid rgba(39,39,42,.6);
display:flex;align-items:center;justify-content:space-between;gap:8px;flex-wrap:wrap;
}
.out-title-right{display:flex;gap:8px;align-items:center}
.out-action-btn{
display:inline-flex;align-items:center;justify-content:center;background:rgba(173,255,47,.10);
border:1px solid rgba(173,255,47,.2);border-radius:6px;cursor:pointer;padding:3px 10px;
font-size:11px;font-weight:500;color:#D6FF8C!important;gap:4px;height:24px;transition:all .15s;
}
.out-action-btn:hover{background:rgba(173,255,47,.2);border-color:rgba(173,255,47,.35);color:#111!important}
.out-action-btn svg{width:12px;height:12px;fill:#D6FF8C}
.output-frame .out-body{
flex:1;background:#09090b;display:flex;align-items:stretch;justify-content:stretch;
overflow:hidden;min-height:520px;position:relative;
}
.output-scroll-wrap{
width:100%;height:100%;padding:0;overflow:hidden;
}
.output-textarea{
width:100%;height:520px;min-height:520px;max-height:none;background:#09090b;color:#e4e4e7;
border:none;outline:none;padding:18px 20px;font-size:13px;line-height:1.65;
font-family:'JetBrains Mono',monospace;overflow:auto;resize:vertical;white-space:pre-wrap;
}
.output-textarea::placeholder{color:#52525b}
.output-textarea.error-flash{
box-shadow:inset 0 0 0 2px rgba(239,68,68,.6);
}
.modern-loader{
display:none;position:absolute;top:0;left:0;right:0;bottom:0;background:rgba(9,9,11,.92);
z-index:15;flex-direction:column;align-items:center;justify-content:center;gap:16px;backdrop-filter:blur(4px);
}
.modern-loader.active{display:flex}
.modern-loader .loader-spinner{
width:36px;height:36px;border:3px solid #27272a;border-top-color:#ADFF2F;
border-radius:50%;animation:spin .8s linear infinite;
}
@keyframes spin{to{transform:rotate(360deg)}}
.modern-loader .loader-text{font-size:13px;color:#a1a1aa;font-weight:500}
.loader-bar-track{width:200px;height:4px;background:#27272a;border-radius:2px;overflow:hidden}
.loader-bar-fill{
height:100%;background:linear-gradient(90deg,#ADFF2F,#C6FF66,#ADFF2F);
background-size:200% 100%;animation:shimmer 1.5s ease-in-out infinite;border-radius:2px;
}
@keyframes shimmer{0%{background-position:200% 0}100%{background-position:-200% 0}}
.settings-group{border:1px solid #27272a;border-radius:10px;margin:12px 16px;padding:0;overflow:hidden}
.settings-group-title{
font-size:12px;font-weight:600;color:#71717a;text-transform:uppercase;letter-spacing:.8px;
padding:10px 16px;border-bottom:1px solid #27272a;background:rgba(24,24,27,.5);
}
.settings-group-body{padding:14px 16px;display:flex;flex-direction:column;gap:12px}
.slider-row{display:flex;align-items:center;gap:10px;min-height:28px}
.slider-row label{font-size:13px;font-weight:500;color:#a1a1aa;min-width:118px;flex-shrink:0}
.slider-row input[type="range"]{
flex:1;-webkit-appearance:none;appearance:none;height:6px;background:#27272a;
border-radius:3px;outline:none;min-width:0;
}
.slider-row input[type="range"]::-webkit-slider-thumb{
-webkit-appearance:none;width:16px;height:16px;background:linear-gradient(135deg,#ADFF2F,#8FD61F);
border-radius:50%;cursor:pointer;box-shadow:0 2px 6px rgba(173,255,47,.35);transition:transform .15s;
}
.slider-row input[type="range"]::-webkit-slider-thumb:hover{transform:scale(1.2)}
.slider-row input[type="range"]::-moz-range-thumb{
width:16px;height:16px;background:linear-gradient(135deg,#ADFF2F,#8FD61F);
border-radius:50%;cursor:pointer;border:none;box-shadow:0 2px 6px rgba(173,255,47,.35);
}
.slider-row .slider-val{
min-width:58px;text-align:right;font-family:'JetBrains Mono',monospace;font-size:12px;
font-weight:500;padding:3px 8px;background:#09090b;border:1px solid #27272a;
border-radius:6px;color:#a1a1aa;flex-shrink:0;
}
.app-statusbar{
background:#18181b;border-top:1px solid #27272a;padding:6px 20px;
display:flex;gap:12px;height:34px;align-items:center;font-size:12px;
}
.app-statusbar .sb-section{
padding:0 12px;flex:1;display:flex;align-items:center;font-family:'JetBrains Mono',monospace;
font-size:12px;color:#52525b;overflow:hidden;white-space:nowrap;
}
.app-statusbar .sb-section.sb-fixed{
flex:0 0 auto;min-width:110px;text-align:center;justify-content:center;
padding:3px 12px;background:rgba(173,255,47,.08);border-radius:6px;color:#D6FF8C;font-weight:500;
}
.exp-note{padding:10px 20px;font-size:12px;color:#52525b;border-top:1px solid #27272a;text-align:center}
.exp-note a{color:#D6FF8C;text-decoration:none}
.exp-note a:hover{text-decoration:underline}
::-webkit-scrollbar{width:8px;height:8px}
::-webkit-scrollbar-track{background:#09090b}
::-webkit-scrollbar-thumb{background:#27272a;border-radius:4px}
::-webkit-scrollbar-thumb:hover{background:#3f3f46}
@media(max-width:980px){
.app-main-row{flex-direction:column}
.app-main-right{width:100%;min-width:0}
.app-main-left{border-right:none;border-bottom:1px solid #27272a}
.modern-textarea{min-height:180px}
.output-frame .out-body{min-height:420px}
.output-textarea{height:420px;min-height:420px}
}
"""
gallery_js = r"""
() => {
function init() {
if (window.__ocr2GreenInitDone) return;
const dropZone = document.getElementById('image-drop-zone');
const uploadPrompt = document.getElementById('upload-prompt');
const uploadClick = document.getElementById('upload-click-area');
const fileInput = document.getElementById('custom-file-input');
const previewWrap = document.getElementById('single-preview-wrap');
const previewImg = document.getElementById('single-preview-img');
const btnUpload = document.getElementById('preview-upload-btn');
const btnClear = document.getElementById('preview-clear-btn');
const urlInput = document.getElementById('image-url-input');
const urlBtn = document.getElementById('image-url-btn');
const promptInput = document.getElementById('custom-query-input');
const promptPanel = document.getElementById('prompt-panel');
const promptTabsBar = document.getElementById('prompt-tabs-bar');
const runBtnEl = document.getElementById('custom-run-btn');
const outputArea = document.getElementById('custom-output-textarea');
const imgStatus = document.getElementById('sb-image-status');
if (!dropZone || !fileInput || !promptInput || !previewWrap || !previewImg || !urlInput || !urlBtn) {
setTimeout(init, 250);
return;
}
window.__ocr2GreenInitDone = true;
let imageState = null;
let toastTimer = null;
let examplePoller = null;
let lastSeenExamplePayload = null;
const promptTemplatesNode = document.getElementById('prompt-templates-data');
let promptTemplates = {};
try {
promptTemplates = promptTemplatesNode ? JSON.parse(promptTemplatesNode.textContent || '{}') : {};
} catch (e) {
promptTemplates = {};
}
function showToast(message, type) {
let toast = document.getElementById('app-toast');
if (!toast) {
toast = document.createElement('div');
toast.id = 'app-toast';
toast.className = 'toast-notification';
toast.innerHTML = ' ';
document.body.appendChild(toast);
}
const icon = toast.querySelector('.toast-icon');
const text = toast.querySelector('.toast-text');
toast.className = 'toast-notification ' + (type || 'error');
if (type === 'warning') icon.textContent = '\u26A0';
else if (type === 'info') icon.textContent = '\u2139';
else icon.textContent = '\u2717';
text.textContent = message;
if (toastTimer) clearTimeout(toastTimer);
void toast.offsetWidth;
toast.classList.add('visible');
toastTimer = setTimeout(() => toast.classList.remove('visible'), 3500);
}
function showLoader() {
const l = document.getElementById('output-loader');
if (l) l.classList.add('active');
const sb = document.getElementById('sb-run-state');
if (sb) sb.textContent = 'Processing...';
}
function hideLoader() {
const l = document.getElementById('output-loader');
if (l) l.classList.remove('active');
const sb = document.getElementById('sb-run-state');
if (sb) sb.textContent = 'Done';
}
function setRunErrorState() {
const l = document.getElementById('output-loader');
if (l) l.classList.remove('active');
const sb = document.getElementById('sb-run-state');
if (sb) sb.textContent = 'Error';
}
window.__showToast = showToast;
window.__showLoader = showLoader;
window.__hideLoader = hideLoader;
window.__setRunErrorState = setRunErrorState;
function flashPromptError() {
promptInput.classList.add('error-flash');
promptInput.focus();
setTimeout(() => promptInput.classList.remove('error-flash'), 800);
}
function flashOutputError() {
if (!outputArea) return;
outputArea.classList.add('error-flash');
setTimeout(() => outputArea.classList.remove('error-flash'), 800);
}
function getValueFromContainer(containerId) {
const container = document.getElementById(containerId);
if (!container) return '';
const el = container.querySelector('textarea, input');
return el ? (el.value || '') : '';
}
function setGradioValue(containerId, value) {
const container = document.getElementById(containerId);
if (!container) return false;
const el = container.querySelector('textarea, input');
if (!el) return false;
const proto = el.tagName === 'TEXTAREA' ? HTMLTextAreaElement.prototype : HTMLInputElement.prototype;
const ns = Object.getOwnPropertyDescriptor(proto, 'value');
if (ns && ns.set) {
ns.set.call(el, value);
el.dispatchEvent(new Event('input', {bubbles:true, composed:true}));
el.dispatchEvent(new Event('change', {bubbles:true, composed:true}));
return true;
}
return false;
}
function syncImageToGradio() {
setGradioValue('hidden-image-b64', imageState ? imageState.value : '');
const txt = imageState ? ('Image ready: ' + (imageState.name || 'image')) : 'No image uploaded';
if (imgStatus) imgStatus.textContent = txt;
}
function syncPromptToGradio() {
const activeModel = (document.querySelector('.model-tab.active') || {}).dataset?.model;
setGradioValue('prompt-gradio-input', activeModel === 'LightOnOCR-2-1B' ? '' : promptInput.value);
}
function syncModelToGradio(name) {
setGradioValue('hidden-model-name', name);
}
function setPreview(src, name, value) {
imageState = {src, name: name || 'image', value: value || src};
previewImg.src = src;
previewWrap.style.display = 'flex';
if (uploadPrompt) uploadPrompt.style.display = 'none';
if (urlInput) urlInput.value = '';
syncImageToGradio();
}
window.__setPreview = setPreview;
function clearPreview() {
imageState = null;
previewImg.src = '';
previewWrap.style.display = 'none';
if (uploadPrompt) uploadPrompt.style.display = 'flex';
syncImageToGradio();
}
window.__clearPreview = clearPreview;
function processFile(file) {
if (!file) return;
if (!file.type.startsWith('image/')) {
showToast('Only image files are supported', 'error');
return;
}
const reader = new FileReader();
reader.onload = (e) => setPreview(e.target.result, file.name, e.target.result);
reader.readAsDataURL(file);
}
function applyImageUrl() {
const raw = urlInput.value.trim();
if (!raw) {
showToast('Enter an image URL', 'warning');
return;
}
let parsed;
try {
parsed = new URL(raw);
} catch (e) {
showToast('Invalid image URL', 'error');
return;
}
if (!['http:', 'https:'].includes(parsed.protocol)) {
showToast('Only http/https image URLs are supported', 'error');
return;
}
const testImg = new Image();
testImg.onload = () => {
const name = parsed.pathname.split('/').filter(Boolean).pop() || parsed.hostname;
setPreview(raw, name, raw);
showToast('Image URL loaded', 'info');
};
testImg.onerror = () => showToast('Could not load image from URL', 'error');
testImg.src = raw;
}
fileInput.addEventListener('change', (e) => {
const file = e.target.files && e.target.files[0] ? e.target.files[0] : null;
if (file) processFile(file);
e.target.value = '';
});
urlBtn.addEventListener('click', applyImageUrl);
urlInput.addEventListener('keydown', (e) => {
if (e.key === 'Enter') {
e.preventDefault();
applyImageUrl();
}
});
if (uploadClick) uploadClick.addEventListener('click', () => fileInput.click());
if (btnUpload) btnUpload.addEventListener('click', () => fileInput.click());
if (btnClear) btnClear.addEventListener('click', clearPreview);
dropZone.addEventListener('dragover', (e) => {
e.preventDefault();
dropZone.classList.add('drag-over');
});
dropZone.addEventListener('dragleave', (e) => {
e.preventDefault();
dropZone.classList.remove('drag-over');
});
dropZone.addEventListener('drop', (e) => {
e.preventDefault();
dropZone.classList.remove('drag-over');
if (e.dataTransfer.files && e.dataTransfer.files.length) processFile(e.dataTransfer.files[0]);
});
promptInput.addEventListener('input', syncPromptToGradio);
function activateModelTab(name) {
document.querySelectorAll('.model-tab[data-model]').forEach(btn => {
btn.classList.toggle('active', btn.getAttribute('data-model') === name);
});
const hidePrompt = name === 'LightOnOCR-2-1B';
if (promptPanel) promptPanel.classList.toggle('is-hidden', hidePrompt);
if (promptTabsBar) promptTabsBar.classList.toggle('is-hidden', hidePrompt);
syncModelToGradio(name);
syncPromptToGradio();
}
function findPromptKeyByValue(value) {
return Object.keys(promptTemplates).find(key => promptTemplates[key] === value) || null;
}
function activatePromptTab(key) {
document.querySelectorAll('.prompt-tab[data-prompt-key]').forEach(btn => {
btn.classList.toggle('active', btn.getAttribute('data-prompt-key') === key);
});
if (!key || !promptTemplates[key]) return;
promptInput.value = promptTemplates[key];
syncPromptToGradio();
}
window.__activateModelTab = activateModelTab;
window.__activatePromptTab = activatePromptTab;
document.querySelectorAll('.model-tab[data-model]').forEach(btn => {
btn.addEventListener('click', () => {
const model = btn.getAttribute('data-model');
activateModelTab(model);
});
});
document.querySelectorAll('.prompt-tab[data-prompt-key]').forEach(btn => {
btn.addEventListener('click', () => {
const promptKey = btn.getAttribute('data-prompt-key');
activatePromptTab(promptKey);
});
});
activateModelTab('Nanonets-OCR2-3B');
const initialPromptKey = findPromptKeyByValue(promptInput.value) || 'GENERAL';
if (!promptInput.value.trim() && promptTemplates[initialPromptKey]) {
promptInput.value = promptTemplates[initialPromptKey];
}
activatePromptTab(findPromptKeyByValue(promptInput.value) || initialPromptKey);
promptInput.addEventListener('input', () => {
const matchingKey = findPromptKeyByValue(promptInput.value);
document.querySelectorAll('.prompt-tab[data-prompt-key]').forEach(btn => {
btn.classList.toggle('active', btn.getAttribute('data-prompt-key') === matchingKey);
});
});
function syncSlider(customId, gradioId) {
const slider = document.getElementById(customId);
const valSpan = document.getElementById(customId + '-val');
if (!slider) return;
slider.addEventListener('input', () => {
if (valSpan) valSpan.textContent = slider.value;
const container = document.getElementById(gradioId);
if (!container) return;
container.querySelectorAll('input[type="range"],input[type="number"]').forEach(el => {
const ns = Object.getOwnPropertyDescriptor(HTMLInputElement.prototype, 'value');
if (ns && ns.set) {
ns.set.call(el, slider.value);
el.dispatchEvent(new Event('input', {bubbles:true, composed:true}));
el.dispatchEvent(new Event('change', {bubbles:true, composed:true}));
}
});
});
}
syncSlider('custom-max-new-tokens', 'gradio-max-new-tokens');
syncSlider('custom-temperature', 'gradio-temperature');
syncSlider('custom-top-p', 'gradio-top-p');
syncSlider('custom-top-k', 'gradio-top-k');
syncSlider('custom-repetition-penalty', 'gradio-repetition-penalty');
syncSlider('custom-gpu-duration', 'gradio-gpu-duration');
function validateBeforeRun() {
const promptVal = promptInput.value.trim();
const currentModel = (document.querySelector('.model-tab.active') || {}).dataset?.model;
const requiresPrompt = currentModel !== 'LightOnOCR-2-1B';
if (!imageState && !promptVal && requiresPrompt) {
showToast('Please upload an image and enter your OCR instruction', 'error');
flashPromptError();
return false;
}
if (!imageState) {
showToast('Please upload an image', 'error');
return false;
}
if (requiresPrompt && !promptVal) {
showToast('Please enter your OCR/query instruction', 'warning');
flashPromptError();
return false;
}
if (!currentModel) {
showToast('Please select a model', 'error');
return false;
}
return true;
}
window.__clickGradioRunBtn = function() {
if (!validateBeforeRun()) return;
syncPromptToGradio();
syncImageToGradio();
const active = document.querySelector('.model-tab.active');
if (active) syncModelToGradio(active.getAttribute('data-model'));
if (outputArea) outputArea.value = '';
showLoader();
setTimeout(() => {
const gradioBtn = document.getElementById('gradio-run-btn');
if (!gradioBtn) {
setRunErrorState();
if (outputArea) outputArea.value = '[ERROR] Run button not found.';
showToast('Run button not found', 'error');
return;
}
const btn = gradioBtn.querySelector('button');
if (btn) btn.click(); else gradioBtn.click();
}, 180);
};
if (runBtnEl) runBtnEl.addEventListener('click', () => window.__clickGradioRunBtn());
const copyBtn = document.getElementById('copy-output-btn');
if (copyBtn) {
copyBtn.addEventListener('click', async () => {
try {
const text = outputArea ? outputArea.value : '';
if (!text.trim()) {
showToast('No output to copy', 'warning');
flashOutputError();
return;
}
await navigator.clipboard.writeText(text);
showToast('Output copied to clipboard', 'info');
} catch(e) {
showToast('Copy failed', 'error');
}
});
}
const saveBtn = document.getElementById('save-output-btn');
if (saveBtn) {
saveBtn.addEventListener('click', () => {
const text = outputArea ? outputArea.value : '';
if (!text.trim()) {
showToast('No output to save', 'warning');
flashOutputError();
return;
}
const blob = new Blob([text], {type: 'text/plain;charset=utf-8'});
const a = document.createElement('a');
a.href = URL.createObjectURL(blob);
a.download = 'multimodal_ocr_output.txt';
document.body.appendChild(a);
a.click();
setTimeout(() => {
URL.revokeObjectURL(a.href);
document.body.removeChild(a);
}, 200);
showToast('Output saved', 'info');
});
}
function applyExamplePayload(raw) {
try {
const data = JSON.parse(raw);
if (data.status === 'ok') {
if (data.image) setPreview(data.image, data.name || 'example.jpg', data.image);
if (data.query) {
promptInput.value = data.query;
syncPromptToGradio();
}
if (data.model) activateModelTab(data.model);
document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
showToast('Example loaded', 'info');
} else if (data.status === 'error') {
document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
showToast(data.message || 'Failed to load example', 'error');
}
} catch (e) {
document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
}
}
function startExamplePolling() {
if (examplePoller) clearInterval(examplePoller);
let attempts = 0;
examplePoller = setInterval(() => {
attempts += 1;
const current = getValueFromContainer('example-result-data');
if (current && current !== lastSeenExamplePayload) {
lastSeenExamplePayload = current;
clearInterval(examplePoller);
examplePoller = null;
applyExamplePayload(current);
return;
}
if (attempts >= 100) {
clearInterval(examplePoller);
examplePoller = null;
document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
showToast('Example load timed out', 'error');
}
}, 120);
}
function triggerExampleLoad(idx) {
const btnWrap = document.getElementById('example-load-btn');
const btn = btnWrap ? (btnWrap.querySelector('button') || btnWrap) : null;
if (!btn) return;
let attempts = 0;
function writeIdxAndClick() {
attempts += 1;
const ok1 = setGradioValue('example-idx-input', String(idx));
setGradioValue('example-result-data', '');
const currentVal = getValueFromContainer('example-idx-input');
if (ok1 && currentVal === String(idx)) {
btn.click();
startExamplePolling();
return;
}
if (attempts < 30) {
setTimeout(writeIdxAndClick, 100);
} else {
document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
showToast('Failed to initialize example loader', 'error');
}
}
writeIdxAndClick();
}
document.querySelectorAll('.example-card[data-idx]').forEach(card => {
card.addEventListener('click', () => {
const idx = card.getAttribute('data-idx');
if (idx === null || idx === undefined || idx === '') return;
document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
card.classList.add('loading');
showToast('Loading example...', 'info');
triggerExampleLoad(idx);
});
});
const observerTarget = document.getElementById('example-result-data');
if (observerTarget) {
const obs = new MutationObserver(() => {
const current = getValueFromContainer('example-result-data');
if (!current || current === lastSeenExamplePayload) return;
lastSeenExamplePayload = current;
if (examplePoller) {
clearInterval(examplePoller);
examplePoller = null;
}
applyExamplePayload(current);
});
obs.observe(observerTarget, {childList:true, subtree:true, characterData:true, attributes:true});
}
if (outputArea) outputArea.value = '';
const sb = document.getElementById('sb-run-state');
if (sb) sb.textContent = 'Ready';
if (imgStatus) imgStatus.textContent = 'No image uploaded';
}
init();
}
"""
wire_outputs_js = r"""
() => {
function watchOutputs() {
const resultContainer = document.getElementById('gradio-result');
const outArea = document.getElementById('custom-output-textarea');
if (!resultContainer || !outArea) { setTimeout(watchOutputs, 500); return; }
let lastText = '';
function isErrorText(val) {
return typeof val === 'string' && val.trim().startsWith('[ERROR]');
}
function syncOutput() {
const el = resultContainer.querySelector('textarea') || resultContainer.querySelector('input');
if (!el) return;
const val = el.value || '';
if (val !== lastText) {
lastText = val;
outArea.value = val;
outArea.scrollTop = outArea.scrollHeight;
if (val.trim()) {
if (isErrorText(val)) {
if (window.__setRunErrorState) window.__setRunErrorState();
if (window.__showToast) window.__showToast('Inference failed', 'error');
} else {
if (window.__hideLoader) window.__hideLoader();
}
}
}
}
const observer = new MutationObserver(syncOutput);
observer.observe(resultContainer, {childList:true, subtree:true, characterData:true, attributes:true});
setInterval(syncOutput, 500);
}
watchOutputs();
}
"""
OCR_LOGO_SVG = """
"""
UPLOAD_PREVIEW_SVG = """
"""
COPY_SVG = """ """
SAVE_SVG = """ """
MODEL_TABS_HTML = "".join([
f'{m} '
for m in MODEL_CHOICES
])
PROMPT_TABS_HTML = "".join([
f'{value["name"]} '
for key, value in PROMPTS.items()
])
PROMPT_TEMPLATES_JSON = json.dumps({key: value["prompt"] for key, value in PROMPTS.items()})
with gr.Blocks() as demo:
hidden_image_b64 = gr.Textbox(value="", elem_id="hidden-image-b64", elem_classes="hidden-input", container=False)
prompt = gr.Textbox(value="", elem_id="prompt-gradio-input", elem_classes="hidden-input", container=False)
hidden_model_name = gr.Textbox(value="Nanonets-OCR2-3B", elem_id="hidden-model-name", elem_classes="hidden-input", container=False)
max_new_tokens = gr.Slider(minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS, elem_id="gradio-max-new-tokens", elem_classes="hidden-input", container=False)
temperature = gr.Slider(minimum=0.1, maximum=4.0, step=0.1, value=0.7, elem_id="gradio-temperature", elem_classes="hidden-input", container=False)
top_p = gr.Slider(minimum=0.05, maximum=1.0, step=0.05, value=0.9, elem_id="gradio-top-p", elem_classes="hidden-input", container=False)
top_k = gr.Slider(minimum=1, maximum=1000, step=1, value=50, elem_id="gradio-top-k", elem_classes="hidden-input", container=False)
repetition_penalty = gr.Slider(minimum=1.0, maximum=2.0, step=0.05, value=1.1, elem_id="gradio-repetition-penalty", elem_classes="hidden-input", container=False)
gpu_duration_state = gr.Number(value=60, elem_id="gradio-gpu-duration", elem_classes="hidden-input", container=False)
result = gr.Textbox(value="", elem_id="gradio-result", elem_classes="hidden-input", container=False)
example_idx = gr.Textbox(value="", elem_id="example-idx-input", elem_classes="hidden-input", container=False)
example_result = gr.Textbox(value="", elem_id="example-result-data", elem_classes="hidden-input", container=False)
example_load_btn = gr.Button("Load Example", elem_id="example-load-btn")
gr.HTML(f"""
{MODEL_TABS_HTML}
{PROMPT_TABS_HTML}
Upload: Click, drag, or paste an image URL ·
Model: Switch model tabs from the header ·
Clear removes the current image
Quick Examples
{EXAMPLE_CARDS_HTML}
OCR / Vision Instruction
Query Input
Run OCR
Raw Output Stream
{COPY_SVG} Copy
{SAVE_SVG} Save File
Experimental OCR Suite · Open on
GitHub
""")
run_btn = gr.Button("Run", elem_id="gradio-run-btn")
demo.load(fn=noop, inputs=None, outputs=None, js=gallery_js)
demo.load(fn=noop, inputs=None, outputs=None, js=wire_outputs_js)
run_btn.click(
fn=run_ocr,
inputs=[
hidden_model_name,
prompt,
hidden_image_b64,
max_new_tokens,
temperature,
top_p,
top_k,
repetition_penalty,
gpu_duration_state,
],
outputs=[result],
js=r"""(m, p, img, mnt, t, tp, tk, rp, gd) => {
const modelEl = document.querySelector('.model-tab.active');
const model = modelEl ? modelEl.getAttribute('data-model') : m;
const promptEl = document.getElementById('custom-query-input');
const promptVal = promptEl ? promptEl.value : p;
const imgContainer = document.getElementById('hidden-image-b64');
let imgVal = img;
if (imgContainer) {
const inner = imgContainer.querySelector('textarea, input');
if (inner) imgVal = inner.value;
}
return [model, promptVal, imgVal, mnt, t, tp, tk, rp, gd];
}""",
)
example_load_btn.click(
fn=load_example_data,
inputs=[example_idx],
outputs=[example_result],
queue=False,
)
if __name__ == "__main__":
demo.queue(max_size=50).launch(
css=css,
mcp_server=False,
ssr_mode=False,
show_error=True,
allowed_paths=["examples"],
)