Spaces:

numind
/

NuExtract3

Running on A100

App Files Files Community

NuExtract3 / app.py

Alexandre-Numind

Update app.py

a19c568 verified 1 day ago

raw

history blame contribute delete

52.5 kB

	import argparse
	import base64
	import io
	import json
	import os
	import re
	from pathlib import Path
	from typing import Any, Dict, Iterator, List, Optional, Tuple

	import gradio as gr
	from openai import OpenAI
	from PIL import Image


	# ---------------- Paths ----------------
	APP_DIR = Path(__file__).resolve().parent


	# ---------------- CLI / environment configuration ----------------
	def parse_args() -> argparse.Namespace:
	parser = argparse.ArgumentParser(description="NuExtract Gradio demo")

	parser.add_argument(
	"--model-name",
	default=os.environ.get("MODEL_NAME", "numind/NuExtract3"),
	help="Model name served by the OpenAI-compatible endpoint.",
	)
	parser.add_argument(
	"--api-base",
	default=os.environ.get("OPENAI_API_BASE", "http://127.0.0.1:8000/v1"),
	help="OpenAI-compatible base URL.",
	)
	parser.add_argument(
	"--api-key",
	default=os.environ.get("OPENAI_API_KEY", "EMPTY"),
	help="API key for the OpenAI-compatible endpoint.",
	)
	parser.add_argument(
	"--server-name",
	default=os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0"),
	help="Gradio server host.",
	)
	parser.add_argument(
	"--server-port",
	type=int,
	default=int(os.environ.get("GRADIO_SERVER_PORT", "7860")),
	help="Gradio server port.",
	)
	parser.add_argument(
	"--share",
	action="store_true",
	default=os.environ.get("GRADIO_SHARE", "false").lower() in {"1", "true", "yes"},
	help="Create a public Gradio share link.",
	)
	parser.add_argument(
	"--max-tokens",
	type=int,
	default=int(os.environ.get("NUEXTRACT_MAX_TOKENS", "10000")),
	help="Maximum tokens for model generation. Hidden from the UI.",
	)
	parser.add_argument(
	"--example-dir",
	default=os.environ.get("NUEXTRACT_EXAMPLE_DIR", str(APP_DIR / "examples")),
	help="Directory containing image examples.",
	)
	parser.add_argument(
	"--assets-dir",
	default=os.environ.get("NUEXTRACT_ASSETS_DIR", str(APP_DIR / "assets")),
	help="Directory containing static assets such as the NuExtract logo.",
	)

	args, _ = parser.parse_known_args()
	return args


	def resolve_dir(path_like: str) -> Path:
	path = Path(path_like).expanduser()
	if path.is_absolute():
	return path.resolve()
	return (APP_DIR / path).resolve()


	ARGS = parse_args()

	DEFAULT_MODEL = ARGS.model_name
	DEFAULT_API_BASE = ARGS.api_base
	DEFAULT_API_KEY = ARGS.api_key
	DEFAULT_MAX_TOKENS = ARGS.max_tokens
	EXAMPLE_DIR = resolve_dir(ARGS.example_dir)
	ASSETS_DIR = resolve_dir(ARGS.assets_dir)

	LOGO_PATH = ASSETS_DIR / "logo_numind_picto.svg"
	if LOGO_PATH.exists():
	LOGO_URL = f"/gradio_api/file={LOGO_PATH}"
	gr.set_static_paths(paths=[ASSETS_DIR])
	else:
	print(f"[assets] Missing logo: {LOGO_PATH}", flush=True)
	LOGO_URL = ""

	SYSTEM_PROMPT_DEFAULT = (
	"You are a precise information extraction assistant. "
	"Return faithful, source-grounded results only."
	)


	# ---------------- Structured extraction examples ----------------
	# These examples populate: Image + Template + Instructions.
	STRUCTURED_EXAMPLE_TEMPLATES: Dict[str, Dict[str, Any]] = {
	"1.jpg": {
	"movie_name": "verbatim-string",
	"tagline": "verbatim-string",
	"language": "string",
	"motion_picture_association_rating": [
	"G - General Audiences",
	"PG - Parental Guidance Suggested",
	"PG-13 – Parents Strongly Cautioned",
	"R – Restricted",
	"NC-17 – Adults Only",
	"not provided"
	],
	"movie_distribution_company": "verbatim-string",
	"movie_production_company": "verbatim-string",
	"theatre_release_date": "date-time",
	"movie_website_address": "verbatim-string",
	"movie_director_name": "verbatim-string",
	"actors_names": [
	"verbatim-string"
	],
	"staff": [
	{
	"staff_name": "verbatim-string",
	"staff_role": [
	"director",
	"co-director",
	"screenwriter",
	"author",
	"cinematographer",
	"costume designer",
	"production designer",
	"set designer",
	"animator",
	"color designer",
	"art director",
	"animation director",
	"vfx director",
	"voice actor",
	"composer",
	"songwriter",
	"music performer",
	"music supervisor",
	"choreographer",
	"casting director",
	"editor",
	"producer",
	"co-producer",
	"associate producer",
	"executive producer",
	"co-executive producer",
	"line producer"
	]
	}
	],
	"reviews": [
	{
	"critic_name": "verbatim-string",
	"review_comment": "verbatim-string"
	}
	],
	"technologies": [
	[
	"Dolby Stereo",
	"Dolby Digital",
	"Dolby Stereo Digital",
	"Dolby Atmos",
	"Dolby Vision",
	"Dolby Cinema",
	"DTS",
	"SDDS",
	"IMAX",
	"4DX"
	]
	]
	},
	"2.png": {
	"number_of_bathrooms": "integer",
	"number_of_fireplaces": "integer",
	"distance_unit": ["meter", "foot"],
	"rooms_that_are_not_bedrooms_or_corridors_or_toilets": [
	{
	"room_name": "verbatim-string",
	"surface_area": "number",
	}
	],
	"bedrooms": [
	{
	"bedroom_name": "verbatim-string",
	"surface_area": "number",
	"number_of_windows": "integer",
	"has_private_bathroom": "boolean",
	}
	],
	"has_laundry_room": "boolean",
	"has_terrace": "boolean",
	"has_balcony": "boolean",
	"number_of_parking_spaces_in_garage": "integer",
	"number_of_parking_spaces_exterior": "integer",
	},
	"8.png" : {
	"invoice_number": "verbatim-string",
	"issuer_name": "verbatim-string",
	"recipient_name": "verbatim-string",
	"issuer_location": {
	"street_number": "verbatim-string",
	"street_name": "verbatim-string",
	"city": "verbatim-string",
	"zip_code": "string",
	"country": "string"
	},
	"date_of_issue": "date-time",
	"date_due": "date-time",
	"currency_code_iso4217": "string",
	"items": [
	{
	"item_name": "verbatim-string",
	"item_quantity": "number",
	"item_price_per_unit": "number",
	"item_total_price": "number"
	}
	],
	"total_discount_amount": "number",
	"total_fee_amount": "number",
	"total_tax_amount": "number",
	"total_price_net": "number",
	"total_price_gross": "number"
	},
	"18.jpg":{
	"festival_name": "verbatim-string",
	"website_url": "url",
	"location": {
	"city": "string",
	"zip_code": "string",
	"country": "country"
	},
	"date_first_day": "date",
	"date_last_day": "date",
	"lineup_entry": [
	{
	"artist_or_group_name": "string",
	"artist_entity_type": ["individual", "band", "ensemble", "b2b", "project_or_collaboration", "other"],
	"performing_stage_name": "verbatim-string",
	"is_headliner": "boolean",
	"day_playing": "date"
	}
	],
	"sponsors": [
	{
	"name": "verbatim-string",
	"type": ["press", "tv", "bank", "insurance", "beverage company", "car company", "technology company", "clothing company", "transportation", "public institution", "other"]
	}
	]
	},
	"17.png":{
	"parts": [
	{
	"name": "verbatim-string",
	"id": "verbatim-string",
	"details": "verbatim-string"
	}
	]
	},
	"16.jpeg":{
	"Applicant": {
	"Name": "verbatim-string",
	"Registration no": "verbatim-string",
	"Holding compagny": "verbatim-string",
	"VAT Registration no": "verbatim-string",
	"Date of creation": "date",
	"Type of entity": "verbatim-string",
	"Location": {
	"Street number": "verbatim-string",
	"Street name": "verbatim-string",
	"City": "verbatim-string",
	"Zip code": "string",
	"Country": "country"
	},
	"Website": "url",
	"Phone": "phone-number",
	"Email": "email-address"
	},
	"Bank Reference": {
	"Bank name": "verbatim-string",
	"Account name": "verbatim-string",
	"Account no": "integer",
	"Importer code": "verbatim-string"
	},
	"Trades references": [
	{
	"Company name": "verbatim-string",
	"Account opened since": "date",
	"Tel": "phone-number",
	"Email": "email-address",
	"Location": {
	"Street number": "verbatim-string",
	"Street name": "verbatim-string",
	"City": "verbatim-string",
	"Zip code": "string",
	"Country": "country"
	},
	"Credit limit": "string"
	}
	],
	"Is document signed": "boolean",
	"Date of signature": "date"
	},
	}

	STRUCTURED_EXAMPLE_INSTRUCTIONS: Dict[str, str] = {
	"1.jpg": "",
	"2.png": "",
	"8.png": "",
	"18.jpg": "",
	"17.png": "",
	"16.jpeg": ""
	}


	# ---------------- Markdown/OCR examples ----------------
	# Put Markdown example image paths here.
	# These examples populate only the Image input and are meant for the
	# “Convert to Markdown” button.
	MARKDOWN_EXAMPLE_IMAGE_PATHS: List[str] = [
	"3.jpg",
	"4.jpg",
	"5.jpg",
	"6.png",
	"7.jpg",
	"9.jpg",
	"10.png",
	"11.png",
	"12.jpg",
	"14.jpg",
	"15.jpg"
	]


	def resolve_example_path(path_like: str) -> Path:
	path = Path(path_like).expanduser()
	if path.is_absolute():
	return path.resolve()
	return (EXAMPLE_DIR / path).resolve()


	def build_structured_examples() -> List[List[Any]]:
	examples: List[List[Any]] = []

	for filename, template_obj in STRUCTURED_EXAMPLE_TEMPLATES.items():
	image_path = resolve_example_path(filename)

	if not image_path.exists():
	print(f"[structured examples] Missing image: {image_path}", flush=True)
	continue

	examples.append(
	[
	str(image_path),
	json.dumps(template_obj, indent=4, ensure_ascii=False),
	STRUCTURED_EXAMPLE_INSTRUCTIONS.get(filename, ""),
	]
	)

	return examples


	def build_markdown_examples() -> List[List[Any]]:
	examples: List[List[Any]] = []

	for path_like in MARKDOWN_EXAMPLE_IMAGE_PATHS:
	image_path = resolve_example_path(path_like)

	if not image_path.exists():
	print(f"[markdown examples] Missing image: {image_path}", flush=True)
	continue

	examples.append([str(image_path)])

	return examples


	STRUCTURED_EXAMPLES = build_structured_examples()
	MARKDOWN_EXAMPLES = build_markdown_examples()


	# ---------------- Utility helpers ----------------
	def image_bytes_to_base64(b: bytes) -> str:
	return base64.b64encode(b).decode("utf-8")


	def ensure_rgb_image(image_bytes: bytes) -> Image.Image:
	img = Image.open(io.BytesIO(image_bytes))
	if img.mode != "RGB":
	img = img.convert("RGB")
	return img


	def file_path_to_bytes(path: str) -> bytes:
	with open(path, "rb") as f:
	return f.read()


	# ---------------- Response parsing ----------------
	def strip_code_fence(payload: str) -> str:
	return re.sub(
	r"^```(?:json\|markdown\|text)?\s\|\s```$",
	"",
	payload.strip(),
	flags=re.IGNORECASE \| re.MULTILINE,
	).strip()


	def pretty_json_or_text(payload: str) -> str:
	if not payload:
	return ""

	cleaned = strip_code_fence(payload)

	try:
	return json.dumps(json.loads(cleaned), indent=4, ensure_ascii=False)
	except Exception:
	return cleaned


	def extract_answer_block(text: str) -> str:
	if not text:
	return ""

	try:
	match = re.search(
	r"<answer>\s(.?)\s*</answer>",
	text,
	flags=re.DOTALL \| re.IGNORECASE,
	)
	if match:
	return pretty_json_or_text(match.group(1).strip())
	except Exception:
	pass

	json_objects = list(re.finditer(r"\{[\s\S]*\}", text))
	if json_objects:
	candidate = max(json_objects, key=lambda match: len(match.group(0))).group(0)
	return pretty_json_or_text(candidate)

	return text.strip()


	def split_reasoning_and_output(text: str, reasoning_enabled: bool) -> Tuple[str, str]:
	if not text:
	return "", ""

	if not reasoning_enabled:
	return "", text.strip()

	lower = text.lower()
	end_tag = "</think>"

	if end_tag in lower:
	end_idx = lower.find(end_tag)
	reasoning = text[:end_idx].strip()
	output = text[end_idx + len(end_tag):].strip()
	return reasoning, output

	return text.strip(), ""


	# ---------------- Message building ----------------
	def make_text_content(text: str) -> List[Dict[str, Any]]:
	return [{"type": "text", "text": text or ""}]


	def make_image_content(
	image_bytes: bytes,
	extra_text: Optional[str] = None,
	) -> List[Dict[str, Any]]:
	img = ensure_rgb_image(image_bytes)

	buffer = io.BytesIO()
	img.save(buffer, format="JPEG", quality=95)
	img_b64 = image_bytes_to_base64(buffer.getvalue())

	content: List[Dict[str, Any]] = [
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{img_b64}",
	"detail": "high",
	},
	}
	]

	if extra_text and extra_text.strip():
	content.append({"type": "text", "text": extra_text.strip()})

	return content


	def normalize_template(template: str) -> str:
	tpl = (template or "").strip()

	if not tpl:
	return "{}"

	try:
	return json.dumps(json.loads(tpl), indent=4, ensure_ascii=False)
	except Exception:
	return tpl


	def collate_single_input(
	*,
	text_or_image: Any,
	template: str,
	system_prompt: Optional[str],
	instruction: Optional[str],
	) -> Tuple[List[Dict[str, Any]], str]:
	is_image_input = isinstance(text_or_image, dict) and "bytes" in text_or_image
	messages: List[Dict[str, Any]] = []

	if system_prompt:
	messages.append({"role": "system", "content": system_prompt})

	template_json = normalize_template(template)

	extra_parts = []

	if instruction and instruction.strip():
	extra_parts.append(f"Instructions:\n{instruction.strip()}")

	if template_json and template_json.strip() not in {"{}", ""}:
	extra_parts.append(f"Extraction template:\n```json\n{template_json}\n```")

	extra_text_for_user = "\n\n".join(extra_parts) if extra_parts else None

	if is_image_input:
	messages.append(
	{
	"role": "user",
	"content": make_image_content(
	image_bytes=text_or_image["bytes"],
	extra_text=extra_text_for_user,
	),
	}
	)
	else:
	text = str(text_or_image or "")

	if extra_text_for_user:
	text = f"{text}\n\n{extra_text_for_user}".strip()

	messages.append({"role": "user", "content": make_text_content(text)})

	return messages, template_json


	def collate_for_template_generation(
	*,
	context_text: str,
	context_image_path: Optional[str],
	system_prompt: Optional[str],
	) -> List[Dict[str, Any]]:
	messages: List[Dict[str, Any]] = []

	if system_prompt:
	messages.append({"role": "system", "content": system_prompt})

	guidance = (
	"Generate a concise JSON extraction template for this document. "
	"Use descriptive field names and simple type hints like string, number, YYYY-MM-DD, "
	"boolean, or arrays of objects. Return only the JSON template."
	)

	if context_image_path:
	messages.append(
	{
	"role": "user",
	"content": make_image_content(
	image_bytes=file_path_to_bytes(context_image_path),
	extra_text=guidance,
	),
	}
	)
	else:
	text = (context_text or "").strip()
	messages.append(
	{
	"role": "user",
	"content": make_text_content(f"{text}\n\n{guidance}".strip()),
	}
	)

	return messages


	def collate_markdown_image_only(
	*,
	image_bytes: bytes,
	system_prompt: Optional[str],
	) -> List[Dict[str, Any]]:
	messages: List[Dict[str, Any]] = []

	if system_prompt:
	messages.append({"role": "system", "content": system_prompt})

	messages.append(
	{
	"role": "user",
	"content": make_image_content(image_bytes=image_bytes),
	}
	)

	return messages


	# ---------------- Model calls ----------------
	def chunk_to_text(chunk: Any) -> str:
	try:
	if not chunk or not getattr(chunk, "choices", None):
	return ""

	delta = getattr(chunk.choices[0], "delta", None)
	if delta is None:
	return ""

	content = getattr(delta, "content", None)

	if isinstance(content, str):
	return content

	if isinstance(content, list):
	parts: List[str] = []
	for item in content:
	if isinstance(item, dict) and item.get("text"):
	parts.append(item["text"])
	elif getattr(item, "text", None):
	parts.append(item.text)
	return "".join(parts)

	except Exception:
	return ""

	return ""


	def build_chat_template_kwargs(
	*,
	template_json: str,
	reasoning: bool,
	instruction: Optional[str],
	markdown_mode: bool,
	) -> Dict[str, Any]:
	if markdown_mode:
	return {
	"mode": "markdown",
	"enable_thinking": bool(reasoning),
	}

	use_structured = bool(
	template_json
	and template_json.strip()
	and template_json.strip() != "{}"
	)

	chat_kwargs: Dict[str, Any] = {
	"mode": "structured" if use_structured else "content",
	"enable_thinking": bool(reasoning),
	}

	if use_structured:
	chat_kwargs["template"] = template_json

	if instruction and instruction.strip():
	chat_kwargs["instructions"] = instruction.strip()

	return chat_kwargs


	def call_model_stream(
	*,
	api_base: str,
	api_key: str,
	model_name: str,
	messages: List[Dict[str, Any]],
	template_json: str,
	temperature: float,
	max_tokens: int,
	reasoning: bool,
	instruction: Optional[str],
	markdown_mode: bool,
	) -> Iterator[str]:
	client = OpenAI(base_url=api_base, api_key=api_key)

	chat_kwargs = build_chat_template_kwargs(
	template_json=template_json,
	reasoning=reasoning,
	instruction=instruction,
	markdown_mode=markdown_mode,
	)

	stream = client.chat.completions.create(
	model=model_name,
	temperature=float(temperature),
	max_tokens=int(max_tokens),
	messages=messages,
	stream=True,
	extra_body={"chat_template_kwargs": chat_kwargs},
	)

	accumulated = ""

	for chunk in stream:
	delta_text = chunk_to_text(chunk)
	if delta_text:
	accumulated += delta_text
	yield accumulated


	def call_model_once(
	*,
	api_base: str,
	api_key: str,
	model_name: str,
	messages: List[Dict[str, Any]],
	mode: str,
	temperature: float,
	max_tokens: int,
	) -> str:
	client = OpenAI(base_url=api_base, api_key=api_key)

	chat = client.chat.completions.create(
	model=model_name,
	temperature=float(temperature),
	max_tokens=int(max_tokens),
	messages=messages,
	extra_body={
	"chat_template_kwargs": {
	"mode": mode,
	"enable_thinking": False,
	}
	},
	)

	return chat.choices[0].message.content if chat.choices else ""


	# ---------------- Inference orchestration ----------------
	def prepare_input(context_text: str, context_image_path: Optional[str]) -> Any:
	if context_image_path:
	return {"bytes": file_path_to_bytes(context_image_path)}

	return context_text or ""


	def infer_stream(
	*,
	api_key: str,
	api_base: str,
	system_prompt: str,
	template: str,
	instruction: str,
	context_text: str,
	context_image_path: Optional[str],
	temperature: float,
	reasoning: bool,
	markdown_mode: bool,
	):
	single_input = prepare_input(context_text, context_image_path)
	is_image = isinstance(single_input, dict) and "bytes" in single_input

	if markdown_mode:
	if not is_image:
	raise ValueError("Markdown conversion requires an image input.")

	messages = collate_markdown_image_only(
	image_bytes=single_input["bytes"],
	system_prompt=system_prompt,
	)
	template_json = ""

	else:
	messages, template_json = collate_single_input(
	text_or_image=single_input,
	template=template,
	system_prompt=system_prompt,
	instruction=instruction,
	)

	for partial_text in call_model_stream(
	api_base=api_base,
	api_key=api_key,
	model_name=DEFAULT_MODEL,
	messages=messages,
	template_json=template_json,
	temperature=temperature,
	max_tokens=DEFAULT_MAX_TOKENS,
	reasoning=reasoning,
	instruction=instruction,
	markdown_mode=markdown_mode,
	):
	trace, output_text = split_reasoning_and_output(
	partial_text,
	reasoning_enabled=reasoning,
	)

	if markdown_mode:
	output_display = output_text or (
	"_(Waiting for output after `</think>`.)_"
	if reasoning
	else "_(Empty output.)_"
	)

	yield {
	"mode": "markdown",
	"output": output_display,
	"think": trace if reasoning else "",
	}
	continue

	if not reasoning:
	output_text = partial_text or ""

	answer = extract_answer_block(output_text)
	output_display = answer or (
	"_(Waiting for output after `</think>`.)_"
	if reasoning
	else "_(No output found yet.)_"
	)

	if output_display.strip().startswith("{") or output_display.strip().startswith("["):
	output_display = pretty_json_or_text(output_display)
	output_display = f"```json\n{output_display}\n```"
	else:
	output_display = output_display.replace("\\n", "\n")

	yield {
	"mode": "structured",
	"output": output_display,
	"think": trace if reasoning else "",
	}


	def infer_template_generation(
	*,
	api_key: str,
	api_base: str,
	system_prompt: str,
	context_text: str,
	context_image_path: Optional[str],
	temperature: float,
	) -> str:
	messages = collate_for_template_generation(
	context_text=context_text,
	context_image_path=context_image_path,
	system_prompt=system_prompt,
	)

	result = call_model_once(
	api_base=api_base,
	api_key=api_key,
	model_name=DEFAULT_MODEL,
	messages=messages,
	mode="template-generation",
	temperature=temperature,
	max_tokens=DEFAULT_MAX_TOKENS,
	)

	return pretty_json_or_text(result)


	# ---------------- UI styling ----------------
	CSS = """
	:root {
	color-scheme: light;
	--bg: #f6f2eb;
	--panel: #ffffff;
	--panel-rgb: 255, 255, 255;
	--panel-strong-rgb: 255, 252, 246;
	--input-rgb: 255, 255, 255;
	--border-blue: rgba(67, 111, 148, 0.30);
	--border-blue-soft: rgba(67, 111, 148, 0.18);
	--border-input: rgba(67, 111, 148, 0.22);
	--border-orange-soft: rgba(190, 103, 36, 0.26);
	--text: #23252b;
	--text-strong: #101318;
	--text-on-accent: #101318;
	--muted: #5f6673;
	--muted-2: #7d8490;
	--logo-blue: #5d9bcf;
	--logo-orange: #d6742f;
	--green: #178f66;
	--card-alpha: 0.88;
	--header-alpha: 0.82;
	--input-alpha: 0.94;
	--shadow: rgba(54, 46, 35, 0.14);
	--inset-highlight: rgba(255, 255, 255, 0.85);
	--logo-opacity: 0.18;
	--focus-ring: rgba(67, 111, 148, 0.26);
	--code-bg: #fdfaf5;
	--dropzone-bg: #fbf8f2;
	}

	html.dark,
	body.dark,
	.dark,
	[data-theme="dark"] {
	color-scheme: dark;
	--bg: #242529;
	--panel: #1d1f26;
	--panel-rgb: 29, 31, 38;
	--panel-strong-rgb: 21, 22, 26;
	--input-rgb: 12, 14, 19;
	--border-blue: rgba(135, 183, 224, 0.24);
	--border-blue-soft: rgba(135, 183, 224, 0.16);
	--border-input: rgba(135, 183, 224, 0.14);
	--border-orange-soft: rgba(228, 132, 58, 0.22);
	--text: #eef0f4;
	--text-strong: #ffffff;
	--text-on-accent: #101318;
	--muted: #969baa;
	--muted-2: #737988;
	--logo-blue: #87b7e0;
	--logo-orange: #e4843a;
	--green: #31c48d;
	--card-alpha: 0.66;
	--header-alpha: 0.42;
	--input-alpha: 0.78;
	--shadow: rgba(0, 0, 0, 0.28);
	--inset-highlight: rgba(255, 255, 255, 0.055);
	--logo-opacity: 0.88;
	--focus-ring: rgba(135, 183, 224, 0.32);
	--code-bg: rgba(12, 14, 19, 0.78);
	--dropzone-bg: rgba(12, 14, 19, 0.78);
	}

	@media (prefers-color-scheme: dark) {
	:root:not([data-theme="light"]) {
	color-scheme: dark;
	--bg: #242529;
	--panel: #1d1f26;
	--panel-rgb: 29, 31, 38;
	--panel-strong-rgb: 21, 22, 26;
	--input-rgb: 12, 14, 19;
	--border-blue: rgba(135, 183, 224, 0.24);
	--border-blue-soft: rgba(135, 183, 224, 0.16);
	--border-input: rgba(135, 183, 224, 0.14);
	--border-orange-soft: rgba(228, 132, 58, 0.22);
	--text: #eef0f4;
	--text-strong: #ffffff;
	--text-on-accent: #101318;
	--muted: #969baa;
	--muted-2: #737988;
	--logo-blue: #87b7e0;
	--logo-orange: #e4843a;
	--green: #31c48d;
	--card-alpha: 0.66;
	--header-alpha: 0.42;
	--input-alpha: 0.78;
	--shadow: rgba(0, 0, 0, 0.28);
	--inset-highlight: rgba(255, 255, 255, 0.055);
	--logo-opacity: 0.88;
	--focus-ring: rgba(135, 183, 224, 0.32);
	--code-bg: rgba(12, 14, 19, 0.78);
	--dropzone-bg: rgba(12, 14, 19, 0.78);
	}
	}

	html,
	body,
	footer,
	.gradio-container {
	color: var(--text) !important;
	}

	html,
	body {
	min-height: 100vh !important;
	width: 100% !important;
	margin: 0 !important;
	overflow-x: hidden !important;
	}

	body {
	background: var(--bg) !important;
	background-attachment: fixed !important;
	}

	footer {
	background: transparent !important;
	}

	.gradio-container {
	position: relative !important;
	isolation: isolate !important;
	max-width: none !important;
	width: 100% !important;
	min-height: 100vh !important;
	padding: 10px 18px 18px 18px !important;
	background: transparent !important;
	box-sizing: border-box !important;
	}

	.gradio-container::before {
	content: "";
	position: fixed;
	inset: 0;
	z-index: -2;
	pointer-events: none;
	background-image: url("__LOGO_URL__");
	background-repeat: no-repeat;
	background-size: min(86vw, 980px) min(86vw, 980px);
	background-position: calc(100% + 230px) 34px;
	opacity: var(--logo-opacity);
	filter: saturate(1.2) drop-shadow(0 0 28px rgba(135, 183, 224, 0.14));
	}

	.with-gap,
	.gradio-row {
	gap: 18px !important;
	}

	.gradio-row {
	width: 100% !important;
	}

	.app-header {
	position: relative;
	display: flex;
	align-items: center;
	justify-content: space-between;
	gap: 16px;
	padding: 10px 12px 14px 12px;
	margin-bottom: 10px;
	border-bottom: 1px solid var(--border-blue-soft);
	background: rgba(var(--panel-strong-rgb), var(--header-alpha));
	border-radius: 14px;
	backdrop-filter: blur(8px);
	box-shadow: 0 12px 42px var(--shadow), inset 0 1px 0 var(--inset-highlight);
	}

	.brand {
	display: flex;
	align-items: center;
	gap: 10px;
	}

	.brand-mark {
	width: 28px;
	height: 28px;
	flex: 0 0 auto;
	object-fit: contain;
	}

	.brand-title {
	display: flex;
	align-items: baseline;
	gap: 8px;
	}

	.brand-name {
	font-size: 23px;
	line-height: 1;
	font-weight: 750;
	letter-spacing: -0.045em;
	color: var(--text-strong) !important;
	}

	.brand-name span {
	color: var(--muted) !important;
	}

	.model-chip {
	display: inline-flex;
	align-items: center;
	max-width: 520px;
	padding: 5px 9px;
	border-radius: 999px;
	background: rgba(var(--panel-rgb), 0.88);
	border: 1px solid var(--border-blue-soft);
	color: var(--muted) !important;
	font-size: 12px;
	white-space: nowrap;
	overflow: hidden;
	text-overflow: ellipsis;
	text-decoration: none !important;
	cursor: pointer;
	transition: border-color 0.15s ease, transform 0.15s ease, background 0.15s ease;
	}

	.model-chip:hover {
	border-color: var(--logo-blue);
	background: rgba(var(--panel-rgb), 1);
	transform: translateY(-1px);
	text-decoration: none !important;
	}

	.model-chip:focus-visible {
	outline: none;
	box-shadow: 0 0 0 3px var(--focus-ring);
	}

	.model-chip code {
	color: var(--text-strong) !important;
	background: transparent !important;
	}

	.header-actions {
	display: flex;
	align-items: center;
	gap: 10px;
	color: var(--muted) !important;
	font-size: 13px;
	}

	.status-dot {
	width: 8px;
	height: 8px;
	border-radius: 99px;
	background: var(--green);
	box-shadow: 0 0 14px rgba(49, 196, 141, 0.65);
	}

	.intro-card {
	margin: 0 0 16px 0;
	padding: 14px 16px;
	border-radius: 14px;
	background: rgba(var(--panel-rgb), var(--card-alpha));
	border: 1px solid var(--border-blue-soft);
	box-shadow: 0 12px 42px var(--shadow), inset 0 1px 0 var(--inset-highlight);
	backdrop-filter: blur(8px);
	}

	.intro-card p {
	margin: 0 0 8px 0;
	line-height: 1.5;
	}

	.section-title {
	margin: 0 0 8px 0;
	color: var(--text-strong) !important;
	font-size: 13px;
	font-weight: 750;
	letter-spacing: 0.01em;
	}

	.main-card,
	.output-card,
	.gradio-group {
	background: rgba(var(--panel-rgb), var(--card-alpha)) !important;
	border: 1px solid var(--border-blue) !important;
	border-radius: 14px !important;
	box-shadow: 0 22px 70px var(--shadow), inset 0 1px 0 var(--inset-highlight) !important;
	backdrop-filter: blur(10px) saturate(1.18);
	}

	.main-card {
	width: 100% !important;
	box-sizing: border-box !important;
	}

	.output-card {
	min-height: 720px !important;
	max-height: 860px !important;
	border-color: var(--border-orange-soft) !important;
	width: 100% !important;
	box-sizing: border-box !important;
	overflow: hidden !important;
	}

	label,
	.markdown,
	.prose,
	h1,
	h2,
	h3,
	h4,
	h5,
	h6,
	p,
	span,
	div {
	color: var(--text) !important;
	}

	.section-title,
	label > span,
	.gradio-container label {
	color: var(--text-strong) !important;
	}

	.secondary-note {
	color: var(--muted) !important;
	font-size: 12px;
	line-height: 1.35;
	}

	textarea,
	input[type="text"],
	input[type="password"],
	input[type="number"],
	input[type="email"],
	.cm-editor {
	background: rgba(var(--input-rgb), var(--input-alpha)) !important;
	color: var(--text) !important;
	border-color: var(--border-input) !important;
	}

	textarea::placeholder,
	input::placeholder {
	color: var(--muted-2) !important;
	}

	textarea:focus,
	input:focus,
	.cm-editor.cm-focused {
	border-color: var(--logo-blue) !important;
	box-shadow: 0 0 0 3px var(--focus-ring) !important;
	}

	input[type="checkbox"] {
	accent-color: var(--logo-blue) !important;
	}

	#schema-box .cm-editor {
	min-height: 410px !important;
	max-height: 480px !important;
	background: var(--code-bg) !important;
	}

	.cm-editor,
	.cm-scroller,
	.cm-content,
	.cm-line,
	.cm-gutters,
	.cm-activeLine,
	.cm-activeLineGutter {
	background: var(--code-bg) !important;
	color: var(--text) !important;
	}

	.cm-gutters {
	border-color: var(--border-blue-soft) !important;
	color: var(--muted-2) !important;
	}

	.cm-cursor {
	border-left-color: var(--text-strong) !important;
	}

	#image-box {
	min-height: 335px !important;
	background: var(--dropzone-bg) !important;
	border-color: var(--border-blue-soft) !important;
	}

	#image-box,
	#image-box *,
	.upload-container,
	.upload-container *,
	.file-preview,
	.file-preview * {
	color: var(--text) !important;
	}

	#image-box button,
	#image-box .icon-wrap,
	#image-box .wrap {
	background: transparent !important;
	}

	#reasoning-box {
	min-height: 180px !important;
	max-height: 240px !important;
	overflow: auto !important;
	padding: 8px;
	border-radius: 8px;
	background: rgba(var(--input-rgb), var(--input-alpha)) !important;
	border: 1px solid var(--border-blue-soft);
	white-space: pre-wrap !important;
	overflow-wrap: anywhere !important;
	word-break: break-word !important;
	}

	#output-box {
	min-height: 360px !important;
	max-height: 520px !important;
	overflow: auto !important;
	padding: 8px;
	border-radius: 8px;
	background: rgba(var(--input-rgb), var(--input-alpha)) !important;
	border: 1px solid var(--border-blue-soft);
	white-space: pre-wrap !important;
	overflow-wrap: anywhere !important;
	word-break: break-word !important;
	}

	#reasoning-box pre,
	#reasoning-box code,
	#output-box pre,
	#output-box code {
	white-space: pre-wrap !important;
	overflow-wrap: anywhere !important;
	word-break: break-word !important;
	color: var(--text) !important;
	background: transparent !important;
	}

	button {
	border-radius: 9px !important;
	min-height: 34px !important;
	}

	button.primary-button,
	.primary-button button,
	.primary-button {
	background: var(--logo-blue) !important;
	background-color: var(--logo-blue) !important;
	color: var(--text-on-accent) !important;
	border: none !important;
	font-weight: 750 !important;
	}

	button.markdown-button,
	.markdown-button button,
	.markdown-button {
	background: var(--logo-orange) !important;
	background-color: var(--logo-orange) !important;
	color: var(--text-on-accent) !important;
	border: none !important;
	font-weight: 750 !important;
	}

	.clear-button button,
	button.clear-button,
	.clear-button {
	background: transparent !important;
	background-color: transparent !important;
	color: var(--muted) !important;
	border: 1px solid var(--border-blue-soft) !important;
	}

	.gradio-container .wrap,
	.gradio-container .block,
	.gradio-container .form,
	.gradio-container .panel,
	.gradio-container .tabs,
	.gradio-container .tabitem {
	background: transparent !important;
	color: var(--text) !important;
	}

	.gradio-accordion {
	border-color: var(--border-blue-soft) !important;
	}

	.gradio-container table,
	.gradio-container th,
	.gradio-container td {
	color: var(--text) !important;
	}

	.gradio-container label,
	.gradio-container label span,
	.gradio-container .label-wrap,
	.gradio-container .label-wrap span {
	color: var(--text-strong) !important;
	}

	@media (max-width: 1100px) {
	.gradio-container {
	width: 100% !important;
	padding: 10px 12px 18px 12px !important;
	}

	.app-header {
	align-items: flex-start;
	flex-direction: column;
	}

	.brand-title {
	align-items: flex-start;
	flex-direction: column;
	}

	.model-chip {
	max-width: 100%;
	}

	.output-card {
	min-height: 520px !important;
	max-height: none !important;
	}

	#reasoning-box {
	min-height: 160px !important;
	max-height: 220px !important;
	}

	#output-box {
	min-height: 320px !important;
	max-height: 480px !important;
	}
	}
	""".replace("__LOGO_URL__", LOGO_URL or "")
	# ---------------- Gradio app ----------------
	with gr.Blocks(
	title="NuExtract3",
	css=CSS,
	theme=gr.themes.Base(
	primary_hue="blue",
	secondary_hue="orange",
	neutral_hue="slate",
	),
	) as demo:
	logo_html = (
	f'<img class="brand-mark" src="{LOGO_URL}" alt="NuExtract logo" />'
	if LOGO_URL
	else '<div class="brand-mark"></div>'
	)

	gr.HTML(
	f"""
	<header class="app-header">
	<div class="brand">
	{logo_html}

	<div class="brand-title">
	<div class="brand-name">NuExtract3</span></div>
	<a
	class="model-chip"
	href="https://huggingface.co/numind/NuExtract3"
	target="_blank"
	rel="noopener noreferrer">
	Model <code>{DEFAULT_MODEL}</code>
	</a>
	</div>
	</div>

	<div class="header-actions">
	<span class="status-dot"></span>
	<span>OpenAI-compatible endpoint</span>
	</div>
	</header>
	"""
	)

	gr.Markdown(
	"""
	<div style="padding: 0.25rem 0 1rem 0;">

	<h1 style="margin-top: 0; margin-bottom: 0.25rem;">NuExtract3</h1>

	<p style="font-size: 1.05rem; line-height: 1.6;">
	<strong>NuExtract3</strong> is a unified <strong>4B vision-language reasoning model</strong>
	for document understanding.
	</p>

	<p style="line-height: 1.6;">
	It combines <strong>structured information extraction</strong> with high-quality
	<strong>image-to-Markdown conversion</strong>, making it useful for OCR, RAG preprocessing,
	and extraction pipelines across scans, receipts, forms, invoices, contracts, tables, and more.
	</p>

	<h3>Features</h3>

	<ul>
	<li><strong>Structured extraction:</strong> text/images + JSON template + instructions → JSON output</li>
	<li><strong>Markdown conversion:</strong> text/images → Markdown</li>
	<li><strong>Multimodal inputs:</strong> text, images, or text + images</li>
	<li><strong>Multilingual documents</strong></li>
	<li><strong>Reasoning and non-reasoning inference modes</strong></li>
	<li><strong>Template generation</strong> from natural language or input documents</li>
	</ul>

	<p align="center">
	🤗 <a href="https://huggingface.co/numind/NuExtract3">Model</a>   \|
	🖥️ <a href="https://nuextract.ai/">API / Platform</a>   \|
	📑 <a href="https://numind.ai/blog">Blog</a>   \|
	🗣️ <a href="https://discord.gg/3tsEtJNCDe">Discord</a>   \|
	🛠️ <a href="https://github.com/numindai/nuextract">GitHub</a>
	</p>
	</div>
	""",
	elem_classes=["intro-card"],
	)

	with gr.Row(equal_height=True):
	# Left: input, schema, controls
	with gr.Column(scale=1, min_width=520):
	with gr.Group(elem_classes="main-card"):
	gr.HTML("<div class='section-title'>Input</div>")

	context_image = gr.Image(
	label="Image",
	type="filepath",
	height=340,
	sources=["upload", "clipboard"],
	elem_id="image-box",
	)

	context_text = gr.Textbox(
	label="Text",
	placeholder="Optional: paste document text.",
	lines=3,
	max_lines=5,
	)

	with gr.Group(elem_classes="main-card"):
	gr.HTML("<div class='section-title'>Schema & instructions</div>")

	instruction = gr.Textbox(
	label="Instructions",
	placeholder="Optional extraction instructions.",
	lines=2,
	max_lines=3,
	)

	with gr.Row(equal_height=True):
	template = gr.Code(
	label="Template",
	language="json",
	value=json.dumps(
	{
	"title": "string",
	"entities": ["string"],
	"dates": ["YYYY-MM-DD"],
	"amounts": [
	{
	"value": "number",
	"currency": "string",
	}
	],
	},
	indent=4,
	),
	lines=16,
	scale=5,
	elem_id="schema-box",
	)

	with gr.Column(scale=2, min_width=150):
	generate_template_btn = gr.Button(
	"Generate template",
	variant="secondary",
	)

	gr.HTML(
	"<div class='secondary-note'>"
	"Use Extract for JSON. Use Markdown to convert an image document. Use generate template to generate a well formated template from an input image."
	"</div>"
	)

	with gr.Group(elem_classes="main-card"):
	gr.HTML("<div class='section-title'>Run</div>")

	with gr.Row():
	extract_btn = gr.Button(
	"Extract JSON",
	variant="secondary",
	elem_classes=["primary-button"],
	)
	markdown_btn = gr.Button(
	"Convert to Markdown",
	variant="secondary",
	elem_classes=["markdown-button"],
	)

	with gr.Row():
	stop_btn = gr.Button("Stop", variant="stop")
	clear_btn = gr.Button(
	"Clear results",
	variant="secondary",
	elem_classes=["clear-button"],
	)

	reasoning_checkbox = gr.Checkbox(
	label="Reasoning",
	value=True,
	interactive=True,
	info="If enabled, reasoning is everything before </think>.",
	)

	temperature = gr.Slider(
	0.0,
	1,
	value=0.0,
	step=0.05,
	label="Temperature",
	info="Higher values make the output less deterministic but can improve reasoning performance (around 0.4-0.6)",
	)

	with gr.Accordion("Structured examples", open=False):
	if STRUCTURED_EXAMPLES:
	gr.Examples(
	examples=STRUCTURED_EXAMPLES,
	inputs=[context_image, template, instruction],
	label="Load structured example",
	examples_per_page=8,
	cache_examples=False,
	)
	else:
	gr.Markdown(
	f"""
	No structured examples found.

	Add files referenced in `STRUCTURED_EXAMPLE_TEMPLATES`, for example:

	```text
	{EXAMPLE_DIR}/1.jpg
	{EXAMPLE_DIR}/2.png
	```
	"""
	)

	with gr.Accordion("Markdown examples", open=False):
	if MARKDOWN_EXAMPLES:
	gr.Examples(
	examples=MARKDOWN_EXAMPLES,
	inputs=[context_image],
	label="Load Markdown example",
	examples_per_page=8,
	cache_examples=False,
	)
	else:
	gr.Markdown(
	f"""
	No Markdown examples found.

	Add image paths to `MARKDOWN_EXAMPLE_IMAGE_PATHS`, for example:

	```python
	MARKDOWN_EXAMPLE_IMAGE_PATHS = [
	"markdown_1.png",
	"markdown_2.jpg",
	"/home/user/app/examples/report.png",
	]
	```

	Relative paths are resolved from:

	```text
	{EXAMPLE_DIR}
	```
	"""
	)

	# Endpoint settings are intentionally hidden from the UI.
	api_base = gr.State(DEFAULT_API_BASE)
	api_key = gr.State(DEFAULT_API_KEY)
	system_prompt = gr.State(SYSTEM_PROMPT_DEFAULT)

	# Right: reasoning + output
	with gr.Column(scale=1, min_width=520):
	with gr.Group(elem_classes="output-card"):
	gr.HTML("<div class='section-title'>Reasoning</div>")
	reasoning_md = gr.Markdown(
	label="Reasoning",
	elem_id="reasoning-box",
	)

	gr.HTML("<div class='section-title' style='margin-top: 12px;'>Output</div>")
	output_md = gr.Markdown(
	label="Output",
	elem_id="output-box",
	)

	error_box = gr.Markdown(visible=False)

	def run_model_click(
	api_key_val,
	api_base_val,
	system_prompt_val,
	instruction_val,
	template_val,
	context_text_val,
	context_image_val,
	temperature_val,
	reasoning_val,
	markdown_mode_val,
	):
	mode_name = "Markdown" if markdown_mode_val else "Extract"
	print(f"[button] {mode_name} clicked", flush=True)
	print(f"[button] image={context_image_val}", flush=True)
	print(f"[button] text_len={len(context_text_val or '')}", flush=True)
	print(f"[button] reasoning={bool(reasoning_val)}", flush=True)

	if markdown_mode_val and not context_image_val:
	msg = "Markdown conversion requires a document image."
	yield (
	gr.update(value=""),
	gr.update(value=""),
	gr.update(visible=True, value=f"### Error\n{msg}"),
	)
	return

	if not context_image_val and not (context_text_val or "").strip():
	msg = "Please provide a document image or paste document text."
	yield (
	gr.update(value=""),
	gr.update(value=""),
	gr.update(visible=True, value=f"### Error\n{msg}"),
	)
	return

	try:
	yielded_anything = False

	for res in infer_stream(
	api_key=api_key_val,
	api_base=api_base_val,
	system_prompt=system_prompt_val,
	template=template_val,
	instruction=instruction_val,
	context_text=context_text_val,
	context_image_path=context_image_val,
	temperature=temperature_val,
	reasoning=bool(reasoning_val),
	markdown_mode=bool(markdown_mode_val),
	):
	yielded_anything = True

	think = res.get("think") or ""
	output = res.get("output") or "_(Empty output.)_"

	yield (
	gr.update(value=f"```text\n{think}\n```" if think else ""),
	gr.update(value=output),
	gr.update(visible=False, value=""),
	)

	if not yielded_anything:
	yield (
	gr.update(value=""),
	gr.update(value=""),
	gr.update(
	visible=True,
	value="### Error\nThe model returned no streamed output.",
	),
	)

	except Exception:
	import traceback

	tb = traceback.format_exc()
	print(tb, flush=True)

	yield (
	gr.update(value=""),
	gr.update(value=""),
	gr.update(visible=True, value=f"### Error\n```text\n{tb}\n```"),
	)

	def on_extract_click(
	api_key_val,
	api_base_val,
	system_prompt_val,
	instruction_val,
	template_val,
	context_text_val,
	context_image_val,
	temperature_val,
	reasoning_val,
	):
	yield from run_model_click(
	api_key_val,
	api_base_val,
	system_prompt_val,
	instruction_val,
	template_val,
	context_text_val,
	context_image_val,
	temperature_val,
	reasoning_val,
	False,
	)

	def on_markdown_click(
	api_key_val,
	api_base_val,
	system_prompt_val,
	instruction_val,
	template_val,
	context_text_val,
	context_image_val,
	temperature_val,
	reasoning_val,
	):
	yield from run_model_click(
	api_key_val,
	api_base_val,
	system_prompt_val,
	instruction_val,
	template_val,
	context_text_val,
	context_image_val,
	temperature_val,
	reasoning_val,
	True,
	)

	def on_click_generate_template(
	api_key_val,
	api_base_val,
	system_prompt_val,
	context_text_val,
	context_image_val,
	temperature_val,
	):
	print("[button] Generate template clicked", flush=True)

	if not context_image_val and not (context_text_val or "").strip():
	return (
	gr.update(),
	gr.update(
	visible=True,
	value="### Error\nPlease provide a document image or paste document text.",
	),
	)

	try:
	template_text = infer_template_generation(
	api_key=api_key_val,
	api_base=api_base_val,
	system_prompt=system_prompt_val,
	context_text=context_text_val,
	context_image_path=context_image_val,
	temperature=temperature_val,
	)

	return gr.update(value=template_text), gr.update(visible=False, value="")

	except Exception:
	import traceback

	tb = traceback.format_exc()
	print(tb, flush=True)

	return (
	gr.update(),
	gr.update(visible=True, value=f"### Error\n```text\n{tb}\n```"),
	)

	def on_clear():
	return (
	gr.update(value=""),
	gr.update(value=""),
	gr.update(visible=False, value=""),
	)

	common_inputs = [
	api_key,
	api_base,
	system_prompt,
	instruction,
	template,
	context_text,
	context_image,
	temperature,
	reasoning_checkbox,
	]

	common_outputs = [
	reasoning_md,
	output_md,
	error_box,
	]

	extract_event = extract_btn.click(
	fn=on_extract_click,
	inputs=common_inputs,
	outputs=common_outputs,
	show_progress=True,
	)

	markdown_event = markdown_btn.click(
	fn=on_markdown_click,
	inputs=common_inputs,
	outputs=common_outputs,
	show_progress=True,
	)

	stop_btn.click(
	fn=None,
	inputs=None,
	outputs=None,
	cancels=[extract_event, markdown_event],
	)

	clear_btn.click(
	fn=on_clear,
	inputs=None,
	outputs=common_outputs,
	)

	generate_template_btn.click(
	fn=on_click_generate_template,
	inputs=[
	api_key,
	api_base,
	system_prompt,
	context_text,
	context_image,
	temperature,
	],
	outputs=[
	template,
	error_box,
	],
	show_progress=True,
	)


	if __name__ == "__main__":
	allowed_paths = []
	if ASSETS_DIR.exists():
	allowed_paths.append(str(ASSETS_DIR))
	if EXAMPLE_DIR.exists():
	allowed_paths.append(str(EXAMPLE_DIR))

	demo.queue().launch(
	share=ARGS.share,
	server_name=ARGS.server_name,
	server_port=ARGS.server_port,
	show_error=True,
	allowed_paths=allowed_paths or None,
	)