Spaces:

Ferr0
/

structured-output-playground

Running on Zero

App Files Files Community

structured-output-playground / app.py

Ferr0

ZeroGPU: Qwen2.5-3B + Outlines — schema-conformance demo

2a3835f verified 4 days ago

Raw

History Blame Contribute Delete

8.08 kB

	"""Structured Output Playground — lock any LLM's output to a JSON schema.

	A local model (Qwen2.5-3B-Instruct) extracts structured data from free text. With
	constrained decoding on (Outlines), the decoder can only emit tokens that keep the
	output conformant to the schema — right keys, right types, valid enums, every time.
	Flip constraints off and the same model free-styles: it may wrap JSON in a markdown
	fence, or — more subtly — return valid JSON that violates the schema. That contrast
	is the demo.

	Runs on ZeroGPU (H200). No external API, no secrets.
	"""

	import json
	import os
	import time

	import gradio as gr
	import jsonschema
	import spaces
	import torch
	import outlines
	from outlines.types import JsonSchema
	from transformers import AutoModelForCausalLM, AutoTokenizer

	from examples import CONTACT_TEXT, EXAMPLES
	from schemas import CUSTOM_LABEL, PRESETS, preset_schema

	MODEL_ID = os.environ.get("MODEL_ID", "Qwen/Qwen2.5-3B-Instruct")
	MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "384"))

	SYSTEM_PROMPT = (
	"You are a precise information-extraction engine. You output a single JSON object and "
	"nothing else. Put each value in the field whose meaning it matches, and never copy the "
	"same value into two different fields."
	)

	DEFAULT_CUSTOM_SCHEMA = json.dumps(
	{
	"type": "object",
	"properties": {
	"summary": {"type": "string"},
	"topics": {"type": "array", "items": {"type": "string"}},
	"sentiment": {"type": "string", "enum": ["positive", "neutral", "negative"]},
	},
	"required": ["summary", "sentiment"],
	},
	indent=2,
	)

	print(f"[init] loading {MODEL_ID} …")
	_t0 = time.perf_counter()
	_tok = AutoTokenizer.from_pretrained(MODEL_ID)
	_hf = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16, device_map="cuda")
	model = outlines.from_transformers(_hf, _tok)
	print(f"[init] model ready in {time.perf_counter() - _t0:.1f}s")


	def resolve_schema(preset: str, custom_schema: str) -> dict:
	if preset == CUSTOM_LABEL:
	return json.loads(custom_schema)
	return preset_schema(preset)


	def build_prompt(text: str, schema: dict) -> str:
	# Naming the fields keeps the model from mis-mapping; the grammar enforces structure.
	fields = ", ".join(schema.get("properties", {}).keys())
	hint = f" with these fields: {fields}" if fields else ""
	user = (
	f"Extract the information from the text below as a JSON object{hint}.\n\n"
	f'Text:\n"""{text}"""\n\n'
	"Return only the JSON object."
	)
	return _tok.apply_chat_template(
	[{"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": user}],
	tokenize=False,
	add_generation_prompt=True,
	)


	@spaces.GPU(duration=60)
	def _generate(prompt: str, preset: str, custom_schema: str, constraints_on: bool) -> str:
	# Simple, picklable args only (ZeroGPU forks a worker); rebuild the output type here.
	if not constraints_on:
	return model(prompt, max_new_tokens=MAX_NEW_TOKENS)
	if preset == CUSTOM_LABEL:
	output_type = JsonSchema(json.loads(custom_schema))
	else:
	output_type = PRESETS[preset]
	return model(prompt, output_type=output_type, max_new_tokens=MAX_NEW_TOKENS)


	def extract(text, preset, custom_schema, constraints_on):
	"""Extract structured data from free text as JSON that conforms to a schema.

	With constraints on, the output is guaranteed valid against the chosen schema
	(right keys, types and enums) via constrained decoding.

	Args:
	text: The free text to extract structured information from.
	preset: Which schema to use — "Contact card", "Product", "Job posting",
	"Event", or "Custom (edit the schema)".
	custom_schema: A JSON Schema string; used only when preset is the Custom option.
	constraints_on: If true, force the output to match the schema (recommended).

	Returns:
	The extracted JSON (string) and a short validity/status badge (markdown).
	"""
	text = (text or "").strip()
	if not text:
	return "", "Paste some text first."

	try:
	schema = resolve_schema(preset, custom_schema)
	except json.JSONDecodeError as e:
	return "", f"❌ Your custom schema is not valid JSON: {e}"

	prompt = build_prompt(text, schema)
	t0 = time.perf_counter()
	raw = _generate(prompt, preset, custom_schema, constraints_on)
	dt = time.perf_counter() - t0

	# 1) Is it even JSON?
	try:
	parsed = json.loads(raw)
	except json.JSONDecodeError as e:
	why = (
	"the model wrapped its answer in a markdown code fence (```)"
	if raw.lstrip().startswith("```")
	else f"`{e}`"
	)
	note = (
	"But constraints were ON — please report this."
	if constraints_on
	else "Constrained decoding never emits a fence or prose — always parseable."
	)
	return raw, f"❌ Not valid JSON · {dt:.1f}s — {why}. {note}"

	# 2) Does it actually conform to the schema (types, enums, required)?
	try:
	jsonschema.validate(parsed, schema)
	except jsonschema.ValidationError as e:
	shown = json.dumps(parsed, indent=2, ensure_ascii=False)
	note = (
	"But constraints were ON — please report this."
	if constraints_on
	else "Constrained decoding would have forced the right type/enum here."
	)
	return shown, f"⚠️ Valid JSON, but it breaks the schema · {dt:.1f}s\n\n`{e.message}` — {note}"

	pretty = json.dumps(parsed, indent=2, ensure_ascii=False)
	extra = "" if constraints_on else " — the model complied this time, but nothing forced it to."
	return pretty, f"✅ Valid & schema-conformant · {dt:.1f}s{extra}"


	def on_preset_change(preset):
	return gr.update(visible=(preset == CUSTOM_LABEL))


	INTRO = """
	# 🔒 Structured Output Playground
	Lock any LLM's output to a JSON schema. A local model (Qwen2.5-3B) extracts structured data from
	free text. With constraints ON, the decoder can only emit tokens that keep the output
	conformant to your schema — right keys, right types, valid enums, every time.

	Flip OFF and the same model free-styles: it may wrap the JSON in a markdown fence, or — more
	subtly — return valid JSON that violates your schema (a string where you asked for an integer, a
	value outside your enum). A good model complies often; constrained decoding makes it always.

	Runs on ZeroGPU (H200) · no external API, no secrets · built by
	[Ferr0](https://huggingface.co/Ferr0) · [pixelium.win](https://pixelium.win) · [GitHub](https://github.com/ferr079)
	"""

	with gr.Blocks(title="Structured Output Playground") as demo:
	gr.Markdown(INTRO)

	with gr.Row():
	with gr.Column(scale=1):
	preset = gr.Dropdown(
	choices=list(PRESETS.keys()) + [CUSTOM_LABEL],
	value="Contact card",
	label="Schema preset",
	)
	custom = gr.Code(
	value=DEFAULT_CUSTOM_SCHEMA,
	language="json",
	label="Custom JSON Schema",
	visible=False,
	)
	text = gr.Textbox(
	value=CONTACT_TEXT,
	lines=8,
	label="Source text",
	placeholder="Paste any text to extract from…",
	)
	constraints = gr.Checkbox(value=True, label="Constraints ON (force schema)")
	go = gr.Button("Extract", variant="primary")

	with gr.Column(scale=1):
	out = gr.Code(label="Extracted JSON", language="json")
	badge = gr.Markdown()

	gr.Examples(examples=EXAMPLES, inputs=[text, preset, constraints])

	preset.change(on_preset_change, inputs=preset, outputs=custom, api_name=False)
	go.click(extract, inputs=[text, preset, custom, constraints], outputs=[out, badge])


	if __name__ == "__main__":
	demo.launch()