Spaces:
Running on Zero
Running on Zero
| """Structured Output Playground — lock any LLM's output to a JSON schema. | |
| A local model (Qwen2.5-3B-Instruct) extracts structured data from free text. With | |
| *constrained decoding* on (Outlines), the decoder can only emit tokens that keep the | |
| output conformant to the schema — right keys, right types, valid enums, every time. | |
| Flip constraints off and the same model free-styles: it may wrap JSON in a markdown | |
| fence, or — more subtly — return valid JSON that violates the schema. That contrast | |
| *is* the demo. | |
| Runs on ZeroGPU (H200). No external API, no secrets. | |
| """ | |
| import json | |
| import os | |
| import time | |
| import gradio as gr | |
| import jsonschema | |
| import spaces | |
| import torch | |
| import outlines | |
| from outlines.types import JsonSchema | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from examples import CONTACT_TEXT, EXAMPLES | |
| from schemas import CUSTOM_LABEL, PRESETS, preset_schema | |
| MODEL_ID = os.environ.get("MODEL_ID", "Qwen/Qwen2.5-3B-Instruct") | |
| MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "384")) | |
| SYSTEM_PROMPT = ( | |
| "You are a precise information-extraction engine. You output a single JSON object and " | |
| "nothing else. Put each value in the field whose meaning it matches, and never copy the " | |
| "same value into two different fields." | |
| ) | |
| DEFAULT_CUSTOM_SCHEMA = json.dumps( | |
| { | |
| "type": "object", | |
| "properties": { | |
| "summary": {"type": "string"}, | |
| "topics": {"type": "array", "items": {"type": "string"}}, | |
| "sentiment": {"type": "string", "enum": ["positive", "neutral", "negative"]}, | |
| }, | |
| "required": ["summary", "sentiment"], | |
| }, | |
| indent=2, | |
| ) | |
| print(f"[init] loading {MODEL_ID} …") | |
| _t0 = time.perf_counter() | |
| _tok = AutoTokenizer.from_pretrained(MODEL_ID) | |
| _hf = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16, device_map="cuda") | |
| model = outlines.from_transformers(_hf, _tok) | |
| print(f"[init] model ready in {time.perf_counter() - _t0:.1f}s") | |
| def resolve_schema(preset: str, custom_schema: str) -> dict: | |
| if preset == CUSTOM_LABEL: | |
| return json.loads(custom_schema) | |
| return preset_schema(preset) | |
| def build_prompt(text: str, schema: dict) -> str: | |
| # Naming the fields keeps the model from mis-mapping; the grammar enforces structure. | |
| fields = ", ".join(schema.get("properties", {}).keys()) | |
| hint = f" with these fields: {fields}" if fields else "" | |
| user = ( | |
| f"Extract the information from the text below as a JSON object{hint}.\n\n" | |
| f'Text:\n"""{text}"""\n\n' | |
| "Return only the JSON object." | |
| ) | |
| return _tok.apply_chat_template( | |
| [{"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": user}], | |
| tokenize=False, | |
| add_generation_prompt=True, | |
| ) | |
| def _generate(prompt: str, preset: str, custom_schema: str, constraints_on: bool) -> str: | |
| # Simple, picklable args only (ZeroGPU forks a worker); rebuild the output type here. | |
| if not constraints_on: | |
| return model(prompt, max_new_tokens=MAX_NEW_TOKENS) | |
| if preset == CUSTOM_LABEL: | |
| output_type = JsonSchema(json.loads(custom_schema)) | |
| else: | |
| output_type = PRESETS[preset] | |
| return model(prompt, output_type=output_type, max_new_tokens=MAX_NEW_TOKENS) | |
| def extract(text, preset, custom_schema, constraints_on): | |
| """Extract structured data from free text as JSON that conforms to a schema. | |
| With constraints on, the output is guaranteed valid against the chosen schema | |
| (right keys, types and enums) via constrained decoding. | |
| Args: | |
| text: The free text to extract structured information from. | |
| preset: Which schema to use — "Contact card", "Product", "Job posting", | |
| "Event", or "Custom (edit the schema)". | |
| custom_schema: A JSON Schema string; used only when preset is the Custom option. | |
| constraints_on: If true, force the output to match the schema (recommended). | |
| Returns: | |
| The extracted JSON (string) and a short validity/status badge (markdown). | |
| """ | |
| text = (text or "").strip() | |
| if not text: | |
| return "", "Paste some text first." | |
| try: | |
| schema = resolve_schema(preset, custom_schema) | |
| except json.JSONDecodeError as e: | |
| return "", f"❌ Your custom schema is not valid JSON: {e}" | |
| prompt = build_prompt(text, schema) | |
| t0 = time.perf_counter() | |
| raw = _generate(prompt, preset, custom_schema, constraints_on) | |
| dt = time.perf_counter() - t0 | |
| # 1) Is it even JSON? | |
| try: | |
| parsed = json.loads(raw) | |
| except json.JSONDecodeError as e: | |
| why = ( | |
| "the model wrapped its answer in a markdown code fence (```)" | |
| if raw.lstrip().startswith("```") | |
| else f"`{e}`" | |
| ) | |
| note = ( | |
| "But constraints were ON — please report this." | |
| if constraints_on | |
| else "Constrained decoding never emits a fence or prose — always parseable." | |
| ) | |
| return raw, f"❌ **Not valid JSON** · {dt:.1f}s — {why}. {note}" | |
| # 2) Does it actually conform to the schema (types, enums, required)? | |
| try: | |
| jsonschema.validate(parsed, schema) | |
| except jsonschema.ValidationError as e: | |
| shown = json.dumps(parsed, indent=2, ensure_ascii=False) | |
| note = ( | |
| "But constraints were ON — please report this." | |
| if constraints_on | |
| else "Constrained decoding would have *forced* the right type/enum here." | |
| ) | |
| return shown, f"⚠️ **Valid JSON, but it breaks the schema** · {dt:.1f}s\n\n`{e.message}` — {note}" | |
| pretty = json.dumps(parsed, indent=2, ensure_ascii=False) | |
| extra = "" if constraints_on else " — the model complied this time, but nothing *forced* it to." | |
| return pretty, f"✅ **Valid & schema-conformant** · {dt:.1f}s{extra}" | |
| def on_preset_change(preset): | |
| return gr.update(visible=(preset == CUSTOM_LABEL)) | |
| INTRO = """ | |
| # 🔒 Structured Output Playground | |
| **Lock any LLM's output to a JSON schema.** A local model (Qwen2.5-3B) extracts structured data from | |
| free text. With **constraints ON**, the decoder can only emit tokens that keep the output | |
| **conformant to your schema** — right keys, right *types*, valid *enums*, every time. | |
| Flip **OFF** and the same model free-styles: it may wrap the JSON in a markdown fence, or — more | |
| subtly — return *valid JSON that violates your schema* (a string where you asked for an integer, a | |
| value outside your enum). A good model complies *often*; constrained decoding makes it **always**. | |
| Runs on **ZeroGPU** (H200) · no external API, no secrets · built by | |
| [Ferr0](https://huggingface.co/Ferr0) · [pixelium.win](https://pixelium.win) · [GitHub](https://github.com/ferr079) | |
| """ | |
| with gr.Blocks(title="Structured Output Playground") as demo: | |
| gr.Markdown(INTRO) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| preset = gr.Dropdown( | |
| choices=list(PRESETS.keys()) + [CUSTOM_LABEL], | |
| value="Contact card", | |
| label="Schema preset", | |
| ) | |
| custom = gr.Code( | |
| value=DEFAULT_CUSTOM_SCHEMA, | |
| language="json", | |
| label="Custom JSON Schema", | |
| visible=False, | |
| ) | |
| text = gr.Textbox( | |
| value=CONTACT_TEXT, | |
| lines=8, | |
| label="Source text", | |
| placeholder="Paste any text to extract from…", | |
| ) | |
| constraints = gr.Checkbox(value=True, label="Constraints ON (force schema)") | |
| go = gr.Button("Extract", variant="primary") | |
| with gr.Column(scale=1): | |
| out = gr.Code(label="Extracted JSON", language="json") | |
| badge = gr.Markdown() | |
| gr.Examples(examples=EXAMPLES, inputs=[text, preset, constraints]) | |
| preset.change(on_preset_change, inputs=preset, outputs=custom, api_name=False) | |
| go.click(extract, inputs=[text, preset, custom, constraints], outputs=[out, badge]) | |
| if __name__ == "__main__": | |
| demo.launch() | |