#!/usr/bin/env python3 """ Inference script for Qwen2.5-Coder-7B LoRA fine-tuned model Input: list of prompt strings (hardcoded below) Output: one .html file per prompt """ import os import re as _re import torch from pathlib import Path from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer try: from anthropic import Anthropic _ANTHROPIC_AVAILABLE = True except ImportError: _ANTHROPIC_AVAILABLE = False # 002 still the best with usloth #/overfit_lora_6k_r32_8epochs_data_full/final_model and 0.02 is the best so far # ────────────────────────────────────────────────────────────────────────────── # Configuration # ────────────────────────────────────────────────────────────────────────────── MODEL_PATH = "/final_model" OUTPUT_FOLDER = "/afdam_style15_20prompts_orig_detailed_style_2_002_less_style_more" MAX_NEW_TOKENS = 16384 TEMPERATURE = 0.02 TOP_P = 0.9 DO_SAMPLE = True SYSTEM_PROMPT = """You are a senior frontend architect. Generate clean, responsive, production-ready HTML using only HTML + Tailwind CSS. RULES: - Output HTML only; no explanations. - Follow the provided base HTML template. - Adapt layouts to the target device (mobile / desktop / web). - Use Tailwind classes exclusively. - For brand names and wordmarks, use styled text elements (, ) — NEVER generate """ # ────────────────────────────────────────────────────────────────────────────── # Prompt normalization (Haiku) # ────────────────────────────────────────────────────────────────────────────── NORMALIZE_PROMPTS = True NORMALIZER_MODEL = "claude-sonnet-4-6" NORMALIZER_TEMPERATURE = 0.4 NORMALIZER_MAX_TOKENS = 4096 # NORMALIZER_SYSTEM_PROMPT = """You are a prompt normalizer for a dashboard / application-UI HTML generation pipeline. A user gives you any kind of dashboard request — a one-liner, a partial brief, or a fully-detailed spec — and you output a single flowing paragraph in a precise structure that the downstream model expects. # OUTPUT RULES (non-negotiable): # - Output ONLY the normalized prompt. No preamble, no explanation, no headings, no markdown, no code blocks, no wrapping quotes. # - Single flowing paragraph. No bullets, no numbered lists, no labeled sections like "Colors:" or "Typography:". # - No mention of real brands you weren't given (Linear, Vercel, Notion, Salesforce, etc.) — steal patterns, never invent brand names. BUT if the user explicitly names their own product/brand (e.g., "Ardentis", "WindFarm", "Tsubaki"), preserve that exact name. # - Length adapts to input. Short/vague input → 500–900 words. Detailed input (≥ 600 words) → match the user's level of specificity, up to 2,000+ words. NEVER compress detailed input to fit a length budget. # PRESERVE-DETAIL RULES (critical for detailed inputs): # - The conversion is STRUCTURAL (labeled sections / bullets / lists → flowing prose), NOT LOSSY. Every concrete detail the user provided survives the rewrite. # - Preserve VERBATIM: # * Exact text strings to render in the UI ("Yield Overview", "Total Collateral", "Booking Requests", any quoted copy or label) # * Brand and product names exactly as written # * Specific numbers (KPI values like $67659.99, counts like "ACTIVE 18", percentages, IDs like "944905011UZ") # * All hex codes the user supplied — do not substitute "near-white" for their #f2f1ec # * Distinctive widget elements (radial gauges with thresholds, kanban column counts, map overlay popups, multi-panel arrangements, status pill counts) # - If the user describes a region in 80 words of detail, your prose version keeps every detail and is roughly the same length. Don't summarize. # - If the user describes a nested element ("inside the sidebar, a nav stack, each nav item with icon + label + count"), preserve all nesting layers in your prose. # AESTHETIC THINKING (when the user gave none): If the user supplied widgets and workflow but no aesthetic, imagine the dashboard deeply first as a product designer would before writing. Who is the operator? Is this a passive monitoring surface or an active work surface? Is data the hero or content the hero? What is the ONE visual move that makes this dashboard feel inevitable? Use your own taste. Avoid lazy defaults like dark navy + electric blue + Inter + 8px-corner cards. # STRUCTURE (merge all six blocks into one continuous paragraph): # 1. OPENING — "Design me a [DASHBOARD TYPE] for [DOMAIN/PRODUCT] — primary user is [OPERATOR ROLE], and the core workflow is [ACTION OR DECISION][, OPTIONAL MOOD FRAGMENT]." # 2. DISTINCTIVE FLOURISH — one sentence describing a single standout interactive or visual behavior tied to data (live ticker on the primary KPI, pulse on a status indicator when state changes, hover-driven detail card over a chart point, smooth column-reorder on the kanban, soft glow on the active sidebar item, count-up animation on header counters). If the user didn't specify one, invent one that fits the operator's attention pattern. # 3. STRUCTURAL WALKTHROUGH — walk through 8–14 regions using frame-language, not narrative-language. Describe positions ("the left side carries…", "the top edge holds…", "the main canvas is a 12-column grid where the top row contains…, the middle row splits into… and …, the bottom row carries…", "anchored to the bottom-right…","floating above the canvas in the top-right corner…"). Avoid landing-page connectives like "Start with…", "Then…", "Flow into…", "Below that…", "Anchored below…" — those imply a story scrolling top-to-bottom. A dashboard is a room: name what sits where in the frame. # 4. COLORS INLINE — "Use [base] [hex] with [text] [hex] text, [primary accent] [hex] [role], [neutral] [hex] for [surfaces], and a state palette of [success-hex] success, [warning-hex] warning, [danger-hex] danger." Every color named inline with its hex code. Never use a "Colors:" label. Dashboards always need a state palette because status pills, gauge thresholds, and alert rows depend on it. # 5. TYPOGRAPHY LINE — "[Display font] [size] [optional tracking] [weight range] for headings and large numerals, [body font] [weight range] [body size] for labels and table rows[, plus optional mono family for tabular figures]." Name real Google Fonts or common typographic families. # 6. CLOSING RULE — ALWAYS end the output with this exact sentence: "Icons via Font Awesome only — never inline SVG." This is mandatory on every prompt. The closing rule must ALSO explicitly state, in the same final paragraph, the global rules for: (a) gradients, (b) shadows, and (c) rounded corners. Don't leave any of those three categories unspecified. Example: "Comfortable density throughout, no gradients except the subtle area-chart fills, no shadows except a soft elevated card on the active kanban column, rounded corners are 12px on cards and 999px on pills with no other radius values used. Icons via Font Awesome only — never inline SVG." # NO-COIN-FLIPS CHECKLIST (apply before every output — these are the rules that separate prompts the model can render deterministically from prompts where it has to guess): # 1. NO "or" CHOICES anywhere in the prompt. Pick ONE specific value. Ranges like "16–20px" are fine; binary "X or Y" between two identities is a coin-flip. # 2. NO PER-WIDGET STAGGERED ANIMATIONS. ONE global directive only ("hold a 200ms blank state on dashboard mount, then all widgets appear simultaneously" or "no animation"). Real-time data updates are continuous, not animations — those are fine. # 3. EXACTLY TWO TYPE FAMILIES, OR TWO + ONE MONO when the dashboard has heavy tabular data (table rows, ID columns, timestamps). The mono is allowed as a third family ONLY for tabular contexts; reinforce in closing rule: "two type families, plus a single mono used only for tabular figures." # 4. ONE HEX PER COLOR ROLE. Every color used in any region must appear in the palette block, mapped to exactly one role. State palette (success/warning/danger) is mandatory for dashboards. # 5. REGION SPECS MUST AGREE WITH CLOSING RULE. Every shadow / corner-radius / gradient mentioned in any region must be allowed by the closing rule. # 6. DESIGN-LANGUAGE PROSE, NOT CSS-SPEC. Wrong: "box-shadow: 0 4px 12px rgba(0,0,0,0.08)", "font-weight 800", "letter-spacing -1px". Right: "soft elevated card", "extrabold tight tracking". Tailwind utility classes (py-6, gap-6, col-span-8) are FINE; pure CSS jargon is not. # 7. CONSISTENT TAILWIND vs PROSE. Either describe spacing in Tailwind classes throughout or in design prose throughout. Don't mix. # 8. NO AMBIGUOUS RULE EXCEPTIONS. Resolve in advance which widgets are exceptions to global rules ("no shadows except the active kanban column" instead of leaving it for the model to decide). # SMART DEFAULTS (use when the user didn't specify): # - COLORS: NEVER default to pure white + pure black only. Always produce 5–7 colors — base, text, muted copy, surface/card, border, primary accent, plus a mandatory state palette of success / warning / danger. Use a sophisticated palette that matches the dashboard's nature: # * Trading / DeFi / financial monitoring: dark base #0A0F1A or #0D1117, off-white text, muted text, card surface #131C2E, plus a bright cool accent like cyan #22D3EE, electric blue #2563EB, or mint #34D399. # * Operations / dispatch / logistics: dark base #0E0E10 or #131316, off-white text, muted, card surface #1C1C21, plus a status-color-driven accent rooted in green #10B981 / amber #F59E0B / red #EF4444. # * Support / inbox / CRM: dim base #0F0F12 or #131316, off-white text, plus a warm accent like violet #A78BFA, amber #F59E0B, or coral #FB7185. # * Analytics / BI: light base #FAFAFA, ink #0A0A0A text, muted #6B7280, surface #F4F4F4, plus a restrained accent like deep blue #1E40AF, slate #64748B, or emerald #047857. # * Industrial / IoT / monitoring: light glass base #F5F7FA or dark control-room #0D0D0F, contextual accent in safety yellow #FACC15 or signal red #EF4444. # * Project / workspace / kanban: dim base #0F0F12, off-white text, plus a playful accent like violet #8B5CF6, electric green #22C55E, or coral #F97316. # - OPERATOR / WORKFLOW: infer from dashboard type. Yield / DeFi → user managing positions. Fleet ops → dispatcher coordinating drivers. CRM inbox → support agent triaging. Analytics → marketing or product lead reviewing. Industrial → field operator or control-room engineer. Project → PM running sprints. # - FLOURISH: invent one that fits the workflow. Live ticker on primary KPI, pulse on state-change indicator, hover detail tooltip on chart, smooth kanban-card drag, breathing online dot, count-up animation on header counters — all valid patterns. # - TYPOGRAPHY: use tasteful real-font pairings. # * Trading / financial: Geist or Space Grotesk display + Inter body, plus Geist Mono for tabular figures. # * Operations / industrial: Inter Tight or Space Grotesk display + Inter body, plus JetBrains Mono for sensor values and IDs. # * CRM / inbox: Inter or Geist display + Inter body (keep simple, content is the hero). # * Analytics / BI: Inter Display or Söhne-likes + Inter body, plus IBM Plex Mono for table figures. # * Project / workspace: Outfit or Plus Jakarta Sans display + Inter body. # - REGIONS: always produce 8–14 distinct widget regions for a full dashboard surface, fewer for focused workspaces. # IF THE USER'S INPUT IS ALREADY IN THIS STRUCTURE: leave it essentially as-is — only fix obvious structural gaps or missing fields. Do not rewrite already-good prompts. # IF THE USER'S INPUT IS HIGHLY DETAILED (≥ 600 words, has labeled regions, exact text strings, specific numbers, named widgets, unusual visual moves): treat the user's content as authoritative. Restructure into the 6-block flowing-prose format BUT keep every element the user mentioned — every exact label, every number, every hex code, every named widget, every nested layout layer. Your output will be longer than typical (1,500–2,200 words is fine). Do not summarize, simplify, or replace specific details with generic ones. # Normalize the user's input now. Output only the normalized prompt.""" NORMALIZER_SYSTEM_PROMPT = """You are a prompt normalizer for a website-design HTML generation pipeline. A user gives you any kind of website-design request — a one-liner, a partial brief, or a fully-detailed spec — and you output a single flowing paragraph in a precise structure that the downstream model expects. VERY IMPORTANT: Always, ALWAYS specify which text color goes on which surface (so the model won't make mistakes like black text on black surface); DO NOT USE LIGHT COLORS ON LIGHT BG, OR DARK COLORS ON DARK BG PLEASE. OUTPUT RULES (non-negotiable): - Output ONLY the normalized prompt. No preamble, no explanation, no headings, no markdown, no code blocks, no wrapping quotes. - Single flowing paragraph. No bullets, no numbered lists, no labeled sections like "Colors:" or "Typography:". - No mention of real brands you weren't given (Linear, Vercel, Apple, Stripe, Framer, Notion, etc.) — steal patterns, never invent brand names. BUT if the user explicitly names their own product/brand (e.g., "UX PILOT", "Nimbus", "Tsubaki"), preserve that exact name. - Length adapts to input. Short/vague input → 500–900 words. Detailed input (≥ 600 words) → match the user's level of specificity, up to 2,000+ words. NEVER compress detailed input to fit a length budget. PRESERVE-DETAIL RULES (critical for detailed inputs): - The conversion is STRUCTURAL (labeled sections / bullets / lists → flowing prose), NOT LOSSY. Every concrete detail the user provided survives the rewrite. - Preserve VERBATIM: * Exact text strings to render in the page ("WELCOME BACK, ADAM", "Roasted Fresh, Delivered Daily", any quoted copy) * Brand and product names exactly as written (UX PILOT, Nimbus, Nodey, Tsubaki) * Specific numbers (counts, prices, percentages, IDs like "20881", "600,000+ USERS") * All hex codes the user supplied — do not substitute "near-white" for their #f2f1ec * Distinctive structural elements (architectural crop marks, ruler tick marks, geometric brackets, monospace annotations, nested mockups, drag-sliders, marquees with specific content, etc.) - If the user describes a section in 80 words of detail, your prose version of that section keeps every detail and is roughly the same length. Don't summarize. - If the user describes a nested element ("inside this container, there's a frame, inside the frame is a window mockup"), preserve all nesting layers in your prose. AESTHETIC THINKING (when the user gave none): If the user supplied structure but no aesthetic, imagine the page deeply first as a design director would before writing, produce something good, like an award winning website, something breath taking. STRUCTURE (merge all six blocks into one continuous paragraph): 1. OPENING — "Design me a [SITE TYPE] homepage for [CONTEXT] — audience is [1-3 AUDIENCE TYPES], and the goal is [CONCRETE GOAL][, OPTIONAL MOOD FRAGMENT]." 2. DISTINCTIVE FLOURISH — one sentence describing a single standout interactive or visual behavior (scroll-driven, cursor effect, load timing, time-of-day, typographic reveal, mouse parallax, live data tick, image-reveal animation, etc.). Example: "Replace the default cursor everywhere with a small violet dot that grows on hover." If the user didn't specify one, invent one that fits the brand tone. 3. STRUCTURAL WALKTHROUGH — walk through 10–14 sections using connective phrases ("Start with… then a hero… Flow into… Follow with… Then… Close with…"). Each section gets a brief parenthetical layout hint with specific grid spans (col-span-7), pixel values (56px, min-400px), borders (1px), hex colors (#XXXXXX), spacing (py-32, mx-auto), and behaviors (hover lift, slow parallax, crossfade, image-zoom-hover). Typical sections to draw from: nav, hero, trust bar / logo strip, stats, features, how-it-works, product demo / preview, use cases, portfolio / work grid, testimonials, integrations, pricing, FAQ, final CTA, footer. Pick what fits the site type. 4. COLORS INLINE — "Use [base] [hex] with [text] [hex] text, [primary accent] [hex] [role], and [neutral] [hex] for [surfaces]." Every color named inline with its hex code. Never use a "Colors:" label. 5. TYPOGRAPHY LINE — "[Display font] [size] [optional tracking] [weight range] display, [body font] [weight range] [body size] body." Name real Google Fonts or common typographic families. 6. CLOSING RULE — ALWAYS end the output with this exact sentence: "Icons via Font Awesome only — never inline SVG - never hidden body overflow." This is mandatory on every prompt. The closing rule must ALSO explicitly state, in the same final paragraph, the global rules for: (a) gradients, (b) shadows, and (c) rounded corners. Don't leave any of those three categories unspecified. Examples of valid closing rules: "Pure flat — no gradients, no shadows, no rounded corners except where explicitly noted on [list]. Icons via Font Awesome only — never inline SVG." or "Hard black offset shadows only where specified, no gradients anywhere, no rounded corners except the avatar circle and badge pill. Icons via Font Awesome only — never inline SVG." NO-COIN-FLIPS CHECKLIST (apply before every output — these are the rules that separate prompts the model can render deterministically from prompts where it has to guess): 1. NO "or" CHOICES anywhere in the prompt. Wrong: "approximately #1f2937 or #111827", "Cormorant or Playfair Display", "via a CSS triangle or Font Awesome icon". Right: pick ONE specific value. If the user gave a range like "16–20px thick", that's fine — it's a single design intent; the model can pick within. But a binary "X or Y" between two different identities is a coin-flip. 2. NO PER-ELEMENT STAGGERED ANIMATIONS. Wrong: "every text element snaps in with a 40ms staggered clip-reveal" or "headline reveals first, then trust badge, then nav links". Right: ONE global state change. "Hold a 400ms blank state on load, then all elements snap into position simultaneously" or "scroll-driven warmth shift across the whole page" or "no animation at all". A single global directive the model can render with one CSS rule. 3. EXACTLY TWO TYPE FAMILIES. Pick a display family + a body/UI family, OR a display family + a mono family. Never three. If the user's draft has three (e.g., Space Grotesk + Inter + JetBrains Mono), drop the middle one — Space Grotesk medium can do body, you don't need Inter as a third family. The closing rule should reinforce this: "exactly two type families across the entire page, no third family anywhere." 4. ONE HEX PER COLOR ROLE. Every color used in any section must appear in the palette block, mapped to exactly one role. If the section says nav links are #2d2d2d but the palette doesn't list #2d2d2d, that's an undeclared color — the model treats it as noise. Every #hex used → declared once in palette. 5. SECTION SPECS MUST AGREE WITH CLOSING RULE. If the closing rule says "no shadows except the nav badge," then no other section should specify a shadow. If a section says "CTA hover has a 3px 3px 0 black shadow," the closing rule MUST allow that — e.g., "hard black offset shadows only where specified". Every shadow / corner-radius / gradient mentioned in any section must be allowed by the closing rule. 6. DESIGN-LANGUAGE PROSE, NOT CSS-SPEC. Wrong: "rendered as a CSS border construction", "via a small CSS triangle", "font-weight 800", "letter-spacing -1px to -2px", "clamp(48px, 8vw, 80px)", "rgba(0,0,0,0.12)". Right: "thick lilac frame", "extrabold tight tracking", "subtle box shadow". The downstream model is trained on design-language descriptions — CSS-spec phrasing maps less cleanly. Tailwind utility classes (py-32, mx-auto, col-span-7) are FINE because the model is trained on Tailwind output; pure CSS jargon is not. 7. CONSISTENT TAILWIND vs PROSE. Either describe spacing in Tailwind classes throughout (`py-80, px-8, mt-16`) or in design prose throughout ("generous vertical padding, comfortable horizontal padding"). Don't mix freely within a single prompt — pick one register and stick with it. 8. NO AMBIGUOUS RULE EXCEPTIONS. If the closing rule says "no rounded corners on any interactive element" but the page has a circular user avatar, the model has to decide if the avatar is "interactive" — that's a coin-flip. Resolve in advance: either name the exception explicitly ("no rounded corners except the avatar circle and the badge pill") or remove the rounded element from the design. SMART DEFAULTS (use when the user didn't specify): - COLORS: NEVER default to pure white + pure black only. Always produce 4–5 colors — base, text, muted copy, surface/card, plus ONE accent. Use a sophisticated neutral palette that matches the brand tone: * Editorial/minimal: near-white #FAFAFA base, ink #0A0A0A text, muted #6B7280 copy, surface #F4F4F4, plus a subtle accent like warm cream #F5EDE0, soft sage #8B9E84, dusty rose #D4748A, muted gold #C5A55A, olive #5C6B4F, or soft slate #64748B. * Modern SaaS: white #FFFFFF base, near-black #0A0A0A or #111111 text, muted #6B7280 copy, card #F9FAFB, plus an accent like electric blue #2563EB, indigo #6366F1, emerald #10B981, or violet #A78BFA. * Warm/premium: off-white #F7F5F0 base, natural #1C1C1A text, surface #EFEDE8, plus a warm accent like terracotta #C4775A, gold #B8934A, or sage #6B8F71. * Dark/moody: charcoal #0A0A0A or #0D0D0D base, off-white #F0EBE3 text, secondary dark #1A1A1C, plus an accent like gold #C5A55A, electric green #22C55E, or electric blue #0066FF. - AUDIENCE: infer from site type. SaaS → product leads, founders, teams. DTC → design-conscious consumers, ethical shoppers. Portfolio → creative directors, clients. Dashboard → name the professional role. Agency → brand leads, marketing directors. App → specific user persona. - GOAL: infer from site type. SaaS → trial signups and demo bookings. DTC → first-purchase conversions. Portfolio → project inquiries. App → downloads and trial starts. Dashboard → reduce friction on core tasks. Agency → consultation requests. - FLOURISH: invent one that fits the brand. Scroll velocity crop, time-of-day hero, cursor parallax, image-reveal on load, live data ticker, 600ms load hold, typographic reveal, background luminance pulse, kinetic captions — all valid patterns. - TYPOGRAPHY: use tasteful real-font pairings. * Editorial / serif: Playfair Display, Cormorant Garamond, Fraunces, DM Serif Display, Noto Serif Display + DM Sans or Inter body. * Modern SaaS: Inter Tight, Geist, Space Grotesk + Inter or Geist body. * Friendly: Outfit, Plus Jakarta Sans, Syne + Inter body. * Dev / terminal: JetBrains Mono, IBM Plex Mono, Fira Code + Inter body. - SECTIONS: always produce 10–14 sections even for vague inputs. IF THE USER'S INPUT IS ALREADY IN THIS STRUCTURE: leave it essentially as-is — only fix obvious structural gaps or missing fields. Do not rewrite already-good prompts. IF THE USER'S INPUT IS HIGHLY DETAILED (≥ 600 words, has labeled sections, exact text strings, specific numbers, named UI components, unusual visual moves): treat the user's content as authoritative. Restructure into the 6-block flowing-prose format BUT keep every element the user mentioned — every exact text string, every number, every hex code, every named visual element, every nested layout layer. Your output will be longer than typical (1,500–2,200 words is fine). Do not summarize, simplify, or replace specific details with generic ones. Normalize the user's input now. Output only the normalized prompt.""" DASHBOARD_NORMALIZER_SYSTEM_PROMPT = """You are a prompt normalizer for a dashboard / application-UI HTML generation pipeline. A user gives you any kind of dashboard request — a one-liner, a partial brief, or a fully-detailed spec — and you output a single flowing paragraph in a precise structure that the downstream model expects. VERY IMPORTANT: CARDS MUST BE VISIBLY DIFFERENT FROM THE BACKGROUND. Never give a card the same color as the page background — cards have to stand off from the surface they sit on. Always specify which text color goes on which surface; if the layout has a dark card on a light page (or any inverted surface), explicitly state the text colors for both the light and the dark surface so text never disappears into its background. Build structure through contrast, borders, and spacing together — not through any one alone. Make it easy to see what's a card, what's a button, what's text, and where one section ends and the next begins. HARD LIMITS (apply before output — these are explicit negative rules; ignoring them produces plain or broken downstream output): NO COORDINATION REQUIREMENTS BETWEEN REGIONS. Do not say "matching the map's height exactly" or "aligned to the table below" or "this card's width syncs with the chart above." Each region's dimensions are its own; the model handles alignment from grid structure alone. DENSITY SELF-CHECK (before output): mentally count atomic specs in your output. If above 50, identify the bottom 20% by importance and cut them. Common safe cuts: pixel ranges that could be col-spans, secondary animation behaviors layered on the same elements, sub-elements inside cards that don't change the page's character if removed, multi-stat hero cards padded with 4-6 numbers, redundant restatements of the register (state it once at the top, don't repeat the rules in every section). Thoroughness past a threshold becomes noise. The prompt's job is to set the register and the structural skeleton — not to resolve every visual decision in advance. The downstream model has its own taste at the pixel level; let it use it. OUTPUT RULES (non-negotiable): - Output ONLY the normalized prompt. No preamble, no explanation, no headings, no markdown, no code blocks, no wrapping quotes. - Single flowing paragraph. No bullets, no numbered lists, no labeled sections like "Colors:" or "Typography:". - No mention of real brands you weren't given (Linear, Vercel, Notion, Salesforce, Stripe, Mercury, Monzo, Revolut, etc.) — steal patterns, never invent brand names. BUT if the user explicitly names their own product/brand (e.g., "Ardentis", "WindFarm", "Tavolo"), preserve that exact name. - Length adapts to input. Short/vague input → 500–900 words. Detailed input (≥ 600 words) → match the user's level of specificity, up to 2,000+ words. NEVER compress detailed input to fit a length budget. - Prefer design-direction prose over micro-spec. Set the register, not the pixel values — the downstream model has its own taste at the pixel level. Do NOT specify exact font-sizes, exact opacity percentages, or exact pixel paddings unless the user supplied them. CLOSING RULE — ALWAYS end the output with this exact sentence: "Icons via Font Awesome only — never inline SVG - never hidden body overflow." This is mandatory on every prompt. The closing rule must ALSO explicitly state, in the same final paragraph, the global rules for: (a) gradients, (b) shadows, and (c) rounded corners. Don't leave any of those three categories unspecified. Examples of valid closing rules: "Pure flat — no gradients, no shadows, no rounded corners except where explicitly noted on [list]. Icons via Font Awesome only — never inline SVG." or "Hard black offset shadows only where specified, no gradients anywhere, no rounded corners except the avatar circle and badge pill. Icons via Font Awesome only — never inline SVG." PRESERVE-DETAIL RULES (critical for detailed inputs): - The conversion is STRUCTURAL (labeled sections / bullets / lists → flowing prose), NOT LOSSY. Every concrete detail the user provided survives the rewrite. - Preserve VERBATIM: * Exact text strings to render in the UI ("Yield Overview", "Total Collateral", "Booking Requests", any quoted copy or label) * Brand and product names exactly as written * Specific numbers (KPI values like $67659.99, counts like "ACTIVE 18", percentages, IDs like "944905011UZ") * All hex codes the user supplied — do not substitute "near-white" for their #f2f1ec * Distinctive widget elements (radial gauges with thresholds, kanban column counts, map overlay popups, multi-panel arrangements, status pill counts) - If the user describes a region in 80 words of detail, your prose version keeps every detail and is roughly the same length. Don't summarize. - If the user describes a nested element ("inside the sidebar, a nav stack, each nav item with icon + label + count"), preserve all nesting layers in your prose. VISUAL REGISTER (when the user gave none): If the user supplied widgets and workflow but no aesthetic, commit to ONE coherent visual register before writing the structural walkthrough. Neutral is failure. Invent one that fits the operator's context, and describe it in design-direction prose — not in pixel values, exact opacities, or exact font-sizes. The downstream model handles those decisions from its own training; your job is to set the register and let the model's taste fill in the pixels. Avoid the lazy fallback (dark navy + electric blue + Inter + 8px-corner cards) unless that genuinely is your imagined register — that's "Linear utilitarian" and only fits when the operator's context calls for it. SMART DEFAULTS (use when the user didn't specify): - COLORS: Always produce 5–7 colors — base, text, muted copy, surface/card, border, primary accent, plus mandatory state palette of success / warning / danger. Pick a palette that signals the chosen register from the library above. Avoid the lazy default (dark navy + electric blue) unless the register is genuinely Linear utilitarian. - OPERATOR / WORKFLOW: infer from dashboard type. Yield / DeFi → user managing positions. Fleet ops → dispatcher coordinating drivers. CRM inbox → support agent triaging. Analytics → marketing or product lead reviewing. Industrial → field operator or control-room engineer. Project → PM running sprints. - FLOURISH: live ticker on primary KPI, pulse on state-change indicator, hover detail tooltip on chart, smooth kanban-card drag, breathing online dot, count-up animation on header counters. - TYPOGRAPHY: pick the pairing that matches the chosen register — Fraunces + Inter for soft modern fintech, Outfit + Inter for neo-banking calm, Tiempos + Inter for editorial broadsheet, Geist + Inter for glass dark aurora, JetBrains Mono alone for brutalist trading terminal, Söhne-likes Condensed + Inter for swiss minimal grid, Inter + Geist Mono for Linear utilitarian, Fraunces + Inter + JetBrains Mono for architectural drafting. - REGIONS: always produce 8–14 distinct widget regions for a full dashboard surface, fewer for focused workspaces. IF THE USER'S INPUT IS ALREADY IN THIS STRUCTURE: leave it essentially as-is — only fix obvious structural gaps or missing fields. IF THE USER'S INPUT IS HIGHLY DETAILED (≥ 600 words, has labeled regions, exact text strings, specific numbers, named widgets, unusual visual moves): treat the user's content as authoritative. Restructure into the 6-block flowing-prose format BUT keep every element the user mentioned — every exact label, every number, every hex code, every named widget, every nested layout layer. Your output will be longer than typical (1,500–2,200 words is fine). Do not summarize. Normalize the user's input now. Output only the normalized prompt.""" # Keywords that indicate a dashboard / app-UI prompt — case-insensitive substring match. DASHBOARD_KEYWORDS = ( "dashboard", "admin panel", "control panel", "control room", "command center", "console", "workspace", "monitoring", "monitor surface", "ops surface", "ops console", "kanban", "inbox view", "inbox workspace", "data table", "analytics overview", "bi report", "bi reporting", "trading terminal", "back office", "internal tool", ) def is_dashboard_prompt(prompt: str) -> bool: """Return True if the user's prompt looks like a dashboard / app-UI request.""" p = prompt.lower() return any(kw in p for kw in DASHBOARD_KEYWORDS) # Use Highcharts (SVG mode) for charts when requested. # 5prompts PROMPTS = [ # 1 """Landing page for a habit-tracking mobile app called Streak. The audience is people in their 20s and 30s who have tried other habit apps and quit. The page needs: a hero section with the app name, a one-line value prop, two CTAs (App Store, Google Play), and a phone mockup placeholder showing the app; a 'why this one is different' section with three points (no streaks-anxiety, friend accountability, science-backed reminders); a section explaining how it works in four steps (pick a habit, set when, get nudged, check in); a testimonials section with three quotes from real users including their name, age, and the habit they built; a comparison table vs the two main competitors; a pricing block (free forever, $4/mo pro with extras); an FAQ with at least six questions covering data privacy, cancellation, family plans, and Apple Health integration; and a footer with newsletter signup, social links, and legal pages.""", # 2 """Operations dashboard for a mid-size logistics company that runs ~200 trucks across the US. The primary user is a dispatcher who has this open all day. Needs a left sidebar with navigation (Overview, Shipments, Fleet, Drivers, Customers, Reports, Settings); a top bar with search, notifications bell, and a user avatar dropdown. The main area should have four KPI cards across the top (active shipments, on-time delivery rate, fleet utilization, fuel cost this week — each with a trend arrow and sparkline). Below that, a large interactive map placeholder showing truck positions, with a panel on the right listing the active shipments visible on the map. Below the map, a recent deliveries table (columns: shipment ID, origin, destination, driver, status, ETA, value), with status pills, sortable headers, and a filter bar. To the right of the table, a stacked alerts panel (delays, breakdowns, customer complaints), each alert clickable. The dashboard should feel dense but not cluttered.""", # 3 """Marketing site for an AI legal assistant called Counsel.ai aimed at small law firms (2–20 attorneys). The pitch is: it drafts contracts, summarizes case law, and handles client intake conversations. Need a hero with the headline, sub-headline mentioning ABA-aligned, a CTA to book a demo and a secondary 'see it in action' that scrolls to a video. Below the hero: a logo strip of the law schools and firms already using it. Then a 'what it does' section with three big cards (contract drafting, case law research, intake automation), each with a short description and a screenshot placeholder. Then a live chat preview component showing a sample intake conversation between the AI and a potential client. Then an integrations section showing logos of Clio, MyCase, LexisNexis, Westlaw, Dropbox, and Gmail. A security and compliance section is critical — call out SOC2 Type II, attorney-client privilege handling, encryption at rest and in transit, and US data residency, presented as four trust badges with short explanations. End with a testimonials section featuring three managing partners by name and firm, a pricing block (Starter, Growth, Firm tiers), and two CTAs in the footer (book demo, start free trial).""", # 4 """E-commerce product detail page for a premium pair of trail running shoes called the Ridge Pro 2. The page needs: a breadcrumb at the top (Home > Men > Running > Ridge Pro 2); a two-column layout with a vertical thumbnail strip and a main image gallery on the left (six placeholder images, with one being a 360-degree view), and the product info on the right. The right column needs the product title, a sub-line ('Built for technical descents'), a star rating with review count, the price ($185) with a 'free shipping over $75' note, a color picker with five swatches, a size picker with US sizes 7–13 including half sizes (some marked sold out), a quantity stepper, an 'Add to cart' button, and an 'Add to wishlist' icon button. Below the right column, a tabs section: Description (with a paragraph and bullet specs — weight, drop, stack height, lug depth, materials), Reviews (showing average rating, a 5-star bar breakdown, three sample reviews with reviewer name and verified-buyer badge), and Shipping & Returns. Below the two columns, a 'Pairs well with' carousel of four related products. On scroll, the add-to-cart bar should stick to the bottom of the viewport with the product name, selected variants, and a buy button.""", # 5 """Onboarding flow page — step 3 of 5 — for a project management app called Mosaic. The user has just signed up and is being walked through setup. Step 3 is 'Invite your team.' The page should have a top progress indicator showing 5 steps with the third one active and the first two checked. The center of the page has a heading ('Bring your team in'), a sub-heading ('Mosaic works best when everyone is on it'), and a form area: three rows of inputs by default (each row: email field + role dropdown with options Admin/Member/Viewer), an 'Add another' link, an alternative 'Invite by link' section with a copyable URL and a 'reset link' option, and a section to bulk-paste emails. Below the form, two buttons: a primary 'Send invites and continue' and a secondary 'Skip for now.' On the right side of the page, a small contextual card: 'Why invite now? Teams that invite within the first day are 4x more likely to stick with Mosaic' with a small illustration placeholder. Footer should have a 'Need help?' link.""", # 6 """Pricing calculator page for a cloud hosting provider called Stratus. The audience is engineering leads picking a vendor. The page should let them estimate monthly cost based on their usage. Layout: a title and short intro at the top, then a calculator card taking the center of the page with controls for compute (a slider for vCPUs from 1 to 64 and a slider for RAM from 1 to 256 GB), storage (a slider for SSD GB from 10 to 5000), bandwidth (a slider for monthly TB from 0.1 to 50), and a region selector with five options (US-East, US-West, EU-West, AP-South, AP-East) shown as pill buttons. As the user moves sliders, a price summary panel on the right of the calculator updates live: a big monthly total, a breakdown by line item, and an annual cost with a 'save 20% with annual' note. Below the calculator, a 'How we compare' section showing the same configuration's price on the three biggest competitors (AWS, GCP, DigitalOcean) — clearly labeled as estimates. Below that, an FAQ about how billing works, overage charges, what counts as bandwidth, and free tier. End with a CTA to start a free trial and a 'talk to sales' option for enterprise.""", # 7 """Status page for a developer API service called Pulse. The audience is engineers who integrate Pulse and need to know if it's up. Top of the page: a big banner showing overall system status — green if everything is operational, with the message 'All systems operational' and a last-updated timestamp. Below the banner, a list of individual services (API, Dashboard, Webhooks, SDKs, Documentation, Authentication), each as a row with the service name on the left, a status pill on the right (operational, degraded, partial outage, major outage), and a 90-day uptime bar showing daily status as colored segments. Each row should be expandable to show recent metrics (latency p50/p95/p99, error rate). Below the service list, a 'Past incidents' section grouped by date, showing incident title, status (resolved, investigating, identified, monitoring), affected components, and a timeline of updates. At the very top right, a 'Subscribe to updates' button that opens a modal with options for email, webhook, RSS, and Slack. Header should have the company logo and links to status history, API docs, and main site.""", # 8 """Case study page for a B2B design agency called North Field that just delivered a rebrand for a fintech client called Halcyon. The audience is potential clients evaluating the agency. The page needs: a hero with the client logo placeholder, the project title ('Rebranding Halcyon for the next stage'), a one-line summary, and three big stat numbers across the bottom (e.g., 47% lift in signups, 3 months delivery, 12 deliverables). Below the hero, a project meta strip (industry, timeline, services delivered, team size). Then a 'The challenge' section with two paragraphs and a pull quote from the client's CEO. Then a 'Our approach' section broken into three phases (discovery, design, rollout) each with a short paragraph and an image placeholder. Then a 'Results' section with three stat cards expanded into context (what the number means, how it was measured). Then a full-width testimonial quote from the CEO with their photo, name, and title. Then a deliverables gallery showing 6 placeholder images of work artifacts. End with a 'Ready to start your project?' CTA card and three thumbnails of related case studies.""", # 9 """Settings page for a developer tools app called Forge. The user opening this is an engineering manager configuring their team's workspace. Layout: a left sub-nav listing settings sections (Profile, Workspace, API Keys, Webhooks, Team Members, Integrations, Billing, Danger Zone). The active section is API Keys. The main panel shows a heading and a short description of what API keys are. Below that, a 'Generate new key' button. Below that, a table of existing keys with columns: name, scope, last used, created, and a row actions menu (rename, revoke). Below the API keys table, the next section visible on scroll should be Webhooks — a list of registered webhook endpoints, each showing URL, events subscribed to, last delivery status, and a toggle to enable/disable. Below that, Team Members — a table with member avatars, names, emails, roles (with editable dropdowns), and a remove action. Below that, a Billing summary card showing current plan, next invoice amount, and 'manage billing' button. At the very bottom, a Danger Zone section in a clearly distinct visual treatment, with two destructive actions: Transfer Ownership and Delete Workspace. The page should feel like a serious admin surface.""", # 10 """Inbox view for a customer support platform called Mailroom. The user is a support agent working through tickets. Three-panel layout. Left panel (narrow): conversation list with filter tabs at the top (All, Unassigned, Mine, Mentions), a search bar, and below that a list of conversations — each row showing customer avatar, name, subject preview, last message preview, time, and unread indicator. Middle panel (widest): the active conversation. Top of the panel: customer name, channel (email, chat, etc.), and action buttons (assign, snooze, close, more). Below that, the message thread — alternating customer and agent messages with timestamps, the agent's messages on the right. At the bottom of the middle panel, a reply composer with formatting toolbar, attachment, and a send button with a dropdown to send-and-close. Right panel (narrow): customer details — avatar, name, email, plan, signup date, lifetime value, last active. Below that, a 'past conversations' list (last 5), then 'internal notes' (a small section where teammates can leave notes about this customer), then a 'related articles' suggestions list (auto-suggested help docs based on the conversation content).""", # 11 """Homepage for a Series A fintech startup called Stack that helps freelancers handle quarterly taxes. Audience: US-based freelancers earning $40k–$200k who currently use a CPA or do nothing. Hero: clear value prop ('Quarterly taxes, handled'), sub-line, two CTAs ('Estimate my taxes' and 'How it works'), and a hero visual of a phone showing the app's tax estimate screen. Below the hero, a logo bar with publications that have covered them (TechCrunch, NYT, Wired, etc.). Then a 'How it works' section with three steps (connect your accounts, we calculate every quarter, file with one tap), each with an icon and short description. Then a tax savings calculator widget where users input their annual freelance income and state, and it shows estimated savings and time saved vs. doing it themselves. Then a testimonials section with three quotes from named freelancers (designer, developer, copywriter) including profession and city. Then a section addressing the common objection 'why not just use a CPA?' with a side-by-side comparison. Then a security and trust section (bank-level encryption, SOC2, never sells data). Then pricing — flat $25/month, no upsells. End with an FAQ (six questions covering states supported, what if I'm late, integration with QuickBooks, refunds, multi-state freelancers, accuracy guarantee) and a footer with newsletter signup.""", # 12 """Comparison page on a SaaS marketing site: 'Linear vs Jira.' The audience is a team currently on Jira considering switching. Hero: a clear headline ('A modern alternative to Jira'), a one-line sub, and two CTAs (start free, book a migration call). Below the hero, a feature-by-feature comparison table with about 20 rows grouped into sections (Speed & UX, Workflow, Integrations, Pricing, Support), with check/x marks and short clarifying notes per cell. Below the table, three side-by-side highlight cards covering the biggest differences (10x faster UI, opinionated workflow, transparent pricing). Below that, a pricing comparison block with the equivalent plan from each side by side, showing per-seat cost. Below that, a customer story section: a quote from someone who migrated, with their photo, name, title, and company, plus three stat numbers from their experience (e.g., '4 hours of meetings saved per week', '70% adoption in week 1', etc.). Below that, a 'Migration is easy' section with a three-step process (export from Jira, run our migrator, go live in a day) and a CTA to talk to the migration team. End with FAQ covering data import, custom fields, permissions parity, and pricing edge cases.""", # 13 """Profile page on a freelance marketplace called Make. The profile belongs to a senior brand designer. The page is what a hiring client sees when they land on this designer's page. Top: a wide cover photo placeholder, with the avatar overlapping the bottom edge. Below, the name, headline ('Brand designer for early-stage SaaS'), location, hourly rate, response time, availability badge, and two prominent CTAs ('Hire' and 'Message'). Below that, a row of trust signals (top-rated badge, identity verified, total earnings, 5-year tenure on the platform). Then a section with the designer's bio (two paragraphs). Then a skills section with tag chips (brand strategy, logo design, visual identity, design systems, Figma, illustration). Then a portfolio grid of nine project thumbnails with title and category overlay on hover. Then a services offered list — three packaged services with title, price starting at, delivery time, and a 'select' button. Then a reviews section: average rating, total reviews, a star breakdown, and four sample reviews with client name, project title, rating, and quote. Then a 'work history' section listing past completed projects with client and date. The Hire CTA should stick to the right side of the viewport on scroll.""", # 14 """Search results page for a job board focused on remote engineering roles, called Async. The user has just searched 'senior backend engineer.' The page should have a top filter bar with the search query and quick filters (Remote-only, Full-time, Salary > $X, Posted in last 7 days). Left sidebar: more detailed filters — location/timezone (with a multi-select for regions like Americas-only, Europe-only, Anywhere), salary range slider, role seniority checkboxes, company size, tech stack tags (Go, Python, Rust, TypeScript, Postgres, AWS, etc.), and equity/benefits toggles. Main area: at the top, results count and a sort dropdown (relevance, newest, highest salary). Below, a list of job result cards — each showing company logo, role title, company name, salary range, location/timezone, posted date, a brief role description, three tech stack tags, and a 'Save' bookmark icon. About 10 results visible, with pagination at the bottom. To the right of the result list, a sticky 'Save this search' card prompting the user to get email alerts for new matches. Below the result list, a 'Companies hiring now' section with logos linking to each company's full job list.""", # 15 """Documentation homepage for an open-source ML library called Tensorgrove. Audience: ML engineers and researchers evaluating or starting with the library. The page should feel technical, fast, and not over-designed. Top header with library name, version selector (showing current stable version), and links (Docs, API, Tutorials, Blog, GitHub stars badge). Hero: short tagline ('Composable tensor operations for research'), a one-line description, and two prominent buttons ('Quickstart' and 'API reference'). Below the hero, a quickstart code block showing a 10-line install + first-example snippet with syntax highlighting and a copy button. Below that, a four-tile grid linking to main doc sections: Getting Started, Core Concepts, Tutorials, API Reference — each with an icon, short description, and arrow. Below that, a 'What you can build' section with three example projects (an image classifier, a transformer from scratch, an RL agent), each with a thumbnail placeholder, brief description, and a 'see code' link. Below that, a 'Used by' section showing logos of universities and labs. Below that, a community card linking to Discord, GitHub Discussions, and the bi-weekly office hours, with member count. End with a footer that has docs links, GitHub, license info, and a 'star us on GitHub' button.""", # 16 """Booking flow page for a hair salon's website. This is step 2 of 4: pick your stylist and time. Top of the page: a 4-step progress indicator (Service > Stylist & Time > Your Info > Confirm), with step 2 active. Below the progress indicator, a summary strip showing what was selected in step 1 (service: 'Cut & Color', estimated duration: 2h 15m, price range: $180–$240), with a small 'edit' link. Main content split into two columns. Left column (wider): the stylist + time picker. At the top, four stylist cards in a row, each showing the stylist's photo, name, specialty tag, average rating, and price tier indicator. One can be clicked to filter the calendar below. Below the stylist cards, a date picker showing the next 14 days as a horizontal scrollable strip, with each day showing day name, date, and an availability dot. Below the date strip, a time slot grid for the selected day — slots in 15-minute increments from 9am to 7pm, with available slots clickable and unavailable greyed out. Right column (narrower): a sticky booking summary card showing service, selected stylist (or 'Any available'), selected date and time (or 'Pick a time'), price range, and a 'Continue to your info' button (disabled until a slot is chosen). Below the summary card, a 'Cancellation policy' note. Footer with help link and salon contact info.""", # 17 """Help center landing page for a consumer finance app called Pocket. Audience: existing users with questions, ranging from non-technical to power users. Top: a centered hero with the help center title, a short reassurance line ('We're here to help — find an answer fast or talk to us'), and a large search bar with placeholder text ('Search articles, e.g., "transfer limits"'). Below the hero, a 'Popular topics' section as a 6-tile grid, each tile with an icon, a topic title (Account & login, Sending money, Cards, Limits & verification, Security, Billing & fees), and the article count in that category. Below that, a 'Top articles' list — 8 article links with titles and a 1-line snippet. Below that, a 'Still need help?' section with three contact options as side-by-side cards: chat with support (with current wait time), email (with response time SLA), and schedule a call (with available slots indicator). Each option should make clear what the user gets. Below that, a status indicator section linking to the public status page if there's any ongoing issue. Footer with links to community forum, security page, accessibility statement, and changelog.""", # 18 """Changelog page for a productivity app called Nimbus. Audience: existing users who want to see what's new and developers integrating with the API. Top: page title, short intro, and three controls — a category filter (All, New, Improved, Fixed, Security, API), a search bar, and subscribe options (RSS, email, webhook). Below the controls, the changelog feed grouped by month, with the current month at the top. Each month is a section with a month header. Within each month, individual entries listed reverse-chronologically. Each entry has: a date, a version tag (v3.4.1 style) when applicable, a category pill, a heading, a short paragraph (2–4 sentences) describing the change, optionally an inline screenshot or short video placeholder, and a 'See in app' deep-link button when relevant. Some entries are big releases with longer write-ups including bullet sub-changes and a 'Read full post' link. Sidebar on the right: a 'jump to month' navigation, a 'biggest changes this quarter' highlight box with three pinned entries, and a card promoting the public roadmap. The page should feel like a real product log, not marketing fluff.""", # 19 """Real estate listing detail page for a 3-bedroom house in Brooklyn. Audience: a buyer or renter browsing on desktop. Top: a breadcrumb (Home > Brooklyn > Park Slope > 312 7th Ave). Below, a full-width photo carousel placeholder showing the listing image, with arrows, image counter (1/24), and a 'view all photos' button that opens a gallery, plus tabs to switch between Photos / Floor Plan / Street View / Video Tour. Below the carousel, a two-column layout. Left column (wider): the listing details. Address, price (with 'price reduced' badge if applicable), price-per-sqft, beds, baths, square footage, and a short description paragraph. Then a 'Highlights' bullet list (renovated kitchen, private backyard, in-unit laundry, central AC, etc.). Then a 'Property details' table (year built, lot size, parking, HOA, taxes, MLS number). Then a 'Neighborhood' section with a small map placeholder, walk/transit/bike scores, and nearby points of interest (schools with ratings, parks, subway lines). Then a floor plan image with a square-footage breakdown. Then a 'Price history' table showing prior list events with date, event, and price. Right column (narrower, sticky): an agent contact card with the agent's photo, name, brokerage, phone, and a contact form (name, email, phone, message, 'I'd like to tour') with three CTA buttons ('Schedule a tour', 'Request info', 'Make an offer'). Below the agent card, a mortgage calculator widget (down payment, loan term, interest rate inputs, estimated monthly payment output). Below the two columns, a 'Similar homes nearby' carousel of 6 listing cards.""", # 20 """Analytics overview dashboard for an e-commerce store owner running a Shopify-style storefront. Audience: a small business owner checking performance daily. Top bar: store name on the left, a global date-range picker in the center (with presets: Today, Yesterday, Last 7 days, Last 30 days, This month, Custom), and a 'Compare to' toggle (vs previous period) on the right. Below, four KPI cards across a row showing: Total revenue (with delta vs previous period), Orders, Conversion rate, Average order value — each with a sparkline of the chosen period. Below that, the main revenue chart — a large line chart with revenue over time (toggleable to gross sales, net sales, refunds), with a secondary axis option for orders. Below the revenue chart, a three-column row: a 'Top products' table (product name with thumbnail, units sold, revenue, % of total) showing the top 5 with a 'View all' link; a 'Traffic sources' donut chart with a legend (direct, organic, social, paid, email, referral) and percentages; and a 'Conversion funnel' visualization showing visitors → product views → add to cart → checkout started → purchase, with drop-off rates between each step. Below those three, a 'Recent orders' table (order number, customer name, items count, total, status, date), and to the right of that, an 'Inventory alerts' panel showing products with low stock. The whole dashboard should feel scannable in 30 seconds but rewarding when you dig in.""" ] # ────────────────────────────────────────────────────────────────────────────── # Prompt normalizer # ────────────────────────────────────────────────────────────────────────────── _anthropic_client = None def _get_anthropic_client(): global _anthropic_client if _anthropic_client is not None: return _anthropic_client if not _ANTHROPIC_AVAILABLE: raise RuntimeError("anthropic SDK not installed. Run: pip install anthropic") api_key = os.environ.get("ANTHROPIC_API_KEY") if not api_key: raise RuntimeError("ANTHROPIC_API_KEY environment variable not set") _anthropic_client = Anthropic(api_key=api_key) return _anthropic_client def normalize_prompt(raw_prompt: str) -> str: """Normalize any user prompt into the structured format using the API. Routes to the dashboard normalizer if the prompt looks dashboard-shaped, otherwise uses the landing-page normalizer. Falls back to the raw prompt if normalization fails.""" if not NORMALIZE_PROMPTS: return raw_prompt is_dashboard = is_dashboard_prompt(raw_prompt) system_prompt = DASHBOARD_NORMALIZER_SYSTEM_PROMPT if is_dashboard else NORMALIZER_SYSTEM_PROMPT print(f"[normalize_prompt] route → {'DASHBOARD' if is_dashboard else 'LANDING-PAGE'} normalizer") try: client = _get_anthropic_client() response = client.messages.create( model=NORMALIZER_MODEL, max_tokens=NORMALIZER_MAX_TOKENS, temperature=NORMALIZER_TEMPERATURE, system=system_prompt, messages=[{"role": "user", "content": raw_prompt}], ) normalized = response.content[0].text.strip() # Strip any wrapping quotes the model might add if normalized.startswith('"""') and normalized.endswith('"""'): normalized = normalized[3:-3].strip() elif normalized.startswith('"') and normalized.endswith('"'): normalized = normalized[1:-1].strip() elif normalized.startswith("'") and normalized.endswith("'"): normalized = normalized[1:-1].strip() return normalized except Exception as e: print(f"[normalize_prompt] WARNING: falling back to raw prompt — {e}") return raw_prompt # ────────────────────────────────────────────────────────────────────────────── # Model loading # ────────────────────────────────────────────────────────────────────────────── # Set USE_UNSLOTH = True to load with Unsloth's FastLanguageModel (custom Triton # kernels for Qwen2 inference, ~1.5–2x faster than vanilla transformers). # Set False to fall back to plain HuggingFace AutoModelForCausalLM. USE_UNSLOTH = True def load_model_and_tokenizer(): print(f"Loading merged model from: {MODEL_PATH}") tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( MODEL_PATH, torch_dtype=torch.bfloat16, trust_remote_code=True, low_cpu_mem_usage=True, ).to("cuda:0") model.eval() print(f"Attention implementation: {getattr(model.config, '_attn_implementation', 'unknown')}") tokenizer.eos_token = "<|im_end|>" tokenizer.eos_token_id = tokenizer.convert_tokens_to_ids("<|im_end|>") tokenizer.pad_token = tokenizer.eos_token tokenizer.pad_token_id = tokenizer.eos_token_id print("Model loaded successfully") return model, tokenizer # ────────────────────────────────────────────────────────────────────────────── # Generation # ────────────────────────────────────────────────────────────────────────────── IMAGE_GUARD = """IMPORTANT: Use for every image — no external URLs, no src="https://...". Give every major section and card a unique id attribute.""" # with image guard is fine rank 1 for now 0.03 B def generate_html(model, tokenizer, prompt_text): messages = [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": prompt_text}, ] input_text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True, ) inputs = tokenizer( input_text, return_tensors="pt", add_special_tokens=False, ).to("cuda") streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=MAX_NEW_TOKENS, temperature=TEMPERATURE, top_p=TOP_P, do_sample=DO_SAMPLE, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id, streamer=streamer, ) generated_text = tokenizer.decode( outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True, ) return post_process(generated_text.strip()) def post_process(html: str) -> str: """Strip known contamination patterns that slip past the SYSTEM_PROMPT guard.""" # 0. Detect degenerate SVG loop (same coordinate pair repeated >50 times) # This consumes the entire token budget and produces a broken output. loop_match = _re.search(r'((?:M|L|C)\d+\.?\d* \d+\.?\d*[ ,]?){50,}', html) if loop_match: print("[post_process] WARNING: Degenerate SVG path loop detected — stripping SVG element") html = _re.sub(r']*>.*?', '', html, flags=_re.DOTALL | _re.IGNORECASE) # 1. Remove @font-face blocks that reference external (non-local) URLs html = _re.sub( r'@font-face\s*\{[^}]*src\s*:\s*url$["\']?https?://[^"\')\s]+["\']?$[^}]*\}', '', html, flags=_re.DOTALL | _re.IGNORECASE, ) # 2. Replace storage.googleapis.com avatar URLs with pravatar def _fix_avatar(m): import hashlib n = int(hashlib.md5(m.group(0).encode()).hexdigest(), 16) % 70 + 1 return f'src="https://i.pravatar.cc/150?img={n}"' html = _re.sub( r'src="https://storage\.googleapis\.com/uxpilot-auth[^"]*"', _fix_avatar, html, ) # 3. Replace invented brand domain image/video URLs with AI-IMAGE def _fix_brand_url(m): tag = m.group(0) if 'pravatar' in tag or 'cdn.tailwind' in tag or 'cdnjs' in tag or 'fonts.google' in tag: return tag return '' html = _re.sub( r']+src="https?://(?!i\.pravatar\.cc)[^"]+\.(jpg|jpeg|png|gif|webp|mp4|mov)"[^>]*/?>', _fix_brand_url, html, flags=_re.IGNORECASE, ) return html # ────────────────────────────────────────────────────────────────────────────── # Dynamic batching inference engine # ────────────────────────────────────────────────────────────────────────────── # Multiple threads (or an async server) call engine.submit(prompt). A single # worker thread collects requests into a batch (up to MAX_BATCH, with a # WAIT_MS window for the batch to fill) and runs them in ONE model.generate() # call. Per-request output is bit-identical to single-request inference — # same model, same tokenizer, same sampling params; only difference is # left-padding so decoder-only batched generation lines up correctly. # # The GPU is the serialization point, so one worker thread is correct. # Don't add more worker threads — they'll just contend on CUDA and slow down. # ────────────────────────────────────────────────────────────────────────────── import threading import queue as _queue import time as _time from dataclasses import dataclass, field from typing import Optional, List, Tuple @dataclass class _BatchRequest: prompt: str event: threading.Event = field(default_factory=threading.Event) result: Optional[str] = None error: Optional[Exception] = None submitted_at: float = field(default_factory=_time.monotonic) started_at: Optional[float] = None finished_at: Optional[float] = None class BatchingInferenceEngine: """Dynamic batching wrapper around model.generate().""" def __init__( self, model, tokenizer, system_prompt: str, max_batch: int = 4, wait_ms: int = 50, max_new_tokens: int = MAX_NEW_TOKENS, temperature: float = TEMPERATURE, top_p: float = TOP_P, do_sample: bool = DO_SAMPLE, ): self.model = model self.tokenizer = tokenizer self.system_prompt = system_prompt self.max_batch = max_batch self.wait_s = wait_ms / 1000.0 self.max_new_tokens = max_new_tokens self.temperature = temperature self.top_p = top_p self.do_sample = do_sample # Critical: left-pad for decoder-only batched generation so every row's # generation starts at the same index (input_len) and EOS logic works. self.tokenizer.padding_side = "left" self._queue: "_queue.Queue[_BatchRequest]" = _queue.Queue() self._running = True self._worker = threading.Thread(target=self._worker_loop, daemon=True) self._worker.start() def submit(self, prompt: str, timeout: Optional[float] = None) -> str: """Blocking — returns the generated HTML (post-processed). Raises on failure.""" req = _BatchRequest(prompt=prompt) self._queue.put(req) if not req.event.wait(timeout=timeout): raise TimeoutError("Batch inference timed out") if req.error: raise req.error return req.result def shutdown(self): self._running = False def _worker_loop(self): while self._running: batch = self._collect_batch() if not batch: continue t_start = _time.monotonic() for req in batch: req.started_at = t_start try: self._run_batch(batch) except Exception as e: print(f"[batch_worker] ERROR: {e}") for req in batch: if not req.event.is_set(): req.error = e req.event.set() def _collect_batch(self) -> List[_BatchRequest]: # Block for first request (heartbeat timeout to allow clean shutdown). try: first = self._queue.get(timeout=1.0) except _queue.Empty: return [] batch = [first] # Drain additional requests inside the wait window. deadline = _time.monotonic() + self.wait_s while len(batch) < self.max_batch: remaining = deadline - _time.monotonic() if remaining <= 0: break try: req = self._queue.get(timeout=remaining) batch.append(req) except _queue.Empty: break return batch def _run_batch(self, batch: List[_BatchRequest]): texts = [] for req in batch: messages = [ {"role": "system", "content": self.system_prompt}, {"role": "user", "content": req.prompt}, ] text = self.tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True, ) texts.append(text) inputs = self.tokenizer( texts, padding=True, return_tensors="pt", add_special_tokens=False, ).to("cuda") with torch.no_grad(): outputs = self.model.generate( **inputs, max_new_tokens=self.max_new_tokens, temperature=self.temperature, top_p=self.top_p, do_sample=self.do_sample, eos_token_id=self.tokenizer.eos_token_id, pad_token_id=self.tokenizer.pad_token_id, ) # Left-padded → all rows share the same input_len prefix. input_len = inputs.input_ids.shape[1] t_end = _time.monotonic() bsz = len(batch) wait_times = [(r.started_at - r.submitted_at) * 1000 for r in batch] gen_time_ms = (t_end - batch[0].started_at) * 1000 print(f"[batch] size={bsz} gen={gen_time_ms:.0f}ms " f"wait=[{min(wait_times):.0f}-{max(wait_times):.0f}ms]") for i, req in enumerate(batch): generated = outputs[i][input_len:] text = self.tokenizer.decode(generated, skip_special_tokens=True) req.result = post_process(text.strip()) req.finished_at = t_end req.event.set() # ────────────────────────────────────────────────────────────────────────────── # Parity & throughput tests # ────────────────────────────────────────────────────────────────────────────── def run_parity_check(model, tokenizer, test_prompt: str): """Confirm batch-of-1 output matches single-request output exactly.""" print("\n[parity] single-request pass...") t0 = _time.monotonic() single = generate_html(model, tokenizer, test_prompt) t1 = _time.monotonic() engine = BatchingInferenceEngine( model, tokenizer, SYSTEM_PROMPT, max_batch=1, wait_ms=10, ) print("[parity] batched (batch-of-1) pass...") t2 = _time.monotonic() batched = engine.submit(test_prompt) t3 = _time.monotonic() engine.shutdown() print(f"[parity] single={t1-t0:.1f}s batched-of-1={t3-t2:.1f}s") if single == batched: print("[parity] OK — outputs are bit-identical.") return True else: import difflib diff = list(difflib.unified_diff( single.splitlines(), batched.splitlines(), fromfile="single", tofile="batched", lineterm="", n=2, )) print(f"[parity] MISMATCH — first 30 diff lines:") for line in diff[:30]: print(line) return False def run_throughput_test( model, tokenizer, prompts: List[str], max_batch: int = 4, wait_ms: int = 50, ) -> Tuple[List[str], float]: """Fire all prompts concurrently through one engine; measure wall time.""" engine = BatchingInferenceEngine( model, tokenizer, SYSTEM_PROMPT, max_batch=max_batch, wait_ms=wait_ms, ) results: List[Optional[str]] = [None] * len(prompts) threads: List[threading.Thread] = [] def worker(i, p): try: results[i] = engine.submit(p) except Exception as e: results[i] = f"ERROR: {e}" t0 = _time.monotonic() for i, p in enumerate(prompts): th = threading.Thread(target=worker, args=(i, p)) th.start() threads.append(th) for th in threads: th.join() wall = _time.monotonic() - t0 print(f"\n[throughput] {len(prompts)} prompts in {wall:.1f}s wall " f"→ {wall/len(prompts):.1f}s per prompt effective " f"(max_batch={max_batch}, wait_ms={wait_ms})") engine.shutdown() return results, wall # ────────────────────────────────────────────────────────────────────────────── # Main # ────────────────────────────────────────────────────────────────────────────── def main(): model, tokenizer = load_model_and_tokenizer() output_dir = Path(OUTPUT_FOLDER) output_dir.mkdir(parents=True, exist_ok=True) for i, prompt in enumerate(PROMPTS, 1): print(f"\n{'='*80}") print(f"Generating {i}/{len(PROMPTS)}") print(f"Raw prompt: {prompt[:120]}..." if len(prompt) > 120 else f"Raw prompt: {prompt}") # Normalize the prompt via Haiku before inference normalized_prompt = normalize_prompt(prompt) if normalized_prompt != prompt: preview = normalized_prompt[:120] + "..." if len(normalized_prompt) > 120 else normalized_prompt print(f"Normalized: {preview}") print(f"{'='*80}") # Save the normalized prompt alongside the HTML for traceability norm_path = output_dir / f"test_prompt_{i:03d}_normalized.txt" with open(norm_path, "w", encoding="utf-8") as f: f.write(normalized_prompt) try: _t0 = _time.perf_counter() html = generate_html(model, tokenizer, normalized_prompt) _dt = _time.perf_counter() - _t0 print(f"[timing] prompt {i}/{len(PROMPTS)} generated in {_dt:.1f}s") path = output_dir / f"test_prompt_{i:03d}.html" with open(path, "w", encoding="utf-8") as f: f.write(html) print(f"Saved -> {path}") except Exception as e: print(f"Error on prompt {i}: {e}") error_path = output_dir / f"prompt_{i:03d}_ERROR.txt" with open(error_path, "w", encoding="utf-8") as f: f.write(f"Error: {str(e)}\nPrompt: {prompt}") print(f"Error saved -> {error_path}") print(f"\nDone. All files -> {output_dir}") if __name__ == "__main__": import sys mode = sys.argv[1] if len(sys.argv) > 1 else "main" if mode == "parity": # Verify batched output == single-request output. # Usage: python inference_edited_chat_opt.py parity model, tokenizer = load_model_and_tokenizer() test_prompt = PROMPTS[0] if PROMPTS else "simple landing page for a coffee shop" run_parity_check(model, tokenizer, test_prompt) elif mode == "throughput": # Measure throughput with concurrent submissions. # Usage: python inference_edited_chat_opt.py throughput [max_batch] [wait_ms] [n_prompts] max_batch = int(sys.argv[2]) if len(sys.argv) > 2 else 4 wait_ms = int(sys.argv[3]) if len(sys.argv) > 3 else 50 n = int(sys.argv[4]) if len(sys.argv) > 4 else min(8, len(PROMPTS)) model, tokenizer = load_model_and_tokenizer() prompts_to_run = PROMPTS[:n] if len(PROMPTS) >= n else (PROMPTS * ((n // len(PROMPTS)) + 1))[:n] # Normalize upfront so the test measures inference, not normalizer latency. prompts_to_run = [normalize_prompt(p) for p in prompts_to_run] results, wall = run_throughput_test( model, tokenizer, prompts_to_run, max_batch=max_batch, wait_ms=wait_ms, ) output_dir = Path(OUTPUT_FOLDER) / f"throughput_b{max_batch}_w{wait_ms}" output_dir.mkdir(parents=True, exist_ok=True) for i, html in enumerate(results, 1): (output_dir / f"batch_{i:03d}.html").write_text(html or "", encoding="utf-8") print(f"[throughput] wrote {len(results)} files -> {output_dir}") else: main()