webicoder-v3-mlx-q8 / example.py
nexsendev's picture
Add files using upload-large-folder tool
fea7f6b verified
#!/usr/bin/env python3
"""
WebICoder v3 β€” Quick Start Example
Generate HTML websites from natural language prompts using MLX on Apple Silicon.
⚠️ MANDATORY: This script implements all 5 required rules for correct output.
See README.md for full documentation.
Usage:
python example.py "Create a landing page for a coffee shop"
python example.py --interactive
"""
import sys
import re
from mlx_lm import load, stream_generate
from mlx_lm.sample_utils import make_sampler, make_logits_processors
# ─── Configuration ──────────────────────────────────────────────────────────
MODEL_PATH = "." # Current directory (the model repo)
# RULE 1: System prompt + Alpaca format (### Instruction / ### Response)
SYSTEM_PROMPT = (
"You are WebICoder, an expert frontend web developer specializing in premium, "
"Apple-inspired design. You create stunning websites using only HTML, CSS, and "
"vanilla JavaScript. Your designs feature: minimalist layouts, elegant typography, "
"smooth animations, glassmorphism effects, generous whitespace, and a refined "
"color palette. You always produce complete, production-ready code."
)
# RULE 2: Stop sequences β€” MANDATORY to prevent infinite loops
STOP_SEQUENCES = ["</html>", "### Instruction:", "You are Deepcoder", "You are WebICoder"]
# RULE 4: Low temperature β€” MANDATORY for coherent HTML
DEFAULT_TEMP = 0.4
DEFAULT_MAX_TOKENS = 4096
# ─── RULE 1: Prompt Formatting (MANDATORY) ──────────────────────────────────
def format_prompt(user_input: str) -> str:
"""
MANDATORY: Format user input into the model's training prompt format.
The model was trained with Alpaca-style prompts. Sending raw text
without this formatting will produce garbage output.
"""
return f"{SYSTEM_PROMPT}\n\n### Instruction:\n{user_input}\n\n### Response:\n"
# ─── RULE 5: Post-Processing (MANDATORY) ────────────────────────────────────
def clean_html(text: str) -> str:
"""
MANDATORY: Extract clean HTML from model output.
The model may leak training artifacts (system prompt, instruction markers).
This function strips them and returns only valid HTML.
"""
# Remove system prompt leaks
for pattern in [
r"You are (?:Deep|Web[iI])coder.*?production-ready code\.\n*",
r"### Instruction:.*",
r"### Response:\s*",
]:
text = re.sub(pattern, "", text, flags=re.DOTALL)
# Extract complete HTML document
html_match = re.search(r"(<(?:!DOCTYPE\s+html|html)[\s\S]*?</html>)", text, re.IGNORECASE)
if html_match:
return html_match.group(1).strip()
# Fallback: find any HTML content and wrap it
html_start = re.search(r"<(?:!DOCTYPE|html|head|body|link)", text, re.IGNORECASE)
if html_start:
html = text[html_start.start():].strip()
if not html.lower().startswith("<!doctype"):
html = "<!DOCTYPE html>\n<html>\n" + html
if "</html>" not in html.lower():
html += "\n</html>"
return html
return text.strip()
# ─── Generation ─────────────────────────────────────────────────────────────
def generate_html(prompt: str, temperature: float = DEFAULT_TEMP, max_tokens: int = DEFAULT_MAX_TOKENS) -> str:
"""
Generate HTML from a natural language prompt.
Implements all 5 mandatory rules:
1. Prompt formatting (### Instruction / ### Response)
2. Stop at </html>
3. Repetition penalty (1.2, context=256)
4. Low temperature (0.4)
5. Post-processing (clean_html)
"""
print(f"[INFO] Loading model from: {MODEL_PATH}")
model, tokenizer = load(MODEL_PATH)
# RULE 1: Format the prompt
formatted_prompt = format_prompt(prompt)
# RULE 4: Low temperature sampler
sampler = make_sampler(temp=temperature)
# RULE 3: Repetition penalty β€” MANDATORY
logits_processors = make_logits_processors(
repetition_penalty=1.2,
repetition_context_size=256,
)
print(f"[INFO] Generating (temp={temperature}, max_tokens={max_tokens}, rep_penalty=1.2)...")
print("─" * 60)
full_text = ""
last_response = None
for response in stream_generate(
model, tokenizer,
prompt=formatted_prompt,
max_tokens=max_tokens,
sampler=sampler,
logits_processors=logits_processors, # RULE 3
):
last_response = response
token_str = response.text
full_text += token_str
print(token_str, end="", flush=True)
# RULE 2: Stop at </html> β€” MANDATORY
should_stop = False
for stop_seq in STOP_SEQUENCES:
if stop_seq in full_text:
idx = full_text.find(stop_seq)
if stop_seq == "</html>":
full_text = full_text[:idx + len(stop_seq)]
else:
full_text = full_text[:idx]
should_stop = True
break
if should_stop or response.finish_reason is not None:
break
print("\n" + "─" * 60)
if last_response:
print(f"[INFO] Generated {last_response.generation_tokens} tokens at {last_response.generation_tps:.1f} tok/s")
print(f"[INFO] Peak memory: {last_response.peak_memory:.2f} GB")
# RULE 5: Clean the output β€” MANDATORY
return clean_html(full_text)
# ─── Main ────────────────────────────────────────────────────────────────────
def main():
if len(sys.argv) > 1 and sys.argv[1] != "--interactive":
# Single prompt mode
prompt = " ".join(sys.argv[1:])
html = generate_html(prompt)
output_file = "output.html"
with open(output_file, "w") as f:
f.write(html)
print(f"\n[INFO] Saved to {output_file} ({len(html)} chars)")
else:
# Interactive mode
print("=" * 60)
print(" ⚑ WebICoder v3 β€” Interactive Mode")
print(" Type a website description, press Enter to generate.")
print(" Type 'quit' to exit.")
print("=" * 60)
while True:
try:
prompt = input("\n🌐 Describe your website: ").strip()
if not prompt or prompt.lower() in ("quit", "exit", "q"):
break
html = generate_html(prompt)
output_file = "output.html"
with open(output_file, "w") as f:
f.write(html)
print(f"\n[INFO] Saved to {output_file} ({len(html)} chars)")
except KeyboardInterrupt:
print("\n[INFO] Bye!")
break
if __name__ == "__main__":
main()