vision-base / scripts /generate_examples.py
SPP
Initial upload: Vision Base MiniCPM-V 4.6 demo suite
98ceb88
Raw
History Blame Contribute Delete
14.5 kB
"""
Generate demo example images using PIL only (no network required).
Run: python scripts/generate_examples.py
"""
from pathlib import Path
from PIL import Image, ImageDraw, ImageFont
EXAMPLES = Path(__file__).parent.parent / "examples"
EXAMPLES.mkdir(exist_ok=True)
def _font(size=16):
try:
return ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", size)
except Exception:
try:
return ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf", size)
except Exception:
return ImageFont.load_default()
def _font_bold(size=16):
try:
return ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", size)
except Exception:
try:
return ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", size)
except Exception:
return ImageFont.load_default()
# ── allergen_label.jpg ────────────────────────────────────────────────────────
def make_allergen_label(path: Path):
img = Image.new("RGB", (480, 640), "#ffffff")
d = ImageDraw.Draw(img)
# outer border
d.rectangle([10, 10, 469, 629], outline="#000000", width=3)
fb = _font_bold(28)
fm = _font(14)
fs = _font(11)
y = 24
d.text((240, y), "Nutrition Facts", font=fb, fill="#000", anchor="mt")
y += 36
d.line([14, y, 466, y], fill="#000", width=2)
y += 8
d.text((20, y), "Serving Size 1 cup (240ml)", font=fm, fill="#000")
y += 22
d.text((20, y), "Servings Per Container: 2", font=fm, fill="#000")
y += 18
d.line([14, y, 466, y], fill="#000", width=6)
y += 8
d.text((20, y), "Amount Per Serving", font=fs, fill="#000")
y += 16
d.text((20, y), "Calories", font=_font_bold(20), fill="#000")
d.text((300, y), "250", font=_font_bold(30), fill="#000")
y += 34
d.line([14, y, 466, y], fill="#000", width=3)
nutrients = [
("Total Fat", "8g", "10%"),
(" Saturated Fat", "3g", "15%"),
(" Trans Fat", "0g", ""),
("Cholesterol", "25mg", "8%"),
("Sodium", "580mg", "25%"),
("Total Carbohydrate", "37g", "13%"),
(" Dietary Fiber", "4g", "14%"),
(" Total Sugars", "12g", ""),
("Protein", "5g", ""),
]
for name, amt, pct in nutrients:
y += 4
d.text((20, y), name, font=fm, fill="#000")
d.text((340, y), amt, font=fm, fill="#000")
if pct:
d.text((420, y), pct, font=fm, fill="#000")
y += 18
d.line([14, y, 466, y], fill="#cccccc", width=1)
y += 12
d.line([14, y, 466, y], fill="#000", width=6)
y += 10
d.text((20, y), "INGREDIENTS:", font=_font_bold(13), fill="#000")
y += 18
ingredients = (
"Whole milk, enriched flour (wheat flour, niacin, reduced iron,\n"
"thiamine mononitrate, riboflavin, folic acid), sugar, palm oil,\n"
"cocoa butter, SOY lecithin, PEANUT oil, modified starch,\n"
"salt, vanilla extract, MILK protein concentrate."
)
for line in ingredients.split("\n"):
d.text((20, y), line, font=fs, fill="#000")
y += 15
y += 10
d.line([14, y, 466, y], fill="#000", width=2)
y += 8
allergen_text = "CONTAINS: WHEAT, MILK, SOY, PEANUTS"
d.text((240, y), allergen_text, font=_font_bold(13), fill="#cc0000", anchor="mt")
img.save(path, "JPEG", quality=90)
print(f" created {path.name}")
def make_allergen_label2(path: Path):
img = Image.new("RGB", (480, 360), "#f9f9f0")
d = ImageDraw.Draw(img)
d.rectangle([8, 8, 471, 351], outline="#888", width=2)
fb = _font_bold(18)
fm = _font(13)
d.text((240, 20), "Ingredients List", font=fb, fill="#222", anchor="mt")
d.line([10, 44, 470, 44], fill="#888", width=1)
lines = [
"Water, sugar, modified corn starch, citric acid,",
"natural flavors, sodium benzoate, MILK powder,",
"caramel color, TREE NUTS (almonds, cashews),",
"ascorbic acid (Vitamin C), beta-carotene.",
"",
"ALLERGEN INFORMATION:",
"Contains: MILK, TREE NUTS",
"Manufactured in a facility that also processes EGGS.",
"",
"Vegetarian βœ“ Gluten-Free βœ“ NOT Vegan (contains milk)",
]
y = 56
for line in lines:
color = "#cc0000" if line.startswith("ALLERGEN") or "Contains:" in line else "#333"
bold = line.startswith("ALLERGEN") or line.startswith("Vegetarian")
d.text((20, y), line, font=_font_bold(13) if bold else fm, fill=color)
y += 20
img.save(path, "JPEG", quality=90)
print(f" created {path.name}")
# ── camera_roll examples ──────────────────────────────────────────────────────
def make_receipt(path: Path):
img = Image.new("RGB", (380, 520), "#fffef8")
d = ImageDraw.Draw(img)
fb = _font_bold(16)
fm = _font(13)
fs = _font(11)
y = 20
d.text((190, y), "WHOLE FOODS MARKET", font=fb, fill="#000", anchor="mt")
y += 22
d.text((190, y), "Store #0472 Β· San Francisco, CA", font=fs, fill="#555", anchor="mt")
y += 18
d.text((190, y), "Tel: (415) 555-0192", font=fs, fill="#555", anchor="mt")
y += 24
d.line([10, y, 370, y], fill="#000", width=1)
y += 10
d.text((20, y), "Date: 06/13/2026 10:42 AM", font=fs, fill="#333")
d.text((370, y), "REG #3", font=fs, fill="#333", anchor="rt")
y += 20
d.line([10, y, 370, y], fill="#999", width=1)
y += 8
items = [
("Organic Whole Milk 1gal", "5.99"),
("Sourdough Bread", "4.49"),
("Free Range Eggs 12ct", "6.29"),
("Mixed Salad Greens 5oz", "3.99"),
("Sparkling Water 12pk", "11.99"),
("Dark Chocolate 70%", "3.49"),
("Almond Butter 16oz", "8.99"),
]
for name, price in items:
d.text((20, y), name, font=fm, fill="#222")
d.text((360, y), f"${price}", font=fm, fill="#222", anchor="rt")
y += 20
y += 6
d.line([10, y, 370, y], fill="#000", width=1)
y += 8
subtotal = "45.23"
tax = "3.62"
total = "48.85"
for label, val in [("Subtotal", subtotal), ("Tax (8%)", tax)]:
d.text((20, y), label, font=fm, fill="#444")
d.text((360, y), f"${val}", font=fm, fill="#444", anchor="rt")
y += 20
d.text((20, y), "TOTAL", font=fb, fill="#000")
d.text((360, y), f"${total}", font=fb, fill="#000", anchor="rt")
y += 24
d.line([10, y, 370, y], fill="#000", width=2)
y += 10
d.text((190, y), "VISA **** 4821", font=fm, fill="#555", anchor="mt")
y += 18
d.text((190, y), "Thank you for shopping!", font=fs, fill="#888", anchor="mt")
img.save(path, "JPEG", quality=90)
print(f" created {path.name}")
def make_event_flyer(path: Path):
img = Image.new("RGB", (480, 360), "#1a237e")
d = ImageDraw.Draw(img)
d.rectangle([16, 16, 463, 343], outline="#ffffff", width=2)
y = 40
d.text((240, y), "TECH MEETUP", font=_font_bold(32), fill="#ffeb3b", anchor="mt")
y += 50
d.text((240, y), "Building Small, Shipping Big", font=_font(18), fill="#e8eaf6", anchor="mt")
y += 40
d.line([40, y, 440, y], fill="#3949ab", width=1)
y += 20
details = [
("πŸ“… Date:", "Thursday, June 18, 2026"),
("πŸ•– Time:", "6:30 PM – 9:00 PM"),
("πŸ“ Venue:", "Hugging Face HQ, NYC"),
("🎟️ RSVP:", "events.hf.co/buildsmall"),
]
for label, val in details:
d.text((60, y), label, font=_font_bold(14), fill="#90caf9")
d.text((175, y), val, font=_font(14), fill="#ffffff")
y += 26
y += 12
d.text((240, y), "Free admission Β· Light refreshments provided", font=_font(11), fill="#9fa8da", anchor="mt")
img.save(path, "JPEG", quality=90)
print(f" created {path.name}")
def make_screenshot(path: Path):
img = Image.new("RGB", (640, 400), "#ffffff")
d = ImageDraw.Draw(img)
# Fake browser chrome
d.rectangle([0, 0, 640, 40], fill="#dee1e6")
d.ellipse([10, 12, 26, 28], fill="#ff5f57")
d.ellipse([34, 12, 50, 28], fill="#febc2e")
d.ellipse([58, 12, 74, 28], fill="#28c840")
d.rectangle([90, 8, 540, 32], fill="#ffffff", outline="#b8b8b8")
d.text((315, 20), "https://news.ycombinator.com", font=_font(12), fill="#555", anchor="mm")
# Page content
y = 60
d.text((20, y), "Hacker News", font=_font_bold(22), fill="#ff6600")
y += 36
stories = [
"Show HN: I built a 1.3B VLM that fits in a ZeroGPU Space (748 pts)",
"Ask HN: What makes a great hackathon demo? (312 pts)",
"MiniCPM-V achieves SOTA on OCRBench at 1.3B params (521 pts)",
"The unreasonable effectiveness of small vision models (189 pts)",
"From zero to ZeroGPU in 24 hours (245 pts)",
]
for i, story in enumerate(stories):
d.text((20, y), f"{i+1}.", font=_font(13), fill="#828282")
d.text((44, y), story, font=_font(13), fill="#000000")
y += 22
d.text((44, y), f"{(i+1)*47} points by user{i+1} | {i+2} hours ago | {(i+1)*3} comments", font=_font(11), fill="#828282")
y += 24
img.save(path, "JPEG", quality=90)
print(f" created {path.name}")
# ── oracle_object.jpg ─────────────────────────────────────────────────────────
def make_oracle_object(path: Path):
img = Image.new("RGB", (480, 480), "#1c1c2e")
d = ImageDraw.Draw(img)
# Draw a stylized antique key
cx, cy = 240, 240
# Key handle β€” circle
d.ellipse([cx-90, cy-180, cx+90, cy-0], outline="#c9a84c", width=8)
d.ellipse([cx-55, cy-145, cx+55, cy-35], outline="#c9a84c", width=4)
# Key shaft
d.rectangle([cx-10, cy-10, cx+10, cy+160], fill="#c9a84c")
# Key teeth
for offset in [100, 130, 155]:
d.rectangle([cx+10, cy+offset, cx+35, cy+offset+14], fill="#c9a84c")
# Mystical glow effect (concentric ellipses)
for r in range(5, 0, -1):
alpha = 60 - r * 10
color = (201, 168, 76, alpha)
size = r * 18
d.ellipse([cx-size, cy-size, cx+size, cy+size], outline="#c9a84c33")
# Stars
import random
rng = random.Random(42)
for _ in range(30):
x, y_ = rng.randint(10, 470), rng.randint(10, 60)
d.ellipse([x, y_, x+2, y_+2], fill="#ffffff")
for _ in range(20):
x, y_ = rng.randint(10, 470), rng.randint(390, 470)
d.ellipse([x, y_, x+2, y_+2], fill="#ffffff")
d.text((240, 440), "What does this object reveal?", font=_font(13), fill="#9e8fcc", anchor="mt")
img.save(path, "JPEG", quality=90)
print(f" created {path.name}")
# ── error screens ─────────────────────────────────────────────────────────────
def make_error_screen(path: Path):
img = Image.new("RGB", (640, 400), "#0a0aff")
d = ImageDraw.Draw(img)
y = 30
d.text((40, y), ":(", font=_font_bold(80), fill="#ffffff")
y += 110
d.text((40, y), "Your PC ran into a problem and needs to restart.", font=_font_bold(20), fill="#ffffff")
y += 36
d.text((40, y), "We're just collecting some error info, and then", font=_font(16), fill="#ffffff")
y += 24
d.text((40, y), "we'll restart for you.", font=_font(16), fill="#ffffff")
y += 50
d.text((40, y), "100% complete", font=_font(16), fill="#ffffff")
y += 40
d.text((40, y), "For more information about this issue and possible fixes,", font=_font(13), fill="#ccccff")
y += 20
d.text((40, y), "visit https://www.windows.com/stopcode", font=_font(13), fill="#ccccff")
y += 30
d.text((40, y), "Stop code: CRITICAL_PROCESS_DIED", font=_font_bold(14), fill="#ffffff")
y += 24
d.text((40, y), "What failed: ntoskrnl.exe", font=_font(13), fill="#ccccff")
img.save(path, "JPEG", quality=90)
print(f" created {path.name}")
def make_error_screen2(path: Path):
img = Image.new("RGB", (560, 420), "#1a1a1a")
d = ImageDraw.Draw(img)
# Fake washing machine display
d.rectangle([20, 20, 540, 400], fill="#222222", outline="#444444", width=3)
d.rectangle([40, 40, 520, 200], fill="#001a33", outline="#0066cc", width=2)
# Display panel
d.text((280, 80), "ERROR", font=_font_bold(40), fill="#ff3300", anchor="mt")
d.text((280, 130), "E4", font=_font_bold(60), fill="#ff6600", anchor="mt")
d.text((280, 195), "DOOR OPEN", font=_font_bold(16), fill="#ffaa00", anchor="mt")
# Status lights
for x, col in [(100, "#ff0000"), (200, "#ff0000"), (300, "#888888"), (400, "#888888")]:
d.ellipse([x-8, 220, x+8, 236], fill=col)
y = 260
d.text((280, y), "Samsung WF45T6000AW", font=_font(13), fill="#888888", anchor="mt")
y += 22
d.text((280, y), "Washing Machine", font=_font(12), fill="#666666", anchor="mt")
y += 30
d.text((280, y), "Press POWER to reset after fixing", font=_font(11), fill="#555555", anchor="mt")
# Physical buttons
for x, label in [(120, "POWER"), (200, "START"), (290, "PAUSE"), (380, "MODE"), (460, "TEMP")]:
d.ellipse([x-20, 330, x+20, 370], fill="#333333", outline="#555555")
d.text((x, 395), label, font=_font(9), fill="#666666", anchor="mt")
img.save(path, "JPEG", quality=90)
print(f" created {path.name}")
# ── Main ──────────────────────────────────────────────────────────────────────
TASKS = [
("allergen_label.jpg", make_allergen_label),
("allergen_label2.jpg", make_allergen_label2),
("concierge_receipt.jpg", make_receipt),
("concierge_event.jpg", make_event_flyer),
("concierge_screenshot.jpg", make_screenshot),
("oracle_object.jpg", make_oracle_object),
("error_screen.jpg", make_error_screen),
("error_screen2.jpg", make_error_screen2),
]
if __name__ == "__main__":
print(f"Generating examples in {EXAMPLES} ...")
for filename, fn in TASKS:
p = EXAMPLES / filename
fn(p)
print("Done.")