File size: 5,466 Bytes
a8171b8 d023a51 a8171b8 d023a51 a8171b8 d023a51 a8171b8 d023a51 a8171b8 d023a51 a8171b8 d023a51 a8171b8 d023a51 a8171b8 d023a51 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | """
tutor/visual_grounding.py
Renders a colourful counting stimulus as an RGB numpy array for gr.Image.
Robust: works with or without system fonts, never returns None for valid counts.
"""
from __future__ import annotations
import math
import numpy as np
from typing import Optional
try:
from PIL import Image, ImageDraw, ImageFont # type: ignore
_PIL = True
except ImportError:
_PIL = False
# โโ Colour palette โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
_PALETTE = [
(231, 76, 60), # red
( 52,152,219), # blue
( 46,204,113), # green
(243,156, 18), # orange
(155, 89,182), # purple
( 26,188,156), # teal
(233, 30,153), # pink
(241,196, 15), # yellow
( 52, 73, 94), # dark
(230,126, 34), # dark orange
]
_CANVAS_W = 480
_CANVAS_H = 320
_BG = (250, 250, 255)
def _get_font(size: int) -> "ImageFont.ImageFont":
"""Try several font paths; fall back to PIL default."""
candidates = [
"/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
"/usr/share/fonts/truetype/freefont/FreeSansBold.ttf",
"/usr/share/fonts/truetype/ubuntu/Ubuntu-B.ttf",
"/System/Library/Fonts/Helvetica.ttc", # macOS
"C:/Windows/Fonts/arialbd.ttf", # Windows
]
for path in candidates:
try:
return ImageFont.truetype(path, size)
except Exception:
continue
# PIL built-in default (tiny but always works)
return ImageFont.load_default()
def _draw_text_centered(draw, text: str, y: int, font, color=(40, 40, 40),
canvas_w: int = _CANVAS_W):
"""Draw text horizontally centred at vertical position y."""
try:
bbox = draw.textbbox((0, 0), text, font=font)
tw = bbox[2] - bbox[0]
except AttributeError:
# Older Pillow without textbbox
tw, _ = draw.textsize(text, font=font)
x = (canvas_w - tw) // 2
draw.text((x, y), text, fill=color, font=font)
def render_counting_stimulus(
count: int,
label: str = "โ",
canvas_w: int = _CANVAS_W,
canvas_h: int = _CANVAS_H,
) -> np.ndarray:
"""
Return an RGB numpy array (canvas_h ร canvas_w ร 3) showing
*count* colourful filled circles with a title prompt above.
Always returns a valid array โ never raises.
"""
if not _PIL:
# Return a plain coloured placeholder if Pillow is missing
arr = np.full((canvas_h, canvas_w, 3), 200, dtype=np.uint8)
return arr
count = max(1, min(20, count))
img = Image.new("RGB", (canvas_w, canvas_h), _BG)
draw = ImageDraw.Draw(img)
# โโ Rounded background card โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
draw.rounded_rectangle([4, 4, canvas_w-4, canvas_h-4],
radius=24, outline=(180,180,220), width=3)
# โโ Title โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
font_title = _get_font(26)
_draw_text_centered(draw, "Count the dots ๐", 14, font_title,
color=(50, 50, 160), canvas_w=canvas_w)
# โโ Dot grid layout โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
area_top = 60
area_bottom = canvas_h - 20
area_left = 20
area_right = canvas_w - 20
area_w = area_right - area_left
area_h = area_bottom - area_top
cols = min(count, 5)
rows = math.ceil(count / cols)
# Maximum dot radius that still fits with padding
max_r_w = (area_w // cols) // 2 - 6
max_r_h = (area_h // rows) // 2 - 6
dot_r = max(10, min(36, max_r_w, max_r_h))
cell_w = area_w // cols
cell_h = area_h // rows
for i in range(count):
col = i % cols
row = i // cols
cx = area_left + col * cell_w + cell_w // 2
cy = area_top + row * cell_h + cell_h // 2
color = _PALETTE[i % len(_PALETTE)]
# Shadow
draw.ellipse(
[cx - dot_r + 3, cy - dot_r + 3,
cx + dot_r + 3, cy + dot_r + 3],
fill=(180, 180, 200))
# Main dot
draw.ellipse(
[cx - dot_r, cy - dot_r,
cx + dot_r, cy + dot_r],
fill=color)
# Highlight (top-left gloss)
hr = max(3, dot_r // 4)
hx, hy = cx - dot_r // 3, cy - dot_r // 3
draw.ellipse(
[hx - hr, hy - hr, hx + hr, hy + hr],
fill=(255, 255, 255, 160))
# โโ Count number hint in bottom-right (subtle) โโโโโโโโโโโโโโโโโโโโโโโโ
font_hint = _get_font(18)
hint = f"({count} dots)"
try:
bbox = draw.textbbox((0,0), hint, font=font_hint)
hw = bbox[2] - bbox[0]
except AttributeError:
hw, _ = draw.textsize(hint, font=font_hint)
draw.text((canvas_w - hw - 14, canvas_h - 26), hint,
fill=(180, 180, 200), font=font_hint)
return np.array(img)
|