import os import logging from typing import Any from PIL import Image, ImageDraw, ImageFont from src.config import settings logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) FONT_PATH = os.path.join( os.path.dirname(__file__), "..", "static", "Roboto-Regular.ttf" ) def _load_font(size: int) -> ImageFont.FreeTypeFont | ImageFont.ImageFont: try: return ImageFont.truetype(FONT_PATH, size) except Exception: return ImageFont.load_default() def _text_width(draw: ImageDraw.ImageDraw, text: str, font: ImageFont.FreeTypeFont) -> int: bbox = draw.textbbox((0, 0), text, font=font) return bbox[2] - bbox[0] def _text_height(draw: ImageDraw.ImageDraw, text: str, font: ImageFont.FreeTypeFont) -> int: bbox = draw.textbbox((0, 0), text, font=font) return bbox[3] - bbox[1] def _wrap_text_by_pixels(draw: ImageDraw.ImageDraw, text: str, font: ImageFont.FreeTypeFont, max_width: int) -> list[str]: """Word-wrap text so each line fits within max_width pixels.""" words = text.split() if not words: return [""] lines: list[str] = [] current = words[0] for word in words[1:]: candidate = f"{current} {word}" if _text_width(draw, candidate, font) <= max_width: current = candidate else: lines.append(current) current = word lines.append(current) return lines def _truncate_line(draw: ImageDraw.ImageDraw, text: str, font: ImageFont.FreeTypeFont, max_width: int) -> str: """Truncate text with ellipsis to fit within max_width pixels.""" if _text_width(draw, text, font) <= max_width: return text for end in range(len(text), 0, -1): truncated = text[:end].rstrip() + "…" if _text_width(draw, truncated, font) <= max_width: return truncated return "…" def _fit_caption(draw: ImageDraw.ImageDraw, text: str, max_width: int, font_max: int, font_min: int, max_lines: int) -> tuple[list[str], ImageFont.FreeTypeFont]: """Find the largest font size where text fits in max_lines. Returns (wrapped_lines, font). Falls back to font_min with truncation. """ for size in range(font_max, font_min - 1, -1): font = _load_font(size) lines = _wrap_text_by_pixels(draw, text, font, max_width) if len(lines) <= max_lines: return lines, font font = _load_font(font_min) lines = _wrap_text_by_pixels(draw, text, font, max_width) if len(lines) > max_lines: lines = lines[:max_lines] lines[-1] = _truncate_line(draw, lines[-1], font, max_width) return lines, font class ImageGenerator: def __init__(self, diffusion_client: Any): self.diffusion_client = diffusion_client def add_caption(self, image: Image.Image, text: str) -> Image.Image: """Overlay a compact dark caption bar at the bottom of the image.""" img = image.convert("RGBA") w, h = img.size pad_x = settings.CAPTION_PADDING_X pad_y = settings.CAPTION_PADDING_Y text_area_width = w - 2 * pad_x scratch = Image.new("RGBA", (1, 1)) scratch_draw = ImageDraw.Draw(scratch) lines, font = _fit_caption( scratch_draw, text, text_area_width, settings.CAPTION_FONT_MAX, settings.CAPTION_FONT_MIN, settings.CAPTION_MAX_LINES, ) line_h = _text_height(scratch_draw, "Áy", font) + 4 block_h = len(lines) * line_h overlay_h = block_h + 2 * pad_y max_overlay = int(h * 0.30) overlay_h = min(overlay_h, max_overlay) overlay = Image.new("RGBA", (w, overlay_h), (0, 0, 0, settings.CAPTION_OVERLAY_OPACITY)) draw = ImageDraw.Draw(overlay) y_start = (overlay_h - block_h) // 2 for i, line in enumerate(lines): lw = _text_width(draw, line, font) x = (w - lw) // 2 y = y_start + i * line_h for dx, dy in [(-1, -1), (1, -1), (-1, 1), (1, 1)]: draw.text((x + dx, y + dy), line, font=font, fill=(0, 0, 0, 200)) draw.text((x, y), line, font=font, fill=(255, 255, 255, 255)) img.paste(overlay, (0, h - overlay_h), overlay) return img.convert("RGB") def generate_image( self, prompt: str, paragraph: str, num_inference_steps: int = 4, guidance_scale: float = 0.0, size: int = 768, ) -> Image.Image | None: raw_img = self.diffusion_client.gen_image( prompt=prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, width=size, height=size, ) if raw_img is not None: return self.add_caption(raw_img, paragraph) return None