| import os |
| import logging |
| from typing import Any |
| from PIL import Image, ImageDraw, ImageFont |
| from src.config import settings |
|
|
| logging.basicConfig(level=logging.INFO) |
| logger = logging.getLogger(__name__) |
|
|
| FONT_PATH = os.path.join( |
| os.path.dirname(__file__), "..", "static", "Roboto-Regular.ttf" |
| ) |
|
|
|
|
| def _load_font(size: int) -> ImageFont.FreeTypeFont | ImageFont.ImageFont: |
| try: |
| return ImageFont.truetype(FONT_PATH, size) |
| except Exception: |
| return ImageFont.load_default() |
|
|
|
|
| def _text_width(draw: ImageDraw.ImageDraw, text: str, |
| font: ImageFont.FreeTypeFont) -> int: |
| bbox = draw.textbbox((0, 0), text, font=font) |
| return bbox[2] - bbox[0] |
|
|
|
|
| def _text_height(draw: ImageDraw.ImageDraw, text: str, |
| font: ImageFont.FreeTypeFont) -> int: |
| bbox = draw.textbbox((0, 0), text, font=font) |
| return bbox[3] - bbox[1] |
|
|
|
|
| def _wrap_text_by_pixels(draw: ImageDraw.ImageDraw, text: str, |
| font: ImageFont.FreeTypeFont, |
| max_width: int) -> list[str]: |
| """Word-wrap text so each line fits within max_width pixels.""" |
| words = text.split() |
| if not words: |
| return [""] |
| lines: list[str] = [] |
| current = words[0] |
| for word in words[1:]: |
| candidate = f"{current} {word}" |
| if _text_width(draw, candidate, font) <= max_width: |
| current = candidate |
| else: |
| lines.append(current) |
| current = word |
| lines.append(current) |
| return lines |
|
|
|
|
| def _truncate_line(draw: ImageDraw.ImageDraw, text: str, |
| font: ImageFont.FreeTypeFont, max_width: int) -> str: |
| """Truncate text with ellipsis to fit within max_width pixels.""" |
| if _text_width(draw, text, font) <= max_width: |
| return text |
| for end in range(len(text), 0, -1): |
| truncated = text[:end].rstrip() + "…" |
| if _text_width(draw, truncated, font) <= max_width: |
| return truncated |
| return "…" |
|
|
|
|
| def _fit_caption(draw: ImageDraw.ImageDraw, text: str, max_width: int, |
| font_max: int, font_min: int, |
| max_lines: int) -> tuple[list[str], ImageFont.FreeTypeFont]: |
| """Find the largest font size where text fits in max_lines. |
| |
| Returns (wrapped_lines, font). Falls back to font_min with truncation. |
| """ |
| for size in range(font_max, font_min - 1, -1): |
| font = _load_font(size) |
| lines = _wrap_text_by_pixels(draw, text, font, max_width) |
| if len(lines) <= max_lines: |
| return lines, font |
|
|
| font = _load_font(font_min) |
| lines = _wrap_text_by_pixels(draw, text, font, max_width) |
| if len(lines) > max_lines: |
| lines = lines[:max_lines] |
| lines[-1] = _truncate_line(draw, lines[-1], font, max_width) |
| return lines, font |
|
|
|
|
| class ImageGenerator: |
| def __init__(self, diffusion_client: Any): |
| self.diffusion_client = diffusion_client |
|
|
| def add_caption(self, image: Image.Image, text: str) -> Image.Image: |
| """Overlay a compact dark caption bar at the bottom of the image.""" |
| img = image.convert("RGBA") |
| w, h = img.size |
| pad_x = settings.CAPTION_PADDING_X |
| pad_y = settings.CAPTION_PADDING_Y |
| text_area_width = w - 2 * pad_x |
|
|
| scratch = Image.new("RGBA", (1, 1)) |
| scratch_draw = ImageDraw.Draw(scratch) |
|
|
| lines, font = _fit_caption( |
| scratch_draw, text, text_area_width, |
| settings.CAPTION_FONT_MAX, settings.CAPTION_FONT_MIN, |
| settings.CAPTION_MAX_LINES, |
| ) |
|
|
| line_h = _text_height(scratch_draw, "Áy", font) + 4 |
| block_h = len(lines) * line_h |
| overlay_h = block_h + 2 * pad_y |
| max_overlay = int(h * 0.30) |
| overlay_h = min(overlay_h, max_overlay) |
|
|
| overlay = Image.new("RGBA", (w, overlay_h), |
| (0, 0, 0, settings.CAPTION_OVERLAY_OPACITY)) |
| draw = ImageDraw.Draw(overlay) |
|
|
| y_start = (overlay_h - block_h) // 2 |
| for i, line in enumerate(lines): |
| lw = _text_width(draw, line, font) |
| x = (w - lw) // 2 |
| y = y_start + i * line_h |
| for dx, dy in [(-1, -1), (1, -1), (-1, 1), (1, 1)]: |
| draw.text((x + dx, y + dy), line, font=font, |
| fill=(0, 0, 0, 200)) |
| draw.text((x, y), line, font=font, fill=(255, 255, 255, 255)) |
|
|
| img.paste(overlay, (0, h - overlay_h), overlay) |
| return img.convert("RGB") |
|
|
| def generate_image( |
| self, |
| prompt: str, |
| paragraph: str, |
| num_inference_steps: int = 4, |
| guidance_scale: float = 0.0, |
| size: int = 768, |
| ) -> Image.Image | None: |
| raw_img = self.diffusion_client.gen_image( |
| prompt=prompt, |
| num_inference_steps=num_inference_steps, |
| guidance_scale=guidance_scale, |
| width=size, |
| height=size, |
| ) |
| if raw_img is not None: |
| return self.add_caption(raw_img, paragraph) |
| return None |
|
|