Phat-Dat's picture
add gemini api logic
a5511c0
import os
import logging
from typing import Any
from PIL import Image, ImageDraw, ImageFont
from src.config import settings
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
FONT_PATH = os.path.join(
os.path.dirname(__file__), "..", "static", "Roboto-Regular.ttf"
)
def _load_font(size: int) -> ImageFont.FreeTypeFont | ImageFont.ImageFont:
try:
return ImageFont.truetype(FONT_PATH, size)
except Exception:
return ImageFont.load_default()
def _text_width(draw: ImageDraw.ImageDraw, text: str,
font: ImageFont.FreeTypeFont) -> int:
bbox = draw.textbbox((0, 0), text, font=font)
return bbox[2] - bbox[0]
def _text_height(draw: ImageDraw.ImageDraw, text: str,
font: ImageFont.FreeTypeFont) -> int:
bbox = draw.textbbox((0, 0), text, font=font)
return bbox[3] - bbox[1]
def _wrap_text_by_pixels(draw: ImageDraw.ImageDraw, text: str,
font: ImageFont.FreeTypeFont,
max_width: int) -> list[str]:
"""Word-wrap text so each line fits within max_width pixels."""
words = text.split()
if not words:
return [""]
lines: list[str] = []
current = words[0]
for word in words[1:]:
candidate = f"{current} {word}"
if _text_width(draw, candidate, font) <= max_width:
current = candidate
else:
lines.append(current)
current = word
lines.append(current)
return lines
def _truncate_line(draw: ImageDraw.ImageDraw, text: str,
font: ImageFont.FreeTypeFont, max_width: int) -> str:
"""Truncate text with ellipsis to fit within max_width pixels."""
if _text_width(draw, text, font) <= max_width:
return text
for end in range(len(text), 0, -1):
truncated = text[:end].rstrip() + "…"
if _text_width(draw, truncated, font) <= max_width:
return truncated
return "…"
def _fit_caption(draw: ImageDraw.ImageDraw, text: str, max_width: int,
font_max: int, font_min: int,
max_lines: int) -> tuple[list[str], ImageFont.FreeTypeFont]:
"""Find the largest font size where text fits in max_lines.
Returns (wrapped_lines, font). Falls back to font_min with truncation.
"""
for size in range(font_max, font_min - 1, -1):
font = _load_font(size)
lines = _wrap_text_by_pixels(draw, text, font, max_width)
if len(lines) <= max_lines:
return lines, font
font = _load_font(font_min)
lines = _wrap_text_by_pixels(draw, text, font, max_width)
if len(lines) > max_lines:
lines = lines[:max_lines]
lines[-1] = _truncate_line(draw, lines[-1], font, max_width)
return lines, font
class ImageGenerator:
def __init__(self, diffusion_client: Any):
self.diffusion_client = diffusion_client
def add_caption(self, image: Image.Image, text: str) -> Image.Image:
"""Overlay a compact dark caption bar at the bottom of the image."""
img = image.convert("RGBA")
w, h = img.size
pad_x = settings.CAPTION_PADDING_X
pad_y = settings.CAPTION_PADDING_Y
text_area_width = w - 2 * pad_x
scratch = Image.new("RGBA", (1, 1))
scratch_draw = ImageDraw.Draw(scratch)
lines, font = _fit_caption(
scratch_draw, text, text_area_width,
settings.CAPTION_FONT_MAX, settings.CAPTION_FONT_MIN,
settings.CAPTION_MAX_LINES,
)
line_h = _text_height(scratch_draw, "Áy", font) + 4
block_h = len(lines) * line_h
overlay_h = block_h + 2 * pad_y
max_overlay = int(h * 0.30)
overlay_h = min(overlay_h, max_overlay)
overlay = Image.new("RGBA", (w, overlay_h),
(0, 0, 0, settings.CAPTION_OVERLAY_OPACITY))
draw = ImageDraw.Draw(overlay)
y_start = (overlay_h - block_h) // 2
for i, line in enumerate(lines):
lw = _text_width(draw, line, font)
x = (w - lw) // 2
y = y_start + i * line_h
for dx, dy in [(-1, -1), (1, -1), (-1, 1), (1, 1)]:
draw.text((x + dx, y + dy), line, font=font,
fill=(0, 0, 0, 200))
draw.text((x, y), line, font=font, fill=(255, 255, 255, 255))
img.paste(overlay, (0, h - overlay_h), overlay)
return img.convert("RGB")
def generate_image(
self,
prompt: str,
paragraph: str,
num_inference_steps: int = 4,
guidance_scale: float = 0.0,
size: int = 768,
) -> Image.Image | None:
raw_img = self.diffusion_client.gen_image(
prompt=prompt,
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale,
width=size,
height=size,
)
if raw_img is not None:
return self.add_caption(raw_img, paragraph)
return None