Spaces:

VLAI-AIVN
/

AIO2025M09_Project_Comic_Generation

Sleeping

App Files Files Community

AIO2025M09_Project_Comic_Generation / src /image_generator.py

Phat-Dat

add gemini api logic

a5511c0 2 months ago

raw

history blame contribute delete

5.02 kB

	import os
	import logging
	from typing import Any
	from PIL import Image, ImageDraw, ImageFont
	from src.config import settings

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	FONT_PATH = os.path.join(
	os.path.dirname(__file__), "..", "static", "Roboto-Regular.ttf"
	)


	def _load_font(size: int) -> ImageFont.FreeTypeFont \| ImageFont.ImageFont:
	try:
	return ImageFont.truetype(FONT_PATH, size)
	except Exception:
	return ImageFont.load_default()


	def _text_width(draw: ImageDraw.ImageDraw, text: str,
	font: ImageFont.FreeTypeFont) -> int:
	bbox = draw.textbbox((0, 0), text, font=font)
	return bbox[2] - bbox[0]


	def _text_height(draw: ImageDraw.ImageDraw, text: str,
	font: ImageFont.FreeTypeFont) -> int:
	bbox = draw.textbbox((0, 0), text, font=font)
	return bbox[3] - bbox[1]


	def _wrap_text_by_pixels(draw: ImageDraw.ImageDraw, text: str,
	font: ImageFont.FreeTypeFont,
	max_width: int) -> list[str]:
	"""Word-wrap text so each line fits within max_width pixels."""
	words = text.split()
	if not words:
	return [""]
	lines: list[str] = []
	current = words[0]
	for word in words[1:]:
	candidate = f"{current} {word}"
	if _text_width(draw, candidate, font) <= max_width:
	current = candidate
	else:
	lines.append(current)
	current = word
	lines.append(current)
	return lines


	def _truncate_line(draw: ImageDraw.ImageDraw, text: str,
	font: ImageFont.FreeTypeFont, max_width: int) -> str:
	"""Truncate text with ellipsis to fit within max_width pixels."""
	if _text_width(draw, text, font) <= max_width:
	return text
	for end in range(len(text), 0, -1):
	truncated = text[:end].rstrip() + "…"
	if _text_width(draw, truncated, font) <= max_width:
	return truncated
	return "…"


	def _fit_caption(draw: ImageDraw.ImageDraw, text: str, max_width: int,
	font_max: int, font_min: int,
	max_lines: int) -> tuple[list[str], ImageFont.FreeTypeFont]:
	"""Find the largest font size where text fits in max_lines.

	Returns (wrapped_lines, font). Falls back to font_min with truncation.
	"""
	for size in range(font_max, font_min - 1, -1):
	font = _load_font(size)
	lines = _wrap_text_by_pixels(draw, text, font, max_width)
	if len(lines) <= max_lines:
	return lines, font

	font = _load_font(font_min)
	lines = _wrap_text_by_pixels(draw, text, font, max_width)
	if len(lines) > max_lines:
	lines = lines[:max_lines]
	lines[-1] = _truncate_line(draw, lines[-1], font, max_width)
	return lines, font


	class ImageGenerator:
	def __init__(self, diffusion_client: Any):
	self.diffusion_client = diffusion_client

	def add_caption(self, image: Image.Image, text: str) -> Image.Image:
	"""Overlay a compact dark caption bar at the bottom of the image."""
	img = image.convert("RGBA")
	w, h = img.size
	pad_x = settings.CAPTION_PADDING_X
	pad_y = settings.CAPTION_PADDING_Y
	text_area_width = w - 2 * pad_x

	scratch = Image.new("RGBA", (1, 1))
	scratch_draw = ImageDraw.Draw(scratch)

	lines, font = _fit_caption(
	scratch_draw, text, text_area_width,
	settings.CAPTION_FONT_MAX, settings.CAPTION_FONT_MIN,
	settings.CAPTION_MAX_LINES,
	)

	line_h = _text_height(scratch_draw, "Áy", font) + 4
	block_h = len(lines) * line_h
	overlay_h = block_h + 2 * pad_y
	max_overlay = int(h * 0.30)
	overlay_h = min(overlay_h, max_overlay)

	overlay = Image.new("RGBA", (w, overlay_h),
	(0, 0, 0, settings.CAPTION_OVERLAY_OPACITY))
	draw = ImageDraw.Draw(overlay)

	y_start = (overlay_h - block_h) // 2
	for i, line in enumerate(lines):
	lw = _text_width(draw, line, font)
	x = (w - lw) // 2
	y = y_start + i * line_h
	for dx, dy in [(-1, -1), (1, -1), (-1, 1), (1, 1)]:
	draw.text((x + dx, y + dy), line, font=font,
	fill=(0, 0, 0, 200))
	draw.text((x, y), line, font=font, fill=(255, 255, 255, 255))

	img.paste(overlay, (0, h - overlay_h), overlay)
	return img.convert("RGB")

	def generate_image(
	self,
	prompt: str,
	paragraph: str,
	num_inference_steps: int = 4,
	guidance_scale: float = 0.0,
	size: int = 768,
	) -> Image.Image \| None:
	raw_img = self.diffusion_client.gen_image(
	prompt=prompt,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	width=size,
	height=size,
	)
	if raw_img is not None:
	return self.add_caption(raw_img, paragraph)
	return None