Spaces:
Sleeping
Sleeping
| import os | |
| import cv2 | |
| import numpy as np | |
| from PIL import Image, ImageDraw, ImageFont | |
| class ImageCaptionOverlay: | |
| """Handles adding captions to images using OpenCV""" | |
| def add_caption_overlay(image: np.ndarray, caption: str, position: str = "bottom", | |
| font_size: int = 1, thickness: int = 2) -> np.ndarray: | |
| """Add caption as overlay on the image""" | |
| img_copy = image.copy() | |
| height, width = img_copy.shape[:2] | |
| # Prepare text | |
| font = cv2.FONT_HERSHEY_SIMPLEX | |
| # Calculate text size and position | |
| text_size = cv2.getTextSize(caption, font, font_size, thickness)[0] | |
| # Wrap text if too long | |
| max_width = width - 40 | |
| if text_size[0] > max_width: | |
| words = caption.split() | |
| lines = [] | |
| current_line = "" | |
| for word in words: | |
| test_line = current_line + " " + word if current_line else word | |
| test_size = cv2.getTextSize(test_line, font, font_size, thickness)[0] | |
| if test_size[0] <= max_width: | |
| current_line = test_line | |
| else: | |
| if current_line: | |
| lines.append(current_line) | |
| current_line = word | |
| if current_line: | |
| lines.append(current_line) | |
| else: | |
| lines = [caption] | |
| # Calculate positions | |
| line_height = cv2.getTextSize("A", font, font_size, thickness)[0][1] + 10 | |
| total_height = len(lines) * line_height | |
| if position == "bottom": | |
| start_y = height - total_height - 20 | |
| elif position == "top": | |
| start_y = 30 | |
| else: # center | |
| start_y = (height - total_height) // 2 | |
| # Add background rectangle for better readability | |
| for i, line in enumerate(lines): | |
| text_size = cv2.getTextSize(line, font, font_size, thickness)[0] | |
| text_x = (width - text_size[0]) // 2 | |
| text_y = start_y + (i * line_height) + text_size[1] | |
| # Background rectangle | |
| cv2.rectangle(img_copy, | |
| (text_x - 10, text_y - text_size[1] - 5), | |
| (text_x + text_size[0] + 10, text_y + 5), | |
| (0, 0, 0), -1) | |
| # Text | |
| cv2.putText(img_copy, line, (text_x, text_y), font, font_size, (255, 255, 255), thickness) | |
| return img_copy | |
| def add_caption_background(image: np.ndarray, caption: str, | |
| font_path: str = None, | |
| background_color: tuple = (0, 0, 0), | |
| text_color: tuple = (255, 255, 255), | |
| margin: int = 50) -> np.ndarray: | |
| """Add caption on a background behind the image""" | |
| height, width = image.shape[:2] | |
| # Use PIL for better text rendering | |
| pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) | |
| # Try to use Poppins font first, then fallback to default | |
| try: | |
| # First priority: custom font path if provided | |
| if font_path and os.path.exists(font_path): | |
| font = ImageFont.truetype(font_path, 24) | |
| # Second priority: check for Poppins font in fonts directory | |
| elif os.path.exists("fonts/Poppins-Regular.ttf"): | |
| font = ImageFont.truetype("fonts/Poppins-Regular.ttf", 24) | |
| else: | |
| # Fallback to default font | |
| font = ImageFont.load_default() | |
| except Exception: | |
| # If anything fails, use default font | |
| font = ImageFont.load_default() | |
| # Calculate text dimensions | |
| draw = ImageDraw.Draw(pil_image) | |
| bbox = draw.textbbox((0, 0), caption, font=font) | |
| text_width = bbox[2] - bbox[0] | |
| text_height = bbox[3] - bbox[1] | |
| # Wrap text if necessary | |
| max_width = width - (2 * margin) | |
| if text_width > max_width: | |
| words = caption.split() | |
| lines = [] | |
| current_line = "" | |
| for word in words: | |
| test_line = current_line + " " + word if current_line else word | |
| test_bbox = draw.textbbox((0, 0), test_line, font=font) | |
| test_width = test_bbox[2] - test_bbox[0] | |
| if test_width <= max_width: | |
| current_line = test_line | |
| else: | |
| if current_line: | |
| lines.append(current_line) | |
| current_line = word | |
| if current_line: | |
| lines.append(current_line) | |
| else: | |
| lines = [caption] | |
| # Calculate total text height | |
| total_text_height = len(lines) * text_height + (len(lines) - 1) * 10 | |
| # Create new image with space for text | |
| new_height = height + total_text_height + (2 * margin) | |
| new_image = Image.new('RGB', (width, new_height), background_color) | |
| # Paste original image | |
| new_image.paste(pil_image, (0, total_text_height + (2 * margin))) | |
| # Add text | |
| draw = ImageDraw.Draw(new_image) | |
| y_offset = margin | |
| for line in lines: | |
| bbox = draw.textbbox((0, 0), line, font=font) | |
| line_width = bbox[2] - bbox[0] | |
| x_position = (width - line_width) // 2 | |
| draw.text((x_position, y_offset), line, fill=text_color, font=font) | |
| y_offset += text_height + 10 | |
| # Convert back to OpenCV format | |
| return cv2.cvtColor(np.array(new_image), cv2.COLOR_RGB2BGR) |