| """Image Generation service for Voice Text Processor. |
| |
| This module implements the ImageGenerationService class for generating |
| cat character images using the MiniMax Text-to-Image API. |
| |
| Requirements: PRD - AI形象生成模块 |
| """ |
|
|
| import logging |
| import httpx |
| from typing import Optional, Dict, List |
| import time |
| import json |
| from pathlib import Path |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| class ImageGenerationError(Exception): |
| """Exception raised when image generation operations fail. |
| |
| This exception is raised when the MiniMax API call fails, |
| such as due to network issues, API errors, or invalid responses. |
| """ |
| |
| def __init__(self, message: str = "图像生成服务不可用"): |
| """Initialize ImageGenerationError. |
| |
| Args: |
| message: Error message describing the failure |
| """ |
| super().__init__(message) |
| self.message = message |
|
|
|
|
| class ImageGenerationService: |
| """Service for generating cat character images using MiniMax API. |
| |
| This service handles image generation by calling the MiniMax Text-to-Image API |
| to create healing-style cat illustrations based on user preferences |
| (color, personality, appearance). |
| |
| Attributes: |
| api_key: MiniMax API key for authentication |
| group_id: MiniMax group ID for authentication |
| client: Async HTTP client for making API requests |
| api_url: MiniMax API endpoint URL |
| model: Model identifier (text-to-image-v2) |
| |
| Requirements: PRD - AI形象生成模块 |
| """ |
| |
| |
| COLOR_MAPPING = { |
| "温暖粉": "soft pastel pink fur, rose-colored aesthetic", |
| "天空蓝": "light sky blue fur, serene blue atmosphere", |
| "薄荷绿": "mint green fur, fresh green ambiance", |
| "奶油黄": "cream yellow fur, warm golden glow", |
| "薰衣草紫": "lavender purple fur, gentle purple tones", |
| "珊瑚橙": "coral orange fur, warm peachy atmosphere", |
| "纯白": "pure white fur, clean minimalist aesthetic", |
| "浅灰": "light gray fur, soft neutral tones" |
| } |
| |
| |
| PERSONALITY_MAPPING = { |
| "活泼": "big curious eyes, dynamic paw gesture, energetic aura, playful expression", |
| "温柔": "soft gentle eyes, calm posture, peaceful expression, caring demeanor", |
| "聪明": "intelligent eyes, thoughtful expression, wise appearance, attentive look", |
| "慵懒": "relaxed eyes, lounging posture, comfortable expression, laid-back vibe", |
| "勇敢": "confident eyes, strong posture, determined expression, courageous stance", |
| "害羞": "shy eyes, timid posture, gentle expression, reserved demeanor" |
| } |
| |
| |
| APPEARANCE_MAPPING = { |
| "戴眼镜": "wearing tiny round glasses, scholarly look", |
| "戴帽子": "wearing a cute small hat, fashionable style", |
| "戴围巾": "wearing a cozy scarf, warm appearance", |
| "戴蝴蝶结": "wearing a cute bow tie, elegant look", |
| "无配饰": "natural appearance, simple and pure" |
| } |
| |
| |
| ROLE_MAPPING = { |
| "陪伴式朋友": "friendly companion, approachable and warm", |
| "温柔照顾型长辈": "caring elder figure, nurturing and protective", |
| "引导型老师": "wise teacher figure, knowledgeable and patient" |
| } |
| |
| |
| BASE_PROMPT = ( |
| "A masterpiece cute stylized cat illustration, {color} theme, " |
| "{personality} facial expression and posture, {appearance}. " |
| "{role}. Japanese watercolor style, clean minimalist background, " |
| "high quality, soft studio lighting, 4k, healing aesthetic, " |
| "adorable and heartwarming" |
| ) |
| |
| def __init__(self, api_key: str, group_id: Optional[str] = None): |
| """Initialize the image generation service. |
| |
| Args: |
| api_key: MiniMax API key for authentication |
| group_id: MiniMax group ID (optional, for compatibility) |
| """ |
| self.api_key = api_key |
| self.group_id = group_id |
| self.client = httpx.AsyncClient(timeout=120.0) |
| self.api_url = "https://api.minimaxi.com/v1/image_generation" |
| self.model = "image-01" |
| |
| async def close(self): |
| """Close the HTTP client. |
| |
| This should be called when the service is no longer needed |
| to properly clean up resources. |
| """ |
| await self.client.aclose() |
| |
| async def download_image(self, url: str, save_path: str) -> str: |
| """Download image from URL and save to local file. |
| |
| Args: |
| url: Image URL to download |
| save_path: Local file path to save the image |
| |
| Returns: |
| Absolute path to the saved image file |
| |
| Raises: |
| ImageGenerationError: If download fails |
| """ |
| try: |
| logger.info(f"Downloading image from: {url}") |
| |
| |
| save_path_obj = Path(save_path) |
| save_path_obj.parent.mkdir(parents=True, exist_ok=True) |
| |
| |
| response = await self.client.get(url, timeout=60.0) |
| |
| if response.status_code != 200: |
| error_msg = f"Failed to download image: HTTP {response.status_code}" |
| logger.error(error_msg) |
| raise ImageGenerationError(error_msg) |
| |
| |
| with open(save_path, 'wb') as f: |
| f.write(response.content) |
| |
| abs_path = str(save_path_obj.absolute()) |
| logger.info(f"Image saved to: {abs_path}") |
| |
| return abs_path |
| |
| except ImageGenerationError: |
| raise |
| except Exception as e: |
| error_msg = f"Failed to download image: {str(e)}" |
| logger.error(error_msg) |
| raise ImageGenerationError(error_msg) |
| |
| def build_prompt( |
| self, |
| color: str = "温暖粉", |
| personality: str = "温柔", |
| appearance: str = "无配饰", |
| role: str = "陪伴式朋友" |
| ) -> str: |
| """Build the complete prompt for image generation. |
| |
| Args: |
| color: Color preference (温暖粉/天空蓝/薄荷绿等) |
| personality: Personality trait (活泼/温柔/聪明等) |
| appearance: Appearance feature (戴眼镜/戴帽子等) |
| role: Character role (陪伴式朋友/温柔照顾型长辈等) |
| |
| Returns: |
| Complete prompt string for CogView API |
| """ |
| |
| color_desc = self.COLOR_MAPPING.get(color, self.COLOR_MAPPING["温暖粉"]) |
| personality_desc = self.PERSONALITY_MAPPING.get( |
| personality, |
| self.PERSONALITY_MAPPING["温柔"] |
| ) |
| appearance_desc = self.APPEARANCE_MAPPING.get( |
| appearance, |
| self.APPEARANCE_MAPPING["无配饰"] |
| ) |
| role_desc = self.ROLE_MAPPING.get( |
| role, |
| self.ROLE_MAPPING["陪伴式朋友"] |
| ) |
| |
| |
| prompt = self.BASE_PROMPT.format( |
| color=color_desc, |
| personality=personality_desc, |
| appearance=appearance_desc, |
| role=role_desc |
| ) |
| |
| logger.info(f"Generated prompt: {prompt[:100]}...") |
| return prompt |
| |
| async def generate_image( |
| self, |
| color: str = "温暖粉", |
| personality: str = "温柔", |
| appearance: str = "无配饰", |
| role: str = "陪伴式朋友", |
| aspect_ratio: str = "1:1", |
| n: int = 1, |
| response_format: str = "url" |
| ) -> Dict[str, str]: |
| """Generate a cat character image using MiniMax API. |
| |
| This method sends a request to the MiniMax API with the constructed |
| prompt and returns the generated image URL or base64 data. |
| |
| Args: |
| color: Color preference |
| personality: Personality trait |
| appearance: Appearance feature |
| role: Character role |
| aspect_ratio: Image aspect ratio (1:1, 16:9, 9:16, 4:3, 3:4) |
| n: Number of images to generate (1-4) |
| response_format: Response format ("url" or "base64") |
| |
| Returns: |
| Dictionary containing: |
| - url: Image URL (if response_format="url") |
| - data: Base64 image data (if response_format="base64") |
| - prompt: Used prompt |
| - task_id: Task ID from MiniMax |
| |
| Raises: |
| ImageGenerationError: If API call fails or returns invalid response |
| """ |
| try: |
| |
| prompt = self.build_prompt(color, personality, appearance, role) |
| |
| |
| headers = { |
| "Authorization": f"Bearer {self.api_key.strip()}", |
| "Content-Type": "application/json" |
| } |
| |
| payload = { |
| "model": self.model, |
| "prompt": prompt, |
| "aspect_ratio": aspect_ratio, |
| "response_format": "url", |
| "n": n, |
| "prompt_optimizer": True |
| } |
| |
| logger.info( |
| f"Calling MiniMax API for image generation. " |
| f"Aspect ratio: {aspect_ratio}, Count: {n}" |
| ) |
| logger.debug(f"API URL: {self.api_url}") |
| logger.debug(f"API Key (first 20 chars): {self.api_key[:20]}...") |
| logger.debug(f"Payload: {json.dumps(payload, ensure_ascii=False)}") |
| |
| |
| response = await self.client.post( |
| self.api_url, |
| headers=headers, |
| json=payload |
| ) |
| |
| |
| if response.status_code != 200: |
| error_msg = f"MiniMax API returned status {response.status_code}" |
| try: |
| error_detail = response.json() |
| error_msg += f": {json.dumps(error_detail, ensure_ascii=False)}" |
| except Exception: |
| error_msg += f": {response.text}" |
| |
| logger.error(f"Image generation API call failed: {error_msg}") |
| logger.error(f"Request URL: {self.api_url}") |
| logger.error(f"Request headers: Authorization=Bearer {self.api_key[:20]}..., Content-Type=application/json") |
| logger.error(f"Request payload: {json.dumps(payload, ensure_ascii=False)}") |
| raise ImageGenerationError(f"图像生成服务不可用: {error_msg}") |
| |
| |
| try: |
| result = response.json() |
| logger.info(f"API Response (full): {json.dumps(result, indent=2, ensure_ascii=False)}") |
| except Exception as e: |
| error_msg = f"Failed to parse MiniMax API response: {str(e)}" |
| logger.error(error_msg) |
| logger.error(f"Raw response text: {response.text}") |
| raise ImageGenerationError(f"图像生成服务不可用: 响应格式无效") |
| |
| |
| try: |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| if "base_resp" in result: |
| base_resp = result.get("base_resp", {}) |
| status_code = base_resp.get("status_code", -1) |
| error_msg = base_resp.get("status_msg", "Unknown error") |
| |
| |
| if status_code != 0: |
| logger.error(f"MiniMax API error: {status_code} - {error_msg}") |
| raise ImageGenerationError(f"图像生成失败: {error_msg}") |
| |
| logger.info(f"MiniMax API success: {status_code} - {error_msg}") |
| |
| |
| task_id = result.get("id") or result.get("task_id", "") |
| |
| |
| if "data" in result: |
| data = result["data"] |
| logger.info(f"Data field keys: {list(data.keys()) if isinstance(data, dict) else 'not a dict'}") |
| |
| if isinstance(data, dict): |
| |
| urls = None |
| if "image_urls" in data: |
| urls = data["image_urls"] |
| logger.info("Found image_urls field") |
| elif "url" in data: |
| urls = data["url"] |
| logger.info("Found url field") |
| |
| if urls: |
| |
| image_url = urls[0] if n == 1 else urls |
| logger.info(f"Image generation successful. URLs: {urls}") |
| |
| return { |
| "url": image_url, |
| "prompt": prompt, |
| "task_id": task_id, |
| "metadata": result.get("metadata", {}) |
| } |
| |
| |
| logger.error(f"Could not extract image URLs from response: {json.dumps(result, ensure_ascii=False)}") |
| raise ImageGenerationError("API 响应格式错误: 无法提取图像 URL") |
| |
| except (KeyError, IndexError) as e: |
| error_msg = f"Invalid API response structure: {str(e)}, Response: {json.dumps(result, ensure_ascii=False)}" |
| logger.error(error_msg) |
| raise ImageGenerationError(f"图像生成服务不可用: 响应结构无效") |
| |
| except ImageGenerationError: |
| |
| raise |
| |
| except httpx.TimeoutException as e: |
| error_msg = f"MiniMax API request timeout: {str(e)}" |
| logger.error(error_msg) |
| raise ImageGenerationError("图像生成服务不可用: 请求超时") |
| |
| except httpx.RequestError as e: |
| error_msg = f"MiniMax API request failed: {str(e)}" |
| logger.error(error_msg) |
| raise ImageGenerationError(f"图像生成服务不可用: 网络错误") |
| |
| except Exception as e: |
| error_msg = f"Unexpected error in image generation service: {str(e)}" |
| logger.error(error_msg, exc_info=True) |
| raise ImageGenerationError(f"图像生成服务不可用: {str(e)}") |
| |
| async def generate_multiple_images( |
| self, |
| color: str = "温暖粉", |
| personality: str = "温柔", |
| appearance: str = "无配饰", |
| role: str = "陪伴式朋友", |
| count: int = 3, |
| aspect_ratio: str = "1:1" |
| ) -> List[Dict[str, str]]: |
| """Generate multiple cat character images. |
| |
| This method generates multiple images with the same parameters, |
| allowing users to choose their favorite one. |
| |
| Args: |
| color: Color preference |
| personality: Personality trait |
| appearance: Appearance feature |
| role: Character role |
| count: Number of images to generate (1-4) |
| aspect_ratio: Image aspect ratio |
| |
| Returns: |
| List of dictionaries, each containing url, prompt, and task_id |
| |
| Raises: |
| ImageGenerationError: If any API call fails |
| """ |
| if count < 1 or count > 4: |
| raise ValueError("Count must be between 1 and 4") |
| |
| try: |
| |
| result = await self.generate_image( |
| color=color, |
| personality=personality, |
| appearance=appearance, |
| role=role, |
| aspect_ratio=aspect_ratio, |
| n=count |
| ) |
| |
| |
| urls = result['url'] if isinstance(result['url'], list) else [result['url']] |
| |
| images = [] |
| for i, url in enumerate(urls): |
| images.append({ |
| "url": url, |
| "prompt": result['prompt'], |
| "task_id": result['task_id'], |
| "index": i |
| }) |
| |
| return images |
| |
| except ImageGenerationError as e: |
| logger.error(f"Failed to generate images: {e.message}") |
| raise |
|
|