Spaces:

kernel14
/

Nora

Sleeping

File size: 17,517 Bytes

59bd45e

"""Image Generation service for Voice Text Processor.

This module implements the ImageGenerationService class for generating
cat character images using the MiniMax Text-to-Image API.

Requirements: PRD - AI形象生成模块
"""

import logging
import httpx
from typing import Optional, Dict, List
import time
import json
from pathlib import Path

logger = logging.getLogger(__name__)


class ImageGenerationError(Exception):
    """Exception raised when image generation operations fail.
    
    This exception is raised when the MiniMax API call fails,
    such as due to network issues, API errors, or invalid responses.
    """
    
    def __init__(self, message: str = "图像生成服务不可用"):
        """Initialize ImageGenerationError.
        
        Args:
            message: Error message describing the failure
        """
        super().__init__(message)
        self.message = message


class ImageGenerationService:
    """Service for generating cat character images using MiniMax API.
    
    This service handles image generation by calling the MiniMax Text-to-Image API
    to create healing-style cat illustrations based on user preferences
    (color, personality, appearance).
    
    Attributes:
        api_key: MiniMax API key for authentication
        group_id: MiniMax group ID for authentication
        client: Async HTTP client for making API requests
        api_url: MiniMax API endpoint URL
        model: Model identifier (text-to-image-v2)
    
    Requirements: PRD - AI形象生成模块
    """
    
    # 颜色映射
    COLOR_MAPPING = {
        "温暖粉": "soft pastel pink fur, rose-colored aesthetic",
        "天空蓝": "light sky blue fur, serene blue atmosphere",
        "薄荷绿": "mint green fur, fresh green ambiance",
        "奶油黄": "cream yellow fur, warm golden glow",
        "薰衣草紫": "lavender purple fur, gentle purple tones",
        "珊瑚橙": "coral orange fur, warm peachy atmosphere",
        "纯白": "pure white fur, clean minimalist aesthetic",
        "浅灰": "light gray fur, soft neutral tones"
    }
    
    # 性格映射
    PERSONALITY_MAPPING = {
        "活泼": "big curious eyes, dynamic paw gesture, energetic aura, playful expression",
        "温柔": "soft gentle eyes, calm posture, peaceful expression, caring demeanor",
        "聪明": "intelligent eyes, thoughtful expression, wise appearance, attentive look",
        "慵懒": "relaxed eyes, lounging posture, comfortable expression, laid-back vibe",
        "勇敢": "confident eyes, strong posture, determined expression, courageous stance",
        "害羞": "shy eyes, timid posture, gentle expression, reserved demeanor"
    }
    
    # 形象特征映射
    APPEARANCE_MAPPING = {
        "戴眼镜": "wearing tiny round glasses, scholarly look",
        "戴帽子": "wearing a cute small hat, fashionable style",
        "戴围巾": "wearing a cozy scarf, warm appearance",
        "戴蝴蝶结": "wearing a cute bow tie, elegant look",
        "无配饰": "natural appearance, simple and pure"
    }
    
    # 角色类型映射
    ROLE_MAPPING = {
        "陪伴式朋友": "friendly companion, approachable and warm",
        "温柔照顾型长辈": "caring elder figure, nurturing and protective",
        "引导型老师": "wise teacher figure, knowledgeable and patient"
    }
    
    # 系统底座提示词
    BASE_PROMPT = (
        "A masterpiece cute stylized cat illustration, {color} theme, "
        "{personality} facial expression and posture, {appearance}. "
        "{role}. Japanese watercolor style, clean minimalist background, "
        "high quality, soft studio lighting, 4k, healing aesthetic, "
        "adorable and heartwarming"
    )
    
    def __init__(self, api_key: str, group_id: Optional[str] = None):
        """Initialize the image generation service.
        
        Args:
            api_key: MiniMax API key for authentication
            group_id: MiniMax group ID (optional, for compatibility)
        """
        self.api_key = api_key
        self.group_id = group_id  # 保留但不使用
        self.client = httpx.AsyncClient(timeout=120.0)  # 图像生成需要更长时间
        self.api_url = "https://api.minimaxi.com/v1/image_generation"
        self.model = "image-01"
    
    async def close(self):
        """Close the HTTP client.
        
        This should be called when the service is no longer needed
        to properly clean up resources.
        """
        await self.client.aclose()
    
    async def download_image(self, url: str, save_path: str) -> str:
        """Download image from URL and save to local file.
        
        Args:
            url: Image URL to download
            save_path: Local file path to save the image
        
        Returns:
            Absolute path to the saved image file
        
        Raises:
            ImageGenerationError: If download fails
        """
        try:
            logger.info(f"Downloading image from: {url}")
            
            # 创建保存目录（如果不存在）
            save_path_obj = Path(save_path)
            save_path_obj.parent.mkdir(parents=True, exist_ok=True)
            
            # 下载图像
            response = await self.client.get(url, timeout=60.0)
            
            if response.status_code != 200:
                error_msg = f"Failed to download image: HTTP {response.status_code}"
                logger.error(error_msg)
                raise ImageGenerationError(error_msg)
            
            # 保存到文件
            with open(save_path, 'wb') as f:
                f.write(response.content)
            
            abs_path = str(save_path_obj.absolute())
            logger.info(f"Image saved to: {abs_path}")
            
            return abs_path
            
        except ImageGenerationError:
            raise
        except Exception as e:
            error_msg = f"Failed to download image: {str(e)}"
            logger.error(error_msg)
            raise ImageGenerationError(error_msg)
    
    def build_prompt(
        self,
        color: str = "温暖粉",
        personality: str = "温柔",
        appearance: str = "无配饰",
        role: str = "陪伴式朋友"
    ) -> str:
        """Build the complete prompt for image generation.
        
        Args:
            color: Color preference (温暖粉/天空蓝/薄荷绿等)
            personality: Personality trait (活泼/温柔/聪明等)
            appearance: Appearance feature (戴眼镜/戴帽子等)
            role: Character role (陪伴式朋友/温柔照顾型长辈等)
        
        Returns:
            Complete prompt string for CogView API
        """
        # 获取映射值，如果没有则使用默认值
        color_desc = self.COLOR_MAPPING.get(color, self.COLOR_MAPPING["温暖粉"])
        personality_desc = self.PERSONALITY_MAPPING.get(
            personality, 
            self.PERSONALITY_MAPPING["温柔"]
        )
        appearance_desc = self.APPEARANCE_MAPPING.get(
            appearance, 
            self.APPEARANCE_MAPPING["无配饰"]
        )
        role_desc = self.ROLE_MAPPING.get(
            role, 
            self.ROLE_MAPPING["陪伴式朋友"]
        )
        
        # 构建完整提示词
        prompt = self.BASE_PROMPT.format(
            color=color_desc,
            personality=personality_desc,
            appearance=appearance_desc,
            role=role_desc
        )
        
        logger.info(f"Generated prompt: {prompt[:100]}...")
        return prompt
    
    async def generate_image(
        self,
        color: str = "温暖粉",
        personality: str = "温柔",
        appearance: str = "无配饰",
        role: str = "陪伴式朋友",
        aspect_ratio: str = "1:1",
        n: int = 1,
        response_format: str = "url"
    ) -> Dict[str, str]:
        """Generate a cat character image using MiniMax API.
        
        This method sends a request to the MiniMax API with the constructed
        prompt and returns the generated image URL or base64 data.
        
        Args:
            color: Color preference
            personality: Personality trait
            appearance: Appearance feature
            role: Character role
            aspect_ratio: Image aspect ratio (1:1, 16:9, 9:16, 4:3, 3:4)
            n: Number of images to generate (1-4)
            response_format: Response format ("url" or "base64")
        
        Returns:
            Dictionary containing:
                - url: Image URL (if response_format="url")
                - data: Base64 image data (if response_format="base64")
                - prompt: Used prompt
                - task_id: Task ID from MiniMax
        
        Raises:
            ImageGenerationError: If API call fails or returns invalid response
        """
        try:
            # 构建提示词
            prompt = self.build_prompt(color, personality, appearance, role)
            
            # 准备请求
            headers = {
                "Authorization": f"Bearer {self.api_key.strip()}",
                "Content-Type": "application/json"
            }
            
            payload = {
                "model": self.model,
                "prompt": prompt,
                "aspect_ratio": aspect_ratio,
                "response_format": "url",
                "n": n,
                "prompt_optimizer": True
            }
            
            logger.info(
                f"Calling MiniMax API for image generation. "
                f"Aspect ratio: {aspect_ratio}, Count: {n}"
            )
            logger.debug(f"API URL: {self.api_url}")
            logger.debug(f"API Key (first 20 chars): {self.api_key[:20]}...")
            logger.debug(f"Payload: {json.dumps(payload, ensure_ascii=False)}")
            
            # 发送请求
            response = await self.client.post(
                self.api_url,
                headers=headers,
                json=payload
            )
            
            # 检查响应状态
            if response.status_code != 200:
                error_msg = f"MiniMax API returned status {response.status_code}"
                try:
                    error_detail = response.json()
                    error_msg += f": {json.dumps(error_detail, ensure_ascii=False)}"
                except Exception:
                    error_msg += f": {response.text}"
                
                logger.error(f"Image generation API call failed: {error_msg}")
                logger.error(f"Request URL: {self.api_url}")
                logger.error(f"Request headers: Authorization=Bearer {self.api_key[:20]}..., Content-Type=application/json")
                logger.error(f"Request payload: {json.dumps(payload, ensure_ascii=False)}")
                raise ImageGenerationError(f"图像生成服务不可用: {error_msg}")
            
            # 解析响应
            try:
                result = response.json()
                logger.info(f"API Response (full): {json.dumps(result, indent=2, ensure_ascii=False)}")
            except Exception as e:
                error_msg = f"Failed to parse MiniMax API response: {str(e)}"
                logger.error(error_msg)
                logger.error(f"Raw response text: {response.text}")
                raise ImageGenerationError(f"图像生成服务不可用: 响应格式无效")
            
            # 提取图像 URL
            try:
                # MiniMax 实际返回格式：
                # {
                #   "id": "task_id",
                #   "data": {"image_urls": [...]},
                #   "metadata": {...},
                #   "base_resp": {"status_code": 0, "status_msg": "success"}
                # }
                
                # 先检查是否有 base_resp
                if "base_resp" in result:
                    base_resp = result.get("base_resp", {})
                    status_code = base_resp.get("status_code", -1)
                    error_msg = base_resp.get("status_msg", "Unknown error")
                    
                    # status_code = 0 表示成功
                    if status_code != 0:
                        logger.error(f"MiniMax API error: {status_code} - {error_msg}")
                        raise ImageGenerationError(f"图像生成失败: {error_msg}")
                    
                    logger.info(f"MiniMax API success: {status_code} - {error_msg}")
                
                # 提取 task_id（可能在 id 或 task_id 字段）
                task_id = result.get("id") or result.get("task_id", "")
                
                # 提取图像数据
                if "data" in result:
                    data = result["data"]
                    logger.info(f"Data field keys: {list(data.keys()) if isinstance(data, dict) else 'not a dict'}")
                    
                    if isinstance(data, dict):
                        # 尝试多个可能的字段名
                        urls = None
                        if "image_urls" in data:
                            urls = data["image_urls"]
                            logger.info("Found image_urls field")
                        elif "url" in data:
                            urls = data["url"]
                            logger.info("Found url field")
                        
                        if urls:
                            # 如果只生成一张，返回单个 URL
                            image_url = urls[0] if n == 1 else urls
                            logger.info(f"Image generation successful. URLs: {urls}")
                            
                            return {
                                "url": image_url,
                                "prompt": prompt,
                                "task_id": task_id,
                                "metadata": result.get("metadata", {})
                            }
                
                # 如果到这里还没有返回，说明响应格式不符合预期
                logger.error(f"Could not extract image URLs from response: {json.dumps(result, ensure_ascii=False)}")
                raise ImageGenerationError("API 响应格式错误: 无法提取图像 URL")
                
            except (KeyError, IndexError) as e:
                error_msg = f"Invalid API response structure: {str(e)}, Response: {json.dumps(result, ensure_ascii=False)}"
                logger.error(error_msg)
                raise ImageGenerationError(f"图像生成服务不可用: 响应结构无效")
        
        except ImageGenerationError:
            # Re-raise ImageGenerationError as-is
            raise
        
        except httpx.TimeoutException as e:
            error_msg = f"MiniMax API request timeout: {str(e)}"
            logger.error(error_msg)
            raise ImageGenerationError("图像生成服务不可用: 请求超时")
        
        except httpx.RequestError as e:
            error_msg = f"MiniMax API request failed: {str(e)}"
            logger.error(error_msg)
            raise ImageGenerationError(f"图像生成服务不可用: 网络错误")
        
        except Exception as e:
            error_msg = f"Unexpected error in image generation service: {str(e)}"
            logger.error(error_msg, exc_info=True)
            raise ImageGenerationError(f"图像生成服务不可用: {str(e)}")
    
    async def generate_multiple_images(
        self,
        color: str = "温暖粉",
        personality: str = "温柔",
        appearance: str = "无配饰",
        role: str = "陪伴式朋友",
        count: int = 3,
        aspect_ratio: str = "1:1"
    ) -> List[Dict[str, str]]:
        """Generate multiple cat character images.
        
        This method generates multiple images with the same parameters,
        allowing users to choose their favorite one.
        
        Args:
            color: Color preference
            personality: Personality trait
            appearance: Appearance feature
            role: Character role
            count: Number of images to generate (1-4)
            aspect_ratio: Image aspect ratio
        
        Returns:
            List of dictionaries, each containing url, prompt, and task_id
        
        Raises:
            ImageGenerationError: If any API call fails
        """
        if count < 1 or count > 4:
            raise ValueError("Count must be between 1 and 4")
        
        try:
            # MiniMax 支持一次生成多张图像
            result = await self.generate_image(
                color=color,
                personality=personality,
                appearance=appearance,
                role=role,
                aspect_ratio=aspect_ratio,
                n=count
            )
            
            # 将结果转换为列表格式
            urls = result['url'] if isinstance(result['url'], list) else [result['url']]
            
            images = []
            for i, url in enumerate(urls):
                images.append({
                    "url": url,
                    "prompt": result['prompt'],
                    "task_id": result['task_id'],
                    "index": i
                })
            
            return images
            
        except ImageGenerationError as e:
            logger.error(f"Failed to generate images: {e.message}")
            raise