File size: 17,517 Bytes
59bd45e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
"""Image Generation service for Voice Text Processor.

This module implements the ImageGenerationService class for generating
cat character images using the MiniMax Text-to-Image API.

Requirements: PRD - AI形象生成模块
"""

import logging
import httpx
from typing import Optional, Dict, List
import time
import json
from pathlib import Path

logger = logging.getLogger(__name__)


class ImageGenerationError(Exception):
    """Exception raised when image generation operations fail.
    
    This exception is raised when the MiniMax API call fails,
    such as due to network issues, API errors, or invalid responses.
    """
    
    def __init__(self, message: str = "图像生成服务不可用"):
        """Initialize ImageGenerationError.
        
        Args:
            message: Error message describing the failure
        """
        super().__init__(message)
        self.message = message


class ImageGenerationService:
    """Service for generating cat character images using MiniMax API.
    
    This service handles image generation by calling the MiniMax Text-to-Image API
    to create healing-style cat illustrations based on user preferences
    (color, personality, appearance).
    
    Attributes:
        api_key: MiniMax API key for authentication
        group_id: MiniMax group ID for authentication
        client: Async HTTP client for making API requests
        api_url: MiniMax API endpoint URL
        model: Model identifier (text-to-image-v2)
    
    Requirements: PRD - AI形象生成模块
    """
    
    # 颜色映射
    COLOR_MAPPING = {
        "温暖粉": "soft pastel pink fur, rose-colored aesthetic",
        "天空蓝": "light sky blue fur, serene blue atmosphere",
        "薄荷绿": "mint green fur, fresh green ambiance",
        "奶油黄": "cream yellow fur, warm golden glow",
        "薰衣草紫": "lavender purple fur, gentle purple tones",
        "珊瑚橙": "coral orange fur, warm peachy atmosphere",
        "纯白": "pure white fur, clean minimalist aesthetic",
        "浅灰": "light gray fur, soft neutral tones"
    }
    
    # 性格映射
    PERSONALITY_MAPPING = {
        "活泼": "big curious eyes, dynamic paw gesture, energetic aura, playful expression",
        "温柔": "soft gentle eyes, calm posture, peaceful expression, caring demeanor",
        "聪明": "intelligent eyes, thoughtful expression, wise appearance, attentive look",
        "慵懒": "relaxed eyes, lounging posture, comfortable expression, laid-back vibe",
        "勇敢": "confident eyes, strong posture, determined expression, courageous stance",
        "害羞": "shy eyes, timid posture, gentle expression, reserved demeanor"
    }
    
    # 形象特征映射
    APPEARANCE_MAPPING = {
        "戴眼镜": "wearing tiny round glasses, scholarly look",
        "戴帽子": "wearing a cute small hat, fashionable style",
        "戴围巾": "wearing a cozy scarf, warm appearance",
        "戴蝴蝶结": "wearing a cute bow tie, elegant look",
        "无配饰": "natural appearance, simple and pure"
    }
    
    # 角色类型映射
    ROLE_MAPPING = {
        "陪伴式朋友": "friendly companion, approachable and warm",
        "温柔照顾型长辈": "caring elder figure, nurturing and protective",
        "引导型老师": "wise teacher figure, knowledgeable and patient"
    }
    
    # 系统底座提示词
    BASE_PROMPT = (
        "A masterpiece cute stylized cat illustration, {color} theme, "
        "{personality} facial expression and posture, {appearance}. "
        "{role}. Japanese watercolor style, clean minimalist background, "
        "high quality, soft studio lighting, 4k, healing aesthetic, "
        "adorable and heartwarming"
    )
    
    def __init__(self, api_key: str, group_id: Optional[str] = None):
        """Initialize the image generation service.
        
        Args:
            api_key: MiniMax API key for authentication
            group_id: MiniMax group ID (optional, for compatibility)
        """
        self.api_key = api_key
        self.group_id = group_id  # 保留但不使用
        self.client = httpx.AsyncClient(timeout=120.0)  # 图像生成需要更长时间
        self.api_url = "https://api.minimaxi.com/v1/image_generation"
        self.model = "image-01"
    
    async def close(self):
        """Close the HTTP client.
        
        This should be called when the service is no longer needed
        to properly clean up resources.
        """
        await self.client.aclose()
    
    async def download_image(self, url: str, save_path: str) -> str:
        """Download image from URL and save to local file.
        
        Args:
            url: Image URL to download
            save_path: Local file path to save the image
        
        Returns:
            Absolute path to the saved image file
        
        Raises:
            ImageGenerationError: If download fails
        """
        try:
            logger.info(f"Downloading image from: {url}")
            
            # 创建保存目录(如果不存在)
            save_path_obj = Path(save_path)
            save_path_obj.parent.mkdir(parents=True, exist_ok=True)
            
            # 下载图像
            response = await self.client.get(url, timeout=60.0)
            
            if response.status_code != 200:
                error_msg = f"Failed to download image: HTTP {response.status_code}"
                logger.error(error_msg)
                raise ImageGenerationError(error_msg)
            
            # 保存到文件
            with open(save_path, 'wb') as f:
                f.write(response.content)
            
            abs_path = str(save_path_obj.absolute())
            logger.info(f"Image saved to: {abs_path}")
            
            return abs_path
            
        except ImageGenerationError:
            raise
        except Exception as e:
            error_msg = f"Failed to download image: {str(e)}"
            logger.error(error_msg)
            raise ImageGenerationError(error_msg)
    
    def build_prompt(
        self,
        color: str = "温暖粉",
        personality: str = "温柔",
        appearance: str = "无配饰",
        role: str = "陪伴式朋友"
    ) -> str:
        """Build the complete prompt for image generation.
        
        Args:
            color: Color preference (温暖粉/天空蓝/薄荷绿等)
            personality: Personality trait (活泼/温柔/聪明等)
            appearance: Appearance feature (戴眼镜/戴帽子等)
            role: Character role (陪伴式朋友/温柔照顾型长辈等)
        
        Returns:
            Complete prompt string for CogView API
        """
        # 获取映射值,如果没有则使用默认值
        color_desc = self.COLOR_MAPPING.get(color, self.COLOR_MAPPING["温暖粉"])
        personality_desc = self.PERSONALITY_MAPPING.get(
            personality, 
            self.PERSONALITY_MAPPING["温柔"]
        )
        appearance_desc = self.APPEARANCE_MAPPING.get(
            appearance, 
            self.APPEARANCE_MAPPING["无配饰"]
        )
        role_desc = self.ROLE_MAPPING.get(
            role, 
            self.ROLE_MAPPING["陪伴式朋友"]
        )
        
        # 构建完整提示词
        prompt = self.BASE_PROMPT.format(
            color=color_desc,
            personality=personality_desc,
            appearance=appearance_desc,
            role=role_desc
        )
        
        logger.info(f"Generated prompt: {prompt[:100]}...")
        return prompt
    
    async def generate_image(
        self,
        color: str = "温暖粉",
        personality: str = "温柔",
        appearance: str = "无配饰",
        role: str = "陪伴式朋友",
        aspect_ratio: str = "1:1",
        n: int = 1,
        response_format: str = "url"
    ) -> Dict[str, str]:
        """Generate a cat character image using MiniMax API.
        
        This method sends a request to the MiniMax API with the constructed
        prompt and returns the generated image URL or base64 data.
        
        Args:
            color: Color preference
            personality: Personality trait
            appearance: Appearance feature
            role: Character role
            aspect_ratio: Image aspect ratio (1:1, 16:9, 9:16, 4:3, 3:4)
            n: Number of images to generate (1-4)
            response_format: Response format ("url" or "base64")
        
        Returns:
            Dictionary containing:
                - url: Image URL (if response_format="url")
                - data: Base64 image data (if response_format="base64")
                - prompt: Used prompt
                - task_id: Task ID from MiniMax
        
        Raises:
            ImageGenerationError: If API call fails or returns invalid response
        """
        try:
            # 构建提示词
            prompt = self.build_prompt(color, personality, appearance, role)
            
            # 准备请求
            headers = {
                "Authorization": f"Bearer {self.api_key.strip()}",
                "Content-Type": "application/json"
            }
            
            payload = {
                "model": self.model,
                "prompt": prompt,
                "aspect_ratio": aspect_ratio,
                "response_format": "url",
                "n": n,
                "prompt_optimizer": True
            }
            
            logger.info(
                f"Calling MiniMax API for image generation. "
                f"Aspect ratio: {aspect_ratio}, Count: {n}"
            )
            logger.debug(f"API URL: {self.api_url}")
            logger.debug(f"API Key (first 20 chars): {self.api_key[:20]}...")
            logger.debug(f"Payload: {json.dumps(payload, ensure_ascii=False)}")
            
            # 发送请求
            response = await self.client.post(
                self.api_url,
                headers=headers,
                json=payload
            )
            
            # 检查响应状态
            if response.status_code != 200:
                error_msg = f"MiniMax API returned status {response.status_code}"
                try:
                    error_detail = response.json()
                    error_msg += f": {json.dumps(error_detail, ensure_ascii=False)}"
                except Exception:
                    error_msg += f": {response.text}"
                
                logger.error(f"Image generation API call failed: {error_msg}")
                logger.error(f"Request URL: {self.api_url}")
                logger.error(f"Request headers: Authorization=Bearer {self.api_key[:20]}..., Content-Type=application/json")
                logger.error(f"Request payload: {json.dumps(payload, ensure_ascii=False)}")
                raise ImageGenerationError(f"图像生成服务不可用: {error_msg}")
            
            # 解析响应
            try:
                result = response.json()
                logger.info(f"API Response (full): {json.dumps(result, indent=2, ensure_ascii=False)}")
            except Exception as e:
                error_msg = f"Failed to parse MiniMax API response: {str(e)}"
                logger.error(error_msg)
                logger.error(f"Raw response text: {response.text}")
                raise ImageGenerationError(f"图像生成服务不可用: 响应格式无效")
            
            # 提取图像 URL
            try:
                # MiniMax 实际返回格式:
                # {
                #   "id": "task_id",
                #   "data": {"image_urls": [...]},
                #   "metadata": {...},
                #   "base_resp": {"status_code": 0, "status_msg": "success"}
                # }
                
                # 先检查是否有 base_resp
                if "base_resp" in result:
                    base_resp = result.get("base_resp", {})
                    status_code = base_resp.get("status_code", -1)
                    error_msg = base_resp.get("status_msg", "Unknown error")
                    
                    # status_code = 0 表示成功
                    if status_code != 0:
                        logger.error(f"MiniMax API error: {status_code} - {error_msg}")
                        raise ImageGenerationError(f"图像生成失败: {error_msg}")
                    
                    logger.info(f"MiniMax API success: {status_code} - {error_msg}")
                
                # 提取 task_id(可能在 id 或 task_id 字段)
                task_id = result.get("id") or result.get("task_id", "")
                
                # 提取图像数据
                if "data" in result:
                    data = result["data"]
                    logger.info(f"Data field keys: {list(data.keys()) if isinstance(data, dict) else 'not a dict'}")
                    
                    if isinstance(data, dict):
                        # 尝试多个可能的字段名
                        urls = None
                        if "image_urls" in data:
                            urls = data["image_urls"]
                            logger.info("Found image_urls field")
                        elif "url" in data:
                            urls = data["url"]
                            logger.info("Found url field")
                        
                        if urls:
                            # 如果只生成一张,返回单个 URL
                            image_url = urls[0] if n == 1 else urls
                            logger.info(f"Image generation successful. URLs: {urls}")
                            
                            return {
                                "url": image_url,
                                "prompt": prompt,
                                "task_id": task_id,
                                "metadata": result.get("metadata", {})
                            }
                
                # 如果到这里还没有返回,说明响应格式不符合预期
                logger.error(f"Could not extract image URLs from response: {json.dumps(result, ensure_ascii=False)}")
                raise ImageGenerationError("API 响应格式错误: 无法提取图像 URL")
                
            except (KeyError, IndexError) as e:
                error_msg = f"Invalid API response structure: {str(e)}, Response: {json.dumps(result, ensure_ascii=False)}"
                logger.error(error_msg)
                raise ImageGenerationError(f"图像生成服务不可用: 响应结构无效")
        
        except ImageGenerationError:
            # Re-raise ImageGenerationError as-is
            raise
        
        except httpx.TimeoutException as e:
            error_msg = f"MiniMax API request timeout: {str(e)}"
            logger.error(error_msg)
            raise ImageGenerationError("图像生成服务不可用: 请求超时")
        
        except httpx.RequestError as e:
            error_msg = f"MiniMax API request failed: {str(e)}"
            logger.error(error_msg)
            raise ImageGenerationError(f"图像生成服务不可用: 网络错误")
        
        except Exception as e:
            error_msg = f"Unexpected error in image generation service: {str(e)}"
            logger.error(error_msg, exc_info=True)
            raise ImageGenerationError(f"图像生成服务不可用: {str(e)}")
    
    async def generate_multiple_images(
        self,
        color: str = "温暖粉",
        personality: str = "温柔",
        appearance: str = "无配饰",
        role: str = "陪伴式朋友",
        count: int = 3,
        aspect_ratio: str = "1:1"
    ) -> List[Dict[str, str]]:
        """Generate multiple cat character images.
        
        This method generates multiple images with the same parameters,
        allowing users to choose their favorite one.
        
        Args:
            color: Color preference
            personality: Personality trait
            appearance: Appearance feature
            role: Character role
            count: Number of images to generate (1-4)
            aspect_ratio: Image aspect ratio
        
        Returns:
            List of dictionaries, each containing url, prompt, and task_id
        
        Raises:
            ImageGenerationError: If any API call fails
        """
        if count < 1 or count > 4:
            raise ValueError("Count must be between 1 and 4")
        
        try:
            # MiniMax 支持一次生成多张图像
            result = await self.generate_image(
                color=color,
                personality=personality,
                appearance=appearance,
                role=role,
                aspect_ratio=aspect_ratio,
                n=count
            )
            
            # 将结果转换为列表格式
            urls = result['url'] if isinstance(result['url'], list) else [result['url']]
            
            images = []
            for i, url in enumerate(urls):
                images.append({
                    "url": url,
                    "prompt": result['prompt'],
                    "task_id": result['task_id'],
                    "index": i
                })
            
            return images
            
        except ImageGenerationError as e:
            logger.error(f"Failed to generate images: {e.message}")
            raise