Nora / app /image_service.py
GitHub Action
Deploy clean version of Nora
59bd45e
"""Image Generation service for Voice Text Processor.
This module implements the ImageGenerationService class for generating
cat character images using the MiniMax Text-to-Image API.
Requirements: PRD - AI形象生成模块
"""
import logging
import httpx
from typing import Optional, Dict, List
import time
import json
from pathlib import Path
logger = logging.getLogger(__name__)
class ImageGenerationError(Exception):
"""Exception raised when image generation operations fail.
This exception is raised when the MiniMax API call fails,
such as due to network issues, API errors, or invalid responses.
"""
def __init__(self, message: str = "图像生成服务不可用"):
"""Initialize ImageGenerationError.
Args:
message: Error message describing the failure
"""
super().__init__(message)
self.message = message
class ImageGenerationService:
"""Service for generating cat character images using MiniMax API.
This service handles image generation by calling the MiniMax Text-to-Image API
to create healing-style cat illustrations based on user preferences
(color, personality, appearance).
Attributes:
api_key: MiniMax API key for authentication
group_id: MiniMax group ID for authentication
client: Async HTTP client for making API requests
api_url: MiniMax API endpoint URL
model: Model identifier (text-to-image-v2)
Requirements: PRD - AI形象生成模块
"""
# 颜色映射
COLOR_MAPPING = {
"温暖粉": "soft pastel pink fur, rose-colored aesthetic",
"天空蓝": "light sky blue fur, serene blue atmosphere",
"薄荷绿": "mint green fur, fresh green ambiance",
"奶油黄": "cream yellow fur, warm golden glow",
"薰衣草紫": "lavender purple fur, gentle purple tones",
"珊瑚橙": "coral orange fur, warm peachy atmosphere",
"纯白": "pure white fur, clean minimalist aesthetic",
"浅灰": "light gray fur, soft neutral tones"
}
# 性格映射
PERSONALITY_MAPPING = {
"活泼": "big curious eyes, dynamic paw gesture, energetic aura, playful expression",
"温柔": "soft gentle eyes, calm posture, peaceful expression, caring demeanor",
"聪明": "intelligent eyes, thoughtful expression, wise appearance, attentive look",
"慵懒": "relaxed eyes, lounging posture, comfortable expression, laid-back vibe",
"勇敢": "confident eyes, strong posture, determined expression, courageous stance",
"害羞": "shy eyes, timid posture, gentle expression, reserved demeanor"
}
# 形象特征映射
APPEARANCE_MAPPING = {
"戴眼镜": "wearing tiny round glasses, scholarly look",
"戴帽子": "wearing a cute small hat, fashionable style",
"戴围巾": "wearing a cozy scarf, warm appearance",
"戴蝴蝶结": "wearing a cute bow tie, elegant look",
"无配饰": "natural appearance, simple and pure"
}
# 角色类型映射
ROLE_MAPPING = {
"陪伴式朋友": "friendly companion, approachable and warm",
"温柔照顾型长辈": "caring elder figure, nurturing and protective",
"引导型老师": "wise teacher figure, knowledgeable and patient"
}
# 系统底座提示词
BASE_PROMPT = (
"A masterpiece cute stylized cat illustration, {color} theme, "
"{personality} facial expression and posture, {appearance}. "
"{role}. Japanese watercolor style, clean minimalist background, "
"high quality, soft studio lighting, 4k, healing aesthetic, "
"adorable and heartwarming"
)
def __init__(self, api_key: str, group_id: Optional[str] = None):
"""Initialize the image generation service.
Args:
api_key: MiniMax API key for authentication
group_id: MiniMax group ID (optional, for compatibility)
"""
self.api_key = api_key
self.group_id = group_id # 保留但不使用
self.client = httpx.AsyncClient(timeout=120.0) # 图像生成需要更长时间
self.api_url = "https://api.minimaxi.com/v1/image_generation"
self.model = "image-01"
async def close(self):
"""Close the HTTP client.
This should be called when the service is no longer needed
to properly clean up resources.
"""
await self.client.aclose()
async def download_image(self, url: str, save_path: str) -> str:
"""Download image from URL and save to local file.
Args:
url: Image URL to download
save_path: Local file path to save the image
Returns:
Absolute path to the saved image file
Raises:
ImageGenerationError: If download fails
"""
try:
logger.info(f"Downloading image from: {url}")
# 创建保存目录(如果不存在)
save_path_obj = Path(save_path)
save_path_obj.parent.mkdir(parents=True, exist_ok=True)
# 下载图像
response = await self.client.get(url, timeout=60.0)
if response.status_code != 200:
error_msg = f"Failed to download image: HTTP {response.status_code}"
logger.error(error_msg)
raise ImageGenerationError(error_msg)
# 保存到文件
with open(save_path, 'wb') as f:
f.write(response.content)
abs_path = str(save_path_obj.absolute())
logger.info(f"Image saved to: {abs_path}")
return abs_path
except ImageGenerationError:
raise
except Exception as e:
error_msg = f"Failed to download image: {str(e)}"
logger.error(error_msg)
raise ImageGenerationError(error_msg)
def build_prompt(
self,
color: str = "温暖粉",
personality: str = "温柔",
appearance: str = "无配饰",
role: str = "陪伴式朋友"
) -> str:
"""Build the complete prompt for image generation.
Args:
color: Color preference (温暖粉/天空蓝/薄荷绿等)
personality: Personality trait (活泼/温柔/聪明等)
appearance: Appearance feature (戴眼镜/戴帽子等)
role: Character role (陪伴式朋友/温柔照顾型长辈等)
Returns:
Complete prompt string for CogView API
"""
# 获取映射值,如果没有则使用默认值
color_desc = self.COLOR_MAPPING.get(color, self.COLOR_MAPPING["温暖粉"])
personality_desc = self.PERSONALITY_MAPPING.get(
personality,
self.PERSONALITY_MAPPING["温柔"]
)
appearance_desc = self.APPEARANCE_MAPPING.get(
appearance,
self.APPEARANCE_MAPPING["无配饰"]
)
role_desc = self.ROLE_MAPPING.get(
role,
self.ROLE_MAPPING["陪伴式朋友"]
)
# 构建完整提示词
prompt = self.BASE_PROMPT.format(
color=color_desc,
personality=personality_desc,
appearance=appearance_desc,
role=role_desc
)
logger.info(f"Generated prompt: {prompt[:100]}...")
return prompt
async def generate_image(
self,
color: str = "温暖粉",
personality: str = "温柔",
appearance: str = "无配饰",
role: str = "陪伴式朋友",
aspect_ratio: str = "1:1",
n: int = 1,
response_format: str = "url"
) -> Dict[str, str]:
"""Generate a cat character image using MiniMax API.
This method sends a request to the MiniMax API with the constructed
prompt and returns the generated image URL or base64 data.
Args:
color: Color preference
personality: Personality trait
appearance: Appearance feature
role: Character role
aspect_ratio: Image aspect ratio (1:1, 16:9, 9:16, 4:3, 3:4)
n: Number of images to generate (1-4)
response_format: Response format ("url" or "base64")
Returns:
Dictionary containing:
- url: Image URL (if response_format="url")
- data: Base64 image data (if response_format="base64")
- prompt: Used prompt
- task_id: Task ID from MiniMax
Raises:
ImageGenerationError: If API call fails or returns invalid response
"""
try:
# 构建提示词
prompt = self.build_prompt(color, personality, appearance, role)
# 准备请求
headers = {
"Authorization": f"Bearer {self.api_key.strip()}",
"Content-Type": "application/json"
}
payload = {
"model": self.model,
"prompt": prompt,
"aspect_ratio": aspect_ratio,
"response_format": "url",
"n": n,
"prompt_optimizer": True
}
logger.info(
f"Calling MiniMax API for image generation. "
f"Aspect ratio: {aspect_ratio}, Count: {n}"
)
logger.debug(f"API URL: {self.api_url}")
logger.debug(f"API Key (first 20 chars): {self.api_key[:20]}...")
logger.debug(f"Payload: {json.dumps(payload, ensure_ascii=False)}")
# 发送请求
response = await self.client.post(
self.api_url,
headers=headers,
json=payload
)
# 检查响应状态
if response.status_code != 200:
error_msg = f"MiniMax API returned status {response.status_code}"
try:
error_detail = response.json()
error_msg += f": {json.dumps(error_detail, ensure_ascii=False)}"
except Exception:
error_msg += f": {response.text}"
logger.error(f"Image generation API call failed: {error_msg}")
logger.error(f"Request URL: {self.api_url}")
logger.error(f"Request headers: Authorization=Bearer {self.api_key[:20]}..., Content-Type=application/json")
logger.error(f"Request payload: {json.dumps(payload, ensure_ascii=False)}")
raise ImageGenerationError(f"图像生成服务不可用: {error_msg}")
# 解析响应
try:
result = response.json()
logger.info(f"API Response (full): {json.dumps(result, indent=2, ensure_ascii=False)}")
except Exception as e:
error_msg = f"Failed to parse MiniMax API response: {str(e)}"
logger.error(error_msg)
logger.error(f"Raw response text: {response.text}")
raise ImageGenerationError(f"图像生成服务不可用: 响应格式无效")
# 提取图像 URL
try:
# MiniMax 实际返回格式:
# {
# "id": "task_id",
# "data": {"image_urls": [...]},
# "metadata": {...},
# "base_resp": {"status_code": 0, "status_msg": "success"}
# }
# 先检查是否有 base_resp
if "base_resp" in result:
base_resp = result.get("base_resp", {})
status_code = base_resp.get("status_code", -1)
error_msg = base_resp.get("status_msg", "Unknown error")
# status_code = 0 表示成功
if status_code != 0:
logger.error(f"MiniMax API error: {status_code} - {error_msg}")
raise ImageGenerationError(f"图像生成失败: {error_msg}")
logger.info(f"MiniMax API success: {status_code} - {error_msg}")
# 提取 task_id(可能在 id 或 task_id 字段)
task_id = result.get("id") or result.get("task_id", "")
# 提取图像数据
if "data" in result:
data = result["data"]
logger.info(f"Data field keys: {list(data.keys()) if isinstance(data, dict) else 'not a dict'}")
if isinstance(data, dict):
# 尝试多个可能的字段名
urls = None
if "image_urls" in data:
urls = data["image_urls"]
logger.info("Found image_urls field")
elif "url" in data:
urls = data["url"]
logger.info("Found url field")
if urls:
# 如果只生成一张,返回单个 URL
image_url = urls[0] if n == 1 else urls
logger.info(f"Image generation successful. URLs: {urls}")
return {
"url": image_url,
"prompt": prompt,
"task_id": task_id,
"metadata": result.get("metadata", {})
}
# 如果到这里还没有返回,说明响应格式不符合预期
logger.error(f"Could not extract image URLs from response: {json.dumps(result, ensure_ascii=False)}")
raise ImageGenerationError("API 响应格式错误: 无法提取图像 URL")
except (KeyError, IndexError) as e:
error_msg = f"Invalid API response structure: {str(e)}, Response: {json.dumps(result, ensure_ascii=False)}"
logger.error(error_msg)
raise ImageGenerationError(f"图像生成服务不可用: 响应结构无效")
except ImageGenerationError:
# Re-raise ImageGenerationError as-is
raise
except httpx.TimeoutException as e:
error_msg = f"MiniMax API request timeout: {str(e)}"
logger.error(error_msg)
raise ImageGenerationError("图像生成服务不可用: 请求超时")
except httpx.RequestError as e:
error_msg = f"MiniMax API request failed: {str(e)}"
logger.error(error_msg)
raise ImageGenerationError(f"图像生成服务不可用: 网络错误")
except Exception as e:
error_msg = f"Unexpected error in image generation service: {str(e)}"
logger.error(error_msg, exc_info=True)
raise ImageGenerationError(f"图像生成服务不可用: {str(e)}")
async def generate_multiple_images(
self,
color: str = "温暖粉",
personality: str = "温柔",
appearance: str = "无配饰",
role: str = "陪伴式朋友",
count: int = 3,
aspect_ratio: str = "1:1"
) -> List[Dict[str, str]]:
"""Generate multiple cat character images.
This method generates multiple images with the same parameters,
allowing users to choose their favorite one.
Args:
color: Color preference
personality: Personality trait
appearance: Appearance feature
role: Character role
count: Number of images to generate (1-4)
aspect_ratio: Image aspect ratio
Returns:
List of dictionaries, each containing url, prompt, and task_id
Raises:
ImageGenerationError: If any API call fails
"""
if count < 1 or count > 4:
raise ValueError("Count must be between 1 and 4")
try:
# MiniMax 支持一次生成多张图像
result = await self.generate_image(
color=color,
personality=personality,
appearance=appearance,
role=role,
aspect_ratio=aspect_ratio,
n=count
)
# 将结果转换为列表格式
urls = result['url'] if isinstance(result['url'], list) else [result['url']]
images = []
for i, url in enumerate(urls):
images.append({
"url": url,
"prompt": result['prompt'],
"task_id": result['task_id'],
"index": i
})
return images
except ImageGenerationError as e:
logger.error(f"Failed to generate images: {e.message}")
raise