Spaces:

kernel14
/

Nora

Sleeping

Nora / app /image_service.py

GitHub Action

Deploy clean version of Nora

59bd45e 3 months ago

17.5 kB

	"""Image Generation service for Voice Text Processor.

	This module implements the ImageGenerationService class for generating
	cat character images using the MiniMax Text-to-Image API.

	Requirements: PRD - AI形象生成模块
	"""

	import logging
	import httpx
	from typing import Optional, Dict, List
	import time
	import json
	from pathlib import Path

	logger = logging.getLogger(__name__)


	class ImageGenerationError(Exception):
	"""Exception raised when image generation operations fail.

	This exception is raised when the MiniMax API call fails,
	such as due to network issues, API errors, or invalid responses.
	"""

	def __init__(self, message: str = "图像生成服务不可用"):
	"""Initialize ImageGenerationError.

	Args:
	message: Error message describing the failure
	"""
	super().__init__(message)
	self.message = message


	class ImageGenerationService:
	"""Service for generating cat character images using MiniMax API.

	This service handles image generation by calling the MiniMax Text-to-Image API
	to create healing-style cat illustrations based on user preferences
	(color, personality, appearance).

	Attributes:
	api_key: MiniMax API key for authentication
	group_id: MiniMax group ID for authentication
	client: Async HTTP client for making API requests
	api_url: MiniMax API endpoint URL
	model: Model identifier (text-to-image-v2)

	Requirements: PRD - AI形象生成模块
	"""

	# 颜色映射
	COLOR_MAPPING = {
	"温暖粉": "soft pastel pink fur, rose-colored aesthetic",
	"天空蓝": "light sky blue fur, serene blue atmosphere",
	"薄荷绿": "mint green fur, fresh green ambiance",
	"奶油黄": "cream yellow fur, warm golden glow",
	"薰衣草紫": "lavender purple fur, gentle purple tones",
	"珊瑚橙": "coral orange fur, warm peachy atmosphere",
	"纯白": "pure white fur, clean minimalist aesthetic",
	"浅灰": "light gray fur, soft neutral tones"
	}

	# 性格映射
	PERSONALITY_MAPPING = {
	"活泼": "big curious eyes, dynamic paw gesture, energetic aura, playful expression",
	"温柔": "soft gentle eyes, calm posture, peaceful expression, caring demeanor",
	"聪明": "intelligent eyes, thoughtful expression, wise appearance, attentive look",
	"慵懒": "relaxed eyes, lounging posture, comfortable expression, laid-back vibe",
	"勇敢": "confident eyes, strong posture, determined expression, courageous stance",
	"害羞": "shy eyes, timid posture, gentle expression, reserved demeanor"
	}

	# 形象特征映射
	APPEARANCE_MAPPING = {
	"戴眼镜": "wearing tiny round glasses, scholarly look",
	"戴帽子": "wearing a cute small hat, fashionable style",
	"戴围巾": "wearing a cozy scarf, warm appearance",
	"戴蝴蝶结": "wearing a cute bow tie, elegant look",
	"无配饰": "natural appearance, simple and pure"
	}

	# 角色类型映射
	ROLE_MAPPING = {
	"陪伴式朋友": "friendly companion, approachable and warm",
	"温柔照顾型长辈": "caring elder figure, nurturing and protective",
	"引导型老师": "wise teacher figure, knowledgeable and patient"
	}

	# 系统底座提示词
	BASE_PROMPT = (
	"A masterpiece cute stylized cat illustration, {color} theme, "
	"{personality} facial expression and posture, {appearance}. "
	"{role}. Japanese watercolor style, clean minimalist background, "
	"high quality, soft studio lighting, 4k, healing aesthetic, "
	"adorable and heartwarming"
	)

	def __init__(self, api_key: str, group_id: Optional[str] = None):
	"""Initialize the image generation service.

	Args:
	api_key: MiniMax API key for authentication
	group_id: MiniMax group ID (optional, for compatibility)
	"""
	self.api_key = api_key
	self.group_id = group_id # 保留但不使用
	self.client = httpx.AsyncClient(timeout=120.0) # 图像生成需要更长时间
	self.api_url = "https://api.minimaxi.com/v1/image_generation"
	self.model = "image-01"

	async def close(self):
	"""Close the HTTP client.

	This should be called when the service is no longer needed
	to properly clean up resources.
	"""
	await self.client.aclose()

	async def download_image(self, url: str, save_path: str) -> str:
	"""Download image from URL and save to local file.

	Args:
	url: Image URL to download
	save_path: Local file path to save the image

	Returns:
	Absolute path to the saved image file

	Raises:
	ImageGenerationError: If download fails
	"""
	try:
	logger.info(f"Downloading image from: {url}")

	# 创建保存目录（如果不存在）
	save_path_obj = Path(save_path)
	save_path_obj.parent.mkdir(parents=True, exist_ok=True)

	# 下载图像
	response = await self.client.get(url, timeout=60.0)

	if response.status_code != 200:
	error_msg = f"Failed to download image: HTTP {response.status_code}"
	logger.error(error_msg)
	raise ImageGenerationError(error_msg)

	# 保存到文件
	with open(save_path, 'wb') as f:
	f.write(response.content)

	abs_path = str(save_path_obj.absolute())
	logger.info(f"Image saved to: {abs_path}")

	return abs_path

	except ImageGenerationError:
	raise
	except Exception as e:
	error_msg = f"Failed to download image: {str(e)}"
	logger.error(error_msg)
	raise ImageGenerationError(error_msg)

	def build_prompt(
	self,
	color: str = "温暖粉",
	personality: str = "温柔",
	appearance: str = "无配饰",
	role: str = "陪伴式朋友"
	) -> str:
	"""Build the complete prompt for image generation.

	Args:
	color: Color preference (温暖粉/天空蓝/薄荷绿等)
	personality: Personality trait (活泼/温柔/聪明等)
	appearance: Appearance feature (戴眼镜/戴帽子等)
	role: Character role (陪伴式朋友/温柔照顾型长辈等)

	Returns:
	Complete prompt string for CogView API
	"""
	# 获取映射值，如果没有则使用默认值
	color_desc = self.COLOR_MAPPING.get(color, self.COLOR_MAPPING["温暖粉"])
	personality_desc = self.PERSONALITY_MAPPING.get(
	personality,
	self.PERSONALITY_MAPPING["温柔"]
	)
	appearance_desc = self.APPEARANCE_MAPPING.get(
	appearance,
	self.APPEARANCE_MAPPING["无配饰"]
	)
	role_desc = self.ROLE_MAPPING.get(
	role,
	self.ROLE_MAPPING["陪伴式朋友"]
	)

	# 构建完整提示词
	prompt = self.BASE_PROMPT.format(
	color=color_desc,
	personality=personality_desc,
	appearance=appearance_desc,
	role=role_desc
	)

	logger.info(f"Generated prompt: {prompt[:100]}...")
	return prompt

	async def generate_image(
	self,
	color: str = "温暖粉",
	personality: str = "温柔",
	appearance: str = "无配饰",
	role: str = "陪伴式朋友",
	aspect_ratio: str = "1:1",
	n: int = 1,
	response_format: str = "url"
	) -> Dict[str, str]:
	"""Generate a cat character image using MiniMax API.

	This method sends a request to the MiniMax API with the constructed
	prompt and returns the generated image URL or base64 data.

	Args:
	color: Color preference
	personality: Personality trait
	appearance: Appearance feature
	role: Character role
	aspect_ratio: Image aspect ratio (1:1, 16:9, 9:16, 4:3, 3:4)
	n: Number of images to generate (1-4)
	response_format: Response format ("url" or "base64")

	Returns:
	Dictionary containing:
	- url: Image URL (if response_format="url")
	- data: Base64 image data (if response_format="base64")
	- prompt: Used prompt
	- task_id: Task ID from MiniMax

	Raises:
	ImageGenerationError: If API call fails or returns invalid response
	"""
	try:
	# 构建提示词
	prompt = self.build_prompt(color, personality, appearance, role)

	# 准备请求
	headers = {
	"Authorization": f"Bearer {self.api_key.strip()}",
	"Content-Type": "application/json"
	}

	payload = {
	"model": self.model,
	"prompt": prompt,
	"aspect_ratio": aspect_ratio,
	"response_format": "url",
	"n": n,
	"prompt_optimizer": True
	}

	logger.info(
	f"Calling MiniMax API for image generation. "
	f"Aspect ratio: {aspect_ratio}, Count: {n}"
	)
	logger.debug(f"API URL: {self.api_url}")
	logger.debug(f"API Key (first 20 chars): {self.api_key[:20]}...")
	logger.debug(f"Payload: {json.dumps(payload, ensure_ascii=False)}")

	# 发送请求
	response = await self.client.post(
	self.api_url,
	headers=headers,
	json=payload
	)

	# 检查响应状态
	if response.status_code != 200:
	error_msg = f"MiniMax API returned status {response.status_code}"
	try:
	error_detail = response.json()
	error_msg += f": {json.dumps(error_detail, ensure_ascii=False)}"
	except Exception:
	error_msg += f": {response.text}"

	logger.error(f"Image generation API call failed: {error_msg}")
	logger.error(f"Request URL: {self.api_url}")
	logger.error(f"Request headers: Authorization=Bearer {self.api_key[:20]}..., Content-Type=application/json")
	logger.error(f"Request payload: {json.dumps(payload, ensure_ascii=False)}")
	raise ImageGenerationError(f"图像生成服务不可用: {error_msg}")

	# 解析响应
	try:
	result = response.json()
	logger.info(f"API Response (full): {json.dumps(result, indent=2, ensure_ascii=False)}")
	except Exception as e:
	error_msg = f"Failed to parse MiniMax API response: {str(e)}"
	logger.error(error_msg)
	logger.error(f"Raw response text: {response.text}")
	raise ImageGenerationError(f"图像生成服务不可用: 响应格式无效")

	# 提取图像 URL
	try:
	# MiniMax 实际返回格式：
	# {
	# "id": "task_id",
	# "data": {"image_urls": [...]},
	# "metadata": {...},
	# "base_resp": {"status_code": 0, "status_msg": "success"}
	# }

	# 先检查是否有 base_resp
	if "base_resp" in result:
	base_resp = result.get("base_resp", {})
	status_code = base_resp.get("status_code", -1)
	error_msg = base_resp.get("status_msg", "Unknown error")

	# status_code = 0 表示成功
	if status_code != 0:
	logger.error(f"MiniMax API error: {status_code} - {error_msg}")
	raise ImageGenerationError(f"图像生成失败: {error_msg}")

	logger.info(f"MiniMax API success: {status_code} - {error_msg}")

	# 提取 task_id（可能在 id 或 task_id 字段）
	task_id = result.get("id") or result.get("task_id", "")

	# 提取图像数据
	if "data" in result:
	data = result["data"]
	logger.info(f"Data field keys: {list(data.keys()) if isinstance(data, dict) else 'not a dict'}")

	if isinstance(data, dict):
	# 尝试多个可能的字段名
	urls = None
	if "image_urls" in data:
	urls = data["image_urls"]
	logger.info("Found image_urls field")
	elif "url" in data:
	urls = data["url"]
	logger.info("Found url field")

	if urls:
	# 如果只生成一张，返回单个 URL
	image_url = urls[0] if n == 1 else urls
	logger.info(f"Image generation successful. URLs: {urls}")

	return {
	"url": image_url,
	"prompt": prompt,
	"task_id": task_id,
	"metadata": result.get("metadata", {})
	}

	# 如果到这里还没有返回，说明响应格式不符合预期
	logger.error(f"Could not extract image URLs from response: {json.dumps(result, ensure_ascii=False)}")
	raise ImageGenerationError("API 响应格式错误: 无法提取图像 URL")

	except (KeyError, IndexError) as e:
	error_msg = f"Invalid API response structure: {str(e)}, Response: {json.dumps(result, ensure_ascii=False)}"
	logger.error(error_msg)
	raise ImageGenerationError(f"图像生成服务不可用: 响应结构无效")

	except ImageGenerationError:
	# Re-raise ImageGenerationError as-is
	raise

	except httpx.TimeoutException as e:
	error_msg = f"MiniMax API request timeout: {str(e)}"
	logger.error(error_msg)
	raise ImageGenerationError("图像生成服务不可用: 请求超时")

	except httpx.RequestError as e:
	error_msg = f"MiniMax API request failed: {str(e)}"
	logger.error(error_msg)
	raise ImageGenerationError(f"图像生成服务不可用: 网络错误")

	except Exception as e:
	error_msg = f"Unexpected error in image generation service: {str(e)}"
	logger.error(error_msg, exc_info=True)
	raise ImageGenerationError(f"图像生成服务不可用: {str(e)}")

	async def generate_multiple_images(
	self,
	color: str = "温暖粉",
	personality: str = "温柔",
	appearance: str = "无配饰",
	role: str = "陪伴式朋友",
	count: int = 3,
	aspect_ratio: str = "1:1"
	) -> List[Dict[str, str]]:
	"""Generate multiple cat character images.

	This method generates multiple images with the same parameters,
	allowing users to choose their favorite one.

	Args:
	color: Color preference
	personality: Personality trait
	appearance: Appearance feature
	role: Character role
	count: Number of images to generate (1-4)
	aspect_ratio: Image aspect ratio

	Returns:
	List of dictionaries, each containing url, prompt, and task_id

	Raises:
	ImageGenerationError: If any API call fails
	"""
	if count < 1 or count > 4:
	raise ValueError("Count must be between 1 and 4")

	try:
	# MiniMax 支持一次生成多张图像
	result = await self.generate_image(
	color=color,
	personality=personality,
	appearance=appearance,
	role=role,
	aspect_ratio=aspect_ratio,
	n=count
	)

	# 将结果转换为列表格式
	urls = result['url'] if isinstance(result['url'], list) else [result['url']]

	images = []
	for i, url in enumerate(urls):
	images.append({
	"url": url,
	"prompt": result['prompt'],
	"task_id": result['task_id'],
	"index": i
	})

	return images

	except ImageGenerationError as e:
	logger.error(f"Failed to generate images: {e.message}")
	raise