Spaces:

oscarzhang
/

Wearable_TimeSeries_Health_Monitor

Runtime error

App Files Files Community

Wearable_TimeSeries_Health_Monitor / utils /formatter.py

oscarzhang

Upload folder using huggingface_hub

76d412c verified 5 months ago

raw

history blame contribute delete

11.3 kB

	"""
	异常检测结果格式化器
	将检测结果格式化为LLM需要的文本格式
	完全基于配置文件，方便扩展和定制
	"""

	import json
	from pathlib import Path
	from typing import Dict, List, Optional, Any


	class AnomalyFormatter:
	"""
	异常检测结果格式化器
	所有格式都从配置文件读取，支持完全自定义
	"""

	def __init__(self, config_path: Optional[Path] = None):
	"""
	初始化格式化器

	参数:
	config_path: 配置文件路径，如果为None则使用默认配置
	"""
	if config_path is None:
	config_path = Path(__file__).parent.parent / "configs" / "formatter_config.json"

	self.config_path = Path(config_path)
	self.config = self._load_config()

	def _load_config(self) -> Dict:
	"""加载配置文件"""
	if self.config_path.exists():
	try:
	with open(self.config_path, 'r', encoding='utf-8') as f:
	return json.load(f)
	except Exception as e:
	print(f"⚠️ 加载格式化配置失败: {e}，使用默认配置")

	# 返回默认配置
	return self._get_default_config()

	def _get_default_config(self) -> Dict:
	"""获取默认配置（向后兼容）"""
	return {
	"sections": {
	"anomaly_overview": {"enabled": True, "title": "异常概览"},
	"core_indicators": {"enabled": True, "title": "核心指标"},
	"historical_trend": {"enabled": True, "title": "历史趋势"},
	"related_indicators": {"enabled": True, "title": "相关健康指标"},
	"user_profile": {"enabled": True, "title": "用户背景信息"}
	},
	"formatting": {
	"section_prefix": "## ",
	"section_suffix": "\n",
	"field_prefix": "- ",
	"field_suffix": "\n",
	"line_separator": "\n",
	"header": "# 健康异常检测结果\n"
	}
	}

	def _format_value(self, value: Any, field_config: Dict) -> str:
	"""格式化单个字段值（完全基于配置）"""
	if value is None:
	default = field_config.get("default", "")
	return default if default else ""

	format_type = field_config.get("format", "string")
	decimal_places = field_config.get("decimal_places", 2)
	prefix = field_config.get("prefix", "")
	suffix = field_config.get("suffix", "")
	default = field_config.get("default", "")
	mapping = field_config.get("mapping", {})

	# 处理映射（如trend: "worsening" -> "持续恶化"）
	if mapping and str(value) in mapping:
	value = mapping[str(value)]

	# 格式化
	try:
	if format_type == "float":
	formatted = f"{float(value):.{decimal_places}f}"
	elif format_type == "integer":
	formatted = f"{int(value)}"
	elif format_type == "boolean":
	true_text = field_config.get("true_text", "是")
	false_text = field_config.get("false_text", "否")
	formatted = true_text if value else false_text
	else:
	formatted = str(value) if value else default
	except (ValueError, TypeError):
	formatted = default if default else ""

	return f"{prefix}{formatted}{suffix}"

	def _format_section(
	self,
	section_key: str,
	data: Dict,
	section_config: Dict
	) -> List[str]:
	"""格式化一个章节（完全基于配置）"""
	lines = []

	if not section_config.get("enabled", True):
	return lines

	formatting = self.config.get("formatting", {})
	section_prefix = formatting.get("section_prefix", "## ")
	section_suffix = formatting.get("section_suffix", "\n")
	field_prefix = formatting.get("field_prefix", "- ")
	field_suffix = formatting.get("field_suffix", "\n")

	# 添加章节标题
	title = section_config.get("title", section_key)
	lines.append(f"{section_prefix}{title}{section_suffix}")

	# 格式化字段
	fields_config = section_config.get("fields", {})
	for field_key, field_config in fields_config.items():
	if not field_config.get("enabled", True):
	continue

	field_label = field_config.get("label", field_key)
	format_type = field_config.get("format", "string")

	if format_type == "nested":
	# 嵌套字段（如activity_level.level）
	nested_data = data.get(field_key, {})
	if nested_data:
	sub_fields = field_config.get("sub_fields", {})
	sub_values = []
	for sub_key, sub_config in sub_fields.items():
	if not sub_config.get("enabled", True):
	continue
	sub_value = nested_data.get(sub_key)
	if sub_value is not None:
	formatted_sub = self._format_value(sub_value, sub_config)
	sub_values.append(formatted_sub)

	if sub_values:
	line = f"{field_prefix}{field_label}：{''.join(sub_values)}{field_suffix}"
	lines.append(line)
	elif format_type == "string_or_nested":
	# 尝试直接值，如果不存在则尝试fallback字段
	value = data.get(field_key)
	fallback_key = field_config.get("fallback")
	if value is None and fallback_key:
	value = data.get(fallback_key)

	if value is not None:
	formatted = self._format_value(value, field_config)
	line = f"{field_prefix}{field_label}：{formatted}{field_suffix}"
	lines.append(line)
	else:
	# 普通字段
	value = data.get(field_key)
	if value is not None:
	formatted = self._format_value(value, field_config)
	line = f"{field_prefix}{field_label}：{formatted}{field_suffix}"
	lines.append(line)

	# 添加章节分隔
	lines.append(formatting.get("line_separator", "\n"))

	return lines

	def _format_historical_trend(
	self,
	daily_results: List[Dict],
	section_config: Dict
	) -> List[str]:
	"""格式化历史趋势（特殊处理，因为是多条记录）"""
	lines = []

	if not section_config.get("enabled", True):
	return lines

	formatting = self.config.get("formatting", {})
	section_prefix = formatting.get("section_prefix", "## ")
	section_suffix = formatting.get("section_suffix", "\n")
	field_prefix = formatting.get("field_prefix", "- ")
	field_suffix = formatting.get("field_suffix", "\n")

	# 添加章节标题
	title = section_config.get("title", "历史趋势")
	lines.append(f"{section_prefix}{title}{section_suffix}")

	# 格式化每条记录
	fields_config = section_config.get("fields", {})
	for result in daily_results:
	parts = []
	for field_key, field_config in fields_config.items():
	if not field_config.get("enabled", True):
	continue

	value = result.get(field_key)
	if value is not None:
	formatted = self._format_value(value, field_config)
	parts.append(formatted)

	if parts:
	date = result.get("date", "")
	line = f"{field_prefix}{date}：{''.join(parts)}{field_suffix}"
	lines.append(line)

	lines.append(formatting.get("line_separator", "\n"))
	return lines

	def format_for_llm(
	self,
	anomaly_result: Dict,
	baseline_info: Optional[Dict] = None,
	related_indicators: Optional[Dict] = None,
	user_profile: Optional[Dict] = None,
	daily_results: Optional[List[Dict]] = None
	) -> str:
	"""
	格式化异常检测结果为文本（给LLM）

	只提供数据，不做判断
	所有格式都从配置文件读取，方便扩展
	"""
	lines = []
	formatting = self.config.get("formatting", {})
	sections = self.config.get("sections", {})

	# 添加标题
	header = formatting.get("header", "# 健康异常检测结果\n")
	lines.append(header)

	# 异常概览章节
	if "anomaly_overview" in sections:
	section_config = sections["anomaly_overview"]
	if section_config.get("enabled", True):
	if "anomaly_pattern" in anomaly_result:
	# 异常模式格式
	pattern_data = anomaly_result["anomaly_pattern"]
	lines.extend(self._format_section("anomaly_overview", pattern_data, section_config))
	elif "is_anomaly" in anomaly_result:
	# 实时检测格式
	lines.extend(self._format_section("anomaly_overview", anomaly_result, section_config))

	# 核心指标章节
	if baseline_info and "core_indicators" in sections:
	section_config = sections["core_indicators"]
	# 重命名字段以匹配配置
	core_data = {
	"hrv_rmssd": baseline_info.get("current_value"),
	"baseline_mean": baseline_info.get("baseline_mean"),
	"deviation_pct": baseline_info.get("deviation_pct")
	}
	lines.extend(self._format_section("core_indicators", core_data, section_config))

	# 历史趋势章节
	if daily_results and "historical_trend" in sections:
	section_config = sections["historical_trend"]
	lines.extend(self._format_historical_trend(daily_results, section_config))

	# 相关健康指标章节
	if related_indicators and "related_indicators" in sections:
	section_config = sections["related_indicators"]
	lines.extend(self._format_section("related_indicators", related_indicators, section_config))

	# 用户背景信息章节
	if user_profile and "user_profile" in sections:
	section_config = sections["user_profile"]
	lines.extend(self._format_section("user_profile", user_profile, section_config))

	return "".join(lines)

	@staticmethod
	def format_realtime_result(result: Dict, config_path: Optional[Path] = None) -> str:
	"""格式化实时检测结果（静态方法，向后兼容）"""
	formatter = AnomalyFormatter(config_path)
	return formatter.format_for_llm(result)