oscarzhang's picture
Upload folder using huggingface_hub
76d412c verified
"""
异常检测结果格式化器
将检测结果格式化为LLM需要的文本格式
完全基于配置文件,方便扩展和定制
"""
import json
from pathlib import Path
from typing import Dict, List, Optional, Any
class AnomalyFormatter:
"""
异常检测结果格式化器
所有格式都从配置文件读取,支持完全自定义
"""
def __init__(self, config_path: Optional[Path] = None):
"""
初始化格式化器
参数:
config_path: 配置文件路径,如果为None则使用默认配置
"""
if config_path is None:
config_path = Path(__file__).parent.parent / "configs" / "formatter_config.json"
self.config_path = Path(config_path)
self.config = self._load_config()
def _load_config(self) -> Dict:
"""加载配置文件"""
if self.config_path.exists():
try:
with open(self.config_path, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception as e:
print(f"⚠️ 加载格式化配置失败: {e},使用默认配置")
# 返回默认配置
return self._get_default_config()
def _get_default_config(self) -> Dict:
"""获取默认配置(向后兼容)"""
return {
"sections": {
"anomaly_overview": {"enabled": True, "title": "异常概览"},
"core_indicators": {"enabled": True, "title": "核心指标"},
"historical_trend": {"enabled": True, "title": "历史趋势"},
"related_indicators": {"enabled": True, "title": "相关健康指标"},
"user_profile": {"enabled": True, "title": "用户背景信息"}
},
"formatting": {
"section_prefix": "## ",
"section_suffix": "\n",
"field_prefix": "- ",
"field_suffix": "\n",
"line_separator": "\n",
"header": "# 健康异常检测结果\n"
}
}
def _format_value(self, value: Any, field_config: Dict) -> str:
"""格式化单个字段值(完全基于配置)"""
if value is None:
default = field_config.get("default", "")
return default if default else ""
format_type = field_config.get("format", "string")
decimal_places = field_config.get("decimal_places", 2)
prefix = field_config.get("prefix", "")
suffix = field_config.get("suffix", "")
default = field_config.get("default", "")
mapping = field_config.get("mapping", {})
# 处理映射(如trend: "worsening" -> "持续恶化")
if mapping and str(value) in mapping:
value = mapping[str(value)]
# 格式化
try:
if format_type == "float":
formatted = f"{float(value):.{decimal_places}f}"
elif format_type == "integer":
formatted = f"{int(value)}"
elif format_type == "boolean":
true_text = field_config.get("true_text", "是")
false_text = field_config.get("false_text", "否")
formatted = true_text if value else false_text
else:
formatted = str(value) if value else default
except (ValueError, TypeError):
formatted = default if default else ""
return f"{prefix}{formatted}{suffix}"
def _format_section(
self,
section_key: str,
data: Dict,
section_config: Dict
) -> List[str]:
"""格式化一个章节(完全基于配置)"""
lines = []
if not section_config.get("enabled", True):
return lines
formatting = self.config.get("formatting", {})
section_prefix = formatting.get("section_prefix", "## ")
section_suffix = formatting.get("section_suffix", "\n")
field_prefix = formatting.get("field_prefix", "- ")
field_suffix = formatting.get("field_suffix", "\n")
# 添加章节标题
title = section_config.get("title", section_key)
lines.append(f"{section_prefix}{title}{section_suffix}")
# 格式化字段
fields_config = section_config.get("fields", {})
for field_key, field_config in fields_config.items():
if not field_config.get("enabled", True):
continue
field_label = field_config.get("label", field_key)
format_type = field_config.get("format", "string")
if format_type == "nested":
# 嵌套字段(如activity_level.level)
nested_data = data.get(field_key, {})
if nested_data:
sub_fields = field_config.get("sub_fields", {})
sub_values = []
for sub_key, sub_config in sub_fields.items():
if not sub_config.get("enabled", True):
continue
sub_value = nested_data.get(sub_key)
if sub_value is not None:
formatted_sub = self._format_value(sub_value, sub_config)
sub_values.append(formatted_sub)
if sub_values:
line = f"{field_prefix}{field_label}{''.join(sub_values)}{field_suffix}"
lines.append(line)
elif format_type == "string_or_nested":
# 尝试直接值,如果不存在则尝试fallback字段
value = data.get(field_key)
fallback_key = field_config.get("fallback")
if value is None and fallback_key:
value = data.get(fallback_key)
if value is not None:
formatted = self._format_value(value, field_config)
line = f"{field_prefix}{field_label}{formatted}{field_suffix}"
lines.append(line)
else:
# 普通字段
value = data.get(field_key)
if value is not None:
formatted = self._format_value(value, field_config)
line = f"{field_prefix}{field_label}{formatted}{field_suffix}"
lines.append(line)
# 添加章节分隔
lines.append(formatting.get("line_separator", "\n"))
return lines
def _format_historical_trend(
self,
daily_results: List[Dict],
section_config: Dict
) -> List[str]:
"""格式化历史趋势(特殊处理,因为是多条记录)"""
lines = []
if not section_config.get("enabled", True):
return lines
formatting = self.config.get("formatting", {})
section_prefix = formatting.get("section_prefix", "## ")
section_suffix = formatting.get("section_suffix", "\n")
field_prefix = formatting.get("field_prefix", "- ")
field_suffix = formatting.get("field_suffix", "\n")
# 添加章节标题
title = section_config.get("title", "历史趋势")
lines.append(f"{section_prefix}{title}{section_suffix}")
# 格式化每条记录
fields_config = section_config.get("fields", {})
for result in daily_results:
parts = []
for field_key, field_config in fields_config.items():
if not field_config.get("enabled", True):
continue
value = result.get(field_key)
if value is not None:
formatted = self._format_value(value, field_config)
parts.append(formatted)
if parts:
date = result.get("date", "")
line = f"{field_prefix}{date}{''.join(parts)}{field_suffix}"
lines.append(line)
lines.append(formatting.get("line_separator", "\n"))
return lines
def format_for_llm(
self,
anomaly_result: Dict,
baseline_info: Optional[Dict] = None,
related_indicators: Optional[Dict] = None,
user_profile: Optional[Dict] = None,
daily_results: Optional[List[Dict]] = None
) -> str:
"""
格式化异常检测结果为文本(给LLM)
只提供数据,不做判断
所有格式都从配置文件读取,方便扩展
"""
lines = []
formatting = self.config.get("formatting", {})
sections = self.config.get("sections", {})
# 添加标题
header = formatting.get("header", "# 健康异常检测结果\n")
lines.append(header)
# 异常概览章节
if "anomaly_overview" in sections:
section_config = sections["anomaly_overview"]
if section_config.get("enabled", True):
if "anomaly_pattern" in anomaly_result:
# 异常模式格式
pattern_data = anomaly_result["anomaly_pattern"]
lines.extend(self._format_section("anomaly_overview", pattern_data, section_config))
elif "is_anomaly" in anomaly_result:
# 实时检测格式
lines.extend(self._format_section("anomaly_overview", anomaly_result, section_config))
# 核心指标章节
if baseline_info and "core_indicators" in sections:
section_config = sections["core_indicators"]
# 重命名字段以匹配配置
core_data = {
"hrv_rmssd": baseline_info.get("current_value"),
"baseline_mean": baseline_info.get("baseline_mean"),
"deviation_pct": baseline_info.get("deviation_pct")
}
lines.extend(self._format_section("core_indicators", core_data, section_config))
# 历史趋势章节
if daily_results and "historical_trend" in sections:
section_config = sections["historical_trend"]
lines.extend(self._format_historical_trend(daily_results, section_config))
# 相关健康指标章节
if related_indicators and "related_indicators" in sections:
section_config = sections["related_indicators"]
lines.extend(self._format_section("related_indicators", related_indicators, section_config))
# 用户背景信息章节
if user_profile and "user_profile" in sections:
section_config = sections["user_profile"]
lines.extend(self._format_section("user_profile", user_profile, section_config))
return "".join(lines)
@staticmethod
def format_realtime_result(result: Dict, config_path: Optional[Path] = None) -> str:
"""格式化实时检测结果(静态方法,向后兼容)"""
formatter = AnomalyFormatter(config_path)
return formatter.format_for_llm(result)