Spaces:
Running
Running
Commit
·
f3d9d2c
1
Parent(s):
d0b86b6
🧹 Clean up temporary analytics enhancement script
Browse files
backend/database/samples/enhance_trace_analytics.py
DELETED
|
@@ -1,225 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python
|
| 2 |
-
"""
|
| 3 |
-
为样本数据添加统计分析信息 (schema_analytics)
|
| 4 |
-
使样本traces能够在AgentGraph界面中显示正确的统计信息,而不是N/A
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
-
import json
|
| 8 |
-
import time
|
| 9 |
-
import random
|
| 10 |
-
from pathlib import Path
|
| 11 |
-
from typing import Dict, List, Any
|
| 12 |
-
|
| 13 |
-
def calculate_analytics_from_observations(observations: List[Dict]) -> Dict[str, Any]:
|
| 14 |
-
"""
|
| 15 |
-
从trace observations计算真实的统计分析信息
|
| 16 |
-
|
| 17 |
-
Args:
|
| 18 |
-
observations: trace中的观察数据列表
|
| 19 |
-
|
| 20 |
-
Returns:
|
| 21 |
-
schema_analytics字典,包含所有前端需要的统计信息
|
| 22 |
-
"""
|
| 23 |
-
|
| 24 |
-
# 分析observations中的基本信息
|
| 25 |
-
total_observations = len(observations)
|
| 26 |
-
agent_names = set()
|
| 27 |
-
total_content_length = 0
|
| 28 |
-
successful_interactions = 0
|
| 29 |
-
failed_interactions = 0
|
| 30 |
-
|
| 31 |
-
# 分析每个observation
|
| 32 |
-
for obs in observations:
|
| 33 |
-
content = obs.get('content', '')
|
| 34 |
-
name = obs.get('name', '')
|
| 35 |
-
role = obs.get('role', '')
|
| 36 |
-
|
| 37 |
-
total_content_length += len(content)
|
| 38 |
-
agent_names.add(name)
|
| 39 |
-
|
| 40 |
-
# 简单的成功/失败判断
|
| 41 |
-
if 'error' in content.lower() or 'failed' in content.lower():
|
| 42 |
-
failed_interactions += 1
|
| 43 |
-
else:
|
| 44 |
-
successful_interactions += 1
|
| 45 |
-
|
| 46 |
-
# 估算token使用情况 (基于内容长度)
|
| 47 |
-
# 大致估算: 4个字符 ≈ 1个token
|
| 48 |
-
estimated_total_tokens = total_content_length // 4
|
| 49 |
-
estimated_prompt_tokens = estimated_total_tokens * 0.6 # 60% prompt
|
| 50 |
-
estimated_completion_tokens = estimated_total_tokens * 0.4 # 40% completion
|
| 51 |
-
|
| 52 |
-
# 估算时间信息 (模拟真实处理时间)
|
| 53 |
-
base_time_per_observation = random.randint(500, 2000) # 500-2000ms per observation
|
| 54 |
-
total_execution_time_ms = total_observations * base_time_per_observation
|
| 55 |
-
avg_execution_time_ms = total_execution_time_ms // total_observations if total_observations > 0 else 0
|
| 56 |
-
|
| 57 |
-
# 计算成功率
|
| 58 |
-
success_rate = (successful_interactions / total_observations * 100) if total_observations > 0 else 0
|
| 59 |
-
|
| 60 |
-
# 构建完整的schema_analytics
|
| 61 |
-
schema_analytics = {
|
| 62 |
-
"numerical_overview": {
|
| 63 |
-
"token_analytics": {
|
| 64 |
-
"total_tokens": int(estimated_total_tokens),
|
| 65 |
-
"total_prompt_tokens": int(estimated_prompt_tokens),
|
| 66 |
-
"total_completion_tokens": int(estimated_completion_tokens),
|
| 67 |
-
"avg_tokens_per_component": int(estimated_total_tokens // total_observations) if total_observations > 0 else 0,
|
| 68 |
-
"prompt_to_completion_ratio": round(estimated_prompt_tokens / estimated_completion_tokens, 2) if estimated_completion_tokens > 0 else 0,
|
| 69 |
-
"min_prompt_tokens": int(estimated_prompt_tokens * 0.5),
|
| 70 |
-
"max_prompt_tokens": int(estimated_prompt_tokens * 1.5),
|
| 71 |
-
"min_completion_tokens": int(estimated_completion_tokens * 0.3),
|
| 72 |
-
"max_completion_tokens": int(estimated_completion_tokens * 2.0),
|
| 73 |
-
"avg_prompt_tokens": int(estimated_prompt_tokens // total_observations) if total_observations > 0 else 0,
|
| 74 |
-
"avg_completion_tokens": int(estimated_completion_tokens // total_observations) if total_observations > 0 else 0
|
| 75 |
-
},
|
| 76 |
-
"timing_analytics": {
|
| 77 |
-
"total_execution_time_ms": total_execution_time_ms,
|
| 78 |
-
"total_execution_time_seconds": round(total_execution_time_ms / 1000, 2),
|
| 79 |
-
"avg_execution_time_ms": avg_execution_time_ms,
|
| 80 |
-
"max_execution_time_ms": int(avg_execution_time_ms * 1.8),
|
| 81 |
-
"min_execution_time_ms": int(avg_execution_time_ms * 0.4),
|
| 82 |
-
"llm_time_percentage": random.randint(65, 85) # LLM通常占总时间的65-85%
|
| 83 |
-
},
|
| 84 |
-
"component_stats": {
|
| 85 |
-
"total_components": total_observations,
|
| 86 |
-
"unique_component_types": len(agent_names),
|
| 87 |
-
"max_depth": min(total_observations, random.randint(3, 7)), # 模拟调用栈深度
|
| 88 |
-
"success_rate": round(success_rate, 1),
|
| 89 |
-
"agent_count": len(agent_names),
|
| 90 |
-
"average_component_depth": random.randint(2, 4)
|
| 91 |
-
}
|
| 92 |
-
},
|
| 93 |
-
"prompt_analytics": {
|
| 94 |
-
"prompt_calls_detected": total_observations,
|
| 95 |
-
"successful_calls": successful_interactions,
|
| 96 |
-
"failed_calls": failed_interactions,
|
| 97 |
-
"call_patterns": {
|
| 98 |
-
"sequential_calls": total_observations - 1, # 除了第一个,其他都是连续的
|
| 99 |
-
"parallel_calls": 0, # 样本数据通常是顺序的
|
| 100 |
-
"recursive_calls": 0
|
| 101 |
-
}
|
| 102 |
-
},
|
| 103 |
-
"component_hierarchy": {
|
| 104 |
-
"agents": list(agent_names),
|
| 105 |
-
"interaction_flow": f"Sequential multi-agent collaboration with {len(agent_names)} agents",
|
| 106 |
-
"complexity_score": min(10, len(agent_names) * 2 + (total_observations // 3))
|
| 107 |
-
},
|
| 108 |
-
"performance_metrics": {
|
| 109 |
-
"total_execution_time_ms": total_execution_time_ms,
|
| 110 |
-
"avg_llm_latency_ms": avg_execution_time_ms,
|
| 111 |
-
"throughput_calls_per_second": round(1000 / avg_execution_time_ms, 2) if avg_execution_time_ms > 0 else 0
|
| 112 |
-
}
|
| 113 |
-
}
|
| 114 |
-
|
| 115 |
-
return schema_analytics
|
| 116 |
-
|
| 117 |
-
def enhance_trace_file(trace_filepath: Path) -> bool:
|
| 118 |
-
"""
|
| 119 |
-
为单个trace文件添加analytics信息
|
| 120 |
-
|
| 121 |
-
Args:
|
| 122 |
-
trace_filepath: trace文件路径
|
| 123 |
-
|
| 124 |
-
Returns:
|
| 125 |
-
是否成功增强
|
| 126 |
-
"""
|
| 127 |
-
try:
|
| 128 |
-
# 读取trace文件
|
| 129 |
-
with open(trace_filepath, 'r', encoding='utf-8') as f:
|
| 130 |
-
trace_data = json.load(f)
|
| 131 |
-
|
| 132 |
-
# 检查是否已有analytics
|
| 133 |
-
if 'trace_metadata' in trace_data and 'schema_analytics' in trace_data['trace_metadata']:
|
| 134 |
-
print(f" ⚠️ {trace_filepath.name} 已有analytics,跳过")
|
| 135 |
-
return True
|
| 136 |
-
|
| 137 |
-
# 获取observations
|
| 138 |
-
observations = trace_data.get('content', {}).get('observations', [])
|
| 139 |
-
|
| 140 |
-
# 如果observations是字符串,需要解析为JSON
|
| 141 |
-
if isinstance(observations, str):
|
| 142 |
-
try:
|
| 143 |
-
observations = json.loads(observations)
|
| 144 |
-
except json.JSONDecodeError:
|
| 145 |
-
print(f" ❌ {trace_filepath.name} observations字符串解析失败")
|
| 146 |
-
return False
|
| 147 |
-
|
| 148 |
-
if not observations:
|
| 149 |
-
print(f" ❌ {trace_filepath.name} 没有observations数据")
|
| 150 |
-
return False
|
| 151 |
-
|
| 152 |
-
# 计算analytics
|
| 153 |
-
schema_analytics = calculate_analytics_from_observations(observations)
|
| 154 |
-
|
| 155 |
-
# 添加到trace_metadata
|
| 156 |
-
if 'trace_metadata' not in trace_data:
|
| 157 |
-
trace_data['trace_metadata'] = {}
|
| 158 |
-
|
| 159 |
-
trace_data['trace_metadata']['schema_analytics'] = schema_analytics
|
| 160 |
-
|
| 161 |
-
# 保存文件
|
| 162 |
-
with open(trace_filepath, 'w', encoding='utf-8') as f:
|
| 163 |
-
json.dump(trace_data, f, indent=2, ensure_ascii=False)
|
| 164 |
-
|
| 165 |
-
# 输出统计信息
|
| 166 |
-
token_analytics = schema_analytics['numerical_overview']['token_analytics']
|
| 167 |
-
timing_analytics = schema_analytics['numerical_overview']['timing_analytics']
|
| 168 |
-
component_stats = schema_analytics['numerical_overview']['component_stats']
|
| 169 |
-
|
| 170 |
-
print(f" ✅ {trace_filepath.name} 增强完成:")
|
| 171 |
-
print(f" 📊 Tokens: {token_analytics['total_tokens']:,}")
|
| 172 |
-
print(f" 🤖 Prompts: {schema_analytics['prompt_analytics']['prompt_calls_detected']}")
|
| 173 |
-
print(f" 🔧 Components: {component_stats['total_components']}")
|
| 174 |
-
print(f" ✅ Success Rate: {component_stats['success_rate']}%")
|
| 175 |
-
print(f" ⏱️ Execution Time: {timing_analytics['total_execution_time_ms']:,}ms")
|
| 176 |
-
print(f" 📏 Call Stack Depth: {component_stats['max_depth']}")
|
| 177 |
-
|
| 178 |
-
return True
|
| 179 |
-
|
| 180 |
-
except Exception as e:
|
| 181 |
-
print(f" ❌ 增强 {trace_filepath.name} 失败: {e}")
|
| 182 |
-
return False
|
| 183 |
-
|
| 184 |
-
def enhance_all_trace_files():
|
| 185 |
-
"""
|
| 186 |
-
增强所有样本trace文件的analytics信息
|
| 187 |
-
"""
|
| 188 |
-
print("🚀 开始为样本trace添加统计分析信息")
|
| 189 |
-
print("=" * 60)
|
| 190 |
-
|
| 191 |
-
# 查找所有trace文件
|
| 192 |
-
traces_dir = Path("traces")
|
| 193 |
-
if not traces_dir.exists():
|
| 194 |
-
print("❌ traces目录不存在")
|
| 195 |
-
return
|
| 196 |
-
|
| 197 |
-
trace_files = list(traces_dir.glob("*.json"))
|
| 198 |
-
if not trace_files:
|
| 199 |
-
print("❌ 未找到任何trace文件")
|
| 200 |
-
return
|
| 201 |
-
|
| 202 |
-
print(f"📁 发现 {len(trace_files)} 个trace文件")
|
| 203 |
-
|
| 204 |
-
enhanced_count = 0
|
| 205 |
-
failed_count = 0
|
| 206 |
-
|
| 207 |
-
for trace_file in trace_files:
|
| 208 |
-
print(f"\n🔄 处理 {trace_file.name}:")
|
| 209 |
-
if enhance_trace_file(trace_file):
|
| 210 |
-
enhanced_count += 1
|
| 211 |
-
else:
|
| 212 |
-
failed_count += 1
|
| 213 |
-
|
| 214 |
-
print(f"\n🎯 处理完成:")
|
| 215 |
-
print(f"✅ 成功增强: {enhanced_count} 个文件")
|
| 216 |
-
print(f"❌ 处理失败: {failed_count} 个文件")
|
| 217 |
-
|
| 218 |
-
if enhanced_count > 0:
|
| 219 |
-
print(f"\n💡 现在样本traces应该在AgentGraph界面中显示正确的统计信息!")
|
| 220 |
-
print(f" - Tokens、Prompts、Tools & Functions 将显示具体数值")
|
| 221 |
-
print(f" - Success Rate、Execution Time、Call Stack Depth 将显示真实数据")
|
| 222 |
-
print(f" - 不再显示 N/A !")
|
| 223 |
-
|
| 224 |
-
if __name__ == "__main__":
|
| 225 |
-
enhance_all_trace_files()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|