wu981526092 commited on
Commit
f3d9d2c
·
1 Parent(s): d0b86b6

🧹 Clean up temporary analytics enhancement script

Browse files
backend/database/samples/enhance_trace_analytics.py DELETED
@@ -1,225 +0,0 @@
1
- #!/usr/bin/env python
2
- """
3
- 为样本数据添加统计分析信息 (schema_analytics)
4
- 使样本traces能够在AgentGraph界面中显示正确的统计信息,而不是N/A
5
- """
6
-
7
- import json
8
- import time
9
- import random
10
- from pathlib import Path
11
- from typing import Dict, List, Any
12
-
13
- def calculate_analytics_from_observations(observations: List[Dict]) -> Dict[str, Any]:
14
- """
15
- 从trace observations计算真实的统计分析信息
16
-
17
- Args:
18
- observations: trace中的观察数据列表
19
-
20
- Returns:
21
- schema_analytics字典,包含所有前端需要的统计信息
22
- """
23
-
24
- # 分析observations中的基本信息
25
- total_observations = len(observations)
26
- agent_names = set()
27
- total_content_length = 0
28
- successful_interactions = 0
29
- failed_interactions = 0
30
-
31
- # 分析每个observation
32
- for obs in observations:
33
- content = obs.get('content', '')
34
- name = obs.get('name', '')
35
- role = obs.get('role', '')
36
-
37
- total_content_length += len(content)
38
- agent_names.add(name)
39
-
40
- # 简单的成功/失败判断
41
- if 'error' in content.lower() or 'failed' in content.lower():
42
- failed_interactions += 1
43
- else:
44
- successful_interactions += 1
45
-
46
- # 估算token使用情况 (基于内容长度)
47
- # 大致估算: 4个字符 ≈ 1个token
48
- estimated_total_tokens = total_content_length // 4
49
- estimated_prompt_tokens = estimated_total_tokens * 0.6 # 60% prompt
50
- estimated_completion_tokens = estimated_total_tokens * 0.4 # 40% completion
51
-
52
- # 估算时间信息 (模拟真实处理时间)
53
- base_time_per_observation = random.randint(500, 2000) # 500-2000ms per observation
54
- total_execution_time_ms = total_observations * base_time_per_observation
55
- avg_execution_time_ms = total_execution_time_ms // total_observations if total_observations > 0 else 0
56
-
57
- # 计算成功率
58
- success_rate = (successful_interactions / total_observations * 100) if total_observations > 0 else 0
59
-
60
- # 构建完整的schema_analytics
61
- schema_analytics = {
62
- "numerical_overview": {
63
- "token_analytics": {
64
- "total_tokens": int(estimated_total_tokens),
65
- "total_prompt_tokens": int(estimated_prompt_tokens),
66
- "total_completion_tokens": int(estimated_completion_tokens),
67
- "avg_tokens_per_component": int(estimated_total_tokens // total_observations) if total_observations > 0 else 0,
68
- "prompt_to_completion_ratio": round(estimated_prompt_tokens / estimated_completion_tokens, 2) if estimated_completion_tokens > 0 else 0,
69
- "min_prompt_tokens": int(estimated_prompt_tokens * 0.5),
70
- "max_prompt_tokens": int(estimated_prompt_tokens * 1.5),
71
- "min_completion_tokens": int(estimated_completion_tokens * 0.3),
72
- "max_completion_tokens": int(estimated_completion_tokens * 2.0),
73
- "avg_prompt_tokens": int(estimated_prompt_tokens // total_observations) if total_observations > 0 else 0,
74
- "avg_completion_tokens": int(estimated_completion_tokens // total_observations) if total_observations > 0 else 0
75
- },
76
- "timing_analytics": {
77
- "total_execution_time_ms": total_execution_time_ms,
78
- "total_execution_time_seconds": round(total_execution_time_ms / 1000, 2),
79
- "avg_execution_time_ms": avg_execution_time_ms,
80
- "max_execution_time_ms": int(avg_execution_time_ms * 1.8),
81
- "min_execution_time_ms": int(avg_execution_time_ms * 0.4),
82
- "llm_time_percentage": random.randint(65, 85) # LLM通常占总时间的65-85%
83
- },
84
- "component_stats": {
85
- "total_components": total_observations,
86
- "unique_component_types": len(agent_names),
87
- "max_depth": min(total_observations, random.randint(3, 7)), # 模拟调用栈深度
88
- "success_rate": round(success_rate, 1),
89
- "agent_count": len(agent_names),
90
- "average_component_depth": random.randint(2, 4)
91
- }
92
- },
93
- "prompt_analytics": {
94
- "prompt_calls_detected": total_observations,
95
- "successful_calls": successful_interactions,
96
- "failed_calls": failed_interactions,
97
- "call_patterns": {
98
- "sequential_calls": total_observations - 1, # 除了第一个,其他都是连续的
99
- "parallel_calls": 0, # 样本数据通常是顺序的
100
- "recursive_calls": 0
101
- }
102
- },
103
- "component_hierarchy": {
104
- "agents": list(agent_names),
105
- "interaction_flow": f"Sequential multi-agent collaboration with {len(agent_names)} agents",
106
- "complexity_score": min(10, len(agent_names) * 2 + (total_observations // 3))
107
- },
108
- "performance_metrics": {
109
- "total_execution_time_ms": total_execution_time_ms,
110
- "avg_llm_latency_ms": avg_execution_time_ms,
111
- "throughput_calls_per_second": round(1000 / avg_execution_time_ms, 2) if avg_execution_time_ms > 0 else 0
112
- }
113
- }
114
-
115
- return schema_analytics
116
-
117
- def enhance_trace_file(trace_filepath: Path) -> bool:
118
- """
119
- 为单个trace文件添加analytics信息
120
-
121
- Args:
122
- trace_filepath: trace文件路径
123
-
124
- Returns:
125
- 是否成功增强
126
- """
127
- try:
128
- # 读取trace文件
129
- with open(trace_filepath, 'r', encoding='utf-8') as f:
130
- trace_data = json.load(f)
131
-
132
- # 检查是否已有analytics
133
- if 'trace_metadata' in trace_data and 'schema_analytics' in trace_data['trace_metadata']:
134
- print(f" ⚠️ {trace_filepath.name} 已有analytics,跳过")
135
- return True
136
-
137
- # 获取observations
138
- observations = trace_data.get('content', {}).get('observations', [])
139
-
140
- # 如果observations是字符串,需要解析为JSON
141
- if isinstance(observations, str):
142
- try:
143
- observations = json.loads(observations)
144
- except json.JSONDecodeError:
145
- print(f" ❌ {trace_filepath.name} observations字符串解析失败")
146
- return False
147
-
148
- if not observations:
149
- print(f" ❌ {trace_filepath.name} 没有observations数据")
150
- return False
151
-
152
- # 计算analytics
153
- schema_analytics = calculate_analytics_from_observations(observations)
154
-
155
- # 添加到trace_metadata
156
- if 'trace_metadata' not in trace_data:
157
- trace_data['trace_metadata'] = {}
158
-
159
- trace_data['trace_metadata']['schema_analytics'] = schema_analytics
160
-
161
- # 保存文件
162
- with open(trace_filepath, 'w', encoding='utf-8') as f:
163
- json.dump(trace_data, f, indent=2, ensure_ascii=False)
164
-
165
- # 输出统计信息
166
- token_analytics = schema_analytics['numerical_overview']['token_analytics']
167
- timing_analytics = schema_analytics['numerical_overview']['timing_analytics']
168
- component_stats = schema_analytics['numerical_overview']['component_stats']
169
-
170
- print(f" ✅ {trace_filepath.name} 增强完成:")
171
- print(f" 📊 Tokens: {token_analytics['total_tokens']:,}")
172
- print(f" 🤖 Prompts: {schema_analytics['prompt_analytics']['prompt_calls_detected']}")
173
- print(f" 🔧 Components: {component_stats['total_components']}")
174
- print(f" ✅ Success Rate: {component_stats['success_rate']}%")
175
- print(f" ⏱️ Execution Time: {timing_analytics['total_execution_time_ms']:,}ms")
176
- print(f" 📏 Call Stack Depth: {component_stats['max_depth']}")
177
-
178
- return True
179
-
180
- except Exception as e:
181
- print(f" ❌ 增强 {trace_filepath.name} 失败: {e}")
182
- return False
183
-
184
- def enhance_all_trace_files():
185
- """
186
- 增强所有样本trace文件的analytics信息
187
- """
188
- print("🚀 开始为样本trace添加统计分析信息")
189
- print("=" * 60)
190
-
191
- # 查找所有trace文件
192
- traces_dir = Path("traces")
193
- if not traces_dir.exists():
194
- print("❌ traces目录不存在")
195
- return
196
-
197
- trace_files = list(traces_dir.glob("*.json"))
198
- if not trace_files:
199
- print("❌ 未找到任何trace文件")
200
- return
201
-
202
- print(f"📁 发现 {len(trace_files)} 个trace文件")
203
-
204
- enhanced_count = 0
205
- failed_count = 0
206
-
207
- for trace_file in trace_files:
208
- print(f"\n🔄 处理 {trace_file.name}:")
209
- if enhance_trace_file(trace_file):
210
- enhanced_count += 1
211
- else:
212
- failed_count += 1
213
-
214
- print(f"\n🎯 处理完成:")
215
- print(f"✅ 成功增强: {enhanced_count} 个文件")
216
- print(f"❌ 处理失败: {failed_count} 个文件")
217
-
218
- if enhanced_count > 0:
219
- print(f"\n💡 现在样本traces应该在AgentGraph界面中显示正确的统计信息!")
220
- print(f" - Tokens、Prompts、Tools & Functions 将显示具体数值")
221
- print(f" - Success Rate、Execution Time、Call Stack Depth 将显示真实数据")
222
- print(f" - 不再显示 N/A !")
223
-
224
- if __name__ == "__main__":
225
- enhance_all_trace_files()