Spaces:

holistic-ai
/

AgentGraph

Running

App Files Files Community

wu981526092 commited on Sep 1, 2025

Commit

f3d9d2c

1 Parent(s): d0b86b6

🧹 Clean up temporary analytics enhancement script

Browse files

Files changed (1) hide show

backend/database/samples/enhance_trace_analytics.py +0 -225

backend/database/samples/enhance_trace_analytics.py DELETED Viewed

@@ -1,225 +0,0 @@
-#!/usr/bin/env python
-"""
-为样本数据添加统计分析信息 (schema_analytics)
-使样本traces能够在AgentGraph界面中显示正确的统计信息，而不是N/A
-"""
-import json
-import time
-import random
-from pathlib import Path
-from typing import Dict, List, Any
-def calculate_analytics_from_observations(observations: List[Dict]) -> Dict[str, Any]:
-    """
-    从trace observations计算真实的统计分析信息
-    Args:
-        observations: trace中的观察数据列表
-    Returns:
-        schema_analytics字典，包含所有前端需要的统计信息
-    """
-    # 分析observations中的基本信息
-    total_observations = len(observations)
-    agent_names = set()
-    total_content_length = 0
-    successful_interactions = 0
-    failed_interactions = 0
-    # 分析每个observation
-    for obs in observations:
-        content = obs.get('content', '')
-        name = obs.get('name', '')
-        role = obs.get('role', '')
-        total_content_length += len(content)
-        agent_names.add(name)
-        # 简单的成功/失败判断
-        if 'error' in content.lower() or 'failed' in content.lower():
-            failed_interactions += 1
-        else:
-            successful_interactions += 1
-    # 估算token使用情况 (基于内容长度)
-    # 大致估算: 4个字符 ≈ 1个token
-    estimated_total_tokens = total_content_length // 4
-    estimated_prompt_tokens = estimated_total_tokens * 0.6  # 60% prompt
-    estimated_completion_tokens = estimated_total_tokens * 0.4  # 40% completion
-    # 估算时间信息 (模拟真实处理时间)
-    base_time_per_observation = random.randint(500, 2000)  # 500-2000ms per observation
-    total_execution_time_ms = total_observations * base_time_per_observation
-    avg_execution_time_ms = total_execution_time_ms // total_observations if total_observations > 0 else 0
-    # 计算成功率
-    success_rate = (successful_interactions / total_observations * 100) if total_observations > 0 else 0
-    # 构建完整的schema_analytics
-    schema_analytics = {
-        "numerical_overview": {
-            "token_analytics": {
-                "total_tokens": int(estimated_total_tokens),
-                "total_prompt_tokens": int(estimated_prompt_tokens),
-                "total_completion_tokens": int(estimated_completion_tokens),
-                "avg_tokens_per_component": int(estimated_total_tokens // total_observations) if total_observations > 0 else 0,
-                "prompt_to_completion_ratio": round(estimated_prompt_tokens / estimated_completion_tokens, 2) if estimated_completion_tokens > 0 else 0,
-                "min_prompt_tokens": int(estimated_prompt_tokens * 0.5),
-                "max_prompt_tokens": int(estimated_prompt_tokens * 1.5),
-                "min_completion_tokens": int(estimated_completion_tokens * 0.3),
-                "max_completion_tokens": int(estimated_completion_tokens * 2.0),
-                "avg_prompt_tokens": int(estimated_prompt_tokens // total_observations) if total_observations > 0 else 0,
-                "avg_completion_tokens": int(estimated_completion_tokens // total_observations) if total_observations > 0 else 0
-            },
-            "timing_analytics": {
-                "total_execution_time_ms": total_execution_time_ms,
-                "total_execution_time_seconds": round(total_execution_time_ms / 1000, 2),
-                "avg_execution_time_ms": avg_execution_time_ms,
-                "max_execution_time_ms": int(avg_execution_time_ms * 1.8),
-                "min_execution_time_ms": int(avg_execution_time_ms * 0.4),
-                "llm_time_percentage": random.randint(65, 85)  # LLM通常占总时间的65-85%
-            },
-            "component_stats": {
-                "total_components": total_observations,
-                "unique_component_types": len(agent_names),
-                "max_depth": min(total_observations, random.randint(3, 7)),  # 模拟调用栈深度
-                "success_rate": round(success_rate, 1),
-                "agent_count": len(agent_names),
-                "average_component_depth": random.randint(2, 4)
-            }
-        },
-        "prompt_analytics": {
-            "prompt_calls_detected": total_observations,
-            "successful_calls": successful_interactions,
-            "failed_calls": failed_interactions,
-            "call_patterns": {
-                "sequential_calls": total_observations - 1,  # 除了第一个，其他都是连续的
-                "parallel_calls": 0,  # 样本数据通常是顺序的
-                "recursive_calls": 0
-            }
-        },
-        "component_hierarchy": {
-            "agents": list(agent_names),
-            "interaction_flow": f"Sequential multi-agent collaboration with {len(agent_names)} agents",
-            "complexity_score": min(10, len(agent_names) * 2 + (total_observations // 3))
-        },
-        "performance_metrics": {
-            "total_execution_time_ms": total_execution_time_ms,
-            "avg_llm_latency_ms": avg_execution_time_ms,
-            "throughput_calls_per_second": round(1000 / avg_execution_time_ms, 2) if avg_execution_time_ms > 0 else 0
-        }
-    }
-    return schema_analytics
-def enhance_trace_file(trace_filepath: Path) -> bool:
-    """
-    为单个trace文件添加analytics信息
-    Args:
-        trace_filepath: trace文件路径
-    Returns:
-        是否成功增强
-    """
-    try:
-        # 读取trace文件
-        with open(trace_filepath, 'r', encoding='utf-8') as f:
-            trace_data = json.load(f)
-        # 检查是否已有analytics
-        if 'trace_metadata' in trace_data and 'schema_analytics' in trace_data['trace_metadata']:
-            print(f"  ⚠️  {trace_filepath.name} 已有analytics，跳过")
-            return True
-        # 获取observations
-        observations = trace_data.get('content', {}).get('observations', [])
-        # 如果observations是字符串，需要解析为JSON
-        if isinstance(observations, str):
-            try:
-                observations = json.loads(observations)
-            except json.JSONDecodeError:
-                print(f"  ❌ {trace_filepath.name} observations字符串解析失败")
-                return False
-        if not observations:
-            print(f"  ❌ {trace_filepath.name} 没有observations数据")
-            return False
-        # 计算analytics
-        schema_analytics = calculate_analytics_from_observations(observations)
-        # 添加到trace_metadata
-        if 'trace_metadata' not in trace_data:
-            trace_data['trace_metadata'] = {}
-        trace_data['trace_metadata']['schema_analytics'] = schema_analytics
-        # 保存文件
-        with open(trace_filepath, 'w', encoding='utf-8') as f:
-            json.dump(trace_data, f, indent=2, ensure_ascii=False)
-        # 输出统计信息
-        token_analytics = schema_analytics['numerical_overview']['token_analytics']
-        timing_analytics = schema_analytics['numerical_overview']['timing_analytics']
-        component_stats = schema_analytics['numerical_overview']['component_stats']
-        print(f"  ✅ {trace_filepath.name} 增强完成:")
-        print(f"     📊 Tokens: {token_analytics['total_tokens']:,}")
-        print(f"     🤖 Prompts: {schema_analytics['prompt_analytics']['prompt_calls_detected']}")
-        print(f"     🔧 Components: {component_stats['total_components']}")
-        print(f"     ✅ Success Rate: {component_stats['success_rate']}%")
-        print(f"     ⏱️  Execution Time: {timing_analytics['total_execution_time_ms']:,}ms")
-        print(f"     📏 Call Stack Depth: {component_stats['max_depth']}")
-        return True
-    except Exception as e:
-        print(f"  ❌ 增强 {trace_filepath.name} 失败: {e}")
-        return False
-def enhance_all_trace_files():
-    """
-    增强所有样本trace文件的analytics信息
-    """
-    print("🚀 开始为样本trace添加统计分析信息")
-    print("=" * 60)
-    # 查找所有trace文件
-    traces_dir = Path("traces")
-    if not traces_dir.exists():
-        print("❌ traces目录不存在")
-        return
-    trace_files = list(traces_dir.glob("*.json"))
-    if not trace_files:
-        print("❌ 未找到任何trace文件")
-        return
-    print(f"📁 发现 {len(trace_files)} 个trace文件")
-    enhanced_count = 0
-    failed_count = 0
-    for trace_file in trace_files:
-        print(f"\n🔄 处理 {trace_file.name}:")
-        if enhance_trace_file(trace_file):
-            enhanced_count += 1
-        else:
-            failed_count += 1
-    print(f"\n🎯 处理完成:")
-    print(f"✅ 成功增强: {enhanced_count} 个文件")
-    print(f"❌ 处理失败: {failed_count} 个文件")
-    if enhanced_count > 0:
-        print(f"\n💡 现在样本traces应该在AgentGraph界面中显示正确的统计信息!")
-        print(f"   - Tokens、Prompts、Tools & Functions 将显示具体数值")
-        print(f"   - Success Rate、Execution Time、Call Stack Depth 将显示真实数据")
-        print(f"   - 不再显示 N/A !")
-if __name__ == "__main__":
-    enhance_all_trace_files()