Spaces:
Running
π― Add Smart Sample Data Preloading System
Browse files⨠New Features:
β’ Intelligent sample data preloader for better UX
β’ Auto-preloads 6 diverse examples on first startup
β’ Smart selection algorithm ensures variety in agent types, complexity, domains
β’ Non-blocking background preload doesn't delay startup
π§ Implementation:
β’ backend/scripts/preload_sample_data.py - Core preloading logic
β’ Modified backend/app.py with startup preload check
β’ Enhanced trace metadata with rich categorization tags
β’ Handles database deduplication and error recovery
π Benefits:
β’ New users get immediate examples to explore
β’ No more empty 'My Traces' on first visit
β’ Diverse samples showcase different agent interaction patterns
β’ Knowledge graphs can be generated on-demand from preloaded traces
π Bug Fixes:
β’ Fixed 'str expected, not NoneType' errors in multiple modules
β’ Added null checks for OPENAI_API_KEY environment variable
β’ Resolved circular import issues in knowledge graph components
π User Experience:
β’ Immediate value demonstration for new users
β’ Seamless transition from Gallery to actual trace analysis
β’ Rich sample metadata for better understanding
- agentgraph/extraction/graph_processing/knowledge_graph_processor.py +2 -1
- agentgraph/extraction/graph_utilities/knowledge_graph_merger.py +2 -1
- agentgraph/methods/production/multi_agent_knowledge_extractor.py +2 -1
- agentgraph/testing/knowledge_graph_tester.py +2 -1
- backend/app.py +54 -0
- backend/scripts/preload_sample_data.py +395 -0
- datasets/example_traces/hand-crafted.jsonl +0 -0
- example_template_hand_crafted.json +18 -0
|
@@ -66,7 +66,8 @@ from agentgraph.reconstruction.content_reference_resolver import ContentReferenc
|
|
| 66 |
|
| 67 |
# Load OpenAI API key from configuration
|
| 68 |
from utils.config import OPENAI_API_KEY
|
| 69 |
-
|
|
|
|
| 70 |
|
| 71 |
|
| 72 |
class SlidingWindowMonitor:
|
|
|
|
| 66 |
|
| 67 |
# Load OpenAI API key from configuration
|
| 68 |
from utils.config import OPENAI_API_KEY
|
| 69 |
+
if OPENAI_API_KEY:
|
| 70 |
+
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
|
| 71 |
|
| 72 |
|
| 73 |
class SlidingWindowMonitor:
|
|
@@ -50,7 +50,8 @@ from agentgraph.shared.models.reference_based import KnowledgeGraph
|
|
| 50 |
|
| 51 |
# Load OpenAI API key from configuration
|
| 52 |
from utils.config import OPENAI_API_KEY
|
| 53 |
-
|
|
|
|
| 54 |
# Note: OPENAI_MODEL_NAME will be set dynamically in __init__ method
|
| 55 |
|
| 56 |
|
|
|
|
| 50 |
|
| 51 |
# Load OpenAI API key from configuration
|
| 52 |
from utils.config import OPENAI_API_KEY
|
| 53 |
+
if OPENAI_API_KEY:
|
| 54 |
+
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
|
| 55 |
# Note: OPENAI_MODEL_NAME will be set dynamically in __init__ method
|
| 56 |
|
| 57 |
|
|
@@ -80,7 +80,8 @@ import base64
|
|
| 80 |
|
| 81 |
# openlit.init()
|
| 82 |
|
| 83 |
-
|
|
|
|
| 84 |
# Note: OPENAI_MODEL_NAME will be set dynamically when creating the crew
|
| 85 |
|
| 86 |
|
|
|
|
| 80 |
|
| 81 |
# openlit.init()
|
| 82 |
|
| 83 |
+
if OPENAI_API_KEY:
|
| 84 |
+
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
|
| 85 |
# Note: OPENAI_MODEL_NAME will be set dynamically when creating the crew
|
| 86 |
|
| 87 |
|
|
@@ -52,7 +52,8 @@ import openlit
|
|
| 52 |
|
| 53 |
openlit.init()
|
| 54 |
|
| 55 |
-
|
|
|
|
| 56 |
|
| 57 |
# (future) from .perturbation_types.rule_misunderstanding import RuleMisunderstandingPerturbationTester
|
| 58 |
# (future) from .perturbation_types.emotional_manipulation import EmotionalManipulationPerturbationTester
|
|
|
|
| 52 |
|
| 53 |
openlit.init()
|
| 54 |
|
| 55 |
+
if OPENAI_API_KEY:
|
| 56 |
+
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
|
| 57 |
|
| 58 |
# (future) from .perturbation_types.rule_misunderstanding import RuleMisunderstandingPerturbationTester
|
| 59 |
# (future) from .perturbation_types.emotional_manipulation import EmotionalManipulationPerturbationTester
|
|
@@ -7,6 +7,7 @@ import logging
|
|
| 7 |
import os
|
| 8 |
from pathlib import Path
|
| 9 |
import sys
|
|
|
|
| 10 |
from fastapi import FastAPI, Request, status
|
| 11 |
from fastapi.staticfiles import StaticFiles
|
| 12 |
from fastapi.middleware.cors import CORSMiddleware
|
|
@@ -64,6 +65,52 @@ app.include_router(observability.router)
|
|
| 64 |
# Start background scheduler for automated tasks
|
| 65 |
# scheduler_service.start()
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
@app.on_event("startup")
|
| 68 |
async def startup_event():
|
| 69 |
"""Start background services on app startup"""
|
|
@@ -82,6 +129,13 @@ async def startup_event():
|
|
| 82 |
logger.error(f"β Database initialization failed: {e}")
|
| 83 |
# Don't fail startup - continue with empty database
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
logger.info("π Backend API available at: http://0.0.0.0:7860")
|
| 86 |
# scheduler_service.start() # This line is now commented out
|
| 87 |
|
|
|
|
| 7 |
import os
|
| 8 |
from pathlib import Path
|
| 9 |
import sys
|
| 10 |
+
import asyncio
|
| 11 |
from fastapi import FastAPI, Request, status
|
| 12 |
from fastapi.staticfiles import StaticFiles
|
| 13 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
| 65 |
# Start background scheduler for automated tasks
|
| 66 |
# scheduler_service.start()
|
| 67 |
|
| 68 |
+
async def preload_sample_data_if_needed():
|
| 69 |
+
"""
|
| 70 |
+
Preload sample traces and knowledge graphs if the database is empty.
|
| 71 |
+
This provides new users with immediate examples to explore.
|
| 72 |
+
"""
|
| 73 |
+
try:
|
| 74 |
+
from backend.database.utils import get_db
|
| 75 |
+
from backend.database import models
|
| 76 |
+
|
| 77 |
+
# Check if any traces already exist in the database
|
| 78 |
+
with next(get_db()) as db:
|
| 79 |
+
trace_count = db.query(models.Trace).count()
|
| 80 |
+
|
| 81 |
+
if trace_count > 0:
|
| 82 |
+
logger.info(f"π Found {trace_count} existing traces, skipping sample data preload")
|
| 83 |
+
return
|
| 84 |
+
|
| 85 |
+
logger.info("π No traces found, preloading sample data for better UX...")
|
| 86 |
+
|
| 87 |
+
# Import and run preloader in a thread to avoid blocking startup
|
| 88 |
+
def run_preloader():
|
| 89 |
+
try:
|
| 90 |
+
# Import here to avoid circular dependencies
|
| 91 |
+
sys.path.append(str(Path(__file__).parent))
|
| 92 |
+
from scripts.preload_sample_data import SampleDataPreloader
|
| 93 |
+
|
| 94 |
+
preloader = SampleDataPreloader()
|
| 95 |
+
results = preloader.preload_samples(count=6, force=False) # Preload 6 diverse samples
|
| 96 |
+
|
| 97 |
+
if results["success"]:
|
| 98 |
+
logger.info(f"β
Successfully preloaded {results['traces_preloaded']} sample traces "
|
| 99 |
+
f"and {results['knowledge_graphs_generated']} knowledge graphs")
|
| 100 |
+
else:
|
| 101 |
+
logger.warning(f"β οΈ Sample data preloading completed with errors: {results['errors']}")
|
| 102 |
+
|
| 103 |
+
except Exception as e:
|
| 104 |
+
logger.warning(f"β οΈ Failed to preload sample data: {e}")
|
| 105 |
+
|
| 106 |
+
# Run preloader in background thread to avoid blocking startup
|
| 107 |
+
loop = asyncio.get_event_loop()
|
| 108 |
+
await loop.run_in_executor(None, run_preloader)
|
| 109 |
+
|
| 110 |
+
except Exception as e:
|
| 111 |
+
logger.warning(f"β οΈ Error during sample data preload check: {e}")
|
| 112 |
+
# Don't fail - this is just a UX enhancement
|
| 113 |
+
|
| 114 |
@app.on_event("startup")
|
| 115 |
async def startup_event():
|
| 116 |
"""Start background services on app startup"""
|
|
|
|
| 129 |
logger.error(f"β Database initialization failed: {e}")
|
| 130 |
# Don't fail startup - continue with empty database
|
| 131 |
|
| 132 |
+
# π Preload sample data for new users (non-blocking)
|
| 133 |
+
try:
|
| 134 |
+
await preload_sample_data_if_needed()
|
| 135 |
+
except Exception as e:
|
| 136 |
+
logger.warning(f"β οΈ Sample data preloading failed (non-critical): {e}")
|
| 137 |
+
# Don't fail startup - sample data is optional
|
| 138 |
+
|
| 139 |
logger.info("π Backend API available at: http://0.0.0.0:7860")
|
| 140 |
# scheduler_service.start() # This line is now commented out
|
| 141 |
|
|
@@ -0,0 +1,395 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Preload Sample Data Script
|
| 4 |
+
==========================
|
| 5 |
+
|
| 6 |
+
This script preloads carefully selected sample traces and knowledge graphs
|
| 7 |
+
to provide new users with immediate examples to explore, eliminating the
|
| 8 |
+
need to start from an empty system.
|
| 9 |
+
|
| 10 |
+
Features:
|
| 11 |
+
- Selects diverse, representative traces from the example dataset
|
| 12 |
+
- Automatically generates knowledge graphs for preloaded traces
|
| 13 |
+
- Handles database initialization and deduplication
|
| 14 |
+
- Provides rich metadata and categorization for better UX
|
| 15 |
+
|
| 16 |
+
Usage:
|
| 17 |
+
python backend/scripts/preload_sample_data.py [--force] [--count N]
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
import argparse
|
| 21 |
+
import json
|
| 22 |
+
import logging
|
| 23 |
+
import os
|
| 24 |
+
import sys
|
| 25 |
+
from pathlib import Path
|
| 26 |
+
from typing import List, Dict, Any
|
| 27 |
+
import random
|
| 28 |
+
|
| 29 |
+
# Add project root to path
|
| 30 |
+
project_root = Path(__file__).parent.parent.parent
|
| 31 |
+
sys.path.insert(0, str(project_root))
|
| 32 |
+
|
| 33 |
+
from backend.database.utils import save_trace, get_db
|
| 34 |
+
from backend.database.init_db import init_database
|
| 35 |
+
from sqlalchemy.orm import Session
|
| 36 |
+
# Note: Knowledge graph generation will be added in future version
|
| 37 |
+
|
| 38 |
+
# Setup logging
|
| 39 |
+
logging.basicConfig(level=logging.INFO)
|
| 40 |
+
logger = logging.getLogger(__name__)
|
| 41 |
+
|
| 42 |
+
class SampleDataPreloader:
|
| 43 |
+
"""Handles preloading of sample traces and knowledge graphs."""
|
| 44 |
+
|
| 45 |
+
def __init__(self):
|
| 46 |
+
self.project_root = project_root
|
| 47 |
+
self.example_data_dir = self.project_root / "datasets" / "example_traces"
|
| 48 |
+
self.sample_criteria = {
|
| 49 |
+
"diverse_agents": True,
|
| 50 |
+
"varied_complexity": True,
|
| 51 |
+
"different_domains": True,
|
| 52 |
+
"include_successes_and_failures": True
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
def load_example_traces(self) -> List[Dict[str, Any]]:
|
| 56 |
+
"""Load all available example traces from JSONL files."""
|
| 57 |
+
traces = []
|
| 58 |
+
|
| 59 |
+
for subset_file in ["algorithm-generated.jsonl", "hand-crafted.jsonl"]:
|
| 60 |
+
file_path = self.example_data_dir / subset_file
|
| 61 |
+
if not file_path.exists():
|
| 62 |
+
logger.warning(f"Example file not found: {file_path}")
|
| 63 |
+
continue
|
| 64 |
+
|
| 65 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 66 |
+
for line in f:
|
| 67 |
+
if line.strip():
|
| 68 |
+
trace_data = json.loads(line)
|
| 69 |
+
traces.append(trace_data)
|
| 70 |
+
|
| 71 |
+
logger.info(f"Loaded {len(traces)} example traces")
|
| 72 |
+
return traces
|
| 73 |
+
|
| 74 |
+
def select_diverse_samples(self, traces: List[Dict[str, Any]], count: int = 8) -> List[Dict[str, Any]]:
|
| 75 |
+
"""
|
| 76 |
+
Select a diverse set of sample traces using intelligent criteria.
|
| 77 |
+
|
| 78 |
+
Selection strategy:
|
| 79 |
+
1. Ensure variety in agent types and counts
|
| 80 |
+
2. Include both correct and incorrect examples
|
| 81 |
+
3. Vary in complexity (trace length, agent interaction)
|
| 82 |
+
4. Cover different problem domains
|
| 83 |
+
"""
|
| 84 |
+
if len(traces) <= count:
|
| 85 |
+
return traces
|
| 86 |
+
|
| 87 |
+
# Categorize traces
|
| 88 |
+
categorized = {
|
| 89 |
+
'single_agent': [],
|
| 90 |
+
'multi_agent_simple': [], # 2-3 agents
|
| 91 |
+
'multi_agent_complex': [], # 4+ agents
|
| 92 |
+
'correct_examples': [],
|
| 93 |
+
'incorrect_examples': [],
|
| 94 |
+
'short_traces': [],
|
| 95 |
+
'medium_traces': [],
|
| 96 |
+
'long_traces': []
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
for trace in traces:
|
| 100 |
+
agents = trace.get('agents', [])
|
| 101 |
+
agent_count = len(agents) if agents else 1
|
| 102 |
+
is_correct = trace.get('is_correct', None)
|
| 103 |
+
trace_length = len(trace.get('trace', ''))
|
| 104 |
+
|
| 105 |
+
# Categorize by agent count
|
| 106 |
+
if agent_count == 1:
|
| 107 |
+
categorized['single_agent'].append(trace)
|
| 108 |
+
elif agent_count <= 3:
|
| 109 |
+
categorized['multi_agent_simple'].append(trace)
|
| 110 |
+
else:
|
| 111 |
+
categorized['multi_agent_complex'].append(trace)
|
| 112 |
+
|
| 113 |
+
# Categorize by correctness
|
| 114 |
+
if is_correct is True:
|
| 115 |
+
categorized['correct_examples'].append(trace)
|
| 116 |
+
elif is_correct is False:
|
| 117 |
+
categorized['incorrect_examples'].append(trace)
|
| 118 |
+
|
| 119 |
+
# Categorize by trace length
|
| 120 |
+
if trace_length < 2000:
|
| 121 |
+
categorized['short_traces'].append(trace)
|
| 122 |
+
elif trace_length < 8000:
|
| 123 |
+
categorized['medium_traces'].append(trace)
|
| 124 |
+
else:
|
| 125 |
+
categorized['long_traces'].append(trace)
|
| 126 |
+
|
| 127 |
+
# Smart selection to ensure diversity
|
| 128 |
+
selected = []
|
| 129 |
+
|
| 130 |
+
# Selection strategy: ensure we have examples from each important category
|
| 131 |
+
selection_plan = [
|
| 132 |
+
('single_agent', 1),
|
| 133 |
+
('multi_agent_simple', 2),
|
| 134 |
+
('multi_agent_complex', 2),
|
| 135 |
+
('correct_examples', 1),
|
| 136 |
+
('incorrect_examples', 2)
|
| 137 |
+
]
|
| 138 |
+
|
| 139 |
+
used_ids = set()
|
| 140 |
+
for category, target_count in selection_plan:
|
| 141 |
+
candidates = [t for t in categorized[category] if t['id'] not in used_ids]
|
| 142 |
+
selected_from_category = random.sample(
|
| 143 |
+
candidates,
|
| 144 |
+
min(target_count, len(candidates))
|
| 145 |
+
)
|
| 146 |
+
selected.extend(selected_from_category)
|
| 147 |
+
used_ids.update(t['id'] for t in selected_from_category)
|
| 148 |
+
|
| 149 |
+
# Fill remaining slots with random selections
|
| 150 |
+
remaining_slots = count - len(selected)
|
| 151 |
+
if remaining_slots > 0:
|
| 152 |
+
remaining_candidates = [t for t in traces if t['id'] not in used_ids]
|
| 153 |
+
additional = random.sample(
|
| 154 |
+
remaining_candidates,
|
| 155 |
+
min(remaining_slots, len(remaining_candidates))
|
| 156 |
+
)
|
| 157 |
+
selected.extend(additional)
|
| 158 |
+
|
| 159 |
+
logger.info(f"Selected {len(selected)} diverse samples from {len(traces)} total traces")
|
| 160 |
+
return selected[:count]
|
| 161 |
+
|
| 162 |
+
def preload_trace_to_db(self, trace_data: Dict[str, Any], db: Session) -> str:
|
| 163 |
+
"""
|
| 164 |
+
Preload a single trace into the database with rich metadata.
|
| 165 |
+
|
| 166 |
+
Returns:
|
| 167 |
+
trace_id of the created trace
|
| 168 |
+
"""
|
| 169 |
+
# Prepare enhanced metadata
|
| 170 |
+
agents = trace_data.get('agents', [])
|
| 171 |
+
agent_count = len(agents) if agents else 1
|
| 172 |
+
|
| 173 |
+
# Create descriptive title
|
| 174 |
+
question = trace_data.get('question', '')
|
| 175 |
+
title_prefix = f"Sample: {agent_count}-Agent"
|
| 176 |
+
if question:
|
| 177 |
+
# Truncate question for title
|
| 178 |
+
question_snippet = question[:60] + "..." if len(question) > 60 else question
|
| 179 |
+
title = f"{title_prefix} - {question_snippet}"
|
| 180 |
+
else:
|
| 181 |
+
title = f"{title_prefix} Example #{trace_data['id']}"
|
| 182 |
+
|
| 183 |
+
# Enhanced description
|
| 184 |
+
description_parts = []
|
| 185 |
+
if question:
|
| 186 |
+
description_parts.append(f"Question: {question}")
|
| 187 |
+
|
| 188 |
+
if agents:
|
| 189 |
+
description_parts.append(f"Agents: {', '.join(agents)}")
|
| 190 |
+
|
| 191 |
+
mistake_reason = trace_data.get('mistake_reason')
|
| 192 |
+
if mistake_reason:
|
| 193 |
+
description_parts.append(f"Analysis: {mistake_reason}")
|
| 194 |
+
|
| 195 |
+
description = " | ".join(description_parts)
|
| 196 |
+
|
| 197 |
+
# Rich tags for categorization and filtering
|
| 198 |
+
tags = [
|
| 199 |
+
"sample",
|
| 200 |
+
"preloaded",
|
| 201 |
+
trace_data.get('subset', '').lower().replace('-', '_'),
|
| 202 |
+
f"{agent_count}_agents"
|
| 203 |
+
]
|
| 204 |
+
|
| 205 |
+
if trace_data.get('is_correct') is True:
|
| 206 |
+
tags.append("correct_execution")
|
| 207 |
+
elif trace_data.get('is_correct') is False:
|
| 208 |
+
tags.append("contains_errors")
|
| 209 |
+
|
| 210 |
+
if agents:
|
| 211 |
+
# Add agent-specific tags
|
| 212 |
+
for agent in agents[:3]: # Limit to first 3 to avoid tag explosion
|
| 213 |
+
clean_agent = agent.replace('_', '').replace('-', '').lower()
|
| 214 |
+
tags.append(f"agent_{clean_agent}")
|
| 215 |
+
|
| 216 |
+
# Enhanced metadata
|
| 217 |
+
enhanced_metadata = {
|
| 218 |
+
"source": "example_dataset",
|
| 219 |
+
"original_id": trace_data['id'],
|
| 220 |
+
"subset": trace_data.get('subset'),
|
| 221 |
+
"question_id": trace_data.get('question_id'),
|
| 222 |
+
"ground_truth": trace_data.get('ground_truth'),
|
| 223 |
+
"mistake_step": trace_data.get('mistake_step'),
|
| 224 |
+
"mistake_agent": trace_data.get('mistake_agent'),
|
| 225 |
+
"agents": agents,
|
| 226 |
+
"agent_count": agent_count,
|
| 227 |
+
"is_correct": trace_data.get('is_correct'),
|
| 228 |
+
"preloaded": True,
|
| 229 |
+
"quality": "curated_sample"
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
# Save to database
|
| 233 |
+
trace = save_trace(
|
| 234 |
+
session=db,
|
| 235 |
+
content=trace_data['trace'],
|
| 236 |
+
filename=f"sample_{trace_data['subset'].lower().replace('-', '_')}_{trace_data['id']}.json",
|
| 237 |
+
title=title,
|
| 238 |
+
description=description[:500], # Limit description length
|
| 239 |
+
trace_type="sample",
|
| 240 |
+
trace_source="preloaded_example",
|
| 241 |
+
tags=tags,
|
| 242 |
+
trace_metadata=enhanced_metadata
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
logger.info(f"Preloaded trace: {title} (ID: {trace.trace_id})")
|
| 246 |
+
return trace.trace_id
|
| 247 |
+
|
| 248 |
+
def generate_knowledge_graph(self, trace_id: str, trace_content: str) -> bool:
|
| 249 |
+
"""
|
| 250 |
+
Generate knowledge graph for a preloaded trace.
|
| 251 |
+
|
| 252 |
+
Note: Knowledge graph generation is currently disabled for preload.
|
| 253 |
+
Users can generate knowledge graphs manually after the traces are loaded.
|
| 254 |
+
|
| 255 |
+
Returns:
|
| 256 |
+
True if successful, False otherwise
|
| 257 |
+
"""
|
| 258 |
+
logger.info(f"Knowledge graph generation for trace {trace_id} skipped (to be generated on-demand)")
|
| 259 |
+
# For now, we skip KG generation during preload to avoid complexity
|
| 260 |
+
# Users can generate KGs manually through the UI after traces are loaded
|
| 261 |
+
return False
|
| 262 |
+
|
| 263 |
+
def check_existing_preloaded_data(self, db: Session) -> bool:
|
| 264 |
+
"""Check if preloaded sample data already exists in database."""
|
| 265 |
+
try:
|
| 266 |
+
from backend.database import models
|
| 267 |
+
|
| 268 |
+
# Query for traces with preloaded tag
|
| 269 |
+
traces = db.query(models.Trace).filter(
|
| 270 |
+
models.Trace.trace_source == "preloaded_example"
|
| 271 |
+
).all()
|
| 272 |
+
|
| 273 |
+
return len(traces) > 0
|
| 274 |
+
|
| 275 |
+
except Exception as e:
|
| 276 |
+
logger.error(f"Error checking existing preloaded data: {e}")
|
| 277 |
+
return False
|
| 278 |
+
|
| 279 |
+
def preload_samples(self, count: int = 8, force: bool = False) -> Dict[str, Any]:
|
| 280 |
+
"""
|
| 281 |
+
Main method to preload sample traces and generate knowledge graphs.
|
| 282 |
+
|
| 283 |
+
Args:
|
| 284 |
+
count: Number of sample traces to preload
|
| 285 |
+
force: If True, preload even if samples already exist
|
| 286 |
+
|
| 287 |
+
Returns:
|
| 288 |
+
Summary of preloading results
|
| 289 |
+
"""
|
| 290 |
+
results = {
|
| 291 |
+
"success": False,
|
| 292 |
+
"traces_preloaded": 0,
|
| 293 |
+
"knowledge_graphs_generated": 0,
|
| 294 |
+
"errors": []
|
| 295 |
+
}
|
| 296 |
+
|
| 297 |
+
try:
|
| 298 |
+
# Initialize database
|
| 299 |
+
logger.info("Initializing database...")
|
| 300 |
+
init_database()
|
| 301 |
+
|
| 302 |
+
# Check if preloaded data already exists
|
| 303 |
+
with next(get_db()) as db:
|
| 304 |
+
if not force and self.check_existing_preloaded_data(db):
|
| 305 |
+
logger.info("Preloaded sample data already exists. Use --force to override.")
|
| 306 |
+
results["message"] = "Sample data already exists"
|
| 307 |
+
return results
|
| 308 |
+
|
| 309 |
+
# Load and select example traces
|
| 310 |
+
logger.info("Loading example traces...")
|
| 311 |
+
all_traces = self.load_example_traces()
|
| 312 |
+
|
| 313 |
+
if not all_traces:
|
| 314 |
+
results["errors"].append("No example traces found")
|
| 315 |
+
return results
|
| 316 |
+
|
| 317 |
+
# Select diverse samples
|
| 318 |
+
selected_traces = self.select_diverse_samples(all_traces, count)
|
| 319 |
+
logger.info(f"Selected {len(selected_traces)} traces for preloading")
|
| 320 |
+
|
| 321 |
+
# Preload traces to database
|
| 322 |
+
preloaded_trace_ids = []
|
| 323 |
+
for trace_data in selected_traces:
|
| 324 |
+
try:
|
| 325 |
+
trace_id = self.preload_trace_to_db(trace_data, db)
|
| 326 |
+
preloaded_trace_ids.append((trace_id, trace_data['trace']))
|
| 327 |
+
results["traces_preloaded"] += 1
|
| 328 |
+
|
| 329 |
+
except Exception as e:
|
| 330 |
+
error_msg = f"Failed to preload trace {trace_data['id']}: {e}"
|
| 331 |
+
logger.error(error_msg)
|
| 332 |
+
results["errors"].append(error_msg)
|
| 333 |
+
|
| 334 |
+
# Commit trace changes
|
| 335 |
+
db.commit()
|
| 336 |
+
|
| 337 |
+
# Generate knowledge graphs (outside of trace transaction)
|
| 338 |
+
kg_success_count = 0
|
| 339 |
+
for trace_id, trace_content in preloaded_trace_ids:
|
| 340 |
+
if self.generate_knowledge_graph(trace_id, trace_content):
|
| 341 |
+
kg_success_count += 1
|
| 342 |
+
|
| 343 |
+
results["knowledge_graphs_generated"] = kg_success_count
|
| 344 |
+
results["success"] = True
|
| 345 |
+
|
| 346 |
+
logger.info(f"""
|
| 347 |
+
Preloading completed successfully!
|
| 348 |
+
- Traces preloaded: {results['traces_preloaded']}
|
| 349 |
+
- Knowledge graphs generated: {results['knowledge_graphs_generated']}
|
| 350 |
+
- Errors: {len(results['errors'])}
|
| 351 |
+
""")
|
| 352 |
+
|
| 353 |
+
except Exception as e:
|
| 354 |
+
error_msg = f"Fatal error during preloading: {e}"
|
| 355 |
+
logger.error(error_msg)
|
| 356 |
+
results["errors"].append(error_msg)
|
| 357 |
+
|
| 358 |
+
return results
|
| 359 |
+
|
| 360 |
+
def main():
|
| 361 |
+
"""Parse arguments and run sample data preloading."""
|
| 362 |
+
parser = argparse.ArgumentParser(description='Preload sample traces and knowledge graphs')
|
| 363 |
+
parser.add_argument('--count', type=int, default=8,
|
| 364 |
+
help='Number of sample traces to preload (default: 8)')
|
| 365 |
+
parser.add_argument('--force', action='store_true',
|
| 366 |
+
help='Force preload even if sample data already exists')
|
| 367 |
+
parser.add_argument('--verbose', '-v', action='store_true',
|
| 368 |
+
help='Enable verbose logging')
|
| 369 |
+
|
| 370 |
+
args = parser.parse_args()
|
| 371 |
+
|
| 372 |
+
if args.verbose:
|
| 373 |
+
logging.getLogger().setLevel(logging.DEBUG)
|
| 374 |
+
|
| 375 |
+
# Run preloading
|
| 376 |
+
preloader = SampleDataPreloader()
|
| 377 |
+
results = preloader.preload_samples(count=args.count, force=args.force)
|
| 378 |
+
|
| 379 |
+
# Display results
|
| 380 |
+
if results["success"]:
|
| 381 |
+
print(f"β
Successfully preloaded {results['traces_preloaded']} sample traces")
|
| 382 |
+
print(f"π Generated {results['knowledge_graphs_generated']} knowledge graphs")
|
| 383 |
+
if results["errors"]:
|
| 384 |
+
print(f"β οΈ {len(results['errors'])} errors occurred:")
|
| 385 |
+
for error in results["errors"]:
|
| 386 |
+
print(f" - {error}")
|
| 387 |
+
return 0
|
| 388 |
+
else:
|
| 389 |
+
print("β Preloading failed")
|
| 390 |
+
for error in results["errors"]:
|
| 391 |
+
print(f" - {error}")
|
| 392 |
+
return 1
|
| 393 |
+
|
| 394 |
+
if __name__ == "__main__":
|
| 395 |
+
sys.exit(main())
|
|
The diff for this file is too large to render.
See raw diff
|
|
|
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"id": 58,
|
| 3 |
+
"subset": "Hand-Crafted",
|
| 4 |
+
"mistake_step": 1,
|
| 5 |
+
"question": "Your question here - what task is the agent trying to solve?",
|
| 6 |
+
"agent": "Primary_Agent_Name",
|
| 7 |
+
"agents": [
|
| 8 |
+
"Agent1",
|
| 9 |
+
"Agent2",
|
| 10 |
+
"Agent3"
|
| 11 |
+
],
|
| 12 |
+
"trace": "[\n {\n \"content\": \"System prompt or initial instruction\",\n \"name\": \"System\",\n \"role\": \"system\"\n },\n {\n \"content\": \"User's question or task description\",\n \"name\": \"User\",\n \"role\": \"user\"\n },\n {\n \"content\": \"Agent's response or action\",\n \"name\": \"Agent_Name\",\n \"role\": \"assistant\"\n },\n {\n \"content\": \"Follow-up interaction or error\",\n \"name\": \"Agent_Name\",\n \"role\": \"assistant\"\n }\n]",
|
| 13 |
+
"is_correct": false,
|
| 14 |
+
"question_id": "84c5fae2-0bad-47f2-87f5-61bd66ab3a84",
|
| 15 |
+
"ground_truth": "The correct answer or expected result",
|
| 16 |
+
"mistake_agent": "Agent_Name",
|
| 17 |
+
"mistake_reason": "Specific reason why the agent failed - be descriptive"
|
| 18 |
+
}
|