AgentGraph / backend /database /sample_data.py
wu981526092's picture
🔄 Replace Oxford Economics with Python Documentation Use Case
85ffdc8
raw
history blame
25.4 kB
#!/usr/bin/env python
"""
Sample data for database initialization.
Provides curated examples of traces and knowledge graphs for new users.
"""
import json
import logging
from typing import Dict, List, Any
logger = logging.getLogger(__name__)
# Enhanced sample traces showcasing AgentGraph's full capabilities
SAMPLE_TRACES = [
{
"filename": "python_documentation_inquiry.json",
"title": "Python Documentation Assistant Demo",
"description": "Enhanced example showing RAG-powered AI assistant processing programming inquiry with knowledge search, failure detection and optimization suggestions",
"trace_type": "documentation_search",
"trace_source": "sample_data",
"tags": ["programming", "rag_assistant", "documentation", "failure_detection", "optimization"],
"content": """{
"id": "doc_trace_demo_001",
"timestamp": "2025-01-27T00:00:00",
"metadata": {
"source": "AgentGraph_Demo",
"row_index": 0,
"converted_at": "2025-01-27T12:00:00.000000"
},
"data": {
"total_observations": 1,
"summary": "Python documentation inquiry with RAG-powered assistant response"
},
"observations": [
{
"id": "demo_obs_001",
"type": "inference",
"timestamp": "2025-01-27T00:00:00",
"input": "You are a helpful Python programming assistant designed to answer questions accurately based on the provided documentation. Use the information retrieved from the knowledge base below to respond concisely and correctly.\\n\\n### Retrieved Documents:\\n['Python list comprehensions - efficient way to create lists', 'Python built-in functions - len(), range(), enumerate()', 'Python data structures - lists, dictionaries, sets, tuples']\\n\\n### Question:\\nwhat are python list comprehensions used for?\\n\\n### Answer:",
"output": "Python list comprehensions provide an efficient and readable way to create lists by applying expressions to elements in iterables.",
"metadata": {
"request_date": "2025-01-27T00:00:00",
"databricks_request_id": "demo-request-001",
"request_time": "2025-01-27T12:00:00.000",
"status_code": 200,
"sampling_fraction": 1,
"request_raw": "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a knowledgeable assistant on Oxford Economics\"},{\"role\":\"user\",\"content\":\"You are a knowledgeable assistant on Oxford Economics designed to answer questions accurately based on the provided context. Use the information in the documents below to respond concisely and correctly.\\\\n\\\\n### Context:\\\\n['Oxford Economics contact: Mateusz Urban - murban@oxfordeconomics.com', 'Oxford Economics research and analysis services', 'Economic forecasting and consultancy expertise']\\\\n\\\\n### Question:\\\\nwhat does oxford economics present?\\\\n\\\\n### Answer:\"}]}",
"response_raw": "{\"choices\":[{\"content_filter_results\":{\"hate\":{\"filtered\":false,\"severity\":\"safe\"},\"self_harm\":{\"filtered\":false,\"severity\":\"safe\"},\"sexual\":{\"filtered\":false,\"severity\":\"safe\"},\"violence\":{\"filtered\":false,\"severity\":\"safe\"}},\"finish_reason\":\"stop\",\"index\":0,\"logprobs\":null,\"message\":{\"content\":\"Oxford Economics provides economic analysis, forecasting, and consultancy services.\",\"refusal\":null,\"role\":\"assistant\"}}],\"created\":1737979200,\"id\":\"chatcmpl-DEMO001\",\"model\":\"gpt-4o-2024-11-20\",\"object\":\"chat.completion\",\"system_fingerprint\":\"fp_demo_001\",\"usage\":{\"completion_tokens\":13,\"completion_tokens_details\":{\"accepted_prediction_tokens\":0,\"audio_tokens\":0,\"reasoning_tokens\":0,\"rejected_prediction_tokens\":0},\"prompt_tokens\":162,\"prompt_tokens_details\":{\"audio_tokens\":0,\"cached_tokens\":0},\"total_tokens\":175}}",
"served_entity_id": "demo-entity-001",
"logging_error_codes": "[]",
"requester": "demo-user-001"
},
"latency": 0.531,
"usage": {
"completion_tokens": 13,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens": 162,
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
},
"total_tokens": 175
},
"model": "gpt-4o-2024-11-20"
}
]
}"""
},
{
"filename": "simple_qa_demonstration.txt",
"title": "Simple Q&A: Basic Programming Help",
"description": "Streamlined example showing basic programming assistant interaction with clear knowledge graph structure",
"trace_type": "conversation",
"trace_source": "sample_data",
"tags": ["simple", "qa", "basic", "demonstration"],
"content": """User: Hello, can you help me understand what Python list comprehensions are used for?
Assistant: Certainly! Python list comprehensions are a powerful and concise way to create lists. Here are their main uses:
1. **Creating Lists from Iterables**: Transform elements from existing sequences
2. **Filtering Data**: Include only elements that meet certain conditions
3. **Mathematical Operations**: Apply calculations to each element efficiently
4. **Nested Iterations**: Handle complex data transformations in readable syntax
For example: `[x**2 for x in range(10) if x % 2 == 0]` creates a list of squares of even numbers.
Would you like to see more specific examples or learn about related Python features?"""
}
]
# Enhanced knowledge graphs demonstrating AgentGraph's complete feature set
SAMPLE_KNOWLEDGE_GRAPHS = [
{
"filename": "kg_oxford_economics_enhanced.json",
"trace_index": 0, # Links to first trace
"graph_data": {
"system_name": "Python Documentation Assistant",
"system_summary": "This intelligent assistant processes user inquiries about Python programming through a streamlined workflow. When users submit questions, the knowledgeable agent analyzes them using RAG-powered knowledge search and delivers accurate, contextual responses about Python concepts, syntax, and best practices.",
"entities": [
{
"id": "agent_001",
"type": "Agent",
"name": "Python Documentation Agent",
"importance": "HIGH",
"raw_prompt": "You are a helpful Python programming assistant designed to answer questions accurately based on retrieved documentation context. Use the search results to provide precise responses.",
"raw_prompt_ref": [
{
"line_start": 31,
"line_end": 32
}
]
},
{
"id": "task_001",
"type": "Task",
"name": "Programming Question Processing",
"importance": "HIGH",
"raw_prompt": "Process user inquiry about Python programming and generate an accurate, contextual response based on available documentation and programming best practices.",
"raw_prompt_ref": [
{
"line_start": 26,
"line_end": 28
}
]
},
{
"id": "input_001",
"type": "Input",
"name": "User Programming Query",
"importance": "HIGH",
"raw_prompt": "what are python list comprehensions used for?",
"raw_prompt_ref": [
{
"line_start": 19,
"line_end": 19
}
]
},
{
"id": "output_001",
"type": "Output",
"name": "Programming Concept Explanation",
"importance": "HIGH",
"raw_prompt": "Python list comprehensions provide an efficient and readable way to create lists by applying expressions to elements in iterables.",
"raw_prompt_ref": [
{
"line_start": 20,
"line_end": 20
}
]
},
{
"id": "human_001",
"type": "Human",
"name": "Python Developer",
"importance": "MEDIUM",
"raw_prompt": "Developer seeking Python programming guidance and documentation",
"raw_prompt_ref": [
{
"line_start": 31,
"line_end": 31
}
]
},
{
"id": "tool_001",
"type": "Tool",
"name": "Python Documentation Search",
"importance": "HIGH",
"raw_prompt": "Retrieval-Augmented Generation (RAG) system that searches Python documentation knowledge base for relevant concepts, syntax examples, and best practices to provide contextual information.",
"raw_prompt_ref": [
{
"line_start": 49,
"line_end": 49
}
]
}
],
"relations": [
{
"id": "rel_001",
"source": "input_001",
"target": "agent_001",
"type": "CONSUMED_BY",
"importance": "HIGH",
"interaction_prompt": "User query received and processed by the Python documentation agent",
"interaction_prompt_ref": [
{
"line_start": 19,
"line_end": 19
}
]
},
{
"id": "rel_002",
"source": "agent_001",
"target": "task_001",
"type": "PERFORMS",
"importance": "HIGH",
"interaction_prompt": "Agent executes programming question processing task",
"interaction_prompt_ref": [
{
"line_start": 26,
"line_end": 28
}
]
},
{
"id": "rel_003",
"source": "task_001",
"target": "output_001",
"type": "PRODUCES",
"importance": "HIGH",
"interaction_prompt": "Processing task generates comprehensive programming concept explanation",
"interaction_prompt_ref": [
{
"line_start": 20,
"line_end": 20
}
]
},
{
"id": "rel_004",
"source": "output_001",
"target": "human_001",
"type": "DELIVERS_TO",
"importance": "HIGH",
"interaction_prompt": "Programming explanation delivered to requesting developer",
"interaction_prompt_ref": [
{
"line_start": 20,
"line_end": 20
}
]
},
{
"id": "rel_005",
"source": "agent_001",
"target": "tool_001",
"type": "USES",
"importance": "HIGH",
"interaction_prompt": "Agent queries knowledge search system to retrieve relevant Python documentation and contextual information",
"interaction_prompt_ref": [
{
"line_start": 49,
"line_end": 49
}
]
}
],
"failures": [
{
"id": "failure_001",
"risk_type": "HALLUCINATION",
"description": "Query lacks specificity - could ask about specific Python features rather than general usage.",
"raw_text": "what are python list comprehensions used for?",
"raw_text_ref": [
{
"line_start": 19,
"line_end": 19
}
],
"affected_id": "input_001"
}
],
"optimizations": [
{
"id": "opt_001",
"recommendation_type": "PROMPT_REFINEMENT",
"description": "Enhance the agent prompt to encourage more specific programming questions and provide structured code examples in responses.",
"affected_ids": ["agent_001"],
"raw_text_ref": [
{
"line_start": 31,
"line_end": 32
}
]
},
{
"id": "opt_002",
"recommendation_type": "TOOL_ENHANCEMENT",
"description": "Expand knowledge search capabilities to include code examples, interactive tutorials, and version-specific documentation alongside basic syntax references.",
"affected_ids": ["tool_001"],
"raw_text_ref": [
{
"line_start": 49,
"line_end": 49
}
]
}
],
"metadata": {
"creation_timestamp": "2025-01-27T12:00:00Z",
"schema_version": "2.1.0",
"quality_score": 0.89,
"entity_count": 6,
"relation_count": 5,
"failure_count": 1,
"optimization_count": 2,
"processing_method": "production_enhanced",
"content_source": "documentation_trace",
"language": "en",
"domain": "programming_documentation"
}
}
},
{
"filename": "kg_simple_qa_demo.json",
"trace_index": 1, # Links to second trace
"graph_data": {
"system_name": "Python Q&A Assistant",
"system_summary": "A streamlined question-answering system designed to provide clear information about Python programming concepts. The system processes user questions and delivers comprehensive responses about Python syntax and best practices.",
"entities": [
{
"id": "user_001",
"type": "Human",
"name": "User",
"importance": "HIGH",
"raw_prompt": "Hello, can you help me understand what Python list comprehensions are used for?",
"raw_prompt_ref": [
{
"line_start": 1,
"line_end": 1
}
]
},
{
"id": "assistant_001",
"type": "Agent",
"name": "Q&A Assistant",
"importance": "HIGH",
"raw_prompt": "RAG-powered assistant specialized in searching knowledge base and providing accurate information about Python programming concepts and capabilities.",
"raw_prompt_ref": [
{
"line_start": 3,
"line_end": 3
}
]
},
{
"id": "inquiry_task",
"type": "Task",
"name": "Service Information Request",
"importance": "HIGH",
"raw_prompt": "Process user inquiry about Python programming concepts and provide comprehensive response.",
"raw_prompt_ref": [
{
"line_start": 1,
"line_end": 1
}
]
},
{
"id": "response_output",
"type": "Output",
"name": "Service Overview Response",
"importance": "HIGH",
"raw_prompt": "Certainly! Python list comprehensions are a powerful feature that provides: 1. Concise List Creation 2. Efficient Data Filtering 3. Readable Transformations 4. Performance Optimization",
"raw_prompt_ref": [
{
"line_start": 3,
"line_end": 10
}
]
}
],
"relations": [
{
"id": "user_inquiry",
"source": "user_001",
"target": "assistant_001",
"type": "CONSUMED_BY",
"importance": "HIGH",
"interaction_prompt": "User asks about Python programming concepts",
"interaction_prompt_ref": [
{
"line_start": 1,
"line_end": 1
}
]
},
{
"id": "assistant_processes",
"source": "assistant_001",
"target": "inquiry_task",
"type": "PERFORMS",
"importance": "HIGH",
"interaction_prompt": "Assistant processes the programming concept request",
"interaction_prompt_ref": [
{
"line_start": 3,
"line_end": 3
}
]
},
{
"id": "task_generates",
"source": "inquiry_task",
"target": "response_output",
"type": "PRODUCES",
"importance": "HIGH",
"interaction_prompt": "Task generates comprehensive programming concept overview",
"interaction_prompt_ref": [
{
"line_start": 3,
"line_end": 10
}
]
},
{
"id": "response_delivered",
"source": "response_output",
"target": "user_001",
"type": "DELIVERS_TO",
"importance": "HIGH",
"interaction_prompt": "Comprehensive programming information delivered to user",
"interaction_prompt_ref": [
{
"line_start": 3,
"line_end": 10
}
]
}
],
"failures": [],
"optimizations": [
{
"id": "opt_simple_001",
"recommendation_type": "PROMPT_REFINEMENT",
"description": "Consider adding interactive code examples and hands-on tutorials to allow users to practice the programming concepts being discussed.",
"affected_ids": ["assistant_001"],
"raw_text_ref": [
{
"line_start": 12,
"line_end": 12
}
]
}
],
"metadata": {
"creation_timestamp": "2025-01-27T12:00:00Z",
"schema_version": "2.1.0",
"quality_score": 0.92,
"entity_count": 4,
"relation_count": 4,
"failure_count": 0,
"optimization_count": 1,
"processing_method": "simplified_demonstration",
"content_source": "conversation_trace",
"language": "en",
"domain": "programming_documentation"
}
}
}
]
def insert_sample_data(session, force_insert=False):
"""
Insert sample traces and knowledge graphs into the database.
Args:
session: Database session
force_insert: If True, insert even if data already exists
Returns:
Dict with insertion results
"""
from backend.database.utils import save_trace, save_knowledge_graph
from backend.database.models import Trace, KnowledgeGraph
results = {
"traces_inserted": 0,
"knowledge_graphs_inserted": 0,
"skipped": 0,
"errors": []
}
# Check if sample data already exists
if not force_insert:
existing_sample = session.query(Trace).filter(
Trace.trace_source == "sample_data"
).first()
if existing_sample:
logger.info("Sample data already exists, skipping insertion")
results["skipped"] = len(SAMPLE_TRACES)
return results
try:
# Insert sample traces
trace_ids = []
for i, trace_data in enumerate(SAMPLE_TRACES):
try:
trace = save_trace(
session=session,
content=trace_data["content"],
filename=trace_data["filename"],
title=trace_data["title"],
description=trace_data["description"],
trace_type=trace_data["trace_type"],
trace_source=trace_data["trace_source"],
tags=trace_data["tags"]
)
trace_ids.append(trace.trace_id)
results["traces_inserted"] += 1
logger.info(f"Inserted sample trace: {trace_data['title']}")
except Exception as e:
error_msg = f"Error inserting trace {i}: {str(e)}"
logger.error(error_msg)
results["errors"].append(error_msg)
# Insert corresponding knowledge graphs
for kg_data in SAMPLE_KNOWLEDGE_GRAPHS:
try:
trace_index = kg_data["trace_index"]
if trace_index < len(trace_ids):
save_knowledge_graph(
session=session,
filename=kg_data["filename"],
graph_data=kg_data["graph_data"],
trace_id=trace_ids[trace_index],
window_index=None, # None for final KG
window_total=1, # Not None to indicate it's a processed final KG
is_original=True
)
results["knowledge_graphs_inserted"] += 1
logger.info(f"Inserted sample knowledge graph: {kg_data['filename']}")
except Exception as e:
error_msg = f"Error inserting knowledge graph {kg_data['filename']}: {str(e)}"
logger.error(error_msg)
results["errors"].append(error_msg)
logger.info(f"Sample data insertion completed: {results}")
except Exception as e:
error_msg = f"Fatal error during sample data insertion: {str(e)}"
logger.error(error_msg)
results["errors"].append(error_msg)
raise # Re-raise to trigger rollback in calling code
return results
def get_sample_data_info():
"""
Get information about the available sample data.
Returns:
Dict with sample data statistics
"""
return {
"traces_count": len(SAMPLE_TRACES),
"knowledge_graphs_count": len(SAMPLE_KNOWLEDGE_GRAPHS),
"trace_types": list(set(t["trace_type"] for t in SAMPLE_TRACES)),
"complexity_levels": ["enhanced", "simple"],
"features": ["rag_search", "failure_detection", "optimization_recommendations", "content_references", "quality_scoring"],
"description": "Enhanced AgentGraph examples showcasing Python Documentation RAG-powered assistant with knowledge search, failure detection, optimization suggestions, and advanced knowledge graph features"
}