wu981526092 commited on
Commit
32fecea
·
1 Parent(s): 9216205
backend/database/README_sample_data.md CHANGED
@@ -12,30 +12,23 @@ The enhanced sample data system automatically inserts curated examples showcasin
12
  - Non-destructive: skips insertion if existing data is found
13
  - Logs all operations for transparency
14
 
15
- ### 🎯 Advanced Complex Examples
16
 
17
- The system includes 3 carefully selected examples showcasing AgentGraph's complete capabilities:
18
 
19
- 1. **Multi-Agent Geospatial Research** (High Complexity)
20
 
21
- - Type: `multi_agent_geospatial`
22
- - Example: Complex location-based restaurant search with error handling
23
- - Multiple specialized agents, systematic verification, error recovery
24
- - Features: Advanced failure detection, optimization recommendations, tool integration
25
 
26
- 2. **Scientific Multi-Agent Calculation** (High Complexity)
27
 
28
- - Type: `scientific_calculation`
29
- - Example: Interdisciplinary calculation combining astronomy and sports science
30
- - Celestial physics and marathon expertise coordination
31
- - Features: Cross-domain knowledge integration, numerical verification
32
-
33
- 3. **Advanced Data Analysis Investigation** (High Complexity)
34
-
35
- - Type: `data_analysis`
36
- - Example: Complex database investigation with systematic error handling
37
- - USGS database access challenges and resolution strategies
38
- - Features: Data validation, format detection, systematic problem-solving
39
 
40
  ### 🕸️ Enhanced Knowledge Graph Examples
41
 
 
12
  - Non-destructive: skips insertion if existing data is found
13
  - Logs all operations for transparency
14
 
15
+ ### 🎯 Enhanced Examples
16
 
17
+ The system includes 2 carefully selected examples showcasing AgentGraph's advanced capabilities:
18
 
19
+ 1. **Oxford Economics AI Assistant** (Enhanced)
20
 
21
+ - Type: `aif_inference`
22
+ - Example: AI assistant processing economic inquiry with failure detection
23
+ - 6 entities, 5 relations, 1 failure, 2 optimizations
24
+ - Features: Content references, quality scoring, system summary
25
 
26
+ 2. **Simple Q&A Demonstration** (Basic)
27
 
28
+ - Type: `conversation`
29
+ - Example: Basic Oxford Economics service inquiry
30
+ - 4 entities, 4 relations, 0 failures, 1 optimization
31
+ - Features: Streamlined structure, clear interaction flow
 
 
 
 
 
 
 
32
 
33
  ### 🕸️ Enhanced Knowledge Graph Examples
34
 
backend/database/sample_data.py CHANGED
@@ -1,8 +1,7 @@
1
  #!/usr/bin/env python
2
  """
3
- Enhanced sample data for database initialization.
4
- Provides curated examples showcasing AgentGraph's complete capabilities including complex multi-agent collaboration,
5
- failure detection, optimization recommendations, and advanced content referencing.
6
  """
7
 
8
  import json
@@ -11,190 +10,173 @@ from typing import Dict, List, Any
11
 
12
  logger = logging.getLogger(__name__)
13
 
14
- # Complex multi-agent collaboration examples showcasing AgentGraph's advanced capabilities
15
  SAMPLE_TRACES = [
16
  {
17
- "filename": "multi_agent_geospatial_research.json",
18
- "title": "Multi-Agent Geospatial Research: Restaurant Discovery",
19
- "description": "Complex multi-agent collaboration involving location services, data verification, and systematic research with error handling and retries",
20
- "trace_type": "multi_agent_geospatial",
21
  "trace_source": "sample_data",
22
- "tags": ["multi_agent", "geospatial", "research", "verification", "complex"],
23
  "content": """{
24
- "id": "geospatial_research_demo_001",
25
  "timestamp": "2025-01-27T00:00:00",
26
  "metadata": {
27
- "source": "AgentGraph_MultiAgent_Demo",
28
- "complexity_level": "high",
29
  "converted_at": "2025-01-27T12:00:00.000000"
30
  },
31
- "agents": ["Location_Services_Expert", "Data_Verification_Expert", "Eateries_Expert", "Computer_terminal"],
32
- "task": "What is the closest eatery to Harkness Memorial State Park that is still open at 11pm on Wednesdays?",
33
- "conversations": [
34
- {
35
- "agent": "Location_Services_Expert",
36
- "content": "First, I will identify the exact location of Harkness Memorial State Park. Then, I will search for eateries near the park using location-based services and filter to find those open until at least 11pm on Wednesdays.",
37
- "action": "location_identification",
38
- "result": "Harkness Memorial State Park is located at 275 Great Neck Rd, Waterford, CT 06385, USA"
39
- },
40
- {
41
- "agent": "Computer_terminal",
42
- "content": "Executing web search for eateries near Harkness Memorial State Park",
43
- "action": "perform_web_search",
44
- "query": "eateries near Harkness Memorial State Park Waterford CT open until 11 PM Wednesday",
45
- "result": "Found multiple restaurant aggregation sites with listings"
46
- },
47
- {
48
- "agent": "Data_Verification_Expert",
49
- "content": "The search results provide several sources listing eateries near Harkness Memorial State Park. I need to extract details about the restaurants and determine their operating hours on Wednesdays.",
50
- "action": "data_extraction_analysis",
51
- "extracted_eateries": ["Ballo Italian Kitchen & Bar", "On the Waterfront", "374 Kitchen & Cocktails", "Captain Scott's Lobster Dock"],
52
- "verification_status": "requires_individual_hour_verification"
53
- },
54
- {
55
- "agent": "Eateries_Expert",
56
- "content": "After systematic verification of operating hours for multiple eateries, none of the initially identified restaurants meet the criteria of being open until 11 PM on Wednesdays. Expanding search radius to include neighboring towns.",
57
- "action": "expanded_search_analysis",
58
- "findings": "Most local eateries close by 9 PM on weekdays, making the search challenging",
59
- "recommendation": "Consider fast food chains or 24-hour establishments for late-night dining options"
60
- }
61
- ]
62
- }"""
63
- },
64
- {
65
- "filename": "scientific_calculation_marathon.json",
66
- "title": "Scientific Calculation: Marathon Pace Analysis",
67
- "description": "Advanced scientific calculation involving celestial physics, marathon expertise, and numerical verification with cross-validation",
68
- "trace_type": "scientific_calculation",
69
- "trace_source": "sample_data",
70
- "tags": ["scientific", "calculation", "verification", "celestial_physics", "sports_analysis"],
71
- "content": """{
72
- "id": "marathon_calculation_demo_001",
73
- "timestamp": "2025-01-27T00:00:00",
74
- "metadata": {
75
- "source": "AgentGraph_Scientific_Demo",
76
- "complexity_level": "high",
77
- "converted_at": "2025-01-27T12:00:00.000000"
78
  },
79
- "agents": ["CelestialPhysics_Expert", "Marathon_Expert", "Computer_terminal", "Verification_Expert"],
80
- "task": "If Eliud Kipchoge could maintain his record-making marathon pace indefinitely, how many thousand hours would it take him to run the distance between the Earth and the Moon at its closest approach?",
81
- "conversations": [
82
- {
83
- "agent": "CelestialPhysics_Expert",
84
- "content": "To solve this task, I need to break down the plan step-by-step: 1) Verify the minimum perigee distance between Earth and Moon, 2) Calculate Kipchoge's marathon pace, 3) Calculate time required, 4) Convert to thousand hours and round",
85
- "action": "scientific_analysis_planning",
86
- "data_points": {
87
- "moon_perigee_distance": "356,500 kilometers",
88
- "marathon_distance": "42.195 kilometers",
89
- "kipchoge_record_time": "1:59:40"
90
- }
91
- },
92
- {
93
- "agent": "Computer_terminal",
94
- "content": "Executing Python calculation for marathon pace and distance analysis",
95
- "action": "numerical_calculation",
96
- "code": "total_time_in_hours = 1 + (59/60) + (40/3600); kipchoge_pace_kmh = 42.195 / total_time_in_hours; time_to_run_hours = 356500 / kipchoge_pace_kmh; thousand_hours = round(time_to_run_hours / 1000) * 1000",
97
- "result": "17000 hours"
98
- },
99
- {
100
- "agent": "Marathon_Expert",
101
- "content": "After reviewing the process and calculations, the results are accurate. Kipchoge's record time converts to approximately 1.9944 hours, giving a pace of ~21.24 km/h. Time to run 356,500 km would be ~16,788 hours, which rounds to 17,000 hours.",
102
- "action": "calculation_verification",
103
- "verification_details": "All assumptions and steps are correct, final answer is well-rounded according to instructions"
104
- },
105
  {
106
- "agent": "Verification_Expert",
107
- "content": "I confirm the calculations and the final result. The process correctly follows the plan and all steps are verified.",
108
- "action": "final_verification",
109
- "conclusion": "17,000 hours is the correct answer"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  }
111
  ]
112
  }"""
113
  },
114
  {
115
- "filename": "data_analysis_usgs_database.json",
116
- "title": "Data Analysis: USGS Database Investigation",
117
- "description": "Complex data analysis involving database access, data extraction challenges, error handling, and systematic problem-solving",
118
- "trace_type": "data_analysis",
119
  "trace_source": "sample_data",
120
- "tags": ["data_analysis", "database", "error_handling", "verification", "usgs"],
121
- "content": """{
122
- "id": "data_analysis_demo_001",
123
- "timestamp": "2025-01-27T00:00:00",
124
- "metadata": {
125
- "source": "AgentGraph_DataAnalysis_Demo",
126
- "complexity_level": "high",
127
- "converted_at": "2025-01-27T12:00:00.000000"
128
- },
129
- "agents": ["DataAnalysis_Expert", "DataVerification_Expert", "StatisticalAnalysis_Expert", "Computer_terminal"],
130
- "task": "How many nonindigenous crocodiles were found in Florida from the year 2000 through 2020? You can get the data from the USGS Nonindigenous Aquatic Species database.",
131
- "conversations": [
132
- {
133
- "agent": "DataAnalysis_Expert",
134
- "content": "To proceed with the task, I need to: 1) Confirm the exact name and URL for the USGS Nonindigenous Aquatic Species database, 2) Download the correct dataset, 3) Extract data for nonindigenous crocodiles in Florida from 2000-2020, 4) Analyze and count the occurrences",
135
- "action": "data_acquisition_planning",
136
- "challenges": "Need to identify correct URL for dataset download"
137
- },
138
- {
139
- "agent": "Computer_terminal",
140
- "content": "Attempting to download dataset using placeholder URL",
141
- "action": "dataset_download_attempt",
142
- "command": "curl -o nonindigenous_aquatic_species.csv",
143
- "error": "Downloaded file appears to be HTML instead of CSV data"
144
- },
145
- {
146
- "agent": "DataVerification_Expert",
147
- "content": "The file appears to be an HTML page rather than the required CSV dataset. This suggests the placeholder URL downloaded a webpage instead of the actual data. We need to identify the correct URL for the USGS database.",
148
- "action": "data_validation_error_detection",
149
- "finding": "File contains HTML content, not structured CSV data",
150
- "recommendation": "Confirm exact URL for USGS Nonindigenous Aquatic Species database"
151
- },
152
- {
153
- "agent": "StatisticalAnalysis_Expert",
154
- "content": "Based on previous attempts and the current data download issues, we need to visit the USGS Nonindigenous Aquatic Species database website directly to obtain the correct download link for the dataset.",
155
- "action": "methodology_adjustment",
156
- "solution": "Access https://nas.er.usgs.gov/ to find correct dataset URL",
157
- "data_requirements": "CSV format with species, location, and date information for filtering"
158
- }
159
- ]
160
- }"""
161
  }
162
  ]
163
 
164
- # Enhanced knowledge graphs demonstrating AgentGraph's complete feature set with complex scenarios
165
  SAMPLE_KNOWLEDGE_GRAPHS = [
166
  {
167
- "filename": "kg_multi_agent_geospatial.json",
168
  "trace_index": 0, # Links to first trace
169
  "graph_data": {
170
- "system_name": "Multi-Agent Geospatial Research System",
171
- "system_summary": "This sophisticated system coordinates multiple specialized agents to conduct comprehensive geospatial research. When users submit location-based queries, the Location Services Expert identifies precise coordinates, the Data Verification Expert validates findings through systematic analysis, the Eateries Expert provides domain-specific knowledge, and the Computer Terminal executes searches and calculations. The system demonstrates advanced error handling when initial searches fail to meet criteria.",
172
  "entities": [
173
  {
174
- "id": "location_expert_001",
175
  "type": "Agent",
176
- "name": "Location Services Expert",
177
  "importance": "HIGH",
178
- "raw_prompt": "Identify exact location coordinates and conduct systematic location-based searches using mapping services and geographical databases.",
179
  "raw_prompt_ref": [
180
  {
181
- "line_start": 12,
182
- "line_end": 14,
183
- "confidence": 0.95
184
  }
185
  ]
186
  },
187
  {
188
- "id": "verification_expert_001",
189
- "type": "Agent",
190
- "name": "Data Verification Expert",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  "importance": "HIGH",
192
- "raw_prompt": "Extract structured data from search results, validate information accuracy, and ensure data quality through systematic verification processes.",
193
  "raw_prompt_ref": [
194
  {
195
- "line_start": 25,
196
- "line_end": 27,
197
- "confidence": 0.92
198
  }
199
  ]
200
  }
@@ -202,16 +184,71 @@ SAMPLE_KNOWLEDGE_GRAPHS = [
202
  "relations": [
203
  {
204
  "id": "rel_001",
205
- "source": "search_input_001",
206
- "target": "location_expert_001",
207
  "type": "CONSUMED_BY",
208
  "importance": "HIGH",
209
- "interaction_prompt": "Complex location query processed by geographic specialist",
210
  "interaction_prompt_ref": [
211
  {
212
- "line_start": 12,
213
- "line_end": 14,
214
- "confidence": 0.90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  }
216
  ]
217
  }
@@ -219,30 +256,40 @@ SAMPLE_KNOWLEDGE_GRAPHS = [
219
  "failures": [
220
  {
221
  "id": "failure_001",
222
- "risk_type": "RETRIEVAL_ERROR",
223
- "description": "Initial search failed to find eateries meeting 11pm Wednesday criteria",
224
- "raw_text": "None of the eateries meet the requirement of being open until 11 PM",
225
  "raw_text_ref": [
226
  {
227
- "line_start": 33,
228
- "line_end": 34,
229
- "confidence": 0.95
230
  }
231
  ],
232
- "affected_id": "location_expert_001"
233
  }
234
  ],
235
  "optimizations": [
236
  {
237
  "id": "opt_001",
238
- "recommendation_type": "WORKFLOW_SIMPLIFICATION",
239
- "description": "Pre-filter for 24-hour establishments before detailed verification",
240
- "affected_ids": ["location_expert_001"],
 
 
 
 
 
 
 
 
 
 
 
 
241
  "raw_text_ref": [
242
  {
243
- "line_start": 39,
244
- "line_end": 40,
245
- "confidence": 0.87
246
  }
247
  ]
248
  }
@@ -250,198 +297,163 @@ SAMPLE_KNOWLEDGE_GRAPHS = [
250
  "metadata": {
251
  "creation_timestamp": "2025-01-27T12:00:00Z",
252
  "schema_version": "2.1.0",
253
- "quality_score": 0.91,
254
- "entity_count": 2,
255
- "relation_count": 1,
256
  "failure_count": 1,
257
- "optimization_count": 1,
258
- "processing_method": "multi_agent_collaboration_enhanced",
259
- "content_source": "geospatial_research_trace",
260
  "language": "en",
261
- "domain": "geospatial_analysis_restaurant_research"
262
  }
263
  }
264
  },
265
  {
266
- "filename": "kg_scientific_calculation.json",
267
  "trace_index": 1, # Links to second trace
268
  "graph_data": {
269
- "system_name": "Scientific Multi-Agent Calculation System",
270
- "system_summary": "Advanced system coordinating specialized scientific experts for complex calculations involving celestial physics, athletic performance analysis, and numerical verification.",
271
  "entities": [
272
  {
273
- "id": "celestial_expert_001",
274
- "type": "Agent",
275
- "name": "Celestial Physics Expert",
276
  "importance": "HIGH",
277
- "raw_prompt": "Apply astronomical knowledge for precise planetary distance measurements and calculations.",
278
  "raw_prompt_ref": [
279
  {
280
- "line_start": 11,
281
- "line_end": 13,
282
- "confidence": 0.96
283
  }
284
  ]
285
  },
286
  {
287
- "id": "marathon_expert_001",
288
  "type": "Agent",
289
- "name": "Marathon Performance Expert",
290
- "importance": "HIGH",
291
- "raw_prompt": "Provide specialized marathon running and pace calculation expertise.",
292
  "raw_prompt_ref": [
293
  {
294
- "line_start": 28,
295
- "line_end": 30,
296
- "confidence": 0.94
297
  }
298
  ]
299
- }
300
- ],
301
- "relations": [
302
  {
303
- "id": "rel_sci_001",
304
- "source": "celestial_expert_001",
305
- "target": "marathon_expert_001",
306
- "type": "NEXT",
307
  "importance": "HIGH",
308
- "interaction_prompt": "Celestial expert coordinates with marathon specialist for interdisciplinary calculation",
309
- "interaction_prompt_ref": [
310
  {
311
- "line_start": 14,
312
- "line_end": 16,
313
- "confidence": 0.94
314
  }
315
  ]
316
- }
317
- ],
318
- "failures": [],
319
- "optimizations": [
320
  {
321
- "id": "opt_sci_001",
322
- "recommendation_type": "TOOL_ENHANCEMENT",
323
- "description": "Implement automated unit conversion validation for scientific calculations",
324
- "affected_ids": ["celestial_expert_001"],
325
- "raw_text_ref": [
 
326
  {
327
- "line_start": 20,
328
- "line_end": 22,
329
- "confidence": 0.88
330
  }
331
  ]
332
  }
333
  ],
334
- "metadata": {
335
- "creation_timestamp": "2025-01-27T12:00:00Z",
336
- "schema_version": "2.1.0",
337
- "quality_score": 0.94,
338
- "entity_count": 2,
339
- "relation_count": 1,
340
- "failure_count": 0,
341
- "optimization_count": 1,
342
- "processing_method": "scientific_collaboration_enhanced",
343
- "content_source": "scientific_calculation_trace",
344
- "language": "en",
345
- "domain": "astronomical_sports_science_calculation"
346
- }
347
- }
348
- },
349
- {
350
- "filename": "kg_data_analysis_challenge.json",
351
- "trace_index": 2, # Links to third trace
352
- "graph_data": {
353
- "system_name": "Advanced Data Analysis Investigation System",
354
- "system_summary": "Robust system coordinating data experts to tackle complex database investigations with systematic error handling and problem-solving when facing data acquisition challenges.",
355
- "entities": [
356
  {
357
- "id": "data_analysis_expert_001",
358
- "type": "Agent",
359
- "name": "Data Analysis Expert",
 
360
  "importance": "HIGH",
361
- "raw_prompt": "Design and execute comprehensive data acquisition strategies with error recovery capabilities.",
362
- "raw_prompt_ref": [
363
  {
364
- "line_start": 12,
365
- "line_end": 14,
366
- "confidence": 0.93
367
  }
368
  ]
369
  },
370
  {
371
- "id": "data_verification_expert_001",
372
- "type": "Agent",
373
- "name": "Data Verification Expert",
 
374
  "importance": "HIGH",
375
- "raw_prompt": "Validate data integrity and identify format inconsistencies through systematic verification.",
376
- "raw_prompt_ref": [
377
  {
378
- "line_start": 22,
379
- "line_end": 24,
380
- "confidence": 0.95
381
  }
382
  ]
383
- }
384
- ],
385
- "relations": [
386
  {
387
- "id": "rel_data_001",
388
- "source": "data_analysis_expert_001",
389
- "target": "data_verification_expert_001",
390
- "type": "NEXT",
391
- "importance": "HIGH",
392
- "interaction_prompt": "Data analysis expert coordinates with verification specialist for quality assessment",
393
  "interaction_prompt_ref": [
394
  {
395
- "line_start": 15,
396
- "line_end": 17,
397
- "confidence": 0.92
398
  }
399
  ]
400
- }
401
- ],
402
- "failures": [
403
  {
404
- "id": "failure_data_001",
405
- "risk_type": "RETRIEVAL_ERROR",
406
- "description": "Database download resulted in HTML content instead of expected CSV format",
407
- "raw_text": "Downloaded file appears to be HTML instead of CSV data",
408
- "raw_text_ref": [
 
 
409
  {
410
- "line_start": 25,
411
- "line_end": 26,
412
- "confidence": 0.96
413
  }
414
- ],
415
- "affected_id": "data_analysis_expert_001"
416
  }
417
  ],
 
418
  "optimizations": [
419
  {
420
- "id": "opt_data_001",
421
- "recommendation_type": "WORKFLOW_SIMPLIFICATION",
422
- "description": "Implement pre-validation step to verify database URLs before download attempts",
423
- "affected_ids": ["data_analysis_expert_001"],
424
  "raw_text_ref": [
425
  {
426
- "line_start": 30,
427
- "line_end": 32,
428
- "confidence": 0.89
429
  }
430
  ]
431
  }
432
  ],
433
  "metadata": {
434
  "creation_timestamp": "2025-01-27T12:00:00Z",
435
- "schema_version": "2.1.0",
436
- "quality_score": 0.87,
437
- "entity_count": 2,
438
- "relation_count": 1,
439
- "failure_count": 1,
440
  "optimization_count": 1,
441
- "processing_method": "data_analysis_collaboration_enhanced",
442
- "content_source": "database_investigation_trace",
443
  "language": "en",
444
- "domain": "environmental_data_analysis_database_investigation"
445
  }
446
  }
447
  }
@@ -450,7 +462,7 @@ SAMPLE_KNOWLEDGE_GRAPHS = [
450
 
451
  def insert_sample_data(session, force_insert=False):
452
  """
453
- Insert enhanced sample traces and knowledge graphs into the database.
454
 
455
  Args:
456
  session: Database session
@@ -497,7 +509,7 @@ def insert_sample_data(session, force_insert=False):
497
  )
498
  trace_ids.append(trace.trace_id)
499
  results["traces_inserted"] += 1
500
- logger.info(f"Inserted enhanced sample trace: {trace_data['title']}")
501
  except Exception as e:
502
  error_msg = f"Error inserting trace {i}: {str(e)}"
503
  logger.error(error_msg)
@@ -518,16 +530,16 @@ def insert_sample_data(session, force_insert=False):
518
  is_original=True
519
  )
520
  results["knowledge_graphs_inserted"] += 1
521
- logger.info(f"Inserted enhanced sample knowledge graph: {kg_data['filename']}")
522
  except Exception as e:
523
  error_msg = f"Error inserting knowledge graph {kg_data['filename']}: {str(e)}"
524
  logger.error(error_msg)
525
  results["errors"].append(error_msg)
526
 
527
- logger.info(f"Enhanced sample data insertion completed: {results}")
528
 
529
  except Exception as e:
530
- error_msg = f"Fatal error during enhanced sample data insertion: {str(e)}"
531
  logger.error(error_msg)
532
  results["errors"].append(error_msg)
533
  raise # Re-raise to trigger rollback in calling code
@@ -537,7 +549,7 @@ def insert_sample_data(session, force_insert=False):
537
 
538
  def get_sample_data_info():
539
  """
540
- Get information about the available enhanced sample data.
541
 
542
  Returns:
543
  Dict with sample data statistics
@@ -546,21 +558,7 @@ def get_sample_data_info():
546
  "traces_count": len(SAMPLE_TRACES),
547
  "knowledge_graphs_count": len(SAMPLE_KNOWLEDGE_GRAPHS),
548
  "trace_types": list(set(t["trace_type"] for t in SAMPLE_TRACES)),
549
- "complexity_levels": ["high", "complex", "advanced"],
550
- "features": [
551
- "multi_agent_collaboration",
552
- "advanced_failure_detection",
553
- "systematic_optimization_recommendations",
554
- "enhanced_content_references",
555
- "comprehensive_quality_scoring",
556
- "complex_tool_integration",
557
- "systematic_error_handling",
558
- "domain_expertise_coordination"
559
- ],
560
- "domains": [
561
- "geospatial_analysis_restaurant_research",
562
- "astronomical_sports_science_calculation",
563
- "environmental_data_analysis_database_investigation"
564
- ],
565
- "description": "Advanced AgentGraph examples showcasing complex multi-agent collaboration scenarios including geospatial research, scientific calculations, and data analysis challenges. Features comprehensive failure detection, systematic optimization recommendations, and advanced error handling capabilities representing production-ready knowledge graph extraction."
566
- }
 
1
  #!/usr/bin/env python
2
  """
3
+ Sample data for database initialization.
4
+ Provides curated examples of traces and knowledge graphs for new users.
 
5
  """
6
 
7
  import json
 
10
 
11
  logger = logging.getLogger(__name__)
12
 
13
+ # Enhanced sample traces showcasing AgentGraph's full capabilities
14
  SAMPLE_TRACES = [
15
  {
16
+ "filename": "oxford_economics_inquiry.json",
17
+ "title": "Oxford Economics AI Assistant Demo",
18
+ "description": "Enhanced example showing AI assistant processing economic inquiry with failure detection and optimization suggestions",
19
+ "trace_type": "aif_inference",
20
  "trace_source": "sample_data",
21
+ "tags": ["economics", "ai_assistant", "enhanced", "failure_detection", "optimization"],
22
  "content": """{
23
+ "id": "aif_trace_demo_001",
24
  "timestamp": "2025-01-27T00:00:00",
25
  "metadata": {
26
+ "source": "AgentGraph_Demo",
27
+ "row_index": 0,
28
  "converted_at": "2025-01-27T12:00:00.000000"
29
  },
30
+ "data": {
31
+ "total_observations": 1,
32
+ "summary": "Oxford Economics inquiry with AI assistant response"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  },
34
+ "observations": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  {
36
+ "id": "demo_obs_001",
37
+ "type": "inference",
38
+ "timestamp": "2025-01-27T00:00:00",
39
+ "input": "You are a knowledgeable assistant on Oxford Economics designed to answer questions accurately based on the provided context. Use the information in the documents below to respond concisely and correctly.\\n\\n### Context:\\n['Oxford Economics contact: Mateusz Urban - murban@oxfordeconomics.com', 'Oxford Economics research and analysis services', 'Economic forecasting and consultancy expertise']\\n\\n### Question:\\nwhat does oxford economics present?\\n\\n### Answer:",
40
+ "output": "Oxford Economics provides economic analysis, forecasting, and consultancy services.",
41
+ "metadata": {
42
+ "request_date": "2025-01-27T00:00:00",
43
+ "databricks_request_id": "demo-request-001",
44
+ "request_time": "2025-01-27T12:00:00.000",
45
+ "status_code": 200,
46
+ "sampling_fraction": 1,
47
+ "request_raw": "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a knowledgeable assistant on Oxford Economics\"},{\"role\":\"user\",\"content\":\"You are a knowledgeable assistant on Oxford Economics designed to answer questions accurately based on the provided context. Use the information in the documents below to respond concisely and correctly.\\\\n\\\\n### Context:\\\\n['Oxford Economics contact: Mateusz Urban - murban@oxfordeconomics.com', 'Oxford Economics research and analysis services', 'Economic forecasting and consultancy expertise']\\\\n\\\\n### Question:\\\\nwhat does oxford economics present?\\\\n\\\\n### Answer:\"}]}",
48
+ "response_raw": "{\"choices\":[{\"content_filter_results\":{\"hate\":{\"filtered\":false,\"severity\":\"safe\"},\"self_harm\":{\"filtered\":false,\"severity\":\"safe\"},\"sexual\":{\"filtered\":false,\"severity\":\"safe\"},\"violence\":{\"filtered\":false,\"severity\":\"safe\"}},\"finish_reason\":\"stop\",\"index\":0,\"logprobs\":null,\"message\":{\"content\":\"Oxford Economics provides economic analysis, forecasting, and consultancy services.\",\"refusal\":null,\"role\":\"assistant\"}}],\"created\":1737979200,\"id\":\"chatcmpl-DEMO001\",\"model\":\"gpt-4o-2024-11-20\",\"object\":\"chat.completion\",\"system_fingerprint\":\"fp_demo_001\",\"usage\":{\"completion_tokens\":13,\"completion_tokens_details\":{\"accepted_prediction_tokens\":0,\"audio_tokens\":0,\"reasoning_tokens\":0,\"rejected_prediction_tokens\":0},\"prompt_tokens\":162,\"prompt_tokens_details\":{\"audio_tokens\":0,\"cached_tokens\":0},\"total_tokens\":175}}",
49
+ "served_entity_id": "demo-entity-001",
50
+ "logging_error_codes": "[]",
51
+ "requester": "demo-user-001"
52
+ },
53
+ "latency": 0.531,
54
+ "usage": {
55
+ "completion_tokens": 13,
56
+ "completion_tokens_details": {
57
+ "accepted_prediction_tokens": 0,
58
+ "audio_tokens": 0,
59
+ "reasoning_tokens": 0,
60
+ "rejected_prediction_tokens": 0
61
+ },
62
+ "prompt_tokens": 162,
63
+ "prompt_tokens_details": {
64
+ "audio_tokens": 0,
65
+ "cached_tokens": 0
66
+ },
67
+ "total_tokens": 175
68
+ },
69
+ "model": "gpt-4o-2024-11-20"
70
  }
71
  ]
72
  }"""
73
  },
74
  {
75
+ "filename": "simple_qa_demonstration.txt",
76
+ "title": "Simple Q&A: Basic Interaction",
77
+ "description": "Streamlined example showing basic agent-human interaction with clear knowledge graph structure",
78
+ "trace_type": "conversation",
79
  "trace_source": "sample_data",
80
+ "tags": ["simple", "qa", "basic", "demonstration"],
81
+ "content": """User: Hello, can you help me understand what services Oxford Economics provides?
82
+
83
+ Assistant: Certainly! Oxford Economics is a leading economic consultancy that provides:
84
+
85
+ 1. **Economic Analysis & Forecasting**: Comprehensive economic research and predictive modeling
86
+ 2. **Consultancy Services**: Strategic advice for businesses, governments, and institutions
87
+ 3. **Industry Analysis**: Sector-specific insights and market intelligence
88
+ 4. **Policy Impact Assessment**: Evaluation of economic policies and their implications
89
+
90
+ They specialize in data-driven insights that help organizations make informed decisions about their economic environment and strategic planning.
91
+
92
+ Is there a specific aspect of their services you'd like to know more about?"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  }
94
  ]
95
 
96
+ # Enhanced knowledge graphs demonstrating AgentGraph's complete feature set
97
  SAMPLE_KNOWLEDGE_GRAPHS = [
98
  {
99
+ "filename": "kg_oxford_economics_enhanced.json",
100
  "trace_index": 0, # Links to first trace
101
  "graph_data": {
102
+ "system_name": "Oxford Economics AI Assistant",
103
+ "system_summary": "This intelligent assistant processes user inquiries about Oxford Economics through a streamlined workflow. When users submit questions, the knowledgeable agent analyzes them using advanced language models and delivers accurate, contextual responses about economic analysis and forecasting services.",
104
  "entities": [
105
  {
106
+ "id": "agent_001",
107
  "type": "Agent",
108
+ "name": "Oxford Economics Knowledge Agent",
109
  "importance": "HIGH",
110
+ "raw_prompt": "You are a knowledgeable assistant on Oxford Economics designed to answer questions accurately based on the provided context. Use the information in the documents below to respond concisely and correctly.",
111
  "raw_prompt_ref": [
112
  {
113
+ "line_start": 31,
114
+ "line_end": 32
 
115
  }
116
  ]
117
  },
118
  {
119
+ "id": "task_001",
120
+ "type": "Task",
121
+ "name": "Economic Inquiry Processing",
122
+ "importance": "HIGH",
123
+ "raw_prompt": "Process user inquiry about Oxford Economics and generate an accurate, contextual response based on available information and company expertise.",
124
+ "raw_prompt_ref": [
125
+ {
126
+ "line_start": 26,
127
+ "line_end": 28
128
+ }
129
+ ]
130
+ },
131
+ {
132
+ "id": "input_001",
133
+ "type": "Input",
134
+ "name": "User Economic Query",
135
+ "importance": "HIGH",
136
+ "raw_prompt": "what does oxford economics present?",
137
+ "raw_prompt_ref": [
138
+ {
139
+ "line_start": 19,
140
+ "line_end": 19
141
+ }
142
+ ]
143
+ },
144
+ {
145
+ "id": "output_001",
146
+ "type": "Output",
147
+ "name": "Economic Services Overview",
148
+ "importance": "HIGH",
149
+ "raw_prompt": "Oxford Economics provides economic analysis, forecasting, and consultancy services.",
150
+ "raw_prompt_ref": [
151
+ {
152
+ "line_start": 20,
153
+ "line_end": 20
154
+ }
155
+ ]
156
+ },
157
+ {
158
+ "id": "human_001",
159
+ "type": "Human",
160
+ "name": "Business User",
161
+ "importance": "MEDIUM",
162
+ "raw_prompt": "Professional seeking economic insights and analysis",
163
+ "raw_prompt_ref": [
164
+ {
165
+ "line_start": 31,
166
+ "line_end": 31
167
+ }
168
+ ]
169
+ },
170
+ {
171
+ "id": "tool_001",
172
+ "type": "Tool",
173
+ "name": "GPT-4o Language Model",
174
  "importance": "HIGH",
175
+ "raw_prompt": "Advanced AI language model with economic domain knowledge and structured response capabilities.",
176
  "raw_prompt_ref": [
177
  {
178
+ "line_start": 49,
179
+ "line_end": 49
 
180
  }
181
  ]
182
  }
 
184
  "relations": [
185
  {
186
  "id": "rel_001",
187
+ "source": "input_001",
188
+ "target": "agent_001",
189
  "type": "CONSUMED_BY",
190
  "importance": "HIGH",
191
+ "interaction_prompt": "User query received and processed by the economic knowledge agent",
192
  "interaction_prompt_ref": [
193
  {
194
+ "line_start": 19,
195
+ "line_end": 19
196
+ }
197
+ ]
198
+ },
199
+ {
200
+ "id": "rel_002",
201
+ "source": "agent_001",
202
+ "target": "task_001",
203
+ "type": "PERFORMS",
204
+ "importance": "HIGH",
205
+ "interaction_prompt": "Agent executes economic inquiry processing task",
206
+ "interaction_prompt_ref": [
207
+ {
208
+ "line_start": 26,
209
+ "line_end": 28
210
+ }
211
+ ]
212
+ },
213
+ {
214
+ "id": "rel_003",
215
+ "source": "task_001",
216
+ "target": "output_001",
217
+ "type": "PRODUCES",
218
+ "importance": "HIGH",
219
+ "interaction_prompt": "Processing task generates comprehensive economic services response",
220
+ "interaction_prompt_ref": [
221
+ {
222
+ "line_start": 20,
223
+ "line_end": 20
224
+ }
225
+ ]
226
+ },
227
+ {
228
+ "id": "rel_004",
229
+ "source": "output_001",
230
+ "target": "human_001",
231
+ "type": "DELIVERS_TO",
232
+ "importance": "HIGH",
233
+ "interaction_prompt": "Economic analysis delivered to requesting business user",
234
+ "interaction_prompt_ref": [
235
+ {
236
+ "line_start": 20,
237
+ "line_end": 20
238
+ }
239
+ ]
240
+ },
241
+ {
242
+ "id": "rel_005",
243
+ "source": "agent_001",
244
+ "target": "tool_001",
245
+ "type": "USES",
246
+ "importance": "HIGH",
247
+ "interaction_prompt": "Agent leverages language model for natural language understanding and generation",
248
+ "interaction_prompt_ref": [
249
+ {
250
+ "line_start": 49,
251
+ "line_end": 49
252
  }
253
  ]
254
  }
 
256
  "failures": [
257
  {
258
  "id": "failure_001",
259
+ "risk_type": "HALLUCINATION",
260
+ "description": "Minor spelling inconsistency in user query may affect search precision.",
261
+ "raw_text": "what does oxford economics present?",
262
  "raw_text_ref": [
263
  {
264
+ "line_start": 19,
265
+ "line_end": 19
 
266
  }
267
  ],
268
+ "affected_id": "input_001"
269
  }
270
  ],
271
  "optimizations": [
272
  {
273
  "id": "opt_001",
274
+ "recommendation_type": "PROMPT_REFINEMENT",
275
+ "description": "Enhance the agent prompt to include spell-checking and query normalization capabilities for improved accuracy and user experience.",
276
+ "affected_ids": ["agent_001"],
277
+ "raw_text_ref": [
278
+ {
279
+ "line_start": 31,
280
+ "line_end": 32
281
+ }
282
+ ]
283
+ },
284
+ {
285
+ "id": "opt_002",
286
+ "recommendation_type": "TOOL_ENHANCEMENT",
287
+ "description": "Integrate Oxford Economics knowledge base with the language model to provide more specific and detailed responses about services and capabilities.",
288
+ "affected_ids": ["tool_001"],
289
  "raw_text_ref": [
290
  {
291
+ "line_start": 49,
292
+ "line_end": 49
 
293
  }
294
  ]
295
  }
 
297
  "metadata": {
298
  "creation_timestamp": "2025-01-27T12:00:00Z",
299
  "schema_version": "2.1.0",
300
+ "quality_score": 0.89,
301
+ "entity_count": 6,
302
+ "relation_count": 5,
303
  "failure_count": 1,
304
+ "optimization_count": 2,
305
+ "processing_method": "production_enhanced",
306
+ "content_source": "aif_inference_trace",
307
  "language": "en",
308
+ "domain": "economics_consulting"
309
  }
310
  }
311
  },
312
  {
313
+ "filename": "kg_simple_qa_demo.json",
314
  "trace_index": 1, # Links to second trace
315
  "graph_data": {
316
+ "system_name": "Oxford Economics Q&A Assistant",
317
+ "system_summary": "A streamlined question-answering system designed to provide clear information about Oxford Economics services. The system processes user questions and delivers comprehensive responses about economic analysis and consulting capabilities.",
318
  "entities": [
319
  {
320
+ "id": "user_001",
321
+ "type": "Human",
322
+ "name": "User",
323
  "importance": "HIGH",
324
+ "raw_prompt": "Hello, can you help me understand what services Oxford Economics provides?",
325
  "raw_prompt_ref": [
326
  {
327
+ "line_start": 1,
328
+ "line_end": 1
 
329
  }
330
  ]
331
  },
332
  {
333
+ "id": "assistant_001",
334
  "type": "Agent",
335
+ "name": "Q&A Assistant",
336
+ "importance": "HIGH",
337
+ "raw_prompt": "Helpful assistant specialized in providing information about Oxford Economics services and capabilities.",
338
  "raw_prompt_ref": [
339
  {
340
+ "line_start": 3,
341
+ "line_end": 3
 
342
  }
343
  ]
344
+ },
 
 
345
  {
346
+ "id": "inquiry_task",
347
+ "type": "Task",
348
+ "name": "Service Information Request",
 
349
  "importance": "HIGH",
350
+ "raw_prompt": "Process user inquiry about Oxford Economics services and provide comprehensive response.",
351
+ "raw_prompt_ref": [
352
  {
353
+ "line_start": 1,
354
+ "line_end": 1
 
355
  }
356
  ]
357
+ },
 
 
 
358
  {
359
+ "id": "response_output",
360
+ "type": "Output",
361
+ "name": "Service Overview Response",
362
+ "importance": "HIGH",
363
+ "raw_prompt": "Certainly! Oxford Economics is a leading economic consultancy that provides: 1. Economic Analysis & Forecasting 2. Consultancy Services 3. Industry Analysis 4. Policy Impact Assessment",
364
+ "raw_prompt_ref": [
365
  {
366
+ "line_start": 3,
367
+ "line_end": 10
 
368
  }
369
  ]
370
  }
371
  ],
372
+ "relations": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
  {
374
+ "id": "user_inquiry",
375
+ "source": "user_001",
376
+ "target": "assistant_001",
377
+ "type": "CONSUMED_BY",
378
  "importance": "HIGH",
379
+ "interaction_prompt": "User asks about Oxford Economics services",
380
+ "interaction_prompt_ref": [
381
  {
382
+ "line_start": 1,
383
+ "line_end": 1
 
384
  }
385
  ]
386
  },
387
  {
388
+ "id": "assistant_processes",
389
+ "source": "assistant_001",
390
+ "target": "inquiry_task",
391
+ "type": "PERFORMS",
392
  "importance": "HIGH",
393
+ "interaction_prompt": "Assistant processes the service information request",
394
+ "interaction_prompt_ref": [
395
  {
396
+ "line_start": 3,
397
+ "line_end": 3
 
398
  }
399
  ]
400
+ },
 
 
401
  {
402
+ "id": "task_generates",
403
+ "source": "inquiry_task",
404
+ "target": "response_output",
405
+ "type": "PRODUCES",
406
+ "importance": "HIGH",
407
+ "interaction_prompt": "Task generates comprehensive service overview",
408
  "interaction_prompt_ref": [
409
  {
410
+ "line_start": 3,
411
+ "line_end": 10
 
412
  }
413
  ]
414
+ },
 
 
415
  {
416
+ "id": "response_delivered",
417
+ "source": "response_output",
418
+ "target": "user_001",
419
+ "type": "DELIVERS_TO",
420
+ "importance": "HIGH",
421
+ "interaction_prompt": "Comprehensive service information delivered to user",
422
+ "interaction_prompt_ref": [
423
  {
424
+ "line_start": 3,
425
+ "line_end": 10
 
426
  }
427
+ ]
 
428
  }
429
  ],
430
+ "failures": [],
431
  "optimizations": [
432
  {
433
+ "id": "opt_simple_001",
434
+ "recommendation_type": "PROMPT_REFINEMENT",
435
+ "description": "Consider adding interactive follow-up capabilities to allow users to ask for more specific information about any of the mentioned service areas.",
436
+ "affected_ids": ["assistant_001"],
437
  "raw_text_ref": [
438
  {
439
+ "line_start": 12,
440
+ "line_end": 12
 
441
  }
442
  ]
443
  }
444
  ],
445
  "metadata": {
446
  "creation_timestamp": "2025-01-27T12:00:00Z",
447
+ "schema_version": "2.1.0",
448
+ "quality_score": 0.92,
449
+ "entity_count": 4,
450
+ "relation_count": 4,
451
+ "failure_count": 0,
452
  "optimization_count": 1,
453
+ "processing_method": "simplified_demonstration",
454
+ "content_source": "conversation_trace",
455
  "language": "en",
456
+ "domain": "economics_consulting"
457
  }
458
  }
459
  }
 
462
 
463
  def insert_sample_data(session, force_insert=False):
464
  """
465
+ Insert sample traces and knowledge graphs into the database.
466
 
467
  Args:
468
  session: Database session
 
509
  )
510
  trace_ids.append(trace.trace_id)
511
  results["traces_inserted"] += 1
512
+ logger.info(f"Inserted sample trace: {trace_data['title']}")
513
  except Exception as e:
514
  error_msg = f"Error inserting trace {i}: {str(e)}"
515
  logger.error(error_msg)
 
530
  is_original=True
531
  )
532
  results["knowledge_graphs_inserted"] += 1
533
+ logger.info(f"Inserted sample knowledge graph: {kg_data['filename']}")
534
  except Exception as e:
535
  error_msg = f"Error inserting knowledge graph {kg_data['filename']}: {str(e)}"
536
  logger.error(error_msg)
537
  results["errors"].append(error_msg)
538
 
539
+ logger.info(f"Sample data insertion completed: {results}")
540
 
541
  except Exception as e:
542
+ error_msg = f"Fatal error during sample data insertion: {str(e)}"
543
  logger.error(error_msg)
544
  results["errors"].append(error_msg)
545
  raise # Re-raise to trigger rollback in calling code
 
549
 
550
  def get_sample_data_info():
551
  """
552
+ Get information about the available sample data.
553
 
554
  Returns:
555
  Dict with sample data statistics
 
558
  "traces_count": len(SAMPLE_TRACES),
559
  "knowledge_graphs_count": len(SAMPLE_KNOWLEDGE_GRAPHS),
560
  "trace_types": list(set(t["trace_type"] for t in SAMPLE_TRACES)),
561
+ "complexity_levels": ["enhanced", "simple"],
562
+ "features": ["failure_detection", "optimization_recommendations", "content_references", "quality_scoring"],
563
+ "description": "Enhanced AgentGraph examples showcasing Oxford Economics AI assistant with complete failure detection, optimization suggestions, and advanced knowledge graph features"
564
+ }