wu981526092 commited on
Commit
0845df6
Β·
1 Parent(s): b450ab8

πŸ”§ Fix Sample Data Loading for HF Spaces

Browse files

βœ… Root Cause Identified:
β€’ Database inconsistency: sample data added to file DB, app used in-memory DB
β€’ In init_db.py: used sqlite3 connection to file database
β€’ In __init__.py: used SQLAlchemy with in-memory database for HF Spaces
β€’ Result: sample data invisible to application

⚑ Complete Fix Implementation:
β€’ Created add_sample_data_for_hf() using SQLAlchemy models
β€’ Uses same in-memory database engine as the application
β€’ Proper session management with rollback on errors
β€’ Only runs on HF Spaces (SPACE_ID environment variable)
β€’ Integrated into app startup sequence after database initialization

🎯 Technical improvements:
β€’ Consistent database access patterns across codebase
β€’ Proper SQLAlchemy model usage (Trace, KnowledgeGraph, Entity, Relation)
β€’ Transaction safety with commit/rollback handling
β€’ Clean separation: file DB for local dev, memory DB + samples for HF

πŸš€ Expected result:
β€’ Dashboard should show 1 trace and 1 knowledge graph on HF Spaces
β€’ 15 entities and 18 relations demonstrating multi-agent coordination
β€’ Perfect customer service scenario with error handling examples
β€’ Users immediately see AgentGraph's capabilities

Files changed (2) hide show
  1. backend/app.py +6 -0
  2. backend/database/__init__.py +248 -0
backend/app.py CHANGED
@@ -76,12 +76,18 @@ async def startup_event():
76
  # πŸ—„οΈ Initialize database on startup
77
  try:
78
  from backend.database.init_db import init_database
 
 
79
  init_database(reset=False, force=False)
 
80
 
81
  # Show database type info
82
  if os.getenv("SPACE_ID"):
83
  logger.info("πŸ”’ HF Spaces: Using in-memory database for user privacy")
84
  logger.info("πŸ“ Note: Data will be cleared when container restarts")
 
 
 
85
  else:
86
  logger.info("πŸ’Ύ Local development: Using persistent database")
87
 
 
76
  # πŸ—„οΈ Initialize database on startup
77
  try:
78
  from backend.database.init_db import init_database
79
+ from backend.database import init_db, add_sample_data_for_hf
80
+
81
  init_database(reset=False, force=False)
82
+ init_db() # Create tables using SQLAlchemy
83
 
84
  # Show database type info
85
  if os.getenv("SPACE_ID"):
86
  logger.info("πŸ”’ HF Spaces: Using in-memory database for user privacy")
87
  logger.info("πŸ“ Note: Data will be cleared when container restarts")
88
+
89
+ # Add sample data for HF Spaces
90
+ add_sample_data_for_hf()
91
  else:
92
  logger.info("πŸ’Ύ Local development: Using persistent database")
93
 
backend/database/__init__.py CHANGED
@@ -60,10 +60,258 @@ def init_db():
60
  """Initialize the database by creating all tables."""
61
  Base.metadata.create_all(bind=engine)
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  __all__ = [
64
  'get_db',
65
  'models',
66
  'init_db',
 
67
  'save_knowledge_graph',
68
  'update_knowledge_graph_status',
69
  'get_knowledge_graph',
 
60
  """Initialize the database by creating all tables."""
61
  Base.metadata.create_all(bind=engine)
62
 
63
+ def add_sample_data_for_hf():
64
+ """Add sample data for HF Spaces using SQLAlchemy."""
65
+ if not os.getenv("SPACE_ID"):
66
+ return # Only run on HF Spaces
67
+
68
+ import json
69
+ import uuid
70
+ import hashlib
71
+ from backend.database.models import Trace, KnowledgeGraph, Entity, Relation
72
+
73
+ session = Session()
74
+ try:
75
+ # Check if data already exists
76
+ existing_traces = session.query(Trace).count()
77
+ existing_kgs = session.query(KnowledgeGraph).count()
78
+
79
+ if existing_traces > 0 or existing_kgs > 0:
80
+ return # Data already exists
81
+
82
+ print("🎯 HF Spaces: Adding sample data to in-memory database...")
83
+
84
+ # Sample trace content - Multi-agent customer service scenario
85
+ sample_trace_content = '''[
86
+ {
87
+ "role": "user",
88
+ "content": "I need help with my order #12345. It was supposed to arrive yesterday but I still haven't received it. This is very frustrating as I need it for an important meeting tomorrow.",
89
+ "timestamp": "2024-08-31T10:00:00Z",
90
+ "metadata": {"priority": "high", "emotion": "frustrated"}
91
+ },
92
+ {
93
+ "role": "assistant",
94
+ "name": "RouterAgent",
95
+ "content": "I understand your frustration about the delayed order. Let me route this to our order tracking specialist who can provide detailed information about your shipment.",
96
+ "timestamp": "2024-08-31T10:00:15Z",
97
+ "actions": [{"type": "route", "target": "OrderTrackingAgent", "priority": "high"}]
98
+ },
99
+ {
100
+ "role": "assistant",
101
+ "name": "OrderTrackingAgent",
102
+ "content": "I'll look up order #12345 immediately. Checking our tracking system... I found the issue - there was a delay at the distribution center due to weather conditions. Your package is currently in transit and should arrive by 2 PM today.",
103
+ "timestamp": "2024-08-31T10:01:00Z",
104
+ "actions": [
105
+ {"type": "database_query", "query": "SELECT * FROM orders WHERE order_id = '12345'"},
106
+ {"type": "api_call", "endpoint": "shipping_tracker", "response": "in_transit"}
107
+ ],
108
+ "findings": {
109
+ "issue": "Weather delay at distribution center",
110
+ "current_status": "In transit",
111
+ "estimated_delivery": "2024-08-31T14:00:00Z"
112
+ }
113
+ },
114
+ {
115
+ "role": "assistant",
116
+ "name": "CompensationAgent",
117
+ "content": "Given this delay caused inconvenience, I'm authorizing a $10 credit to your account and providing free expedited shipping on your next order. I'm also escalating this to ensure better communication about delays in the future.",
118
+ "timestamp": "2024-08-31T10:02:00Z",
119
+ "actions": [
120
+ {"type": "credit_account", "amount": 10, "reason": "delivery_delay"},
121
+ {"type": "add_benefit", "type": "free_expedited_shipping"},
122
+ {"type": "escalate", "department": "operations", "issue": "delayed_delivery_communication"}
123
+ ],
124
+ "error": "Failed to process account credit - payment system temporarily unavailable"
125
+ },
126
+ {
127
+ "role": "assistant",
128
+ "name": "SupervisorAgent",
129
+ "content": "I see there was an error processing the account credit. I'm manually flagging your account for the $10 credit and expedited shipping benefit. Our technical team will resolve the payment system issue. You should see the credit within 24 hours.",
130
+ "timestamp": "2024-08-31T10:03:00Z",
131
+ "actions": [
132
+ {"type": "manual_flag", "account_credit": 10},
133
+ {"type": "technical_escalation", "system": "payment_processor"},
134
+ {"type": "follow_up_schedule", "timeframe": "24_hours"}
135
+ ],
136
+ "optimization_opportunity": "Implement payment system redundancy to prevent credit processing failures"
137
+ },
138
+ {
139
+ "role": "user",
140
+ "content": "Thank you for the quick response and compensation. I appreciate you taking care of this issue promptly.",
141
+ "timestamp": "2024-08-31T10:04:00Z",
142
+ "sentiment": "satisfied"
143
+ },
144
+ {
145
+ "role": "assistant",
146
+ "name": "QualityAgent",
147
+ "content": "Session completed successfully. Customer satisfaction restored. Identified system improvement: need better proactive communication about shipping delays and payment system reliability backup.",
148
+ "timestamp": "2024-08-31T10:05:00Z",
149
+ "session_analysis": {
150
+ "resolution_time": "5 minutes",
151
+ "customer_satisfaction": "high",
152
+ "agents_involved": 4,
153
+ "system_errors": 1,
154
+ "optimization_recommendations": [
155
+ "Implement proactive delay notifications",
156
+ "Add payment system redundancy",
157
+ "Improve agent handoff protocols"
158
+ ]
159
+ }
160
+ }
161
+ ]'''
162
+
163
+ # Generate trace metadata
164
+ trace_id = str(uuid.uuid4())
165
+ content_hash = hashlib.sha256(sample_trace_content.encode()).hexdigest()
166
+
167
+ # Create and save sample trace
168
+ sample_trace = Trace(
169
+ trace_id=trace_id,
170
+ filename="sample_customer_service.json",
171
+ title="Multi-Agent Customer Service Resolution",
172
+ description="Demonstration of multi-agent system handling customer complaint with error handling and optimization opportunities",
173
+ content=sample_trace_content,
174
+ content_hash=content_hash,
175
+ uploader="AgentGraph Demo",
176
+ trace_type="multi_agent",
177
+ trace_source="sample",
178
+ character_count=len(sample_trace_content),
179
+ turn_count=6,
180
+ status="processed",
181
+ tags=json.dumps(["sample", "customer_service", "multi_agent", "error_handling", "optimization"]),
182
+ trace_metadata=json.dumps({
183
+ "scenario": "customer_service",
184
+ "agents": ["RouterAgent", "OrderTrackingAgent", "CompensationAgent", "SupervisorAgent", "QualityAgent"],
185
+ "domain": "e_commerce",
186
+ "complexity": "high"
187
+ })
188
+ )
189
+
190
+ session.add(sample_trace)
191
+ session.flush() # Get the trace ID
192
+
193
+ # Create sample knowledge graph
194
+ sample_kg = KnowledgeGraph(
195
+ filename="sample_customer_service_kg.json",
196
+ creator="AgentGraph Demo",
197
+ entity_count=15,
198
+ relation_count=18,
199
+ namespace="customer_service_demo",
200
+ system_name="Multi-Agent Customer Service System",
201
+ system_summary="An intelligent customer service system featuring multiple specialized agents working together to resolve customer issues, handle errors, and identify optimization opportunities. The system demonstrates sophisticated agent coordination, error recovery mechanisms, and continuous improvement processes.",
202
+ status="completed",
203
+ trace_id=trace_id,
204
+ window_index=0,
205
+ window_total=1,
206
+ processing_run_id="sample_demo_run"
207
+ )
208
+
209
+ session.add(sample_kg)
210
+ session.flush() # Get the KG ID
211
+
212
+ # Sample entities with rich properties
213
+ entities_data = [
214
+ # Agents
215
+ ("agent_1", "agent", "RouterAgent", {"role": "traffic_routing", "specialization": "request_classification", "priority_handling": True}),
216
+ ("agent_2", "agent", "OrderTrackingAgent", {"role": "order_management", "specialization": "shipping_tracking", "data_sources": ["internal_db", "shipping_apis"]}),
217
+ ("agent_3", "agent", "CompensationAgent", {"role": "customer_retention", "specialization": "compensation_authorization", "max_credit_limit": 50}),
218
+ ("agent_4", "agent", "SupervisorAgent", {"role": "escalation_handling", "specialization": "system_error_recovery", "override_authority": True}),
219
+ ("agent_5", "agent", "QualityAgent", {"role": "quality_assurance", "specialization": "session_analysis", "improvement_tracking": True}),
220
+
221
+ # Systems and Tools
222
+ ("system_1", "system", "OrderDatabase", {"type": "database", "function": "order_storage", "performance": "high"}),
223
+ ("system_2", "system", "ShippingTracker", {"type": "external_api", "function": "package_tracking", "reliability": "99.5%"}),
224
+ ("system_3", "system", "PaymentProcessor", {"type": "financial_system", "function": "account_credits", "status": "temporarily_unavailable"}),
225
+
226
+ # Issues and Problems
227
+ ("issue_1", "issue", "DeliveryDelay", {"severity": "medium", "cause": "weather_conditions", "impact": "customer_satisfaction"}),
228
+ ("issue_2", "issue", "PaymentSystemFailure", {"severity": "high", "cause": "system_unavailability", "impact": "compensation_processing"}),
229
+
230
+ # Actions and Processes
231
+ ("action_1", "action", "RouteRequest", {"type": "traffic_management", "success_rate": "98%"}),
232
+ ("action_2", "action", "TrackPackage", {"type": "information_retrieval", "data_sources": 2}),
233
+ ("action_3", "action", "AuthorizeCredit", {"type": "financial_transaction", "approval_required": True}),
234
+ ("action_4", "action", "EscalateIssue", {"type": "process_escalation", "department": "operations"}),
235
+
236
+ # Improvements and Optimizations
237
+ ("improvement_1", "improvement", "ProactiveNotifications", {"priority": "high", "implementation_effort": "medium", "expected_impact": "reduce_complaints_by_30%"})
238
+ ]
239
+
240
+ # Add entities
241
+ for entity_id, entity_type, name, properties in entities_data:
242
+ entity = Entity(
243
+ graph_id=sample_kg.id,
244
+ entity_id=entity_id,
245
+ type=entity_type,
246
+ name=name,
247
+ properties=json.dumps(properties),
248
+ knowledge_graph_namespace="customer_service_demo"
249
+ )
250
+ session.add(entity)
251
+
252
+ # Sample relations showing complex interactions
253
+ relations_data = [
254
+ # Agent interactions
255
+ ("rel_1", "agent_1", "routes_to", "agent_2", {"context": "order_inquiry", "priority": "high", "success": True}),
256
+ ("rel_2", "agent_2", "collaborates_with", "agent_3", {"context": "customer_compensation", "coordination": "automated"}),
257
+ ("rel_3", "agent_3", "escalates_to", "agent_4", {"context": "system_error", "escalation_reason": "payment_failure"}),
258
+ ("rel_4", "agent_4", "coordinates_with", "agent_5", {"context": "quality_improvement", "outcome": "optimization_identified"}),
259
+
260
+ # System interactions
261
+ ("rel_5", "agent_2", "queries", "system_1", {"query_type": "order_lookup", "response_time": "0.5s", "success": True}),
262
+ ("rel_6", "agent_2", "calls", "system_2", {"api_endpoint": "track_package", "response_time": "1.2s", "success": True}),
263
+ ("rel_7", "agent_3", "attempts_transaction", "system_3", {"transaction_type": "credit", "amount": 10, "success": False}),
264
+
265
+ # Problem identification and resolution
266
+ ("rel_8", "agent_2", "identifies", "issue_1", {"detection_method": "system_query", "severity_assessed": "medium"}),
267
+ ("rel_9", "agent_3", "encounters", "issue_2", {"error_handling": "automatic_escalation", "recovery_action": "manual_override"}),
268
+ ("rel_10", "agent_4", "resolves", "issue_2", {"resolution_method": "manual_flag", "permanent_fix": False}),
269
+
270
+ # Action execution
271
+ ("rel_11", "agent_1", "executes", "action_1", {"execution_time": "15s", "outcome": "successful_routing"}),
272
+ ("rel_12", "agent_2", "performs", "action_2", {"data_retrieved": True, "accuracy": "100%"}),
273
+ ("rel_13", "agent_3", "initiates", "action_3", {"authorization_level": "standard", "blocked_by": "system_error"}),
274
+ ("rel_14", "agent_4", "triggers", "action_4", {"escalation_department": "operations", "follow_up_required": True}),
275
+
276
+ # Improvement opportunities
277
+ ("rel_15", "agent_5", "identifies", "improvement_1", {"analysis_method": "session_review", "confidence": "high"}),
278
+ ("rel_16", "issue_1", "leads_to", "improvement_1", {"causal_relationship": "direct", "prevention_potential": "high"}),
279
+ ("rel_17", "issue_2", "exposes", "system_3", {"vulnerability_type": "single_point_of_failure", "risk_level": "high"}),
280
+ ("rel_18", "improvement_1", "would_prevent", "issue_1", {"prevention_mechanism": "early_warning", "effectiveness": "85%"})
281
+ ]
282
+
283
+ # Add relations
284
+ for relation_id, from_entity, relation_type, to_entity, properties in relations_data:
285
+ relation = Relation(
286
+ graph_id=sample_kg.id,
287
+ relation_id=relation_id,
288
+ from_entity_id=from_entity,
289
+ relation_type=relation_type,
290
+ to_entity_id=to_entity,
291
+ properties=json.dumps(properties),
292
+ knowledge_graph_namespace="customer_service_demo"
293
+ )
294
+ session.add(relation)
295
+
296
+ # Commit all data
297
+ session.commit()
298
+
299
+ print("βœ… Sample data added successfully to HF Spaces in-memory database!")
300
+ print(f" β€’ 1 sample trace: Multi-Agent Customer Service Resolution")
301
+ print(f" β€’ 1 knowledge graph with {len(entities_data)} entities and {len(relations_data)} relations")
302
+ print(f" β€’ Demonstrates: Multi-agent coordination, error handling, optimization opportunities")
303
+
304
+ except Exception as e:
305
+ print(f"❌ Failed to add sample data: {e}")
306
+ session.rollback()
307
+ finally:
308
+ session.close()
309
+
310
  __all__ = [
311
  'get_db',
312
  'models',
313
  'init_db',
314
+ 'add_sample_data_for_hf',
315
  'save_knowledge_graph',
316
  'update_knowledge_graph_status',
317
  'get_knowledge_graph',