Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python | |
| """ | |
| Sample data for database initialization. | |
| Provides curated examples of traces and knowledge graphs for new users. | |
| """ | |
| import json | |
| import logging | |
| from typing import Dict, List, Any | |
| logger = logging.getLogger(__name__) | |
| # Enhanced sample traces showcasing AgentGraph's full capabilities | |
| SAMPLE_TRACES = [ | |
| { | |
| "filename": "python_documentation_inquiry.json", | |
| "title": "Python Documentation Assistant Demo", | |
| "description": "Enhanced example showing RAG-powered AI assistant processing programming inquiry with knowledge search, failure detection and optimization suggestions", | |
| "trace_type": "documentation_search", | |
| "trace_source": "sample_data", | |
| "tags": ["programming", "rag_assistant", "documentation", "failure_detection", "optimization"], | |
| "content": """{ | |
| "id": "doc_trace_demo_001", | |
| "timestamp": "2025-01-27T00:00:00", | |
| "metadata": { | |
| "source": "AgentGraph_Demo", | |
| "row_index": 0, | |
| "converted_at": "2025-01-27T12:00:00.000000" | |
| }, | |
| "data": { | |
| "total_observations": 1, | |
| "summary": "Python documentation inquiry with RAG-powered assistant response" | |
| }, | |
| "observations": [ | |
| { | |
| "id": "demo_obs_001", | |
| "type": "inference", | |
| "timestamp": "2025-01-27T00:00:00", | |
| "input": "You are a helpful Python programming assistant designed to answer questions accurately based on the provided documentation. Use the information retrieved from the knowledge base below to respond concisely and correctly.\\n\\n### Retrieved Documents:\\n['Python list comprehensions - efficient way to create lists', 'Python built-in functions - len(), range(), enumerate()', 'Python data structures - lists, dictionaries, sets, tuples']\\n\\n### Question:\\nwhat are python list comprehensions used for?\\n\\n### Answer:", | |
| "output": "Python list comprehensions provide an efficient and readable way to create lists by applying expressions to elements in iterables.", | |
| "metadata": { | |
| "request_date": "2025-01-27T00:00:00", | |
| "databricks_request_id": "demo-request-001", | |
| "request_time": "2025-01-27T12:00:00.000", | |
| "status_code": 200, | |
| "sampling_fraction": 1, | |
| "request_raw": "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a knowledgeable assistant on Oxford Economics\"},{\"role\":\"user\",\"content\":\"You are a knowledgeable assistant on Oxford Economics designed to answer questions accurately based on the provided context. Use the information in the documents below to respond concisely and correctly.\\\\n\\\\n### Context:\\\\n['Oxford Economics contact: Mateusz Urban - murban@oxfordeconomics.com', 'Oxford Economics research and analysis services', 'Economic forecasting and consultancy expertise']\\\\n\\\\n### Question:\\\\nwhat does oxford economics present?\\\\n\\\\n### Answer:\"}]}", | |
| "response_raw": "{\"choices\":[{\"content_filter_results\":{\"hate\":{\"filtered\":false,\"severity\":\"safe\"},\"self_harm\":{\"filtered\":false,\"severity\":\"safe\"},\"sexual\":{\"filtered\":false,\"severity\":\"safe\"},\"violence\":{\"filtered\":false,\"severity\":\"safe\"}},\"finish_reason\":\"stop\",\"index\":0,\"logprobs\":null,\"message\":{\"content\":\"Oxford Economics provides economic analysis, forecasting, and consultancy services.\",\"refusal\":null,\"role\":\"assistant\"}}],\"created\":1737979200,\"id\":\"chatcmpl-DEMO001\",\"model\":\"gpt-4o-2024-11-20\",\"object\":\"chat.completion\",\"system_fingerprint\":\"fp_demo_001\",\"usage\":{\"completion_tokens\":13,\"completion_tokens_details\":{\"accepted_prediction_tokens\":0,\"audio_tokens\":0,\"reasoning_tokens\":0,\"rejected_prediction_tokens\":0},\"prompt_tokens\":162,\"prompt_tokens_details\":{\"audio_tokens\":0,\"cached_tokens\":0},\"total_tokens\":175}}", | |
| "served_entity_id": "demo-entity-001", | |
| "logging_error_codes": "[]", | |
| "requester": "demo-user-001" | |
| }, | |
| "latency": 0.531, | |
| "usage": { | |
| "completion_tokens": 13, | |
| "completion_tokens_details": { | |
| "accepted_prediction_tokens": 0, | |
| "audio_tokens": 0, | |
| "reasoning_tokens": 0, | |
| "rejected_prediction_tokens": 0 | |
| }, | |
| "prompt_tokens": 162, | |
| "prompt_tokens_details": { | |
| "audio_tokens": 0, | |
| "cached_tokens": 0 | |
| }, | |
| "total_tokens": 175 | |
| }, | |
| "model": "gpt-4o-2024-11-20" | |
| } | |
| ] | |
| }""" | |
| }, | |
| { | |
| "filename": "simple_qa_demonstration.txt", | |
| "title": "Simple Q&A: Basic Programming Help", | |
| "description": "Streamlined example showing basic programming assistant interaction with clear knowledge graph structure", | |
| "trace_type": "conversation", | |
| "trace_source": "sample_data", | |
| "tags": ["simple", "qa", "basic", "demonstration"], | |
| "content": """User: Hello, can you help me understand what Python list comprehensions are used for? | |
| Assistant: Certainly! Python list comprehensions are a powerful and concise way to create lists. Here are their main uses: | |
| 1. **Creating Lists from Iterables**: Transform elements from existing sequences | |
| 2. **Filtering Data**: Include only elements that meet certain conditions | |
| 3. **Mathematical Operations**: Apply calculations to each element efficiently | |
| 4. **Nested Iterations**: Handle complex data transformations in readable syntax | |
| For example: `[x**2 for x in range(10) if x % 2 == 0]` creates a list of squares of even numbers. | |
| Would you like to see more specific examples or learn about related Python features?""" | |
| } | |
| ] | |
| # Enhanced knowledge graphs demonstrating AgentGraph's complete feature set | |
| SAMPLE_KNOWLEDGE_GRAPHS = [ | |
| { | |
| "filename": "kg_oxford_economics_enhanced.json", | |
| "trace_index": 0, # Links to first trace | |
| "graph_data": { | |
| "system_name": "Python Documentation Assistant", | |
| "system_summary": "This intelligent assistant processes user inquiries about Python programming through a streamlined workflow. When users submit questions, the knowledgeable agent analyzes them using RAG-powered knowledge search and delivers accurate, contextual responses about Python concepts, syntax, and best practices.", | |
| "entities": [ | |
| { | |
| "id": "agent_001", | |
| "type": "Agent", | |
| "name": "Python Documentation Agent", | |
| "importance": "HIGH", | |
| "raw_prompt": "You are a helpful Python programming assistant designed to answer questions accurately based on retrieved documentation context. Use the search results to provide precise responses.", | |
| "raw_prompt_ref": [ | |
| { | |
| "line_start": 31, | |
| "line_end": 32 | |
| } | |
| ] | |
| }, | |
| { | |
| "id": "task_001", | |
| "type": "Task", | |
| "name": "Programming Question Processing", | |
| "importance": "HIGH", | |
| "raw_prompt": "Process user inquiry about Python programming and generate an accurate, contextual response based on available documentation and programming best practices.", | |
| "raw_prompt_ref": [ | |
| { | |
| "line_start": 26, | |
| "line_end": 28 | |
| } | |
| ] | |
| }, | |
| { | |
| "id": "input_001", | |
| "type": "Input", | |
| "name": "User Programming Query", | |
| "importance": "HIGH", | |
| "raw_prompt": "what are python list comprehensions used for?", | |
| "raw_prompt_ref": [ | |
| { | |
| "line_start": 19, | |
| "line_end": 19 | |
| } | |
| ] | |
| }, | |
| { | |
| "id": "output_001", | |
| "type": "Output", | |
| "name": "Programming Concept Explanation", | |
| "importance": "HIGH", | |
| "raw_prompt": "Python list comprehensions provide an efficient and readable way to create lists by applying expressions to elements in iterables.", | |
| "raw_prompt_ref": [ | |
| { | |
| "line_start": 20, | |
| "line_end": 20 | |
| } | |
| ] | |
| }, | |
| { | |
| "id": "human_001", | |
| "type": "Human", | |
| "name": "Python Developer", | |
| "importance": "MEDIUM", | |
| "raw_prompt": "Developer seeking Python programming guidance and documentation", | |
| "raw_prompt_ref": [ | |
| { | |
| "line_start": 31, | |
| "line_end": 31 | |
| } | |
| ] | |
| }, | |
| { | |
| "id": "tool_001", | |
| "type": "Tool", | |
| "name": "Python Documentation Search", | |
| "importance": "HIGH", | |
| "raw_prompt": "Retrieval-Augmented Generation (RAG) system that searches Python documentation knowledge base for relevant concepts, syntax examples, and best practices to provide contextual information.", | |
| "raw_prompt_ref": [ | |
| { | |
| "line_start": 49, | |
| "line_end": 49 | |
| } | |
| ] | |
| } | |
| ], | |
| "relations": [ | |
| { | |
| "id": "rel_001", | |
| "source": "input_001", | |
| "target": "agent_001", | |
| "type": "CONSUMED_BY", | |
| "importance": "HIGH", | |
| "interaction_prompt": "User query received and processed by the Python documentation agent", | |
| "interaction_prompt_ref": [ | |
| { | |
| "line_start": 19, | |
| "line_end": 19 | |
| } | |
| ] | |
| }, | |
| { | |
| "id": "rel_002", | |
| "source": "agent_001", | |
| "target": "task_001", | |
| "type": "PERFORMS", | |
| "importance": "HIGH", | |
| "interaction_prompt": "Agent executes programming question processing task", | |
| "interaction_prompt_ref": [ | |
| { | |
| "line_start": 26, | |
| "line_end": 28 | |
| } | |
| ] | |
| }, | |
| { | |
| "id": "rel_003", | |
| "source": "task_001", | |
| "target": "output_001", | |
| "type": "PRODUCES", | |
| "importance": "HIGH", | |
| "interaction_prompt": "Processing task generates comprehensive programming concept explanation", | |
| "interaction_prompt_ref": [ | |
| { | |
| "line_start": 20, | |
| "line_end": 20 | |
| } | |
| ] | |
| }, | |
| { | |
| "id": "rel_004", | |
| "source": "output_001", | |
| "target": "human_001", | |
| "type": "DELIVERS_TO", | |
| "importance": "HIGH", | |
| "interaction_prompt": "Programming explanation delivered to requesting developer", | |
| "interaction_prompt_ref": [ | |
| { | |
| "line_start": 20, | |
| "line_end": 20 | |
| } | |
| ] | |
| }, | |
| { | |
| "id": "rel_005", | |
| "source": "agent_001", | |
| "target": "tool_001", | |
| "type": "USES", | |
| "importance": "HIGH", | |
| "interaction_prompt": "Agent queries knowledge search system to retrieve relevant Python documentation and contextual information", | |
| "interaction_prompt_ref": [ | |
| { | |
| "line_start": 49, | |
| "line_end": 49 | |
| } | |
| ] | |
| } | |
| ], | |
| "failures": [ | |
| { | |
| "id": "failure_001", | |
| "risk_type": "HALLUCINATION", | |
| "description": "Query lacks specificity - could ask about specific Python features rather than general usage.", | |
| "raw_text": "what are python list comprehensions used for?", | |
| "raw_text_ref": [ | |
| { | |
| "line_start": 19, | |
| "line_end": 19 | |
| } | |
| ], | |
| "affected_id": "input_001" | |
| } | |
| ], | |
| "optimizations": [ | |
| { | |
| "id": "opt_001", | |
| "recommendation_type": "PROMPT_REFINEMENT", | |
| "description": "Enhance the agent prompt to encourage more specific programming questions and provide structured code examples in responses.", | |
| "affected_ids": ["agent_001"], | |
| "raw_text_ref": [ | |
| { | |
| "line_start": 31, | |
| "line_end": 32 | |
| } | |
| ] | |
| }, | |
| { | |
| "id": "opt_002", | |
| "recommendation_type": "TOOL_ENHANCEMENT", | |
| "description": "Expand knowledge search capabilities to include code examples, interactive tutorials, and version-specific documentation alongside basic syntax references.", | |
| "affected_ids": ["tool_001"], | |
| "raw_text_ref": [ | |
| { | |
| "line_start": 49, | |
| "line_end": 49 | |
| } | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "creation_timestamp": "2025-01-27T12:00:00Z", | |
| "schema_version": "2.1.0", | |
| "quality_score": 0.89, | |
| "entity_count": 6, | |
| "relation_count": 5, | |
| "failure_count": 1, | |
| "optimization_count": 2, | |
| "processing_method": "production_enhanced", | |
| "content_source": "documentation_trace", | |
| "language": "en", | |
| "domain": "programming_documentation" | |
| } | |
| } | |
| }, | |
| { | |
| "filename": "kg_simple_qa_demo.json", | |
| "trace_index": 1, # Links to second trace | |
| "graph_data": { | |
| "system_name": "Python Q&A Assistant", | |
| "system_summary": "A streamlined question-answering system designed to provide clear information about Python programming concepts. The system processes user questions and delivers comprehensive responses about Python syntax and best practices.", | |
| "entities": [ | |
| { | |
| "id": "user_001", | |
| "type": "Human", | |
| "name": "User", | |
| "importance": "HIGH", | |
| "raw_prompt": "Hello, can you help me understand what Python list comprehensions are used for?", | |
| "raw_prompt_ref": [ | |
| { | |
| "line_start": 1, | |
| "line_end": 1 | |
| } | |
| ] | |
| }, | |
| { | |
| "id": "assistant_001", | |
| "type": "Agent", | |
| "name": "Q&A Assistant", | |
| "importance": "HIGH", | |
| "raw_prompt": "RAG-powered assistant specialized in searching knowledge base and providing accurate information about Python programming concepts and capabilities.", | |
| "raw_prompt_ref": [ | |
| { | |
| "line_start": 3, | |
| "line_end": 3 | |
| } | |
| ] | |
| }, | |
| { | |
| "id": "inquiry_task", | |
| "type": "Task", | |
| "name": "Service Information Request", | |
| "importance": "HIGH", | |
| "raw_prompt": "Process user inquiry about Python programming concepts and provide comprehensive response.", | |
| "raw_prompt_ref": [ | |
| { | |
| "line_start": 1, | |
| "line_end": 1 | |
| } | |
| ] | |
| }, | |
| { | |
| "id": "response_output", | |
| "type": "Output", | |
| "name": "Service Overview Response", | |
| "importance": "HIGH", | |
| "raw_prompt": "Certainly! Python list comprehensions are a powerful feature that provides: 1. Concise List Creation 2. Efficient Data Filtering 3. Readable Transformations 4. Performance Optimization", | |
| "raw_prompt_ref": [ | |
| { | |
| "line_start": 3, | |
| "line_end": 10 | |
| } | |
| ] | |
| } | |
| ], | |
| "relations": [ | |
| { | |
| "id": "user_inquiry", | |
| "source": "user_001", | |
| "target": "assistant_001", | |
| "type": "CONSUMED_BY", | |
| "importance": "HIGH", | |
| "interaction_prompt": "User asks about Python programming concepts", | |
| "interaction_prompt_ref": [ | |
| { | |
| "line_start": 1, | |
| "line_end": 1 | |
| } | |
| ] | |
| }, | |
| { | |
| "id": "assistant_processes", | |
| "source": "assistant_001", | |
| "target": "inquiry_task", | |
| "type": "PERFORMS", | |
| "importance": "HIGH", | |
| "interaction_prompt": "Assistant processes the programming concept request", | |
| "interaction_prompt_ref": [ | |
| { | |
| "line_start": 3, | |
| "line_end": 3 | |
| } | |
| ] | |
| }, | |
| { | |
| "id": "task_generates", | |
| "source": "inquiry_task", | |
| "target": "response_output", | |
| "type": "PRODUCES", | |
| "importance": "HIGH", | |
| "interaction_prompt": "Task generates comprehensive programming concept overview", | |
| "interaction_prompt_ref": [ | |
| { | |
| "line_start": 3, | |
| "line_end": 10 | |
| } | |
| ] | |
| }, | |
| { | |
| "id": "response_delivered", | |
| "source": "response_output", | |
| "target": "user_001", | |
| "type": "DELIVERS_TO", | |
| "importance": "HIGH", | |
| "interaction_prompt": "Comprehensive programming information delivered to user", | |
| "interaction_prompt_ref": [ | |
| { | |
| "line_start": 3, | |
| "line_end": 10 | |
| } | |
| ] | |
| } | |
| ], | |
| "failures": [], | |
| "optimizations": [ | |
| { | |
| "id": "opt_simple_001", | |
| "recommendation_type": "PROMPT_REFINEMENT", | |
| "description": "Consider adding interactive code examples and hands-on tutorials to allow users to practice the programming concepts being discussed.", | |
| "affected_ids": ["assistant_001"], | |
| "raw_text_ref": [ | |
| { | |
| "line_start": 12, | |
| "line_end": 12 | |
| } | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "creation_timestamp": "2025-01-27T12:00:00Z", | |
| "schema_version": "2.1.0", | |
| "quality_score": 0.92, | |
| "entity_count": 4, | |
| "relation_count": 4, | |
| "failure_count": 0, | |
| "optimization_count": 1, | |
| "processing_method": "simplified_demonstration", | |
| "content_source": "conversation_trace", | |
| "language": "en", | |
| "domain": "programming_documentation" | |
| } | |
| } | |
| } | |
| ] | |
| def insert_sample_data(session, force_insert=False): | |
| """ | |
| Insert sample traces and knowledge graphs into the database. | |
| Args: | |
| session: Database session | |
| force_insert: If True, insert even if data already exists | |
| Returns: | |
| Dict with insertion results | |
| """ | |
| from backend.database.utils import save_trace, save_knowledge_graph | |
| from backend.database.models import Trace, KnowledgeGraph | |
| results = { | |
| "traces_inserted": 0, | |
| "knowledge_graphs_inserted": 0, | |
| "skipped": 0, | |
| "errors": [] | |
| } | |
| # Check if sample data already exists | |
| if not force_insert: | |
| existing_sample = session.query(Trace).filter( | |
| Trace.trace_source == "sample_data" | |
| ).first() | |
| if existing_sample: | |
| logger.info("Sample data already exists, skipping insertion") | |
| results["skipped"] = len(SAMPLE_TRACES) | |
| return results | |
| try: | |
| # Insert sample traces | |
| trace_ids = [] | |
| for i, trace_data in enumerate(SAMPLE_TRACES): | |
| try: | |
| trace = save_trace( | |
| session=session, | |
| content=trace_data["content"], | |
| filename=trace_data["filename"], | |
| title=trace_data["title"], | |
| description=trace_data["description"], | |
| trace_type=trace_data["trace_type"], | |
| trace_source=trace_data["trace_source"], | |
| tags=trace_data["tags"] | |
| ) | |
| trace_ids.append(trace.trace_id) | |
| results["traces_inserted"] += 1 | |
| logger.info(f"Inserted sample trace: {trace_data['title']}") | |
| except Exception as e: | |
| error_msg = f"Error inserting trace {i}: {str(e)}" | |
| logger.error(error_msg) | |
| results["errors"].append(error_msg) | |
| # Insert corresponding knowledge graphs | |
| for kg_data in SAMPLE_KNOWLEDGE_GRAPHS: | |
| try: | |
| trace_index = kg_data["trace_index"] | |
| if trace_index < len(trace_ids): | |
| save_knowledge_graph( | |
| session=session, | |
| filename=kg_data["filename"], | |
| graph_data=kg_data["graph_data"], | |
| trace_id=trace_ids[trace_index], | |
| window_index=None, # None for final KG | |
| window_total=1, # Not None to indicate it's a processed final KG | |
| is_original=True | |
| ) | |
| results["knowledge_graphs_inserted"] += 1 | |
| logger.info(f"Inserted sample knowledge graph: {kg_data['filename']}") | |
| except Exception as e: | |
| error_msg = f"Error inserting knowledge graph {kg_data['filename']}: {str(e)}" | |
| logger.error(error_msg) | |
| results["errors"].append(error_msg) | |
| logger.info(f"Sample data insertion completed: {results}") | |
| except Exception as e: | |
| error_msg = f"Fatal error during sample data insertion: {str(e)}" | |
| logger.error(error_msg) | |
| results["errors"].append(error_msg) | |
| raise # Re-raise to trigger rollback in calling code | |
| return results | |
| def get_sample_data_info(): | |
| """ | |
| Get information about the available sample data. | |
| Returns: | |
| Dict with sample data statistics | |
| """ | |
| return { | |
| "traces_count": len(SAMPLE_TRACES), | |
| "knowledge_graphs_count": len(SAMPLE_KNOWLEDGE_GRAPHS), | |
| "trace_types": list(set(t["trace_type"] for t in SAMPLE_TRACES)), | |
| "complexity_levels": ["enhanced", "simple"], | |
| "features": ["rag_search", "failure_detection", "optimization_recommendations", "content_references", "quality_scoring"], | |
| "description": "Enhanced AgentGraph examples showcasing Python Documentation RAG-powered assistant with knowledge search, failure detection, optimization suggestions, and advanced knowledge graph features" | |
| } |