Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- configs/hyperparameters/.gitkeep +0 -0
- configs/hyperparameters/boolq_train_pipeline_config.json +0 -0
- configs/hyperparameters/dual_persona_training_data_pipeline_config.json +0 -0
- configs/hyperparameters/dual_persona_training_phase1_pipeline_config.json +0 -0
- configs/hyperparameters/dual_persona_training_phase2_pipeline_config.json +0 -0
- configs/hyperparameters/dual_persona_training_phase3_pipeline_config.json +0 -0
- configs/hyperparameters/enhanced_training_config.json +136 -0
- configs/hyperparameters/moe_training_config.json +65 -0
- configs/hyperparameters/stressor_train.json +0 -0
- configs/hyperparameters/training_config.json +0 -0
- configs/infrastructure/.gitkeep +0 -0
- configs/model_configs/.gitkeep +0 -0
- configs/stage_configs/.gitkeep +0 -0
- configs/stage_configs/1.PsychologyTest_requirements.txt +0 -0
- configs/stage_configs/18ddda4f-4118-4292-ad4c-3cfe2d29152c.json +48 -0
- configs/stage_configs/4710e616-eb07-4773-9757-df922c41b33f.json +48 -0
- configs/stage_configs/878d3cb5-95e8-4e11-9d6c-6fa585c0a85e.json +48 -0
- configs/stage_configs/CoT_Neurodivergent_vs_Neurotypical_Interactions_metadata.json +15 -0
- configs/stage_configs/CoT_Philosophical_Understanding_metadata.json +15 -0
- configs/stage_configs/CoT_Reasoning_Mens_Mental_Health_metadata.json +15 -0
- configs/stage_configs/CoT_Temporal_Reasoning_Dataset_metadata.json +15 -0
- configs/stage_configs/HealthCareMagic-100k.json +0 -0
- configs/stage_configs/Instructions.ts +0 -0
- configs/stage_configs/ULTIMATE_FINAL_INTEGRATION_SUMMARY.json +0 -0
- configs/stage_configs/ai_config.py +0 -0
- configs/stage_configs/api_config.py +62 -0
- configs/stage_configs/api_documentation.json +296 -0
- configs/stage_configs/approach_config.json +455 -0
- configs/stage_configs/audit_report.json +655 -0
- configs/stage_configs/auto_resume_requirements.txt +52 -0
- configs/stage_configs/bias_validated_validation_summary.json +14 -0
- configs/stage_configs/boolq_validation_pipeline_config.json +0 -0
- configs/stage_configs/celery_config.py +111 -0
- configs/stage_configs/check_config.sh +0 -0
- configs/stage_configs/checkpoint_config.json +44 -0
- configs/stage_configs/checkpoint_requirements.txt +45 -0
- configs/stage_configs/claude_assessment.json +0 -0
- configs/stage_configs/cli_config.py +232 -0
- configs/stage_configs/complexity_config.json +56 -0
- configs/stage_configs/comprehensive_integration_summary.json +32 -0
- configs/stage_configs/condition_config.json +460 -0
- configs/stage_configs/config.py +53 -0
- configs/stage_configs/config_example.py +0 -0
- configs/stage_configs/config_lock.json +39 -0
- configs/stage_configs/config_lock.py +206 -0
- configs/stage_configs/config_profiles.py +339 -0
- configs/stage_configs/config_tracker.py +700 -0
- configs/stage_configs/config_validator.py +705 -0
- configs/stage_configs/configs_config.py +67 -0
- configs/stage_configs/corrected_audit_report.json +694 -0
configs/hyperparameters/.gitkeep
ADDED
|
File without changes
|
configs/hyperparameters/boolq_train_pipeline_config.json
ADDED
|
File without changes
|
configs/hyperparameters/dual_persona_training_data_pipeline_config.json
ADDED
|
File without changes
|
configs/hyperparameters/dual_persona_training_phase1_pipeline_config.json
ADDED
|
File without changes
|
configs/hyperparameters/dual_persona_training_phase2_pipeline_config.json
ADDED
|
File without changes
|
configs/hyperparameters/dual_persona_training_phase3_pipeline_config.json
ADDED
|
File without changes
|
configs/hyperparameters/enhanced_training_config.json
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"base_model": "LatitudeGames/Wayfarer-2-12B",
|
| 3 |
+
"training_type": "kan28_enhanced_therapeutic_ai",
|
| 4 |
+
|
| 5 |
+
"dataset_config": {
|
| 6 |
+
"ultimate_final_dataset": "ULTIMATE_FINAL_DATASET.jsonl",
|
| 7 |
+
"component_enhanced_dataset": "unified_6_component_dataset.jsonl",
|
| 8 |
+
"total_conversations": 608497,
|
| 9 |
+
"component_enhanced_conversations": 39,
|
| 10 |
+
"train_split": 0.9,
|
| 11 |
+
"val_split": 0.1
|
| 12 |
+
},
|
| 13 |
+
|
| 14 |
+
"kan28_components": {
|
| 15 |
+
"integrated_components": [
|
| 16 |
+
"journaling_system",
|
| 17 |
+
"voice_blending",
|
| 18 |
+
"edge_case_handling",
|
| 19 |
+
"dual_persona_dynamics",
|
| 20 |
+
"bias_detection",
|
| 21 |
+
"psychology_knowledge_base"
|
| 22 |
+
],
|
| 23 |
+
"expert_voices": ["Tim Ferriss", "Gabor Maté", "Brené Brown"],
|
| 24 |
+
"psychology_concepts": 4867,
|
| 25 |
+
"bias_categories": 5,
|
| 26 |
+
"therapeutic_modalities": 6
|
| 27 |
+
},
|
| 28 |
+
|
| 29 |
+
"training_parameters": {
|
| 30 |
+
"num_train_epochs": 3,
|
| 31 |
+
"per_device_train_batch_size": 4,
|
| 32 |
+
"per_device_eval_batch_size": 4,
|
| 33 |
+
"gradient_accumulation_steps": 8,
|
| 34 |
+
"learning_rate": 3e-4,
|
| 35 |
+
"warmup_steps": 1000,
|
| 36 |
+
"weight_decay": 0.01,
|
| 37 |
+
"max_grad_norm": 1.0
|
| 38 |
+
},
|
| 39 |
+
|
| 40 |
+
"lora_config": {
|
| 41 |
+
"lora_r": 16,
|
| 42 |
+
"lora_alpha": 32,
|
| 43 |
+
"lora_dropout": 0.1,
|
| 44 |
+
"lora_target_modules": ["q_proj", "v_proj", "k_proj", "o_proj"],
|
| 45 |
+
"lora_bias": "none",
|
| 46 |
+
"task_type": "CAUSAL_LM"
|
| 47 |
+
},
|
| 48 |
+
|
| 49 |
+
"context_config": {
|
| 50 |
+
"max_position_embeddings": 8192,
|
| 51 |
+
"training_max_length": 2048,
|
| 52 |
+
"conversation_format": "chatml"
|
| 53 |
+
},
|
| 54 |
+
|
| 55 |
+
"h100_optimizations": {
|
| 56 |
+
"bf16": true,
|
| 57 |
+
"gradient_checkpointing": true,
|
| 58 |
+
"optim": "adamw_torch_fused",
|
| 59 |
+
"dataloader_num_workers": 4,
|
| 60 |
+
"dataloader_pin_memory": true,
|
| 61 |
+
"group_by_length": true,
|
| 62 |
+
"torch_compile": false,
|
| 63 |
+
"flash_attention": true
|
| 64 |
+
},
|
| 65 |
+
|
| 66 |
+
"training_constraints": {
|
| 67 |
+
"max_training_hours": 12,
|
| 68 |
+
"checkpoint_interval_minutes": 30,
|
| 69 |
+
"early_stopping_patience": 3,
|
| 70 |
+
"max_memory_gb": 80
|
| 71 |
+
},
|
| 72 |
+
|
| 73 |
+
"logging": {
|
| 74 |
+
"logging_steps": 10,
|
| 75 |
+
"eval_steps": 500,
|
| 76 |
+
"save_steps": 500,
|
| 77 |
+
"save_total_limit": 5,
|
| 78 |
+
"wandb_project": "pixelated-empathy-kan28",
|
| 79 |
+
"wandb_run_name": "therapeutic_ai_6_components"
|
| 80 |
+
},
|
| 81 |
+
|
| 82 |
+
"component_specific_config": {
|
| 83 |
+
"journaling_system": {
|
| 84 |
+
"weight": 1.0,
|
| 85 |
+
"focus": "long_term_progress_tracking"
|
| 86 |
+
},
|
| 87 |
+
"voice_blending": {
|
| 88 |
+
"weight": 1.2,
|
| 89 |
+
"experts": ["Tim", "Gabor", "Brené"],
|
| 90 |
+
"blending_strategy": "weighted_combination"
|
| 91 |
+
},
|
| 92 |
+
"edge_case_handling": {
|
| 93 |
+
"weight": 1.5,
|
| 94 |
+
"crisis_scenarios": ["suicidal_ideation", "trauma_flashback", "severe_dissociation"],
|
| 95 |
+
"safety_priority": "maximum"
|
| 96 |
+
},
|
| 97 |
+
"dual_persona_dynamics": {
|
| 98 |
+
"weight": 1.1,
|
| 99 |
+
"relationship_types": ["anxious_perfectionist", "trauma_survivor", "relationship_struggles"],
|
| 100 |
+
"alliance_tracking": true
|
| 101 |
+
},
|
| 102 |
+
"bias_detection": {
|
| 103 |
+
"weight": 1.3,
|
| 104 |
+
"validation_categories": ["cultural", "therapeutic", "accessibility", "demographic", "safety"],
|
| 105 |
+
"safety_threshold": 0.8
|
| 106 |
+
},
|
| 107 |
+
"psychology_knowledge_base": {
|
| 108 |
+
"weight": 1.0,
|
| 109 |
+
"concept_count": 4867,
|
| 110 |
+
"integration_method": "contextual_enhancement"
|
| 111 |
+
}
|
| 112 |
+
},
|
| 113 |
+
|
| 114 |
+
"validation_config": {
|
| 115 |
+
"therapeutic_quality_scoring": true,
|
| 116 |
+
"bias_detection_validation": true,
|
| 117 |
+
"component_integration_checks": true,
|
| 118 |
+
"safety_validation": true,
|
| 119 |
+
"expert_voice_consistency": true
|
| 120 |
+
},
|
| 121 |
+
|
| 122 |
+
"output_config": {
|
| 123 |
+
"model_name": "pixelated_empathy_kan28",
|
| 124 |
+
"save_format": "safetensors",
|
| 125 |
+
"include_tokenizer": true,
|
| 126 |
+
"include_config": true,
|
| 127 |
+
"create_model_card": true
|
| 128 |
+
},
|
| 129 |
+
|
| 130 |
+
"lightning_ai_config": {
|
| 131 |
+
"studio_type": "H100",
|
| 132 |
+
"instance_type": "studio-xl-h100",
|
| 133 |
+
"auto_shutdown": true,
|
| 134 |
+
"max_idle_minutes": 30
|
| 135 |
+
}
|
| 136 |
+
}
|
configs/hyperparameters/moe_training_config.json
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"base_model": "LatitudeGames/Wayfarer-2-12B",
|
| 3 |
+
"num_train_epochs": 3,
|
| 4 |
+
"per_device_train_batch_size": 4,
|
| 5 |
+
"gradient_accumulation_steps": 8,
|
| 6 |
+
"learning_rate": 3e-4,
|
| 7 |
+
"warmup_steps": 1000,
|
| 8 |
+
"weight_decay": 0.01,
|
| 9 |
+
"max_grad_norm": 1.0,
|
| 10 |
+
"datasets": ["data/acquired_datasets/mental_health_counseling.json"],
|
| 11 |
+
|
| 12 |
+
"moe_config": {
|
| 13 |
+
"num_experts": 4,
|
| 14 |
+
"expert_domains": [
|
| 15 |
+
"psychology",
|
| 16 |
+
"mental_health",
|
| 17 |
+
"bias_detection",
|
| 18 |
+
"general_therapeutic"
|
| 19 |
+
],
|
| 20 |
+
"expert_capacity": 2,
|
| 21 |
+
"load_balancing_weight": 0.01,
|
| 22 |
+
"router_z_loss_weight": 0.001
|
| 23 |
+
},
|
| 24 |
+
|
| 25 |
+
"lora_config": {
|
| 26 |
+
"lora_r": 16,
|
| 27 |
+
"lora_alpha": 32,
|
| 28 |
+
"lora_dropout": 0.1,
|
| 29 |
+
"lora_target_modules": ["q_proj", "v_proj", "k_proj", "o_proj"]
|
| 30 |
+
},
|
| 31 |
+
|
| 32 |
+
"context_config": {
|
| 33 |
+
"max_position_embeddings": 8192,
|
| 34 |
+
"training_max_length": 2048
|
| 35 |
+
},
|
| 36 |
+
|
| 37 |
+
"qlora_config": {
|
| 38 |
+
"load_in_4bit": true,
|
| 39 |
+
"bnb_4bit_quant_type": "nf4",
|
| 40 |
+
"bnb_4bit_use_double_quant": true,
|
| 41 |
+
"bnb_4bit_compute_dtype": "bfloat16"
|
| 42 |
+
},
|
| 43 |
+
|
| 44 |
+
"h100_optimizations": {
|
| 45 |
+
"bf16": true,
|
| 46 |
+
"gradient_checkpointing": true,
|
| 47 |
+
"optim": "adamw_torch_fused",
|
| 48 |
+
"dataloader_num_workers": 4,
|
| 49 |
+
"dataloader_pin_memory": true,
|
| 50 |
+
"group_by_length": true
|
| 51 |
+
},
|
| 52 |
+
|
| 53 |
+
"training_constraints": {
|
| 54 |
+
"max_training_hours": 12,
|
| 55 |
+
"checkpoint_interval_minutes": 30,
|
| 56 |
+
"early_stopping_patience": 3
|
| 57 |
+
},
|
| 58 |
+
|
| 59 |
+
"logging": {
|
| 60 |
+
"logging_steps": 10,
|
| 61 |
+
"eval_steps": 500,
|
| 62 |
+
"save_steps": 500,
|
| 63 |
+
"save_total_limit": 5
|
| 64 |
+
}
|
| 65 |
+
}
|
configs/hyperparameters/stressor_train.json
ADDED
|
File without changes
|
configs/hyperparameters/training_config.json
ADDED
|
File without changes
|
configs/infrastructure/.gitkeep
ADDED
|
File without changes
|
configs/model_configs/.gitkeep
ADDED
|
File without changes
|
configs/stage_configs/.gitkeep
ADDED
|
File without changes
|
configs/stage_configs/1.PsychologyTest_requirements.txt
ADDED
|
File without changes
|
configs/stage_configs/18ddda4f-4118-4292-ad4c-3cfe2d29152c.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"session": {
|
| 3 |
+
"session_id": "18ddda4f-4118-4292-ad4c-3cfe2d29152c",
|
| 4 |
+
"start_date": "2025-11-14T10:02:25.887989",
|
| 5 |
+
"target_sources": [
|
| 6 |
+
"pubmed",
|
| 7 |
+
"zenodo",
|
| 8 |
+
"dryad"
|
| 9 |
+
],
|
| 10 |
+
"search_keywords": {
|
| 11 |
+
"therapy": [
|
| 12 |
+
"cbt",
|
| 13 |
+
"dbt",
|
| 14 |
+
"act"
|
| 15 |
+
],
|
| 16 |
+
"mental_health": [
|
| 17 |
+
"depression",
|
| 18 |
+
"anxiety"
|
| 19 |
+
]
|
| 20 |
+
},
|
| 21 |
+
"weekly_targets": {
|
| 22 |
+
"sources_identified": 10,
|
| 23 |
+
"datasets_evaluated": 5,
|
| 24 |
+
"datasets_acquired": 2
|
| 25 |
+
},
|
| 26 |
+
"current_phase": "discovery",
|
| 27 |
+
"progress_metrics": {}
|
| 28 |
+
},
|
| 29 |
+
"state": {
|
| 30 |
+
"sources": [],
|
| 31 |
+
"evaluations": [],
|
| 32 |
+
"access_requests": [],
|
| 33 |
+
"acquired_datasets": [],
|
| 34 |
+
"integration_plans": [],
|
| 35 |
+
"integration_feasibility": {}
|
| 36 |
+
},
|
| 37 |
+
"progress": {
|
| 38 |
+
"sources_identified": 0,
|
| 39 |
+
"datasets_evaluated": 0,
|
| 40 |
+
"access_established": 0,
|
| 41 |
+
"datasets_acquired": 0,
|
| 42 |
+
"integration_plans_created": 0,
|
| 43 |
+
"last_updated": null
|
| 44 |
+
},
|
| 45 |
+
"progress_history": [],
|
| 46 |
+
"activity_logs": [],
|
| 47 |
+
"error_log": []
|
| 48 |
+
}
|
configs/stage_configs/4710e616-eb07-4773-9757-df922c41b33f.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"session": {
|
| 3 |
+
"session_id": "4710e616-eb07-4773-9757-df922c41b33f",
|
| 4 |
+
"start_date": "2025-11-11T01:37:26.290061",
|
| 5 |
+
"target_sources": [
|
| 6 |
+
"pubmed",
|
| 7 |
+
"zenodo",
|
| 8 |
+
"dryad"
|
| 9 |
+
],
|
| 10 |
+
"search_keywords": {
|
| 11 |
+
"therapy": [
|
| 12 |
+
"cbt",
|
| 13 |
+
"dbt",
|
| 14 |
+
"act"
|
| 15 |
+
],
|
| 16 |
+
"mental_health": [
|
| 17 |
+
"depression",
|
| 18 |
+
"anxiety"
|
| 19 |
+
]
|
| 20 |
+
},
|
| 21 |
+
"weekly_targets": {
|
| 22 |
+
"sources_identified": 10,
|
| 23 |
+
"datasets_evaluated": 5,
|
| 24 |
+
"datasets_acquired": 2
|
| 25 |
+
},
|
| 26 |
+
"current_phase": "discovery",
|
| 27 |
+
"progress_metrics": {}
|
| 28 |
+
},
|
| 29 |
+
"state": {
|
| 30 |
+
"sources": [],
|
| 31 |
+
"evaluations": [],
|
| 32 |
+
"access_requests": [],
|
| 33 |
+
"acquired_datasets": [],
|
| 34 |
+
"integration_plans": [],
|
| 35 |
+
"integration_feasibility": {}
|
| 36 |
+
},
|
| 37 |
+
"progress": {
|
| 38 |
+
"sources_identified": 0,
|
| 39 |
+
"datasets_evaluated": 0,
|
| 40 |
+
"access_established": 0,
|
| 41 |
+
"datasets_acquired": 0,
|
| 42 |
+
"integration_plans_created": 0,
|
| 43 |
+
"last_updated": null
|
| 44 |
+
},
|
| 45 |
+
"progress_history": [],
|
| 46 |
+
"activity_logs": [],
|
| 47 |
+
"error_log": []
|
| 48 |
+
}
|
configs/stage_configs/878d3cb5-95e8-4e11-9d6c-6fa585c0a85e.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"session": {
|
| 3 |
+
"session_id": "878d3cb5-95e8-4e11-9d6c-6fa585c0a85e",
|
| 4 |
+
"start_date": "2025-11-11T01:40:20.313691",
|
| 5 |
+
"target_sources": [
|
| 6 |
+
"pubmed",
|
| 7 |
+
"zenodo",
|
| 8 |
+
"dryad"
|
| 9 |
+
],
|
| 10 |
+
"search_keywords": {
|
| 11 |
+
"therapy": [
|
| 12 |
+
"cbt",
|
| 13 |
+
"dbt",
|
| 14 |
+
"act"
|
| 15 |
+
],
|
| 16 |
+
"mental_health": [
|
| 17 |
+
"depression",
|
| 18 |
+
"anxiety"
|
| 19 |
+
]
|
| 20 |
+
},
|
| 21 |
+
"weekly_targets": {
|
| 22 |
+
"sources_identified": 10,
|
| 23 |
+
"datasets_evaluated": 5,
|
| 24 |
+
"datasets_acquired": 2
|
| 25 |
+
},
|
| 26 |
+
"current_phase": "discovery",
|
| 27 |
+
"progress_metrics": {}
|
| 28 |
+
},
|
| 29 |
+
"state": {
|
| 30 |
+
"sources": [],
|
| 31 |
+
"evaluations": [],
|
| 32 |
+
"access_requests": [],
|
| 33 |
+
"acquired_datasets": [],
|
| 34 |
+
"integration_plans": [],
|
| 35 |
+
"integration_feasibility": {}
|
| 36 |
+
},
|
| 37 |
+
"progress": {
|
| 38 |
+
"sources_identified": 0,
|
| 39 |
+
"datasets_evaluated": 0,
|
| 40 |
+
"access_established": 0,
|
| 41 |
+
"datasets_acquired": 0,
|
| 42 |
+
"integration_plans_created": 0,
|
| 43 |
+
"last_updated": null
|
| 44 |
+
},
|
| 45 |
+
"progress_history": [],
|
| 46 |
+
"activity_logs": [],
|
| 47 |
+
"error_log": []
|
| 48 |
+
}
|
configs/stage_configs/CoT_Neurodivergent_vs_Neurotypical_Interactions_metadata.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "CoT_Neurodivergent_vs_Neurotypical_Interactions",
|
| 3 |
+
"description": "Neurodiversity-aware therapeutic approaches",
|
| 4 |
+
"reasoning_type": "neurodiversity_reasoning",
|
| 5 |
+
"therapeutic_focus": "inclusive_therapy",
|
| 6 |
+
"total_examples": 200,
|
| 7 |
+
"reasoning_patterns": [
|
| 8 |
+
"Consider neurodivergent perspective",
|
| 9 |
+
"Assess sensory processing differences",
|
| 10 |
+
"Evaluate communication preferences",
|
| 11 |
+
"Account for executive function variations",
|
| 12 |
+
"Recognize masking behaviors"
|
| 13 |
+
],
|
| 14 |
+
"created_at": "2025-09-26T18:06:58.401899"
|
| 15 |
+
}
|
configs/stage_configs/CoT_Philosophical_Understanding_metadata.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "CoT_Philosophical_Understanding",
|
| 3 |
+
"description": "33MB, 60K existential/philosophical therapy",
|
| 4 |
+
"reasoning_type": "philosophical_reasoning",
|
| 5 |
+
"therapeutic_focus": "existential_therapy",
|
| 6 |
+
"total_examples": 500,
|
| 7 |
+
"reasoning_patterns": [
|
| 8 |
+
"Examine existential concerns",
|
| 9 |
+
"Explore meaning and purpose",
|
| 10 |
+
"Consider life's fundamental questions",
|
| 11 |
+
"Assess values and beliefs",
|
| 12 |
+
"Evaluate spiritual dimensions"
|
| 13 |
+
],
|
| 14 |
+
"created_at": "2025-09-26T18:06:58.440412"
|
| 15 |
+
}
|
configs/stage_configs/CoT_Reasoning_Mens_Mental_Health_metadata.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "CoT_Reasoning_Mens_Mental_Health",
|
| 3 |
+
"description": "Gender-specific therapeutic reasoning",
|
| 4 |
+
"reasoning_type": "gender_specific_reasoning",
|
| 5 |
+
"therapeutic_focus": "mens_therapy",
|
| 6 |
+
"total_examples": 200,
|
| 7 |
+
"reasoning_patterns": [
|
| 8 |
+
"Consider societal gender expectations",
|
| 9 |
+
"Assess masculine identity pressures",
|
| 10 |
+
"Evaluate emotional expression barriers",
|
| 11 |
+
"Account for help-seeking stigma",
|
| 12 |
+
"Recognize vulnerability challenges"
|
| 13 |
+
],
|
| 14 |
+
"created_at": "2025-09-26T18:06:58.421250"
|
| 15 |
+
}
|
configs/stage_configs/CoT_Temporal_Reasoning_Dataset_metadata.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "CoT_Temporal_Reasoning_Dataset",
|
| 3 |
+
"description": "15MB, 30K time-based therapeutic planning",
|
| 4 |
+
"reasoning_type": "temporal_reasoning",
|
| 5 |
+
"therapeutic_focus": "treatment_planning",
|
| 6 |
+
"total_examples": 200,
|
| 7 |
+
"reasoning_patterns": [
|
| 8 |
+
"Assess timeline of symptoms",
|
| 9 |
+
"Plan treatment progression",
|
| 10 |
+
"Consider developmental stages",
|
| 11 |
+
"Evaluate progress markers",
|
| 12 |
+
"Project future outcomes"
|
| 13 |
+
],
|
| 14 |
+
"created_at": "2025-09-26T18:06:58.520641"
|
| 15 |
+
}
|
configs/stage_configs/HealthCareMagic-100k.json
ADDED
|
File without changes
|
configs/stage_configs/Instructions.ts
ADDED
|
File without changes
|
configs/stage_configs/ULTIMATE_FINAL_INTEGRATION_SUMMARY.json
ADDED
|
File without changes
|
configs/stage_configs/ai_config.py
ADDED
|
File without changes
|
configs/stage_configs/api_config.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration management for the API server.
|
| 3 |
+
|
| 4 |
+
This module provides configuration loading from environment variables
|
| 5 |
+
with sensible defaults.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
from functools import lru_cache
|
| 10 |
+
from typing import List
|
| 11 |
+
|
| 12 |
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class Settings(BaseSettings):
|
| 16 |
+
"""API server settings."""
|
| 17 |
+
|
| 18 |
+
# Server configuration
|
| 19 |
+
host: str = "0.0.0.0"
|
| 20 |
+
port: int = 8000
|
| 21 |
+
environment: str = "development" # development, staging, production
|
| 22 |
+
api_version: str = "1.0.0"
|
| 23 |
+
debug: bool = False
|
| 24 |
+
|
| 25 |
+
# CORS configuration
|
| 26 |
+
cors_origins: List[str] = [
|
| 27 |
+
"http://localhost:4321", # Astro dev server
|
| 28 |
+
"http://localhost:3000", # Alternative dev port
|
| 29 |
+
"http://localhost:5173", # Vite dev server
|
| 30 |
+
]
|
| 31 |
+
|
| 32 |
+
# Authentication configuration
|
| 33 |
+
auth_enabled: bool = True
|
| 34 |
+
jwt_secret: str = os.getenv("JWT_SECRET", "change-me-in-production")
|
| 35 |
+
jwt_algorithm: str = "HS256"
|
| 36 |
+
jwt_expiration_minutes: int = 60 * 24 # 24 hours
|
| 37 |
+
|
| 38 |
+
# Rate limiting
|
| 39 |
+
rate_limit_enabled: bool = True
|
| 40 |
+
rate_limit_per_minute: int = 60
|
| 41 |
+
rate_limit_per_hour: int = 1000
|
| 42 |
+
|
| 43 |
+
# Logging
|
| 44 |
+
log_level: str = "INFO"
|
| 45 |
+
log_format: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
| 46 |
+
|
| 47 |
+
# Session storage (must match across all components)
|
| 48 |
+
session_storage_path: str = os.getenv(
|
| 49 |
+
"SESSION_STORAGE_PATH", "ai/journal_dataset_research/sessions"
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
model_config = SettingsConfigDict(
|
| 53 |
+
env_file=".env",
|
| 54 |
+
env_file_encoding="utf-8",
|
| 55 |
+
case_sensitive=False,
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
@lru_cache()
|
| 60 |
+
def get_settings() -> Settings:
|
| 61 |
+
"""Get cached settings instance."""
|
| 62 |
+
return Settings()
|
configs/stage_configs/api_documentation.json
ADDED
|
@@ -0,0 +1,296 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"api_version": "1.0.0",
|
| 3 |
+
"base_url": "https://api.pixelatedempathy.com",
|
| 4 |
+
"endpoints": {
|
| 5 |
+
"validate_conversation": {
|
| 6 |
+
"endpoint": "/api/v1/validate/conversation",
|
| 7 |
+
"method": "POST",
|
| 8 |
+
"description": "Validate a therapeutic conversation using multi-tier quality assessment",
|
| 9 |
+
"parameters": {
|
| 10 |
+
"conversation": {
|
| 11 |
+
"type": "object",
|
| 12 |
+
"required": true,
|
| 13 |
+
"description": "Conversation object with id, content, turns, and metadata"
|
| 14 |
+
},
|
| 15 |
+
"validation_level": {
|
| 16 |
+
"type": "string",
|
| 17 |
+
"required": false,
|
| 18 |
+
"default": "comprehensive",
|
| 19 |
+
"options": [
|
| 20 |
+
"basic",
|
| 21 |
+
"standard",
|
| 22 |
+
"comprehensive",
|
| 23 |
+
"clinical"
|
| 24 |
+
]
|
| 25 |
+
},
|
| 26 |
+
"include_recommendations": {
|
| 27 |
+
"type": "boolean",
|
| 28 |
+
"required": false,
|
| 29 |
+
"default": true
|
| 30 |
+
}
|
| 31 |
+
},
|
| 32 |
+
"request_example": {
|
| 33 |
+
"conversation": {
|
| 34 |
+
"id": "conv_001",
|
| 35 |
+
"content": "I understand you're feeling anxious. Let's explore some coping strategies.",
|
| 36 |
+
"turns": [
|
| 37 |
+
{
|
| 38 |
+
"speaker": "user",
|
| 39 |
+
"text": "I'm feeling anxious lately."
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"speaker": "therapist",
|
| 43 |
+
"text": "I understand. Let's explore coping strategies."
|
| 44 |
+
}
|
| 45 |
+
],
|
| 46 |
+
"metadata": {
|
| 47 |
+
"source": "professional",
|
| 48 |
+
"condition": "anxiety",
|
| 49 |
+
"approach": "CBT"
|
| 50 |
+
}
|
| 51 |
+
},
|
| 52 |
+
"validation_level": "comprehensive",
|
| 53 |
+
"include_recommendations": true
|
| 54 |
+
},
|
| 55 |
+
"response_example": {
|
| 56 |
+
"validation_id": "val_12345",
|
| 57 |
+
"overall_quality_score": 0.85,
|
| 58 |
+
"tier_assessment": "professional",
|
| 59 |
+
"validation_results": {
|
| 60 |
+
"multi_tier_validation": {
|
| 61 |
+
"passed": true,
|
| 62 |
+
"score": 0.87
|
| 63 |
+
},
|
| 64 |
+
"dsm5_accuracy": {
|
| 65 |
+
"passed": true,
|
| 66 |
+
"score": 0.83
|
| 67 |
+
},
|
| 68 |
+
"safety_ethics": {
|
| 69 |
+
"passed": true,
|
| 70 |
+
"score": 0.91
|
| 71 |
+
},
|
| 72 |
+
"effectiveness_prediction": {
|
| 73 |
+
"score": 0.78,
|
| 74 |
+
"confidence": "high"
|
| 75 |
+
},
|
| 76 |
+
"coherence_validation": {
|
| 77 |
+
"score": 0.82,
|
| 78 |
+
"level": "moderately_coherent"
|
| 79 |
+
}
|
| 80 |
+
},
|
| 81 |
+
"issues": [],
|
| 82 |
+
"recommendations": [
|
| 83 |
+
"Consider adding more specific therapeutic techniques",
|
| 84 |
+
"Enhance empathetic responses"
|
| 85 |
+
],
|
| 86 |
+
"processing_time_ms": 245
|
| 87 |
+
},
|
| 88 |
+
"error_codes": [
|
| 89 |
+
{
|
| 90 |
+
"code": "400",
|
| 91 |
+
"description": "Invalid conversation format"
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"code": "422",
|
| 95 |
+
"description": "Validation failed - conversation quality too low"
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"code": "429",
|
| 99 |
+
"description": "Rate limit exceeded"
|
| 100 |
+
},
|
| 101 |
+
{
|
| 102 |
+
"code": "500",
|
| 103 |
+
"description": "Internal validation error"
|
| 104 |
+
}
|
| 105 |
+
],
|
| 106 |
+
"rate_limits": "100 requests per minute",
|
| 107 |
+
"authentication": "API key required"
|
| 108 |
+
},
|
| 109 |
+
"export_dataset": {
|
| 110 |
+
"endpoint": "/api/v1/export/dataset",
|
| 111 |
+
"method": "POST",
|
| 112 |
+
"description": "Export dataset in specified format with tiered access control",
|
| 113 |
+
"parameters": {
|
| 114 |
+
"export_config": {
|
| 115 |
+
"type": "object",
|
| 116 |
+
"required": true,
|
| 117 |
+
"description": "Export configuration including formats, tiers, and options"
|
| 118 |
+
},
|
| 119 |
+
"filters": {
|
| 120 |
+
"type": "object",
|
| 121 |
+
"required": false,
|
| 122 |
+
"description": "Optional filters for conversation selection"
|
| 123 |
+
}
|
| 124 |
+
},
|
| 125 |
+
"request_example": {
|
| 126 |
+
"export_config": {
|
| 127 |
+
"formats": [
|
| 128 |
+
"json",
|
| 129 |
+
"csv"
|
| 130 |
+
],
|
| 131 |
+
"access_tiers": [
|
| 132 |
+
"priority",
|
| 133 |
+
"professional"
|
| 134 |
+
],
|
| 135 |
+
"quality_threshold": 0.8,
|
| 136 |
+
"include_metadata": true,
|
| 137 |
+
"compress_output": true
|
| 138 |
+
},
|
| 139 |
+
"filters": {
|
| 140 |
+
"conditions": [
|
| 141 |
+
"anxiety",
|
| 142 |
+
"depression"
|
| 143 |
+
],
|
| 144 |
+
"date_range": {
|
| 145 |
+
"start": "2025-01-01",
|
| 146 |
+
"end": "2025-08-10"
|
| 147 |
+
}
|
| 148 |
+
}
|
| 149 |
+
},
|
| 150 |
+
"response_example": {
|
| 151 |
+
"export_id": "exp_67890",
|
| 152 |
+
"status": "completed",
|
| 153 |
+
"export_metadata": [
|
| 154 |
+
{
|
| 155 |
+
"format": "json",
|
| 156 |
+
"tier": "priority",
|
| 157 |
+
"conversations": 1542,
|
| 158 |
+
"file_path": "/exports/v1/priority/conversations.json.zip",
|
| 159 |
+
"checksum": "sha256:abc123..."
|
| 160 |
+
}
|
| 161 |
+
],
|
| 162 |
+
"total_conversations": 4626,
|
| 163 |
+
"export_time_seconds": 45.2
|
| 164 |
+
},
|
| 165 |
+
"error_codes": [
|
| 166 |
+
{
|
| 167 |
+
"code": "400",
|
| 168 |
+
"description": "Invalid export configuration"
|
| 169 |
+
},
|
| 170 |
+
{
|
| 171 |
+
"code": "403",
|
| 172 |
+
"description": "Insufficient access permissions for requested tier"
|
| 173 |
+
},
|
| 174 |
+
{
|
| 175 |
+
"code": "413",
|
| 176 |
+
"description": "Export size exceeds limits"
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"code": "500",
|
| 180 |
+
"description": "Export processing error"
|
| 181 |
+
}
|
| 182 |
+
],
|
| 183 |
+
"rate_limits": "10 exports per hour",
|
| 184 |
+
"authentication": "API key with export permissions required"
|
| 185 |
+
},
|
| 186 |
+
"get_analytics": {
|
| 187 |
+
"endpoint": "/api/v1/analytics/dashboard",
|
| 188 |
+
"method": "GET",
|
| 189 |
+
"description": "Get comprehensive analytics dashboard data",
|
| 190 |
+
"parameters": {
|
| 191 |
+
"time_range": {
|
| 192 |
+
"type": "string",
|
| 193 |
+
"required": false,
|
| 194 |
+
"default": "24h",
|
| 195 |
+
"options": [
|
| 196 |
+
"1h",
|
| 197 |
+
"24h",
|
| 198 |
+
"7d",
|
| 199 |
+
"30d"
|
| 200 |
+
]
|
| 201 |
+
},
|
| 202 |
+
"include_trends": {
|
| 203 |
+
"type": "boolean",
|
| 204 |
+
"required": false,
|
| 205 |
+
"default": true
|
| 206 |
+
}
|
| 207 |
+
},
|
| 208 |
+
"request_example": {},
|
| 209 |
+
"response_example": {
|
| 210 |
+
"dashboard_data": {
|
| 211 |
+
"total_conversations": 15420,
|
| 212 |
+
"quality_distribution": {
|
| 213 |
+
"excellent": 3084,
|
| 214 |
+
"good": 6168,
|
| 215 |
+
"acceptable": 4626,
|
| 216 |
+
"poor": 1542
|
| 217 |
+
},
|
| 218 |
+
"safety_metrics": {
|
| 219 |
+
"overall_safety_score": 0.91,
|
| 220 |
+
"compliance_rate": 0.94
|
| 221 |
+
},
|
| 222 |
+
"performance_trends": {
|
| 223 |
+
"quality_scores": [
|
| 224 |
+
0.78,
|
| 225 |
+
0.79,
|
| 226 |
+
0.81,
|
| 227 |
+
0.82
|
| 228 |
+
]
|
| 229 |
+
}
|
| 230 |
+
},
|
| 231 |
+
"summary_report": {
|
| 232 |
+
"performance_status": "\ud83d\udfe2 EXCELLENT",
|
| 233 |
+
"key_insights": [
|
| 234 |
+
"High quality conversations",
|
| 235 |
+
"Excellent safety compliance"
|
| 236 |
+
]
|
| 237 |
+
}
|
| 238 |
+
},
|
| 239 |
+
"error_codes": [
|
| 240 |
+
{
|
| 241 |
+
"code": "400",
|
| 242 |
+
"description": "Invalid time range parameter"
|
| 243 |
+
},
|
| 244 |
+
{
|
| 245 |
+
"code": "500",
|
| 246 |
+
"description": "Analytics processing error"
|
| 247 |
+
}
|
| 248 |
+
],
|
| 249 |
+
"rate_limits": "60 requests per minute",
|
| 250 |
+
"authentication": null
|
| 251 |
+
},
|
| 252 |
+
"get_system_status": {
|
| 253 |
+
"endpoint": "/api/v1/system/status",
|
| 254 |
+
"method": "GET",
|
| 255 |
+
"description": "Get real-time system status and health metrics",
|
| 256 |
+
"parameters": {},
|
| 257 |
+
"request_example": {},
|
| 258 |
+
"response_example": {
|
| 259 |
+
"system_status": "healthy",
|
| 260 |
+
"components": {
|
| 261 |
+
"validation_pipeline": {
|
| 262 |
+
"status": "operational",
|
| 263 |
+
"response_time_ms": 150
|
| 264 |
+
},
|
| 265 |
+
"export_system": {
|
| 266 |
+
"status": "operational",
|
| 267 |
+
"queue_size": 2
|
| 268 |
+
},
|
| 269 |
+
"analytics_engine": {
|
| 270 |
+
"status": "operational",
|
| 271 |
+
"last_update": "2025-08-10T07:30:00Z"
|
| 272 |
+
},
|
| 273 |
+
"maintenance_system": {
|
| 274 |
+
"status": "operational",
|
| 275 |
+
"next_maintenance": "2025-08-10T12:00:00Z"
|
| 276 |
+
}
|
| 277 |
+
},
|
| 278 |
+
"performance_metrics": {
|
| 279 |
+
"total_conversations_processed": 15420,
|
| 280 |
+
"average_processing_time_ms": 245,
|
| 281 |
+
"success_rate": 0.998,
|
| 282 |
+
"uptime_hours": 168.5
|
| 283 |
+
},
|
| 284 |
+
"alerts": []
|
| 285 |
+
},
|
| 286 |
+
"error_codes": [
|
| 287 |
+
{
|
| 288 |
+
"code": "503",
|
| 289 |
+
"description": "System temporarily unavailable"
|
| 290 |
+
}
|
| 291 |
+
],
|
| 292 |
+
"rate_limits": "120 requests per minute",
|
| 293 |
+
"authentication": null
|
| 294 |
+
}
|
| 295 |
+
}
|
| 296 |
+
}
|
configs/stage_configs/approach_config.json
ADDED
|
@@ -0,0 +1,455 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cbt": {
|
| 3 |
+
"name": "Cognitive Behavioral Therapy",
|
| 4 |
+
"evidence_level": "strong",
|
| 5 |
+
"target_weight": 0.25,
|
| 6 |
+
"min_samples": 500,
|
| 7 |
+
"max_samples": 8000,
|
| 8 |
+
"keywords": [
|
| 9 |
+
"cbt",
|
| 10 |
+
"cognitive behavioral",
|
| 11 |
+
"cognitive therapy",
|
| 12 |
+
"behavioral therapy",
|
| 13 |
+
"thought patterns",
|
| 14 |
+
"cognitive restructuring",
|
| 15 |
+
"behavioral activation"
|
| 16 |
+
],
|
| 17 |
+
"techniques": [
|
| 18 |
+
"cognitive restructuring",
|
| 19 |
+
"behavioral activation",
|
| 20 |
+
"exposure therapy",
|
| 21 |
+
"thought records",
|
| 22 |
+
"activity scheduling",
|
| 23 |
+
"behavioral experiments"
|
| 24 |
+
],
|
| 25 |
+
"conditions_suited": [
|
| 26 |
+
"depression",
|
| 27 |
+
"anxiety",
|
| 28 |
+
"ptsd",
|
| 29 |
+
"ocd",
|
| 30 |
+
"panic_disorder"
|
| 31 |
+
],
|
| 32 |
+
"effectiveness_score": 0.95
|
| 33 |
+
},
|
| 34 |
+
"dbt": {
|
| 35 |
+
"name": "Dialectical Behavior Therapy",
|
| 36 |
+
"evidence_level": "strong",
|
| 37 |
+
"target_weight": 0.12,
|
| 38 |
+
"min_samples": 300,
|
| 39 |
+
"max_samples": 4000,
|
| 40 |
+
"keywords": [
|
| 41 |
+
"dbt",
|
| 42 |
+
"dialectical",
|
| 43 |
+
"mindfulness",
|
| 44 |
+
"distress tolerance",
|
| 45 |
+
"emotion regulation",
|
| 46 |
+
"interpersonal effectiveness"
|
| 47 |
+
],
|
| 48 |
+
"techniques": [
|
| 49 |
+
"mindfulness",
|
| 50 |
+
"distress tolerance",
|
| 51 |
+
"emotion regulation",
|
| 52 |
+
"interpersonal effectiveness",
|
| 53 |
+
"wise mind",
|
| 54 |
+
"radical acceptance"
|
| 55 |
+
],
|
| 56 |
+
"conditions_suited": [
|
| 57 |
+
"bpd",
|
| 58 |
+
"self_harm",
|
| 59 |
+
"suicidal_ideation",
|
| 60 |
+
"emotion_dysregulation"
|
| 61 |
+
],
|
| 62 |
+
"effectiveness_score": 0.9
|
| 63 |
+
},
|
| 64 |
+
"psychodynamic": {
|
| 65 |
+
"name": "Psychodynamic Therapy",
|
| 66 |
+
"evidence_level": "moderate",
|
| 67 |
+
"target_weight": 0.15,
|
| 68 |
+
"min_samples": 400,
|
| 69 |
+
"max_samples": 5000,
|
| 70 |
+
"keywords": [
|
| 71 |
+
"psychodynamic",
|
| 72 |
+
"psychoanalytic",
|
| 73 |
+
"unconscious",
|
| 74 |
+
"transference",
|
| 75 |
+
"defense mechanisms",
|
| 76 |
+
"insight",
|
| 77 |
+
"interpretation"
|
| 78 |
+
],
|
| 79 |
+
"techniques": [
|
| 80 |
+
"free association",
|
| 81 |
+
"dream analysis",
|
| 82 |
+
"transference analysis",
|
| 83 |
+
"interpretation",
|
| 84 |
+
"working through",
|
| 85 |
+
"insight development"
|
| 86 |
+
],
|
| 87 |
+
"conditions_suited": [
|
| 88 |
+
"depression",
|
| 89 |
+
"anxiety",
|
| 90 |
+
"personality_disorders",
|
| 91 |
+
"trauma"
|
| 92 |
+
],
|
| 93 |
+
"effectiveness_score": 0.75
|
| 94 |
+
},
|
| 95 |
+
"humanistic": {
|
| 96 |
+
"name": "Humanistic/Person-Centered Therapy",
|
| 97 |
+
"evidence_level": "moderate",
|
| 98 |
+
"target_weight": 0.1,
|
| 99 |
+
"min_samples": 250,
|
| 100 |
+
"max_samples": 3500,
|
| 101 |
+
"keywords": [
|
| 102 |
+
"person-centered",
|
| 103 |
+
"humanistic",
|
| 104 |
+
"unconditional positive regard",
|
| 105 |
+
"empathy",
|
| 106 |
+
"genuineness",
|
| 107 |
+
"self-actualization",
|
| 108 |
+
"client-centered"
|
| 109 |
+
],
|
| 110 |
+
"techniques": [
|
| 111 |
+
"active listening",
|
| 112 |
+
"reflection",
|
| 113 |
+
"unconditional positive regard",
|
| 114 |
+
"empathic understanding",
|
| 115 |
+
"genuineness",
|
| 116 |
+
"congruence"
|
| 117 |
+
],
|
| 118 |
+
"conditions_suited": [
|
| 119 |
+
"self_esteem",
|
| 120 |
+
"identity_issues",
|
| 121 |
+
"personal_growth"
|
| 122 |
+
],
|
| 123 |
+
"effectiveness_score": 0.7
|
| 124 |
+
},
|
| 125 |
+
"acceptance_commitment": {
|
| 126 |
+
"name": "Acceptance and Commitment Therapy",
|
| 127 |
+
"evidence_level": "strong",
|
| 128 |
+
"target_weight": 0.08,
|
| 129 |
+
"min_samples": 200,
|
| 130 |
+
"max_samples": 3000,
|
| 131 |
+
"keywords": [
|
| 132 |
+
"act",
|
| 133 |
+
"acceptance commitment",
|
| 134 |
+
"psychological flexibility",
|
| 135 |
+
"mindfulness",
|
| 136 |
+
"values",
|
| 137 |
+
"committed action",
|
| 138 |
+
"defusion"
|
| 139 |
+
],
|
| 140 |
+
"techniques": [
|
| 141 |
+
"mindfulness",
|
| 142 |
+
"acceptance",
|
| 143 |
+
"cognitive defusion",
|
| 144 |
+
"values clarification",
|
| 145 |
+
"committed action",
|
| 146 |
+
"psychological flexibility"
|
| 147 |
+
],
|
| 148 |
+
"conditions_suited": [
|
| 149 |
+
"anxiety",
|
| 150 |
+
"depression",
|
| 151 |
+
"chronic_pain",
|
| 152 |
+
"substance_abuse"
|
| 153 |
+
],
|
| 154 |
+
"effectiveness_score": 0.85
|
| 155 |
+
},
|
| 156 |
+
"emdr": {
|
| 157 |
+
"name": "Eye Movement Desensitization and Reprocessing",
|
| 158 |
+
"evidence_level": "strong",
|
| 159 |
+
"target_weight": 0.06,
|
| 160 |
+
"min_samples": 150,
|
| 161 |
+
"max_samples": 2500,
|
| 162 |
+
"keywords": [
|
| 163 |
+
"emdr",
|
| 164 |
+
"eye movement",
|
| 165 |
+
"bilateral stimulation",
|
| 166 |
+
"trauma processing",
|
| 167 |
+
"desensitization",
|
| 168 |
+
"reprocessing"
|
| 169 |
+
],
|
| 170 |
+
"techniques": [
|
| 171 |
+
"bilateral stimulation",
|
| 172 |
+
"resource installation",
|
| 173 |
+
"trauma processing",
|
| 174 |
+
"desensitization",
|
| 175 |
+
"reprocessing",
|
| 176 |
+
"safe place visualization"
|
| 177 |
+
],
|
| 178 |
+
"conditions_suited": [
|
| 179 |
+
"ptsd",
|
| 180 |
+
"trauma",
|
| 181 |
+
"phobias",
|
| 182 |
+
"anxiety"
|
| 183 |
+
],
|
| 184 |
+
"effectiveness_score": 0.9
|
| 185 |
+
},
|
| 186 |
+
"family_systems": {
|
| 187 |
+
"name": "Family Systems Therapy",
|
| 188 |
+
"evidence_level": "moderate",
|
| 189 |
+
"target_weight": 0.07,
|
| 190 |
+
"min_samples": 180,
|
| 191 |
+
"max_samples": 2800,
|
| 192 |
+
"keywords": [
|
| 193 |
+
"family therapy",
|
| 194 |
+
"systems therapy",
|
| 195 |
+
"family systems",
|
| 196 |
+
"structural",
|
| 197 |
+
"strategic",
|
| 198 |
+
"multigenerational",
|
| 199 |
+
"boundaries"
|
| 200 |
+
],
|
| 201 |
+
"techniques": [
|
| 202 |
+
"genogram",
|
| 203 |
+
"structural interventions",
|
| 204 |
+
"strategic interventions",
|
| 205 |
+
"boundary setting",
|
| 206 |
+
"family sculpting",
|
| 207 |
+
"circular questioning"
|
| 208 |
+
],
|
| 209 |
+
"conditions_suited": [
|
| 210 |
+
"family_conflict",
|
| 211 |
+
"relationship_issues",
|
| 212 |
+
"adolescent_issues"
|
| 213 |
+
],
|
| 214 |
+
"effectiveness_score": 0.75
|
| 215 |
+
},
|
| 216 |
+
"gestalt": {
|
| 217 |
+
"name": "Gestalt Therapy",
|
| 218 |
+
"evidence_level": "emerging",
|
| 219 |
+
"target_weight": 0.04,
|
| 220 |
+
"min_samples": 100,
|
| 221 |
+
"max_samples": 1500,
|
| 222 |
+
"keywords": [
|
| 223 |
+
"gestalt",
|
| 224 |
+
"here and now",
|
| 225 |
+
"awareness",
|
| 226 |
+
"contact",
|
| 227 |
+
"experiment",
|
| 228 |
+
"phenomenology",
|
| 229 |
+
"field theory"
|
| 230 |
+
],
|
| 231 |
+
"techniques": [
|
| 232 |
+
"empty chair",
|
| 233 |
+
"two-chair technique",
|
| 234 |
+
"body awareness",
|
| 235 |
+
"here and now focus",
|
| 236 |
+
"experiments",
|
| 237 |
+
"contact and awareness"
|
| 238 |
+
],
|
| 239 |
+
"conditions_suited": [
|
| 240 |
+
"anxiety",
|
| 241 |
+
"depression",
|
| 242 |
+
"relationship_issues"
|
| 243 |
+
],
|
| 244 |
+
"effectiveness_score": 0.65
|
| 245 |
+
},
|
| 246 |
+
"solution_focused": {
|
| 247 |
+
"name": "Solution-Focused Brief Therapy",
|
| 248 |
+
"evidence_level": "moderate",
|
| 249 |
+
"target_weight": 0.05,
|
| 250 |
+
"min_samples": 120,
|
| 251 |
+
"max_samples": 2000,
|
| 252 |
+
"keywords": [
|
| 253 |
+
"solution focused",
|
| 254 |
+
"brief therapy",
|
| 255 |
+
"miracle question",
|
| 256 |
+
"scaling",
|
| 257 |
+
"exceptions",
|
| 258 |
+
"goals",
|
| 259 |
+
"strengths"
|
| 260 |
+
],
|
| 261 |
+
"techniques": [
|
| 262 |
+
"miracle question",
|
| 263 |
+
"scaling questions",
|
| 264 |
+
"exception finding",
|
| 265 |
+
"goal setting",
|
| 266 |
+
"compliments",
|
| 267 |
+
"between-session tasks"
|
| 268 |
+
],
|
| 269 |
+
"conditions_suited": [
|
| 270 |
+
"depression",
|
| 271 |
+
"anxiety",
|
| 272 |
+
"relationship_issues",
|
| 273 |
+
"substance_abuse"
|
| 274 |
+
],
|
| 275 |
+
"effectiveness_score": 0.7
|
| 276 |
+
},
|
| 277 |
+
"narrative": {
|
| 278 |
+
"name": "Narrative Therapy",
|
| 279 |
+
"evidence_level": "emerging",
|
| 280 |
+
"target_weight": 0.03,
|
| 281 |
+
"min_samples": 80,
|
| 282 |
+
"max_samples": 1200,
|
| 283 |
+
"keywords": [
|
| 284 |
+
"narrative",
|
| 285 |
+
"story",
|
| 286 |
+
"externalization",
|
| 287 |
+
"unique outcomes",
|
| 288 |
+
"re-authoring",
|
| 289 |
+
"deconstruction",
|
| 290 |
+
"preferred story"
|
| 291 |
+
],
|
| 292 |
+
"techniques": [
|
| 293 |
+
"externalization",
|
| 294 |
+
"unique outcomes",
|
| 295 |
+
"re-authoring",
|
| 296 |
+
"definitional ceremony",
|
| 297 |
+
"outsider witness",
|
| 298 |
+
"therapeutic documents"
|
| 299 |
+
],
|
| 300 |
+
"conditions_suited": [
|
| 301 |
+
"identity_issues",
|
| 302 |
+
"trauma",
|
| 303 |
+
"oppression",
|
| 304 |
+
"self_esteem"
|
| 305 |
+
],
|
| 306 |
+
"effectiveness_score": 0.6
|
| 307 |
+
},
|
| 308 |
+
"mindfulness_based": {
|
| 309 |
+
"name": "Mindfulness-Based Interventions",
|
| 310 |
+
"evidence_level": "strong",
|
| 311 |
+
"target_weight": 0.06,
|
| 312 |
+
"min_samples": 150,
|
| 313 |
+
"max_samples": 2500,
|
| 314 |
+
"keywords": [
|
| 315 |
+
"mindfulness",
|
| 316 |
+
"mbsr",
|
| 317 |
+
"mbct",
|
| 318 |
+
"meditation",
|
| 319 |
+
"present moment",
|
| 320 |
+
"non-judgmental awareness",
|
| 321 |
+
"body scan"
|
| 322 |
+
],
|
| 323 |
+
"techniques": [
|
| 324 |
+
"mindfulness meditation",
|
| 325 |
+
"body scan",
|
| 326 |
+
"breathing exercises",
|
| 327 |
+
"mindful movement",
|
| 328 |
+
"loving-kindness",
|
| 329 |
+
"present moment awareness"
|
| 330 |
+
],
|
| 331 |
+
"conditions_suited": [
|
| 332 |
+
"anxiety",
|
| 333 |
+
"depression",
|
| 334 |
+
"chronic_pain",
|
| 335 |
+
"stress"
|
| 336 |
+
],
|
| 337 |
+
"effectiveness_score": 0.8
|
| 338 |
+
},
|
| 339 |
+
"interpersonal": {
|
| 340 |
+
"name": "Interpersonal Therapy",
|
| 341 |
+
"evidence_level": "strong",
|
| 342 |
+
"target_weight": 0.07,
|
| 343 |
+
"min_samples": 180,
|
| 344 |
+
"max_samples": 2800,
|
| 345 |
+
"keywords": [
|
| 346 |
+
"interpersonal therapy",
|
| 347 |
+
"ipt",
|
| 348 |
+
"grief",
|
| 349 |
+
"role disputes",
|
| 350 |
+
"role transitions",
|
| 351 |
+
"interpersonal deficits"
|
| 352 |
+
],
|
| 353 |
+
"techniques": [
|
| 354 |
+
"grief work",
|
| 355 |
+
"role dispute resolution",
|
| 356 |
+
"role transition work",
|
| 357 |
+
"interpersonal skills training",
|
| 358 |
+
"communication analysis"
|
| 359 |
+
],
|
| 360 |
+
"conditions_suited": [
|
| 361 |
+
"depression",
|
| 362 |
+
"anxiety",
|
| 363 |
+
"eating_disorders",
|
| 364 |
+
"ptsd"
|
| 365 |
+
],
|
| 366 |
+
"effectiveness_score": 0.85
|
| 367 |
+
},
|
| 368 |
+
"motivational_interviewing": {
|
| 369 |
+
"name": "Motivational Interviewing",
|
| 370 |
+
"evidence_level": "strong",
|
| 371 |
+
"target_weight": 0.05,
|
| 372 |
+
"min_samples": 120,
|
| 373 |
+
"max_samples": 2000,
|
| 374 |
+
"keywords": [
|
| 375 |
+
"motivational interviewing",
|
| 376 |
+
"mi",
|
| 377 |
+
"ambivalence",
|
| 378 |
+
"change talk",
|
| 379 |
+
"rolling with resistance",
|
| 380 |
+
"self-efficacy"
|
| 381 |
+
],
|
| 382 |
+
"techniques": [
|
| 383 |
+
"open-ended questions",
|
| 384 |
+
"affirmations",
|
| 385 |
+
"reflective listening",
|
| 386 |
+
"summarizing",
|
| 387 |
+
"eliciting change talk",
|
| 388 |
+
"developing discrepancy"
|
| 389 |
+
],
|
| 390 |
+
"conditions_suited": [
|
| 391 |
+
"substance_abuse",
|
| 392 |
+
"health_behavior_change",
|
| 393 |
+
"motivation"
|
| 394 |
+
],
|
| 395 |
+
"effectiveness_score": 0.8
|
| 396 |
+
},
|
| 397 |
+
"exposure_therapy": {
|
| 398 |
+
"name": "Exposure and Response Prevention",
|
| 399 |
+
"evidence_level": "strong",
|
| 400 |
+
"target_weight": 0.04,
|
| 401 |
+
"min_samples": 100,
|
| 402 |
+
"max_samples": 1500,
|
| 403 |
+
"keywords": [
|
| 404 |
+
"exposure",
|
| 405 |
+
"response prevention",
|
| 406 |
+
"systematic desensitization",
|
| 407 |
+
"flooding",
|
| 408 |
+
"habituation",
|
| 409 |
+
"fear hierarchy"
|
| 410 |
+
],
|
| 411 |
+
"techniques": [
|
| 412 |
+
"systematic desensitization",
|
| 413 |
+
"in vivo exposure",
|
| 414 |
+
"imaginal exposure",
|
| 415 |
+
"response prevention",
|
| 416 |
+
"fear hierarchy",
|
| 417 |
+
"habituation"
|
| 418 |
+
],
|
| 419 |
+
"conditions_suited": [
|
| 420 |
+
"ocd",
|
| 421 |
+
"phobias",
|
| 422 |
+
"anxiety",
|
| 423 |
+
"ptsd"
|
| 424 |
+
],
|
| 425 |
+
"effectiveness_score": 0.9
|
| 426 |
+
},
|
| 427 |
+
"integrative": {
|
| 428 |
+
"name": "Integrative/Eclectic Therapy",
|
| 429 |
+
"evidence_level": "moderate",
|
| 430 |
+
"target_weight": 0.08,
|
| 431 |
+
"min_samples": 200,
|
| 432 |
+
"max_samples": 3000,
|
| 433 |
+
"keywords": [
|
| 434 |
+
"integrative",
|
| 435 |
+
"eclectic",
|
| 436 |
+
"multimodal",
|
| 437 |
+
"combination",
|
| 438 |
+
"tailored approach",
|
| 439 |
+
"best practices"
|
| 440 |
+
],
|
| 441 |
+
"techniques": [
|
| 442 |
+
"technique integration",
|
| 443 |
+
"approach combination",
|
| 444 |
+
"tailored interventions",
|
| 445 |
+
"flexible methodology",
|
| 446 |
+
"evidence-based selection"
|
| 447 |
+
],
|
| 448 |
+
"conditions_suited": [
|
| 449 |
+
"complex_presentations",
|
| 450 |
+
"comorbid_conditions",
|
| 451 |
+
"treatment_resistant"
|
| 452 |
+
],
|
| 453 |
+
"effectiveness_score": 0.75
|
| 454 |
+
}
|
| 455 |
+
}
|
configs/stage_configs/audit_report.json
ADDED
|
@@ -0,0 +1,655 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"audit_date": "2025-08-24T13:12:24.522685",
|
| 3 |
+
"total_tasks": 36,
|
| 4 |
+
"complete": 18,
|
| 5 |
+
"partial": 0,
|
| 6 |
+
"missing": 18,
|
| 7 |
+
"completion_rate": 0.5,
|
| 8 |
+
"overall_status": "PARTIAL",
|
| 9 |
+
"phase_breakdown": {
|
| 10 |
+
"Phase 1": {
|
| 11 |
+
"complete": 3,
|
| 12 |
+
"total": 6,
|
| 13 |
+
"completion_rate": 0.5
|
| 14 |
+
},
|
| 15 |
+
"Phase 2": {
|
| 16 |
+
"complete": 3,
|
| 17 |
+
"total": 6,
|
| 18 |
+
"completion_rate": 0.5
|
| 19 |
+
},
|
| 20 |
+
"Phase 3": {
|
| 21 |
+
"complete": 3,
|
| 22 |
+
"total": 6,
|
| 23 |
+
"completion_rate": 0.5
|
| 24 |
+
},
|
| 25 |
+
"Phase 4": {
|
| 26 |
+
"complete": 0,
|
| 27 |
+
"total": 6,
|
| 28 |
+
"completion_rate": 0.0
|
| 29 |
+
},
|
| 30 |
+
"Phase 5": {
|
| 31 |
+
"complete": 4,
|
| 32 |
+
"total": 6,
|
| 33 |
+
"completion_rate": 0.6666666666666666
|
| 34 |
+
},
|
| 35 |
+
"Phase 6": {
|
| 36 |
+
"complete": 5,
|
| 37 |
+
"total": 6,
|
| 38 |
+
"completion_rate": 0.8333333333333334
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"detailed_results": {
|
| 42 |
+
"6.1": {
|
| 43 |
+
"task_id": "6.1",
|
| 44 |
+
"filename": "distributed_architecture.py",
|
| 45 |
+
"description": "Distributed processing architecture",
|
| 46 |
+
"exists": true,
|
| 47 |
+
"size_bytes": 20724,
|
| 48 |
+
"imports_ok": true,
|
| 49 |
+
"content_analysis": {
|
| 50 |
+
"classes": 6,
|
| 51 |
+
"functions": 26,
|
| 52 |
+
"lines": 569,
|
| 53 |
+
"docstring": true,
|
| 54 |
+
"size_kb": 20.2275390625
|
| 55 |
+
},
|
| 56 |
+
"status": "COMPLETE",
|
| 57 |
+
"issues": []
|
| 58 |
+
},
|
| 59 |
+
"6.2": {
|
| 60 |
+
"task_id": "6.2",
|
| 61 |
+
"filename": "data_fusion_engine.py",
|
| 62 |
+
"description": "Intelligent data fusion algorithms",
|
| 63 |
+
"exists": true,
|
| 64 |
+
"size_bytes": 27331,
|
| 65 |
+
"imports_ok": true,
|
| 66 |
+
"content_analysis": {
|
| 67 |
+
"classes": 5,
|
| 68 |
+
"functions": 20,
|
| 69 |
+
"lines": 694,
|
| 70 |
+
"docstring": true,
|
| 71 |
+
"size_kb": 26.6845703125
|
| 72 |
+
},
|
| 73 |
+
"status": "COMPLETE",
|
| 74 |
+
"issues": []
|
| 75 |
+
},
|
| 76 |
+
"6.3": {
|
| 77 |
+
"task_id": "6.3",
|
| 78 |
+
"filename": "quality_assessment_framework.py",
|
| 79 |
+
"description": "Hierarchical quality assessment framework",
|
| 80 |
+
"exists": true,
|
| 81 |
+
"size_bytes": 28315,
|
| 82 |
+
"imports_ok": true,
|
| 83 |
+
"content_analysis": {
|
| 84 |
+
"classes": 5,
|
| 85 |
+
"functions": 25,
|
| 86 |
+
"lines": 708,
|
| 87 |
+
"docstring": true,
|
| 88 |
+
"size_kb": 27.6455078125
|
| 89 |
+
},
|
| 90 |
+
"status": "COMPLETE",
|
| 91 |
+
"issues": []
|
| 92 |
+
},
|
| 93 |
+
"6.4": {
|
| 94 |
+
"task_id": "6.4",
|
| 95 |
+
"filename": "deduplication.py",
|
| 96 |
+
"description": "Automated conversation deduplication",
|
| 97 |
+
"exists": true,
|
| 98 |
+
"size_bytes": 17642,
|
| 99 |
+
"imports_ok": false,
|
| 100 |
+
"content_analysis": {
|
| 101 |
+
"classes": 3,
|
| 102 |
+
"functions": 17,
|
| 103 |
+
"lines": 460,
|
| 104 |
+
"docstring": true,
|
| 105 |
+
"size_kb": 17.228515625
|
| 106 |
+
},
|
| 107 |
+
"status": "MISSING",
|
| 108 |
+
"issues": [
|
| 109 |
+
"Import errors"
|
| 110 |
+
]
|
| 111 |
+
},
|
| 112 |
+
"6.5": {
|
| 113 |
+
"task_id": "6.5",
|
| 114 |
+
"filename": "cross_dataset_linker.py",
|
| 115 |
+
"description": "Cross-dataset conversation linking",
|
| 116 |
+
"exists": false,
|
| 117 |
+
"size_bytes": 0,
|
| 118 |
+
"imports_ok": false,
|
| 119 |
+
"content_analysis": {},
|
| 120 |
+
"status": "MISSING",
|
| 121 |
+
"issues": [
|
| 122 |
+
"File does not exist"
|
| 123 |
+
]
|
| 124 |
+
},
|
| 125 |
+
"6.6": {
|
| 126 |
+
"task_id": "6.6",
|
| 127 |
+
"filename": "metadata_schema.py",
|
| 128 |
+
"description": "Unified metadata schema",
|
| 129 |
+
"exists": false,
|
| 130 |
+
"size_bytes": 0,
|
| 131 |
+
"imports_ok": false,
|
| 132 |
+
"content_analysis": {},
|
| 133 |
+
"status": "MISSING",
|
| 134 |
+
"issues": [
|
| 135 |
+
"File does not exist"
|
| 136 |
+
]
|
| 137 |
+
},
|
| 138 |
+
"6.7": {
|
| 139 |
+
"task_id": "6.7",
|
| 140 |
+
"filename": "therapeutic_intelligence.py",
|
| 141 |
+
"description": "Comprehensive therapeutic approach classification",
|
| 142 |
+
"exists": true,
|
| 143 |
+
"size_bytes": 26025,
|
| 144 |
+
"imports_ok": false,
|
| 145 |
+
"content_analysis": {
|
| 146 |
+
"classes": 4,
|
| 147 |
+
"functions": 18,
|
| 148 |
+
"lines": 582,
|
| 149 |
+
"docstring": true,
|
| 150 |
+
"size_kb": 25.4091796875
|
| 151 |
+
},
|
| 152 |
+
"status": "MISSING",
|
| 153 |
+
"issues": [
|
| 154 |
+
"Import errors"
|
| 155 |
+
]
|
| 156 |
+
},
|
| 157 |
+
"6.8": {
|
| 158 |
+
"task_id": "6.8",
|
| 159 |
+
"filename": "condition_pattern_recognition.py",
|
| 160 |
+
"description": "Mental health condition pattern recognition",
|
| 161 |
+
"exists": false,
|
| 162 |
+
"size_bytes": 0,
|
| 163 |
+
"imports_ok": false,
|
| 164 |
+
"content_analysis": {},
|
| 165 |
+
"status": "MISSING",
|
| 166 |
+
"issues": [
|
| 167 |
+
"File does not exist"
|
| 168 |
+
]
|
| 169 |
+
},
|
| 170 |
+
"6.9": {
|
| 171 |
+
"task_id": "6.9",
|
| 172 |
+
"filename": "outcome_prediction.py",
|
| 173 |
+
"description": "Therapeutic outcome prediction models",
|
| 174 |
+
"exists": false,
|
| 175 |
+
"size_bytes": 0,
|
| 176 |
+
"imports_ok": false,
|
| 177 |
+
"content_analysis": {},
|
| 178 |
+
"status": "MISSING",
|
| 179 |
+
"issues": [
|
| 180 |
+
"File does not exist"
|
| 181 |
+
]
|
| 182 |
+
},
|
| 183 |
+
"6.10": {
|
| 184 |
+
"task_id": "6.10",
|
| 185 |
+
"filename": "crisis_intervention_detector.py",
|
| 186 |
+
"description": "Crisis intervention detection and escalation",
|
| 187 |
+
"exists": true,
|
| 188 |
+
"size_bytes": 40122,
|
| 189 |
+
"imports_ok": true,
|
| 190 |
+
"content_analysis": {
|
| 191 |
+
"classes": 7,
|
| 192 |
+
"functions": 24,
|
| 193 |
+
"lines": 849,
|
| 194 |
+
"docstring": true,
|
| 195 |
+
"size_kb": 39.1484375
|
| 196 |
+
},
|
| 197 |
+
"status": "COMPLETE",
|
| 198 |
+
"issues": []
|
| 199 |
+
},
|
| 200 |
+
"6.11": {
|
| 201 |
+
"task_id": "6.11",
|
| 202 |
+
"filename": "personality_adapter.py",
|
| 203 |
+
"description": "Personality-aware conversation adaptation",
|
| 204 |
+
"exists": true,
|
| 205 |
+
"size_bytes": 30898,
|
| 206 |
+
"imports_ok": true,
|
| 207 |
+
"content_analysis": {
|
| 208 |
+
"classes": 7,
|
| 209 |
+
"functions": 26,
|
| 210 |
+
"lines": 704,
|
| 211 |
+
"docstring": true,
|
| 212 |
+
"size_kb": 30.1650390625
|
| 213 |
+
},
|
| 214 |
+
"status": "COMPLETE",
|
| 215 |
+
"issues": []
|
| 216 |
+
},
|
| 217 |
+
"6.12": {
|
| 218 |
+
"task_id": "6.12",
|
| 219 |
+
"filename": "cultural_competency_generator.py",
|
| 220 |
+
"description": "Cultural competency and diversity-aware response generation",
|
| 221 |
+
"exists": true,
|
| 222 |
+
"size_bytes": 34793,
|
| 223 |
+
"imports_ok": true,
|
| 224 |
+
"content_analysis": {
|
| 225 |
+
"classes": 6,
|
| 226 |
+
"functions": 35,
|
| 227 |
+
"lines": 789,
|
| 228 |
+
"docstring": true,
|
| 229 |
+
"size_kb": 33.9677734375
|
| 230 |
+
},
|
| 231 |
+
"status": "COMPLETE",
|
| 232 |
+
"issues": []
|
| 233 |
+
},
|
| 234 |
+
"6.13": {
|
| 235 |
+
"task_id": "6.13",
|
| 236 |
+
"filename": "audio_emotion_integration.py",
|
| 237 |
+
"description": "Audio emotion recognition integration",
|
| 238 |
+
"exists": true,
|
| 239 |
+
"size_bytes": 23773,
|
| 240 |
+
"imports_ok": true,
|
| 241 |
+
"content_analysis": {
|
| 242 |
+
"classes": 5,
|
| 243 |
+
"functions": 18,
|
| 244 |
+
"lines": 575,
|
| 245 |
+
"docstring": true,
|
| 246 |
+
"size_kb": 23.2099609375
|
| 247 |
+
},
|
| 248 |
+
"status": "COMPLETE",
|
| 249 |
+
"issues": []
|
| 250 |
+
},
|
| 251 |
+
"6.14": {
|
| 252 |
+
"task_id": "6.14",
|
| 253 |
+
"filename": "multimodal_disorder_analysis.py",
|
| 254 |
+
"description": "Multi-modal mental disorder analysis pipeline",
|
| 255 |
+
"exists": false,
|
| 256 |
+
"size_bytes": 0,
|
| 257 |
+
"imports_ok": false,
|
| 258 |
+
"content_analysis": {},
|
| 259 |
+
"status": "MISSING",
|
| 260 |
+
"issues": [
|
| 261 |
+
"File does not exist"
|
| 262 |
+
]
|
| 263 |
+
},
|
| 264 |
+
"6.15": {
|
| 265 |
+
"task_id": "6.15",
|
| 266 |
+
"filename": "emotion_cause_extraction.py",
|
| 267 |
+
"description": "Emotion cause extraction and intervention mapping",
|
| 268 |
+
"exists": false,
|
| 269 |
+
"size_bytes": 0,
|
| 270 |
+
"imports_ok": false,
|
| 271 |
+
"content_analysis": {},
|
| 272 |
+
"status": "MISSING",
|
| 273 |
+
"issues": [
|
| 274 |
+
"File does not exist"
|
| 275 |
+
]
|
| 276 |
+
},
|
| 277 |
+
"6.16": {
|
| 278 |
+
"task_id": "6.16",
|
| 279 |
+
"filename": "tfidf_clusterer.py",
|
| 280 |
+
"description": "TF-IDF feature-based conversation clustering",
|
| 281 |
+
"exists": true,
|
| 282 |
+
"size_bytes": 28344,
|
| 283 |
+
"imports_ok": false,
|
| 284 |
+
"content_analysis": {
|
| 285 |
+
"classes": 6,
|
| 286 |
+
"functions": 20,
|
| 287 |
+
"lines": 668,
|
| 288 |
+
"docstring": true,
|
| 289 |
+
"size_kb": 27.6640625
|
| 290 |
+
},
|
| 291 |
+
"status": "MISSING",
|
| 292 |
+
"issues": [
|
| 293 |
+
"Import errors"
|
| 294 |
+
]
|
| 295 |
+
},
|
| 296 |
+
"6.17": {
|
| 297 |
+
"task_id": "6.17",
|
| 298 |
+
"filename": "temporal_reasoner.py",
|
| 299 |
+
"description": "Temporal reasoning integration",
|
| 300 |
+
"exists": true,
|
| 301 |
+
"size_bytes": 31062,
|
| 302 |
+
"imports_ok": true,
|
| 303 |
+
"content_analysis": {
|
| 304 |
+
"classes": 7,
|
| 305 |
+
"functions": 25,
|
| 306 |
+
"lines": 744,
|
| 307 |
+
"docstring": true,
|
| 308 |
+
"size_kb": 30.3173828125
|
| 309 |
+
},
|
| 310 |
+
"status": "COMPLETE",
|
| 311 |
+
"issues": []
|
| 312 |
+
},
|
| 313 |
+
"6.18": {
|
| 314 |
+
"task_id": "6.18",
|
| 315 |
+
"filename": "evidence_validator.py",
|
| 316 |
+
"description": "Scientific evidence-based practice validation",
|
| 317 |
+
"exists": true,
|
| 318 |
+
"size_bytes": 33065,
|
| 319 |
+
"imports_ok": true,
|
| 320 |
+
"content_analysis": {
|
| 321 |
+
"classes": 8,
|
| 322 |
+
"functions": 22,
|
| 323 |
+
"lines": 755,
|
| 324 |
+
"docstring": true,
|
| 325 |
+
"size_kb": 32.271484375
|
| 326 |
+
},
|
| 327 |
+
"status": "COMPLETE",
|
| 328 |
+
"issues": []
|
| 329 |
+
},
|
| 330 |
+
"6.19": {
|
| 331 |
+
"task_id": "6.19",
|
| 332 |
+
"filename": "priority_weighted_sampler.py",
|
| 333 |
+
"description": "Priority-weighted sampling algorithms",
|
| 334 |
+
"exists": true,
|
| 335 |
+
"size_bytes": 26014,
|
| 336 |
+
"imports_ok": false,
|
| 337 |
+
"content_analysis": {
|
| 338 |
+
"classes": 3,
|
| 339 |
+
"functions": 17,
|
| 340 |
+
"lines": 646,
|
| 341 |
+
"docstring": true,
|
| 342 |
+
"size_kb": 25.404296875
|
| 343 |
+
},
|
| 344 |
+
"status": "MISSING",
|
| 345 |
+
"issues": [
|
| 346 |
+
"Import errors"
|
| 347 |
+
]
|
| 348 |
+
},
|
| 349 |
+
"6.20": {
|
| 350 |
+
"task_id": "6.20",
|
| 351 |
+
"filename": "condition_balancer.py",
|
| 352 |
+
"description": "Condition-specific balancing system",
|
| 353 |
+
"exists": true,
|
| 354 |
+
"size_bytes": 27040,
|
| 355 |
+
"imports_ok": false,
|
| 356 |
+
"content_analysis": {
|
| 357 |
+
"classes": 3,
|
| 358 |
+
"functions": 12,
|
| 359 |
+
"lines": 612,
|
| 360 |
+
"docstring": true,
|
| 361 |
+
"size_kb": 26.40625
|
| 362 |
+
},
|
| 363 |
+
"status": "MISSING",
|
| 364 |
+
"issues": [
|
| 365 |
+
"Import errors"
|
| 366 |
+
]
|
| 367 |
+
},
|
| 368 |
+
"6.21": {
|
| 369 |
+
"task_id": "6.21",
|
| 370 |
+
"filename": "approach_diversity_optimizer.py",
|
| 371 |
+
"description": "Therapeutic approach diversity optimization",
|
| 372 |
+
"exists": true,
|
| 373 |
+
"size_bytes": 34619,
|
| 374 |
+
"imports_ok": false,
|
| 375 |
+
"content_analysis": {
|
| 376 |
+
"classes": 3,
|
| 377 |
+
"functions": 15,
|
| 378 |
+
"lines": 718,
|
| 379 |
+
"docstring": true,
|
| 380 |
+
"size_kb": 33.8076171875
|
| 381 |
+
},
|
| 382 |
+
"status": "MISSING",
|
| 383 |
+
"issues": [
|
| 384 |
+
"Import errors"
|
| 385 |
+
]
|
| 386 |
+
},
|
| 387 |
+
"6.22": {
|
| 388 |
+
"task_id": "6.22",
|
| 389 |
+
"filename": "demographic_balancer.py",
|
| 390 |
+
"description": "Demographic and cultural diversity balancing",
|
| 391 |
+
"exists": true,
|
| 392 |
+
"size_bytes": 21222,
|
| 393 |
+
"imports_ok": false,
|
| 394 |
+
"content_analysis": {
|
| 395 |
+
"classes": 3,
|
| 396 |
+
"functions": 12,
|
| 397 |
+
"lines": 486,
|
| 398 |
+
"docstring": true,
|
| 399 |
+
"size_kb": 20.724609375
|
| 400 |
+
},
|
| 401 |
+
"status": "MISSING",
|
| 402 |
+
"issues": [
|
| 403 |
+
"Import errors"
|
| 404 |
+
]
|
| 405 |
+
},
|
| 406 |
+
"6.23": {
|
| 407 |
+
"task_id": "6.23",
|
| 408 |
+
"filename": "complexity_stratifier.py",
|
| 409 |
+
"description": "Conversation complexity stratification",
|
| 410 |
+
"exists": true,
|
| 411 |
+
"size_bytes": 26863,
|
| 412 |
+
"imports_ok": false,
|
| 413 |
+
"content_analysis": {
|
| 414 |
+
"classes": 3,
|
| 415 |
+
"functions": 14,
|
| 416 |
+
"lines": 623,
|
| 417 |
+
"docstring": true,
|
| 418 |
+
"size_kb": 26.2333984375
|
| 419 |
+
},
|
| 420 |
+
"status": "MISSING",
|
| 421 |
+
"issues": [
|
| 422 |
+
"Import errors"
|
| 423 |
+
]
|
| 424 |
+
},
|
| 425 |
+
"6.24": {
|
| 426 |
+
"task_id": "6.24",
|
| 427 |
+
"filename": "crisis_routine_balancer.py",
|
| 428 |
+
"description": "Crisis-to-routine conversation ratio optimization",
|
| 429 |
+
"exists": true,
|
| 430 |
+
"size_bytes": 24423,
|
| 431 |
+
"imports_ok": false,
|
| 432 |
+
"content_analysis": {
|
| 433 |
+
"classes": 3,
|
| 434 |
+
"functions": 13,
|
| 435 |
+
"lines": 574,
|
| 436 |
+
"docstring": true,
|
| 437 |
+
"size_kb": 23.8505859375
|
| 438 |
+
},
|
| 439 |
+
"status": "MISSING",
|
| 440 |
+
"issues": [
|
| 441 |
+
"Import errors"
|
| 442 |
+
]
|
| 443 |
+
},
|
| 444 |
+
"6.25": {
|
| 445 |
+
"task_id": "6.25",
|
| 446 |
+
"filename": "multi_tier_validator.py",
|
| 447 |
+
"description": "Multi-tier quality validation system",
|
| 448 |
+
"exists": true,
|
| 449 |
+
"size_bytes": 29688,
|
| 450 |
+
"imports_ok": false,
|
| 451 |
+
"content_analysis": {
|
| 452 |
+
"classes": 5,
|
| 453 |
+
"functions": 25,
|
| 454 |
+
"lines": 730,
|
| 455 |
+
"docstring": true,
|
| 456 |
+
"size_kb": 28.9892578125
|
| 457 |
+
},
|
| 458 |
+
"status": "MISSING",
|
| 459 |
+
"issues": [
|
| 460 |
+
"Import errors"
|
| 461 |
+
]
|
| 462 |
+
},
|
| 463 |
+
"6.26": {
|
| 464 |
+
"task_id": "6.26",
|
| 465 |
+
"filename": "dsm5_accuracy_validator.py",
|
| 466 |
+
"description": "DSM-5 therapeutic accuracy validation",
|
| 467 |
+
"exists": true,
|
| 468 |
+
"size_bytes": 27020,
|
| 469 |
+
"imports_ok": true,
|
| 470 |
+
"content_analysis": {
|
| 471 |
+
"classes": 7,
|
| 472 |
+
"functions": 20,
|
| 473 |
+
"lines": 669,
|
| 474 |
+
"docstring": true,
|
| 475 |
+
"size_kb": 26.38671875
|
| 476 |
+
},
|
| 477 |
+
"status": "COMPLETE",
|
| 478 |
+
"issues": []
|
| 479 |
+
},
|
| 480 |
+
"6.27": {
|
| 481 |
+
"task_id": "6.27",
|
| 482 |
+
"filename": "safety_ethics_validator.py",
|
| 483 |
+
"description": "Conversation safety and ethics validation",
|
| 484 |
+
"exists": true,
|
| 485 |
+
"size_bytes": 33303,
|
| 486 |
+
"imports_ok": true,
|
| 487 |
+
"content_analysis": {
|
| 488 |
+
"classes": 7,
|
| 489 |
+
"functions": 20,
|
| 490 |
+
"lines": 804,
|
| 491 |
+
"docstring": true,
|
| 492 |
+
"size_kb": 32.5224609375
|
| 493 |
+
},
|
| 494 |
+
"status": "COMPLETE",
|
| 495 |
+
"issues": []
|
| 496 |
+
},
|
| 497 |
+
"6.28": {
|
| 498 |
+
"task_id": "6.28",
|
| 499 |
+
"filename": "effectiveness_predictor.py",
|
| 500 |
+
"description": "Therapeutic effectiveness prediction",
|
| 501 |
+
"exists": true,
|
| 502 |
+
"size_bytes": 28432,
|
| 503 |
+
"imports_ok": false,
|
| 504 |
+
"content_analysis": {
|
| 505 |
+
"classes": 6,
|
| 506 |
+
"functions": 17,
|
| 507 |
+
"lines": 633,
|
| 508 |
+
"docstring": true,
|
| 509 |
+
"size_kb": 27.765625
|
| 510 |
+
},
|
| 511 |
+
"status": "MISSING",
|
| 512 |
+
"issues": [
|
| 513 |
+
"Import errors"
|
| 514 |
+
]
|
| 515 |
+
},
|
| 516 |
+
"6.29": {
|
| 517 |
+
"task_id": "6.29",
|
| 518 |
+
"filename": "coherence_validator.py",
|
| 519 |
+
"description": "Conversation coherence validation using CoT reasoning",
|
| 520 |
+
"exists": true,
|
| 521 |
+
"size_bytes": 39311,
|
| 522 |
+
"imports_ok": true,
|
| 523 |
+
"content_analysis": {
|
| 524 |
+
"classes": 5,
|
| 525 |
+
"functions": 24,
|
| 526 |
+
"lines": 1016,
|
| 527 |
+
"docstring": true,
|
| 528 |
+
"size_kb": 38.3896484375
|
| 529 |
+
},
|
| 530 |
+
"status": "COMPLETE",
|
| 531 |
+
"issues": []
|
| 532 |
+
},
|
| 533 |
+
"6.30": {
|
| 534 |
+
"task_id": "6.30",
|
| 535 |
+
"filename": "realtime_quality_monitor.py",
|
| 536 |
+
"description": "Real-time conversation quality monitoring",
|
| 537 |
+
"exists": true,
|
| 538 |
+
"size_bytes": 17831,
|
| 539 |
+
"imports_ok": true,
|
| 540 |
+
"content_analysis": {
|
| 541 |
+
"classes": 5,
|
| 542 |
+
"functions": 20,
|
| 543 |
+
"lines": 467,
|
| 544 |
+
"docstring": true,
|
| 545 |
+
"size_kb": 17.41015625
|
| 546 |
+
},
|
| 547 |
+
"status": "COMPLETE",
|
| 548 |
+
"issues": []
|
| 549 |
+
},
|
| 550 |
+
"6.31": {
|
| 551 |
+
"task_id": "6.31",
|
| 552 |
+
"filename": "production_exporter.py",
|
| 553 |
+
"description": "Production-ready dataset export with tiered access",
|
| 554 |
+
"exists": true,
|
| 555 |
+
"size_bytes": 27472,
|
| 556 |
+
"imports_ok": true,
|
| 557 |
+
"content_analysis": {
|
| 558 |
+
"classes": 5,
|
| 559 |
+
"functions": 24,
|
| 560 |
+
"lines": 710,
|
| 561 |
+
"docstring": true,
|
| 562 |
+
"size_kb": 26.828125
|
| 563 |
+
},
|
| 564 |
+
"status": "COMPLETE",
|
| 565 |
+
"issues": []
|
| 566 |
+
},
|
| 567 |
+
"6.32": {
|
| 568 |
+
"task_id": "6.32",
|
| 569 |
+
"filename": "adaptive_learner.py",
|
| 570 |
+
"description": "Adaptive learning pipeline",
|
| 571 |
+
"exists": true,
|
| 572 |
+
"size_bytes": 27077,
|
| 573 |
+
"imports_ok": false,
|
| 574 |
+
"content_analysis": {
|
| 575 |
+
"classes": 8,
|
| 576 |
+
"functions": 34,
|
| 577 |
+
"lines": 684,
|
| 578 |
+
"docstring": true,
|
| 579 |
+
"size_kb": 26.4423828125
|
| 580 |
+
},
|
| 581 |
+
"status": "MISSING",
|
| 582 |
+
"issues": [
|
| 583 |
+
"Import errors"
|
| 584 |
+
]
|
| 585 |
+
},
|
| 586 |
+
"6.33": {
|
| 587 |
+
"task_id": "6.33",
|
| 588 |
+
"filename": "analytics_dashboard.py",
|
| 589 |
+
"description": "Comprehensive analytics dashboard",
|
| 590 |
+
"exists": true,
|
| 591 |
+
"size_bytes": 18609,
|
| 592 |
+
"imports_ok": true,
|
| 593 |
+
"content_analysis": {
|
| 594 |
+
"classes": 2,
|
| 595 |
+
"functions": 17,
|
| 596 |
+
"lines": 455,
|
| 597 |
+
"docstring": true,
|
| 598 |
+
"size_kb": 18.1240234375
|
| 599 |
+
},
|
| 600 |
+
"status": "COMPLETE",
|
| 601 |
+
"issues": []
|
| 602 |
+
},
|
| 603 |
+
"6.34": {
|
| 604 |
+
"task_id": "6.34",
|
| 605 |
+
"filename": "automated_maintenance.py",
|
| 606 |
+
"description": "Automated dataset update and maintenance procedures",
|
| 607 |
+
"exists": true,
|
| 608 |
+
"size_bytes": 20792,
|
| 609 |
+
"imports_ok": true,
|
| 610 |
+
"content_analysis": {
|
| 611 |
+
"classes": 5,
|
| 612 |
+
"functions": 22,
|
| 613 |
+
"lines": 571,
|
| 614 |
+
"docstring": true,
|
| 615 |
+
"size_kb": 20.296875
|
| 616 |
+
},
|
| 617 |
+
"status": "COMPLETE",
|
| 618 |
+
"issues": []
|
| 619 |
+
},
|
| 620 |
+
"6.35": {
|
| 621 |
+
"task_id": "6.35",
|
| 622 |
+
"filename": "feedback_loops.py",
|
| 623 |
+
"description": "Conversation effectiveness feedback loops",
|
| 624 |
+
"exists": true,
|
| 625 |
+
"size_bytes": 19230,
|
| 626 |
+
"imports_ok": true,
|
| 627 |
+
"content_analysis": {
|
| 628 |
+
"classes": 4,
|
| 629 |
+
"functions": 12,
|
| 630 |
+
"lines": 461,
|
| 631 |
+
"docstring": true,
|
| 632 |
+
"size_kb": 18.7763671875
|
| 633 |
+
},
|
| 634 |
+
"status": "COMPLETE",
|
| 635 |
+
"issues": []
|
| 636 |
+
},
|
| 637 |
+
"6.36": {
|
| 638 |
+
"task_id": "6.36",
|
| 639 |
+
"filename": "comprehensive_api.py",
|
| 640 |
+
"description": "Comprehensive documentation and API",
|
| 641 |
+
"exists": true,
|
| 642 |
+
"size_bytes": 30454,
|
| 643 |
+
"imports_ok": true,
|
| 644 |
+
"content_analysis": {
|
| 645 |
+
"classes": 3,
|
| 646 |
+
"functions": 8,
|
| 647 |
+
"lines": 873,
|
| 648 |
+
"docstring": true,
|
| 649 |
+
"size_kb": 29.732421875
|
| 650 |
+
},
|
| 651 |
+
"status": "COMPLETE",
|
| 652 |
+
"issues": []
|
| 653 |
+
}
|
| 654 |
+
}
|
| 655 |
+
}
|
configs/stage_configs/auto_resume_requirements.txt
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Automatic Resume System Requirements
|
| 2 |
+
# Install with: pip install -r auto_resume_requirements.txt
|
| 3 |
+
|
| 4 |
+
# Core system monitoring (from checkpoint system)
|
| 5 |
+
psutil>=5.8.0
|
| 6 |
+
|
| 7 |
+
# Built-in Python modules (listed for reference)
|
| 8 |
+
# asyncio - Built-in Python 3.7+
|
| 9 |
+
# signal - Built-in
|
| 10 |
+
# threading - Built-in
|
| 11 |
+
# time - Built-in
|
| 12 |
+
# uuid - Built-in
|
| 13 |
+
# json - Built-in
|
| 14 |
+
# logging - Built-in
|
| 15 |
+
# os - Built-in
|
| 16 |
+
# sys - Built-in
|
| 17 |
+
# datetime - Built-in
|
| 18 |
+
# pathlib - Built-in
|
| 19 |
+
# tempfile - Built-in
|
| 20 |
+
# shutil - Built-in
|
| 21 |
+
# collections - Built-in
|
| 22 |
+
# dataclasses - Built-in Python 3.7+
|
| 23 |
+
# enum - Built-in
|
| 24 |
+
# typing - Built-in Python 3.5+
|
| 25 |
+
|
| 26 |
+
# Dependencies from checkpoint system
|
| 27 |
+
# (Include checkpoint_requirements.txt)
|
| 28 |
+
|
| 29 |
+
# Optional: Enhanced monitoring and alerting
|
| 30 |
+
# prometheus_client>=0.14.0 # For Prometheus metrics
|
| 31 |
+
# redis>=4.0.0 # For distributed coordination
|
| 32 |
+
# pika>=1.3.0 # For RabbitMQ message queuing
|
| 33 |
+
# celery>=5.2.0 # For distributed task management
|
| 34 |
+
|
| 35 |
+
# Development and testing
|
| 36 |
+
pytest>=7.0.0
|
| 37 |
+
pytest-asyncio>=0.21.0
|
| 38 |
+
coverage>=6.0.0
|
| 39 |
+
pytest-timeout>=2.1.0 # For timeout testing
|
| 40 |
+
|
| 41 |
+
# Code quality
|
| 42 |
+
black>=22.0.0
|
| 43 |
+
flake8>=5.0.0
|
| 44 |
+
mypy>=0.991
|
| 45 |
+
|
| 46 |
+
# Documentation
|
| 47 |
+
sphinx>=4.0.0
|
| 48 |
+
sphinx-rtd-theme>=1.0.0
|
| 49 |
+
|
| 50 |
+
# Performance profiling (optional)
|
| 51 |
+
# memory_profiler>=0.60.0 # For memory usage profiling
|
| 52 |
+
# py-spy>=0.3.0 # For CPU profiling
|
configs/stage_configs/bias_validated_validation_summary.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"total_datasets": 2,
|
| 3 |
+
"safe_datasets": 0,
|
| 4 |
+
"caution_datasets": 2,
|
| 5 |
+
"safety_percentage": 0.0,
|
| 6 |
+
"bias_categories_checked": [
|
| 7 |
+
"cultural_bias",
|
| 8 |
+
"therapeutic_bias",
|
| 9 |
+
"accessibility_bias",
|
| 10 |
+
"demographic_bias",
|
| 11 |
+
"safety_concerns"
|
| 12 |
+
],
|
| 13 |
+
"validation_complete": true
|
| 14 |
+
}
|
configs/stage_configs/boolq_validation_pipeline_config.json
ADDED
|
File without changes
|
configs/stage_configs/celery_config.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Celery Configuration for Pixelated Empathy AI Distributed Processing
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
from celery import Celery
|
| 9 |
+
from kombu import Exchange, Queue
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# Celery application configuration
|
| 13 |
+
def create_celery_app():
|
| 14 |
+
"""Create and configure Celery application"""
|
| 15 |
+
|
| 16 |
+
# Get configuration from environment
|
| 17 |
+
broker_url = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0")
|
| 18 |
+
result_backend = os.getenv("CELERY_RESULT_BACKEND", "redis://localhost:6379/0")
|
| 19 |
+
|
| 20 |
+
# Create Celery app
|
| 21 |
+
app = Celery("pixelated_empathy")
|
| 22 |
+
|
| 23 |
+
# Configure Celery
|
| 24 |
+
app.conf.update(
|
| 25 |
+
# Broker settings
|
| 26 |
+
broker_url=broker_url,
|
| 27 |
+
result_backend=result_backend,
|
| 28 |
+
# Task serialization
|
| 29 |
+
task_serializer="pickle",
|
| 30 |
+
accept_content=["pickle", "json"],
|
| 31 |
+
result_serializer="pickle",
|
| 32 |
+
# Timezone settings
|
| 33 |
+
timezone="UTC",
|
| 34 |
+
enable_utc=True,
|
| 35 |
+
# Task routing
|
| 36 |
+
task_routes={
|
| 37 |
+
"quality_validator.validate_task": {"queue": "quality_validation"},
|
| 38 |
+
"data_processor.process_task": {"queue": "data_processing"},
|
| 39 |
+
"model_trainer.train_task": {"queue": "model_training"},
|
| 40 |
+
"backup.backup_task": {"queue": "backup"},
|
| 41 |
+
},
|
| 42 |
+
# Queue configuration
|
| 43 |
+
task_default_queue="default",
|
| 44 |
+
task_queues=(
|
| 45 |
+
Queue("default", Exchange("default"), routing_key="default"),
|
| 46 |
+
Queue(
|
| 47 |
+
"quality_validation",
|
| 48 |
+
Exchange("quality"),
|
| 49 |
+
routing_key="quality.validation",
|
| 50 |
+
),
|
| 51 |
+
Queue("data_processing", Exchange("data"), routing_key="data.processing"),
|
| 52 |
+
Queue("model_training", Exchange("training"), routing_key="training.model"),
|
| 53 |
+
Queue("backup", Exchange("backup"), routing_key="backup.task"),
|
| 54 |
+
Queue("high_priority", Exchange("priority"), routing_key="priority.high"),
|
| 55 |
+
),
|
| 56 |
+
# Worker settings
|
| 57 |
+
worker_prefetch_multiplier=1,
|
| 58 |
+
task_acks_late=True,
|
| 59 |
+
worker_max_tasks_per_child=1000,
|
| 60 |
+
# Task execution settings
|
| 61 |
+
task_soft_time_limit=300, # 5 minutes
|
| 62 |
+
task_time_limit=600, # 10 minutes
|
| 63 |
+
task_reject_on_worker_lost=True,
|
| 64 |
+
# Result settings
|
| 65 |
+
result_expires=3600, # 1 hour
|
| 66 |
+
# Monitoring
|
| 67 |
+
worker_send_task_events=True,
|
| 68 |
+
task_send_sent_event=True,
|
| 69 |
+
# Error handling
|
| 70 |
+
task_annotations={
|
| 71 |
+
"*": {"rate_limit": "100/m"},
|
| 72 |
+
"quality_validator.validate_task": {"rate_limit": "50/m"},
|
| 73 |
+
"model_trainer.train_task": {"rate_limit": "5/m"},
|
| 74 |
+
},
|
| 75 |
+
# Beat schedule (for periodic tasks)
|
| 76 |
+
beat_schedule={
|
| 77 |
+
"cleanup-old-results": {
|
| 78 |
+
"task": "maintenance.cleanup_old_results",
|
| 79 |
+
"schedule": 3600.0, # Every hour
|
| 80 |
+
},
|
| 81 |
+
"health-check": {
|
| 82 |
+
"task": "monitoring.health_check",
|
| 83 |
+
"schedule": 300.0, # Every 5 minutes
|
| 84 |
+
},
|
| 85 |
+
"backup-data": {
|
| 86 |
+
"task": "backup.backup_task",
|
| 87 |
+
"schedule": 86400.0, # Daily
|
| 88 |
+
"kwargs": {"backup_type": "incremental"},
|
| 89 |
+
},
|
| 90 |
+
},
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
return app
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
# Create the Celery app instance
|
| 97 |
+
celery_app = create_celery_app()
|
| 98 |
+
|
| 99 |
+
# Task discovery
|
| 100 |
+
celery_app.autodiscover_tasks(
|
| 101 |
+
[
|
| 102 |
+
"distributed_processing.quality_validator",
|
| 103 |
+
"distributed_processing.data_processor",
|
| 104 |
+
"distributed_processing.model_trainer",
|
| 105 |
+
"distributed_processing.backup_manager",
|
| 106 |
+
"distributed_processing.monitoring",
|
| 107 |
+
]
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
if __name__ == "__main__":
|
| 111 |
+
celery_app.start()
|
configs/stage_configs/check_config.sh
ADDED
|
File without changes
|
configs/stage_configs/checkpoint_config.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"checkpoint_config": {
|
| 3 |
+
"save_steps": 506,
|
| 4 |
+
"save_total_limit": 5,
|
| 5 |
+
"output_dir": "./checkpoints",
|
| 6 |
+
"resume_from_checkpoint": true,
|
| 7 |
+
"auto_find_batch_size": false
|
| 8 |
+
},
|
| 9 |
+
"backup_strategy": {
|
| 10 |
+
"local_backup": true,
|
| 11 |
+
"cloud_backup": false,
|
| 12 |
+
"backup_frequency": "every_checkpoint",
|
| 13 |
+
"backup_location": "./backups"
|
| 14 |
+
},
|
| 15 |
+
"recovery_points": [
|
| 16 |
+
{
|
| 17 |
+
"step": 1013,
|
| 18 |
+
"description": "10% complete"
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"step": 2532,
|
| 22 |
+
"description": "25% complete"
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"step": 5065,
|
| 26 |
+
"description": "50% complete"
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"step": 7597,
|
| 30 |
+
"description": "75% complete"
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"step": 9117,
|
| 34 |
+
"description": "90% complete"
|
| 35 |
+
}
|
| 36 |
+
],
|
| 37 |
+
"monitoring": {
|
| 38 |
+
"track_loss": true,
|
| 39 |
+
"track_learning_rate": true,
|
| 40 |
+
"track_memory_usage": true,
|
| 41 |
+
"alert_on_divergence": true,
|
| 42 |
+
"loss_spike_threshold": 2.0
|
| 43 |
+
}
|
| 44 |
+
}
|
configs/stage_configs/checkpoint_requirements.txt
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Checkpoint System Requirements
|
| 2 |
+
# Install with: pip install -r checkpoint_requirements.txt
|
| 3 |
+
|
| 4 |
+
# Core system monitoring
|
| 5 |
+
psutil>=5.8.0
|
| 6 |
+
|
| 7 |
+
# Built-in Python modules (listed for reference)
|
| 8 |
+
# asyncio - Built-in Python 3.7+
|
| 9 |
+
# sqlite3 - Built-in
|
| 10 |
+
# pickle - Built-in
|
| 11 |
+
# gzip - Built-in
|
| 12 |
+
# json - Built-in
|
| 13 |
+
# hashlib - Built-in
|
| 14 |
+
# threading - Built-in
|
| 15 |
+
# pathlib - Built-in
|
| 16 |
+
# shutil - Built-in
|
| 17 |
+
# tempfile - Built-in
|
| 18 |
+
# uuid - Built-in
|
| 19 |
+
# time - Built-in
|
| 20 |
+
# datetime - Built-in
|
| 21 |
+
# logging - Built-in
|
| 22 |
+
# os - Built-in
|
| 23 |
+
# dataclasses - Built-in Python 3.7+
|
| 24 |
+
# enum - Built-in
|
| 25 |
+
# typing - Built-in Python 3.5+
|
| 26 |
+
|
| 27 |
+
# Optional: Enhanced features
|
| 28 |
+
# redis>=4.0.0 # For distributed checkpoint coordination
|
| 29 |
+
# cryptography>=3.0.0 # For checkpoint encryption
|
| 30 |
+
# lz4>=3.0.0 # For faster compression alternative
|
| 31 |
+
# msgpack>=1.0.0 # For more efficient serialization
|
| 32 |
+
|
| 33 |
+
# Development and testing
|
| 34 |
+
pytest>=7.0.0
|
| 35 |
+
pytest-asyncio>=0.21.0
|
| 36 |
+
coverage>=6.0.0
|
| 37 |
+
|
| 38 |
+
# Code quality
|
| 39 |
+
black>=22.0.0
|
| 40 |
+
flake8>=5.0.0
|
| 41 |
+
mypy>=0.991
|
| 42 |
+
|
| 43 |
+
# Documentation
|
| 44 |
+
sphinx>=4.0.0 # For generating documentation
|
| 45 |
+
sphinx-rtd-theme>=1.0.0
|
configs/stage_configs/claude_assessment.json
ADDED
|
File without changes
|
configs/stage_configs/cli_config.py
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration management for CLI.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import json
|
| 6 |
+
import os
|
| 7 |
+
from copy import deepcopy
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from typing import Any, Dict, Optional, Union
|
| 10 |
+
|
| 11 |
+
try:
|
| 12 |
+
import yaml
|
| 13 |
+
YAML_AVAILABLE = True
|
| 14 |
+
except ImportError:
|
| 15 |
+
YAML_AVAILABLE = False
|
| 16 |
+
yaml = None # type: ignore
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class ConfigManager:
|
| 20 |
+
"""Manages configuration for the research system."""
|
| 21 |
+
|
| 22 |
+
DEFAULT_CONFIG_PATH = Path.home() / ".journal_research" / "config.yaml"
|
| 23 |
+
DEFAULT_CONFIG = {
|
| 24 |
+
"orchestrator": {
|
| 25 |
+
"max_retries": 3,
|
| 26 |
+
"retry_delay_seconds": 1.0,
|
| 27 |
+
"progress_history_limit": 100,
|
| 28 |
+
"parallel_evaluation": False,
|
| 29 |
+
"parallel_integration_planning": False,
|
| 30 |
+
"max_workers": 4,
|
| 31 |
+
"session_storage_path": None,
|
| 32 |
+
"visualization_max_points": 100,
|
| 33 |
+
"fallback_on_failure": True,
|
| 34 |
+
},
|
| 35 |
+
"discovery": {
|
| 36 |
+
"pubmed": {
|
| 37 |
+
"api_key": None,
|
| 38 |
+
"base_url": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils",
|
| 39 |
+
"search_limit": 100,
|
| 40 |
+
},
|
| 41 |
+
"doaj": {
|
| 42 |
+
"base_url": "https://doaj.org/api/v2",
|
| 43 |
+
},
|
| 44 |
+
"repositories": {
|
| 45 |
+
"dryad": {"base_url": "https://datadryad.org/api/v2"},
|
| 46 |
+
"zenodo": {"base_url": "https://zenodo.org/api"},
|
| 47 |
+
"clinical_trials": {"base_url": "https://clinicaltrials.gov/api/v2"},
|
| 48 |
+
},
|
| 49 |
+
},
|
| 50 |
+
"evaluation": {
|
| 51 |
+
"therapeutic_relevance_weight": 0.35,
|
| 52 |
+
"data_structure_quality_weight": 0.25,
|
| 53 |
+
"training_integration_weight": 0.20,
|
| 54 |
+
"ethical_accessibility_weight": 0.20,
|
| 55 |
+
"high_priority_threshold": 7.5,
|
| 56 |
+
"medium_priority_threshold": 5.0,
|
| 57 |
+
},
|
| 58 |
+
"acquisition": {
|
| 59 |
+
"storage_base_path": "data/acquired_datasets",
|
| 60 |
+
"encryption_enabled": False,
|
| 61 |
+
"download_timeout": 3600,
|
| 62 |
+
"max_retries": 3,
|
| 63 |
+
"chunk_size": 8192,
|
| 64 |
+
"resume_downloads": True,
|
| 65 |
+
},
|
| 66 |
+
"integration": {
|
| 67 |
+
"target_format": "chatml",
|
| 68 |
+
"default_complexity": "medium",
|
| 69 |
+
},
|
| 70 |
+
"logging": {
|
| 71 |
+
"level": "INFO",
|
| 72 |
+
"file": None,
|
| 73 |
+
"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
| 74 |
+
},
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
def __init__(self, config_path: Optional[Union[Path, str]] = None):
|
| 78 |
+
"""Initialize config manager with optional config path."""
|
| 79 |
+
# Convert string to Path if needed
|
| 80 |
+
if config_path is not None and isinstance(config_path, str):
|
| 81 |
+
config_path = Path(config_path)
|
| 82 |
+
self.config_path = config_path or self.DEFAULT_CONFIG_PATH
|
| 83 |
+
# Only create parent directory if it's writable
|
| 84 |
+
try:
|
| 85 |
+
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
| 86 |
+
except (PermissionError, OSError):
|
| 87 |
+
# If we can't create the directory, that's okay - we'll handle it in load/save
|
| 88 |
+
pass
|
| 89 |
+
|
| 90 |
+
def load(self) -> Dict[str, Any]:
|
| 91 |
+
"""Load configuration from file or return defaults."""
|
| 92 |
+
if self.config_path.exists():
|
| 93 |
+
try:
|
| 94 |
+
with open(self.config_path, "r") as f:
|
| 95 |
+
if YAML_AVAILABLE and self.config_path.suffix in (".yaml", ".yml"):
|
| 96 |
+
assert yaml is not None # Type guard for type checker
|
| 97 |
+
config = yaml.safe_load(f) or {}
|
| 98 |
+
else:
|
| 99 |
+
# Fall back to JSON
|
| 100 |
+
config = json.load(f) or {}
|
| 101 |
+
# Merge with defaults to ensure all keys exist
|
| 102 |
+
merged = self._merge_config(self.DEFAULT_CONFIG, config)
|
| 103 |
+
return self._apply_legacy_aliases(merged)
|
| 104 |
+
except Exception as e:
|
| 105 |
+
print(f"Warning: Could not load config from {self.config_path}: {e}")
|
| 106 |
+
return self._apply_legacy_aliases(deepcopy(self.DEFAULT_CONFIG))
|
| 107 |
+
return self._apply_legacy_aliases(deepcopy(self.DEFAULT_CONFIG))
|
| 108 |
+
|
| 109 |
+
def save(self, config: Dict[str, Any]) -> None:
|
| 110 |
+
"""Save configuration to file."""
|
| 111 |
+
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
| 112 |
+
with open(self.config_path, "w") as f:
|
| 113 |
+
if YAML_AVAILABLE and self.config_path.suffix in (".yaml", ".yml"):
|
| 114 |
+
assert yaml is not None # Type guard for type checker
|
| 115 |
+
yaml.dump(config, f, default_flow_style=False, indent=2)
|
| 116 |
+
else:
|
| 117 |
+
# Fall back to JSON
|
| 118 |
+
json.dump(config, f, indent=2)
|
| 119 |
+
|
| 120 |
+
def get(self, key_path: str, default: Any = None) -> Any:
|
| 121 |
+
"""Get a configuration value by dot-separated path."""
|
| 122 |
+
config = self.load()
|
| 123 |
+
keys = key_path.split(".")
|
| 124 |
+
value = config
|
| 125 |
+
for key in keys:
|
| 126 |
+
if isinstance(value, dict) and key in value:
|
| 127 |
+
value = value[key]
|
| 128 |
+
else:
|
| 129 |
+
return default
|
| 130 |
+
return value
|
| 131 |
+
|
| 132 |
+
def set(self, key_path: str, value: Any) -> None:
|
| 133 |
+
"""Set a configuration value by dot-separated path."""
|
| 134 |
+
config = self.load()
|
| 135 |
+
keys = key_path.split(".")
|
| 136 |
+
target = config
|
| 137 |
+
for key in keys[:-1]:
|
| 138 |
+
if key not in target:
|
| 139 |
+
target[key] = {}
|
| 140 |
+
target = target[key]
|
| 141 |
+
target[keys[-1]] = value
|
| 142 |
+
self.save(config)
|
| 143 |
+
|
| 144 |
+
def _merge_config(self, default: Dict[str, Any], user: Dict[str, Any]) -> Dict[str, Any]:
|
| 145 |
+
"""Recursively merge user config into default config."""
|
| 146 |
+
result = default.copy()
|
| 147 |
+
for key, value in user.items():
|
| 148 |
+
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
|
| 149 |
+
result[key] = self._merge_config(result[key], value)
|
| 150 |
+
else:
|
| 151 |
+
result[key] = value
|
| 152 |
+
return result
|
| 153 |
+
|
| 154 |
+
def _apply_legacy_aliases(self, config: Dict[str, Any]) -> Dict[str, Any]:
|
| 155 |
+
"""Ensure legacy top-level aliases exist for backward compatibility."""
|
| 156 |
+
# Maintain top-level storage_base_path alias
|
| 157 |
+
acquisition_config = config.get("acquisition", {})
|
| 158 |
+
storage_base_path = acquisition_config.get("storage_base_path")
|
| 159 |
+
if storage_base_path and "storage_base_path" not in config:
|
| 160 |
+
config["storage_base_path"] = storage_base_path
|
| 161 |
+
|
| 162 |
+
# Maintain top-level logging directory alias
|
| 163 |
+
logging_config = config.get("logging", {})
|
| 164 |
+
log_file = logging_config.get("file")
|
| 165 |
+
if log_file and "log_file" not in config:
|
| 166 |
+
config["log_file"] = log_file
|
| 167 |
+
|
| 168 |
+
return config
|
| 169 |
+
|
| 170 |
+
def load_env_overrides(self) -> Dict[str, Any]:
|
| 171 |
+
"""Load configuration overrides from environment variables."""
|
| 172 |
+
overrides = {}
|
| 173 |
+
env_prefix = "JOURNAL_RESEARCH_"
|
| 174 |
+
|
| 175 |
+
# Map environment variables to config paths
|
| 176 |
+
env_mappings = {
|
| 177 |
+
"PUBMED_API_KEY": "discovery.pubmed.api_key",
|
| 178 |
+
"STORAGE_PATH": "acquisition.storage_base_path",
|
| 179 |
+
"LOG_LEVEL": "logging.level",
|
| 180 |
+
"MAX_RETRIES": "orchestrator.max_retries",
|
| 181 |
+
"MAX_WORKERS": "orchestrator.max_workers",
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
for env_var, config_path in env_mappings.items():
|
| 185 |
+
env_key = env_prefix + env_var
|
| 186 |
+
if env_key in os.environ:
|
| 187 |
+
overrides[config_path] = os.environ[env_key]
|
| 188 |
+
|
| 189 |
+
return overrides
|
| 190 |
+
|
| 191 |
+
def apply_env_overrides(self, config: Dict[str, Any]) -> Dict[str, Any]:
|
| 192 |
+
"""Apply environment variable overrides to config."""
|
| 193 |
+
overrides = self.load_env_overrides()
|
| 194 |
+
for key_path, value in overrides.items():
|
| 195 |
+
keys = key_path.split(".")
|
| 196 |
+
target = config
|
| 197 |
+
for key in keys[:-1]:
|
| 198 |
+
if key not in target:
|
| 199 |
+
target[key] = {}
|
| 200 |
+
target = target[key]
|
| 201 |
+
target[keys[-1]] = value
|
| 202 |
+
return config
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
# Global config manager instance
|
| 206 |
+
_config_manager = ConfigManager()
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def load_config(config_path: Optional[Union[Path, str]] = None) -> Dict[str, Any]:
|
| 210 |
+
"""Load configuration with environment overrides."""
|
| 211 |
+
# Convert string to Path if needed
|
| 212 |
+
if config_path is not None and isinstance(config_path, str):
|
| 213 |
+
config_path = Path(config_path)
|
| 214 |
+
manager = ConfigManager(config_path) if config_path else _config_manager
|
| 215 |
+
config = manager.load()
|
| 216 |
+
config = manager.apply_env_overrides(config)
|
| 217 |
+
return config
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
def save_config(config: Dict[str, Any], config_path: Optional[Union[Path, str]] = None) -> None:
|
| 221 |
+
"""Save configuration to file."""
|
| 222 |
+
# Convert string to Path if needed
|
| 223 |
+
if config_path is not None and isinstance(config_path, str):
|
| 224 |
+
config_path = Path(config_path)
|
| 225 |
+
manager = ConfigManager(config_path) if config_path else _config_manager
|
| 226 |
+
manager.save(config)
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
def get_config_value(key_path: str, default: Any = None) -> Any:
|
| 230 |
+
"""Get a configuration value by dot-separated path."""
|
| 231 |
+
return _config_manager.get(key_path, default)
|
| 232 |
+
|
configs/stage_configs/complexity_config.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"beginner": {
|
| 3 |
+
"level": "Beginner",
|
| 4 |
+
"weight": 0.4,
|
| 5 |
+
"min_samples": 1000,
|
| 6 |
+
"max_samples": 15000,
|
| 7 |
+
"complexity_range": [
|
| 8 |
+
0.0,
|
| 9 |
+
0.4
|
| 10 |
+
],
|
| 11 |
+
"characteristics": [
|
| 12 |
+
"Simple emotional expressions",
|
| 13 |
+
"Basic therapeutic techniques",
|
| 14 |
+
"Clear, straightforward issues",
|
| 15 |
+
"Single-topic focus",
|
| 16 |
+
"Minimal comorbidity",
|
| 17 |
+
"Standard interventions"
|
| 18 |
+
]
|
| 19 |
+
},
|
| 20 |
+
"intermediate": {
|
| 21 |
+
"level": "Intermediate",
|
| 22 |
+
"weight": 0.45,
|
| 23 |
+
"min_samples": 1200,
|
| 24 |
+
"max_samples": 18000,
|
| 25 |
+
"complexity_range": [
|
| 26 |
+
0.4,
|
| 27 |
+
0.7
|
| 28 |
+
],
|
| 29 |
+
"characteristics": [
|
| 30 |
+
"Moderate emotional intensity",
|
| 31 |
+
"Multiple therapeutic techniques",
|
| 32 |
+
"Interconnected issues",
|
| 33 |
+
"Some comorbidity",
|
| 34 |
+
"Nuanced interventions",
|
| 35 |
+
"Relationship dynamics"
|
| 36 |
+
]
|
| 37 |
+
},
|
| 38 |
+
"advanced": {
|
| 39 |
+
"level": "Advanced",
|
| 40 |
+
"weight": 0.15,
|
| 41 |
+
"min_samples": 400,
|
| 42 |
+
"max_samples": 6000,
|
| 43 |
+
"complexity_range": [
|
| 44 |
+
0.7,
|
| 45 |
+
1.0
|
| 46 |
+
],
|
| 47 |
+
"characteristics": [
|
| 48 |
+
"High emotional intensity",
|
| 49 |
+
"Complex therapeutic approaches",
|
| 50 |
+
"Multiple interconnected issues",
|
| 51 |
+
"Significant comorbidity",
|
| 52 |
+
"Crisis intervention elements",
|
| 53 |
+
"Advanced clinical skills required"
|
| 54 |
+
]
|
| 55 |
+
}
|
| 56 |
+
}
|
configs/stage_configs/comprehensive_integration_summary.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"integration_complete": true,
|
| 3 |
+
"all_components_integrated": true,
|
| 4 |
+
"components_integrated": [
|
| 5 |
+
"long_term_journaling_system",
|
| 6 |
+
"tri_expert_voice_blending",
|
| 7 |
+
"edge_case_scenarios",
|
| 8 |
+
"dual_persona_dynamics",
|
| 9 |
+
"bias_detection_validation",
|
| 10 |
+
"psychology_knowledge_base"
|
| 11 |
+
],
|
| 12 |
+
"datasets": {
|
| 13 |
+
"journaling_enhanced": 39,
|
| 14 |
+
"voice_enhanced": 0,
|
| 15 |
+
"edge_cases": 5,
|
| 16 |
+
"dual_persona": 75,
|
| 17 |
+
"bias_validated": 10,
|
| 18 |
+
"psychology_kb_enhanced": 5,
|
| 19 |
+
"master_integrated": 0,
|
| 20 |
+
"total_datasets": 134
|
| 21 |
+
},
|
| 22 |
+
"expert_voices": [
|
| 23 |
+
"Tim Ferriss",
|
| 24 |
+
"Gabor Mat\u00e9",
|
| 25 |
+
"Bren\u00e9 Brown"
|
| 26 |
+
],
|
| 27 |
+
"psychology_concepts": 4867,
|
| 28 |
+
"bias_categories_checked": 5,
|
| 29 |
+
"therapeutic_modalities": 6,
|
| 30 |
+
"kan_28_status": "FULLY_SOLVED",
|
| 31 |
+
"integration_timestamp": "2024-10-28"
|
| 32 |
+
}
|
configs/stage_configs/condition_config.json
ADDED
|
@@ -0,0 +1,460 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"depression": {
|
| 3 |
+
"name": "Major Depressive Disorder",
|
| 4 |
+
"prevalence": 0.084,
|
| 5 |
+
"min_samples": 500,
|
| 6 |
+
"max_samples": 8000,
|
| 7 |
+
"aliases": [
|
| 8 |
+
"depression",
|
| 9 |
+
"depressed",
|
| 10 |
+
"major depression",
|
| 11 |
+
"mdd",
|
| 12 |
+
"sad",
|
| 13 |
+
"sadness"
|
| 14 |
+
],
|
| 15 |
+
"comorbid_conditions": [
|
| 16 |
+
"anxiety",
|
| 17 |
+
"ptsd",
|
| 18 |
+
"substance_abuse"
|
| 19 |
+
],
|
| 20 |
+
"severity_levels": [
|
| 21 |
+
"mild",
|
| 22 |
+
"moderate",
|
| 23 |
+
"severe"
|
| 24 |
+
]
|
| 25 |
+
},
|
| 26 |
+
"anxiety": {
|
| 27 |
+
"name": "Generalized Anxiety Disorder",
|
| 28 |
+
"prevalence": 0.031,
|
| 29 |
+
"min_samples": 400,
|
| 30 |
+
"max_samples": 6000,
|
| 31 |
+
"aliases": [
|
| 32 |
+
"anxiety",
|
| 33 |
+
"anxious",
|
| 34 |
+
"gad",
|
| 35 |
+
"worry",
|
| 36 |
+
"worried",
|
| 37 |
+
"panic"
|
| 38 |
+
],
|
| 39 |
+
"comorbid_conditions": [
|
| 40 |
+
"depression",
|
| 41 |
+
"ptsd",
|
| 42 |
+
"ocd"
|
| 43 |
+
],
|
| 44 |
+
"severity_levels": [
|
| 45 |
+
"mild",
|
| 46 |
+
"moderate",
|
| 47 |
+
"severe"
|
| 48 |
+
]
|
| 49 |
+
},
|
| 50 |
+
"ptsd": {
|
| 51 |
+
"name": "Post-Traumatic Stress Disorder",
|
| 52 |
+
"prevalence": 0.037,
|
| 53 |
+
"min_samples": 300,
|
| 54 |
+
"max_samples": 4000,
|
| 55 |
+
"aliases": [
|
| 56 |
+
"ptsd",
|
| 57 |
+
"trauma",
|
| 58 |
+
"traumatic",
|
| 59 |
+
"flashback",
|
| 60 |
+
"nightmares"
|
| 61 |
+
],
|
| 62 |
+
"comorbid_conditions": [
|
| 63 |
+
"depression",
|
| 64 |
+
"anxiety",
|
| 65 |
+
"substance_abuse"
|
| 66 |
+
],
|
| 67 |
+
"severity_levels": [
|
| 68 |
+
"mild",
|
| 69 |
+
"moderate",
|
| 70 |
+
"severe"
|
| 71 |
+
]
|
| 72 |
+
},
|
| 73 |
+
"bipolar": {
|
| 74 |
+
"name": "Bipolar Disorder",
|
| 75 |
+
"prevalence": 0.028,
|
| 76 |
+
"min_samples": 250,
|
| 77 |
+
"max_samples": 3000,
|
| 78 |
+
"aliases": [
|
| 79 |
+
"bipolar",
|
| 80 |
+
"manic",
|
| 81 |
+
"mania",
|
| 82 |
+
"mood swings",
|
| 83 |
+
"hypomania"
|
| 84 |
+
],
|
| 85 |
+
"comorbid_conditions": [
|
| 86 |
+
"anxiety",
|
| 87 |
+
"substance_abuse",
|
| 88 |
+
"adhd"
|
| 89 |
+
],
|
| 90 |
+
"severity_levels": [
|
| 91 |
+
"mild",
|
| 92 |
+
"moderate",
|
| 93 |
+
"severe"
|
| 94 |
+
]
|
| 95 |
+
},
|
| 96 |
+
"adhd": {
|
| 97 |
+
"name": "Attention-Deficit/Hyperactivity Disorder",
|
| 98 |
+
"prevalence": 0.041,
|
| 99 |
+
"min_samples": 300,
|
| 100 |
+
"max_samples": 4000,
|
| 101 |
+
"aliases": [
|
| 102 |
+
"adhd",
|
| 103 |
+
"add",
|
| 104 |
+
"attention deficit",
|
| 105 |
+
"hyperactive",
|
| 106 |
+
"inattentive"
|
| 107 |
+
],
|
| 108 |
+
"comorbid_conditions": [
|
| 109 |
+
"anxiety",
|
| 110 |
+
"depression",
|
| 111 |
+
"bipolar"
|
| 112 |
+
],
|
| 113 |
+
"severity_levels": [
|
| 114 |
+
"mild",
|
| 115 |
+
"moderate",
|
| 116 |
+
"severe"
|
| 117 |
+
]
|
| 118 |
+
},
|
| 119 |
+
"ocd": {
|
| 120 |
+
"name": "Obsessive-Compulsive Disorder",
|
| 121 |
+
"prevalence": 0.012,
|
| 122 |
+
"min_samples": 150,
|
| 123 |
+
"max_samples": 2000,
|
| 124 |
+
"aliases": [
|
| 125 |
+
"ocd",
|
| 126 |
+
"obsessive",
|
| 127 |
+
"compulsive",
|
| 128 |
+
"intrusive thoughts",
|
| 129 |
+
"rituals"
|
| 130 |
+
],
|
| 131 |
+
"comorbid_conditions": [
|
| 132 |
+
"anxiety",
|
| 133 |
+
"depression",
|
| 134 |
+
"tics"
|
| 135 |
+
],
|
| 136 |
+
"severity_levels": [
|
| 137 |
+
"mild",
|
| 138 |
+
"moderate",
|
| 139 |
+
"severe"
|
| 140 |
+
]
|
| 141 |
+
},
|
| 142 |
+
"autism": {
|
| 143 |
+
"name": "Autism Spectrum Disorder",
|
| 144 |
+
"prevalence": 0.016,
|
| 145 |
+
"min_samples": 200,
|
| 146 |
+
"max_samples": 2500,
|
| 147 |
+
"aliases": [
|
| 148 |
+
"autism",
|
| 149 |
+
"asd",
|
| 150 |
+
"asperger",
|
| 151 |
+
"autistic",
|
| 152 |
+
"spectrum"
|
| 153 |
+
],
|
| 154 |
+
"comorbid_conditions": [
|
| 155 |
+
"anxiety",
|
| 156 |
+
"depression",
|
| 157 |
+
"adhd"
|
| 158 |
+
],
|
| 159 |
+
"severity_levels": [
|
| 160 |
+
"level 1",
|
| 161 |
+
"level 2",
|
| 162 |
+
"level 3"
|
| 163 |
+
]
|
| 164 |
+
},
|
| 165 |
+
"bpd": {
|
| 166 |
+
"name": "Borderline Personality Disorder",
|
| 167 |
+
"prevalence": 0.014,
|
| 168 |
+
"min_samples": 150,
|
| 169 |
+
"max_samples": 2000,
|
| 170 |
+
"aliases": [
|
| 171 |
+
"bpd",
|
| 172 |
+
"borderline",
|
| 173 |
+
"personality disorder",
|
| 174 |
+
"emotional dysregulation"
|
| 175 |
+
],
|
| 176 |
+
"comorbid_conditions": [
|
| 177 |
+
"depression",
|
| 178 |
+
"anxiety",
|
| 179 |
+
"ptsd",
|
| 180 |
+
"substance_abuse"
|
| 181 |
+
],
|
| 182 |
+
"severity_levels": [
|
| 183 |
+
"mild",
|
| 184 |
+
"moderate",
|
| 185 |
+
"severe"
|
| 186 |
+
]
|
| 187 |
+
},
|
| 188 |
+
"schizophrenia": {
|
| 189 |
+
"name": "Schizophrenia",
|
| 190 |
+
"prevalence": 0.011,
|
| 191 |
+
"min_samples": 100,
|
| 192 |
+
"max_samples": 1500,
|
| 193 |
+
"aliases": [
|
| 194 |
+
"schizophrenia",
|
| 195 |
+
"psychosis",
|
| 196 |
+
"hallucinations",
|
| 197 |
+
"delusions"
|
| 198 |
+
],
|
| 199 |
+
"comorbid_conditions": [
|
| 200 |
+
"depression",
|
| 201 |
+
"anxiety",
|
| 202 |
+
"substance_abuse"
|
| 203 |
+
],
|
| 204 |
+
"severity_levels": [
|
| 205 |
+
"mild",
|
| 206 |
+
"moderate",
|
| 207 |
+
"severe"
|
| 208 |
+
]
|
| 209 |
+
},
|
| 210 |
+
"eating_disorders": {
|
| 211 |
+
"name": "Eating Disorders",
|
| 212 |
+
"prevalence": 0.009,
|
| 213 |
+
"min_samples": 100,
|
| 214 |
+
"max_samples": 1500,
|
| 215 |
+
"aliases": [
|
| 216 |
+
"anorexia",
|
| 217 |
+
"bulimia",
|
| 218 |
+
"binge eating",
|
| 219 |
+
"eating disorder",
|
| 220 |
+
"body image"
|
| 221 |
+
],
|
| 222 |
+
"comorbid_conditions": [
|
| 223 |
+
"depression",
|
| 224 |
+
"anxiety",
|
| 225 |
+
"ocd"
|
| 226 |
+
],
|
| 227 |
+
"severity_levels": [
|
| 228 |
+
"mild",
|
| 229 |
+
"moderate",
|
| 230 |
+
"severe"
|
| 231 |
+
]
|
| 232 |
+
},
|
| 233 |
+
"substance_abuse": {
|
| 234 |
+
"name": "Substance Use Disorders",
|
| 235 |
+
"prevalence": 0.104,
|
| 236 |
+
"min_samples": 400,
|
| 237 |
+
"max_samples": 6000,
|
| 238 |
+
"aliases": [
|
| 239 |
+
"addiction",
|
| 240 |
+
"substance abuse",
|
| 241 |
+
"alcoholism",
|
| 242 |
+
"drug abuse",
|
| 243 |
+
"dependency"
|
| 244 |
+
],
|
| 245 |
+
"comorbid_conditions": [
|
| 246 |
+
"depression",
|
| 247 |
+
"anxiety",
|
| 248 |
+
"ptsd",
|
| 249 |
+
"bipolar"
|
| 250 |
+
],
|
| 251 |
+
"severity_levels": [
|
| 252 |
+
"mild",
|
| 253 |
+
"moderate",
|
| 254 |
+
"severe"
|
| 255 |
+
]
|
| 256 |
+
},
|
| 257 |
+
"social_anxiety": {
|
| 258 |
+
"name": "Social Anxiety Disorder",
|
| 259 |
+
"prevalence": 0.073,
|
| 260 |
+
"min_samples": 300,
|
| 261 |
+
"max_samples": 4000,
|
| 262 |
+
"aliases": [
|
| 263 |
+
"social anxiety",
|
| 264 |
+
"social phobia",
|
| 265 |
+
"shy",
|
| 266 |
+
"shyness",
|
| 267 |
+
"social fear"
|
| 268 |
+
],
|
| 269 |
+
"comorbid_conditions": [
|
| 270 |
+
"depression",
|
| 271 |
+
"anxiety",
|
| 272 |
+
"avoidant_personality"
|
| 273 |
+
],
|
| 274 |
+
"severity_levels": [
|
| 275 |
+
"mild",
|
| 276 |
+
"moderate",
|
| 277 |
+
"severe"
|
| 278 |
+
]
|
| 279 |
+
},
|
| 280 |
+
"panic_disorder": {
|
| 281 |
+
"name": "Panic Disorder",
|
| 282 |
+
"prevalence": 0.028,
|
| 283 |
+
"min_samples": 200,
|
| 284 |
+
"max_samples": 3000,
|
| 285 |
+
"aliases": [
|
| 286 |
+
"panic disorder",
|
| 287 |
+
"panic attacks",
|
| 288 |
+
"agoraphobia",
|
| 289 |
+
"panic"
|
| 290 |
+
],
|
| 291 |
+
"comorbid_conditions": [
|
| 292 |
+
"anxiety",
|
| 293 |
+
"depression",
|
| 294 |
+
"substance_abuse"
|
| 295 |
+
],
|
| 296 |
+
"severity_levels": [
|
| 297 |
+
"mild",
|
| 298 |
+
"moderate",
|
| 299 |
+
"severe"
|
| 300 |
+
]
|
| 301 |
+
},
|
| 302 |
+
"insomnia": {
|
| 303 |
+
"name": "Insomnia and Sleep Disorders",
|
| 304 |
+
"prevalence": 0.06,
|
| 305 |
+
"min_samples": 250,
|
| 306 |
+
"max_samples": 3500,
|
| 307 |
+
"aliases": [
|
| 308 |
+
"insomnia",
|
| 309 |
+
"sleep disorder",
|
| 310 |
+
"sleepless",
|
| 311 |
+
"sleep problems"
|
| 312 |
+
],
|
| 313 |
+
"comorbid_conditions": [
|
| 314 |
+
"depression",
|
| 315 |
+
"anxiety",
|
| 316 |
+
"bipolar"
|
| 317 |
+
],
|
| 318 |
+
"severity_levels": [
|
| 319 |
+
"mild",
|
| 320 |
+
"moderate",
|
| 321 |
+
"severe"
|
| 322 |
+
]
|
| 323 |
+
},
|
| 324 |
+
"chronic_pain": {
|
| 325 |
+
"name": "Chronic Pain and Mental Health",
|
| 326 |
+
"prevalence": 0.05,
|
| 327 |
+
"min_samples": 200,
|
| 328 |
+
"max_samples": 3000,
|
| 329 |
+
"aliases": [
|
| 330 |
+
"chronic pain",
|
| 331 |
+
"fibromyalgia",
|
| 332 |
+
"pain",
|
| 333 |
+
"chronic illness"
|
| 334 |
+
],
|
| 335 |
+
"comorbid_conditions": [
|
| 336 |
+
"depression",
|
| 337 |
+
"anxiety",
|
| 338 |
+
"ptsd"
|
| 339 |
+
],
|
| 340 |
+
"severity_levels": [
|
| 341 |
+
"mild",
|
| 342 |
+
"moderate",
|
| 343 |
+
"severe"
|
| 344 |
+
]
|
| 345 |
+
},
|
| 346 |
+
"grief": {
|
| 347 |
+
"name": "Grief and Bereavement",
|
| 348 |
+
"prevalence": 0.035,
|
| 349 |
+
"min_samples": 150,
|
| 350 |
+
"max_samples": 2500,
|
| 351 |
+
"aliases": [
|
| 352 |
+
"grief",
|
| 353 |
+
"bereavement",
|
| 354 |
+
"loss",
|
| 355 |
+
"mourning",
|
| 356 |
+
"death"
|
| 357 |
+
],
|
| 358 |
+
"comorbid_conditions": [
|
| 359 |
+
"depression",
|
| 360 |
+
"anxiety",
|
| 361 |
+
"ptsd"
|
| 362 |
+
],
|
| 363 |
+
"severity_levels": [
|
| 364 |
+
"normal",
|
| 365 |
+
"complicated",
|
| 366 |
+
"prolonged"
|
| 367 |
+
]
|
| 368 |
+
},
|
| 369 |
+
"relationship_issues": {
|
| 370 |
+
"name": "Relationship and Interpersonal Issues",
|
| 371 |
+
"prevalence": 0.08,
|
| 372 |
+
"min_samples": 300,
|
| 373 |
+
"max_samples": 4500,
|
| 374 |
+
"aliases": [
|
| 375 |
+
"relationship",
|
| 376 |
+
"marriage",
|
| 377 |
+
"divorce",
|
| 378 |
+
"breakup",
|
| 379 |
+
"interpersonal"
|
| 380 |
+
],
|
| 381 |
+
"comorbid_conditions": [
|
| 382 |
+
"depression",
|
| 383 |
+
"anxiety",
|
| 384 |
+
"attachment_issues"
|
| 385 |
+
],
|
| 386 |
+
"severity_levels": [
|
| 387 |
+
"mild",
|
| 388 |
+
"moderate",
|
| 389 |
+
"severe"
|
| 390 |
+
]
|
| 391 |
+
},
|
| 392 |
+
"work_stress": {
|
| 393 |
+
"name": "Work-Related Stress and Burnout",
|
| 394 |
+
"prevalence": 0.07,
|
| 395 |
+
"min_samples": 250,
|
| 396 |
+
"max_samples": 3500,
|
| 397 |
+
"aliases": [
|
| 398 |
+
"work stress",
|
| 399 |
+
"burnout",
|
| 400 |
+
"job stress",
|
| 401 |
+
"workplace",
|
| 402 |
+
"career"
|
| 403 |
+
],
|
| 404 |
+
"comorbid_conditions": [
|
| 405 |
+
"depression",
|
| 406 |
+
"anxiety",
|
| 407 |
+
"insomnia"
|
| 408 |
+
],
|
| 409 |
+
"severity_levels": [
|
| 410 |
+
"mild",
|
| 411 |
+
"moderate",
|
| 412 |
+
"severe"
|
| 413 |
+
]
|
| 414 |
+
},
|
| 415 |
+
"parenting_stress": {
|
| 416 |
+
"name": "Parenting Stress and Family Issues",
|
| 417 |
+
"prevalence": 0.045,
|
| 418 |
+
"min_samples": 200,
|
| 419 |
+
"max_samples": 3000,
|
| 420 |
+
"aliases": [
|
| 421 |
+
"parenting",
|
| 422 |
+
"family stress",
|
| 423 |
+
"child behavior",
|
| 424 |
+
"parental stress"
|
| 425 |
+
],
|
| 426 |
+
"comorbid_conditions": [
|
| 427 |
+
"depression",
|
| 428 |
+
"anxiety",
|
| 429 |
+
"relationship_issues"
|
| 430 |
+
],
|
| 431 |
+
"severity_levels": [
|
| 432 |
+
"mild",
|
| 433 |
+
"moderate",
|
| 434 |
+
"severe"
|
| 435 |
+
]
|
| 436 |
+
},
|
| 437 |
+
"loneliness": {
|
| 438 |
+
"name": "Loneliness and Social Isolation",
|
| 439 |
+
"prevalence": 0.055,
|
| 440 |
+
"min_samples": 200,
|
| 441 |
+
"max_samples": 3000,
|
| 442 |
+
"aliases": [
|
| 443 |
+
"loneliness",
|
| 444 |
+
"lonely",
|
| 445 |
+
"isolated",
|
| 446 |
+
"social isolation",
|
| 447 |
+
"alone"
|
| 448 |
+
],
|
| 449 |
+
"comorbid_conditions": [
|
| 450 |
+
"depression",
|
| 451 |
+
"anxiety",
|
| 452 |
+
"social_anxiety"
|
| 453 |
+
],
|
| 454 |
+
"severity_levels": [
|
| 455 |
+
"mild",
|
| 456 |
+
"moderate",
|
| 457 |
+
"severe"
|
| 458 |
+
]
|
| 459 |
+
}
|
| 460 |
+
}
|
configs/stage_configs/config.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Configuration for NVIDIA NeMo Data Designer service."""
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from dataclasses import dataclass
|
| 5 |
+
from typing import Optional
|
| 6 |
+
|
| 7 |
+
try:
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
load_dotenv()
|
| 10 |
+
except ImportError:
|
| 11 |
+
pass # dotenv is optional
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@dataclass
|
| 15 |
+
class DataDesignerConfig:
|
| 16 |
+
"""Configuration for NeMo Data Designer client."""
|
| 17 |
+
|
| 18 |
+
base_url: str = "http://localhost:8000" # For local Docker Compose, use http://localhost:8000
|
| 19 |
+
api_key: Optional[str] = None
|
| 20 |
+
timeout: int = 300 # 5 minutes default timeout
|
| 21 |
+
max_retries: int = 3
|
| 22 |
+
batch_size: int = 1000
|
| 23 |
+
|
| 24 |
+
@classmethod
|
| 25 |
+
def from_env(cls) -> "DataDesignerConfig":
|
| 26 |
+
"""Create configuration from environment variables."""
|
| 27 |
+
return cls(
|
| 28 |
+
base_url=os.getenv(
|
| 29 |
+
"NEMO_DATA_DESIGNER_BASE_URL",
|
| 30 |
+
"http://localhost:8000",
|
| 31 |
+
),
|
| 32 |
+
api_key=os.getenv("NVIDIA_API_KEY"),
|
| 33 |
+
timeout=int(os.getenv("NEMO_DATA_DESIGNER_TIMEOUT", "300")),
|
| 34 |
+
max_retries=int(os.getenv("NEMO_DATA_DESIGNER_MAX_RETRIES", "3")),
|
| 35 |
+
batch_size=int(os.getenv("NEMO_DATA_DESIGNER_BATCH_SIZE", "1000")),
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
def validate(self) -> None:
|
| 39 |
+
"""Validate configuration."""
|
| 40 |
+
if not self.api_key:
|
| 41 |
+
raise ValueError(
|
| 42 |
+
"NVIDIA_API_KEY environment variable is required. "
|
| 43 |
+
"Get your API key from https://build.nvidia.com/nemo/data-designer"
|
| 44 |
+
)
|
| 45 |
+
if not self.base_url:
|
| 46 |
+
raise ValueError("base_url cannot be empty")
|
| 47 |
+
if self.timeout <= 0:
|
| 48 |
+
raise ValueError("timeout must be positive")
|
| 49 |
+
if self.max_retries < 0:
|
| 50 |
+
raise ValueError("max_retries must be non-negative")
|
| 51 |
+
if self.batch_size <= 0:
|
| 52 |
+
raise ValueError("batch_size must be positive")
|
| 53 |
+
|
configs/stage_configs/config_example.py
ADDED
|
File without changes
|
configs/stage_configs/config_lock.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2025-11-30T18:08:28.603381Z",
|
| 3 |
+
"git_info": {
|
| 4 |
+
"commit_sha": "46b7965d54d4ccfc0d018ace10b0724a9246bef3",
|
| 5 |
+
"commit_message": "chore: update pnpm version across configurations and workflows\n\n- Upgraded pnpm version from 10.23.0 to 10.24.0 in various configuration files, including .gitlab-ci.yml, Dockerfile, and multiple GitHub workflows.\n- Ensured consistency in pnpm version across package.json, documentation, and scripts to maintain compatibility and improve functionality.\n- Removed obsolete files related to Azure Pipelines diagnostics and remaining fixes plan as they are no longer needed.",
|
| 6 |
+
"branch": "master",
|
| 7 |
+
"is_dirty": true,
|
| 8 |
+
"remote_url": "git@github.com:pixelatedempathy/pixelated.git"
|
| 9 |
+
},
|
| 10 |
+
"random_seed": 42,
|
| 11 |
+
"config_snapshot": {
|
| 12 |
+
"target_samples": 50,
|
| 13 |
+
"pipeline_config": {
|
| 14 |
+
"edge_cases": {
|
| 15 |
+
"enabled": true,
|
| 16 |
+
"target_percentage": 0.25
|
| 17 |
+
},
|
| 18 |
+
"pixel_voice": {
|
| 19 |
+
"enabled": true,
|
| 20 |
+
"target_percentage": 0.2
|
| 21 |
+
},
|
| 22 |
+
"psychology_knowledge": {
|
| 23 |
+
"enabled": true,
|
| 24 |
+
"target_percentage": 0.15
|
| 25 |
+
},
|
| 26 |
+
"dual_persona": {
|
| 27 |
+
"enabled": true,
|
| 28 |
+
"target_percentage": 0.1
|
| 29 |
+
},
|
| 30 |
+
"standard_therapeutic": {
|
| 31 |
+
"enabled": true,
|
| 32 |
+
"target_percentage": 0.3
|
| 33 |
+
}
|
| 34 |
+
}
|
| 35 |
+
},
|
| 36 |
+
"python_version": "3.11.13 (main, Jun 12 2025, 12:41:02) [Clang 20.1.4 ]",
|
| 37 |
+
"platform": "Linux-6.14.0-27-generic-x86_64-with-glibc2.41",
|
| 38 |
+
"config_hash": "eb8ea2f72df5a68b"
|
| 39 |
+
}
|
configs/stage_configs/config_lock.py
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Configuration Locking System
|
| 4 |
+
Freezes configuration, seeds, and git commit info for reproducibility
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import json
|
| 8 |
+
import random
|
| 9 |
+
import subprocess
|
| 10 |
+
from dataclasses import dataclass, asdict, field
|
| 11 |
+
from datetime import datetime
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
from typing import Optional, Dict, Any
|
| 14 |
+
import hashlib
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
@dataclass
|
| 18 |
+
class GitInfo:
|
| 19 |
+
"""Git repository information"""
|
| 20 |
+
commit_sha: str
|
| 21 |
+
commit_message: str
|
| 22 |
+
branch: str
|
| 23 |
+
is_dirty: bool
|
| 24 |
+
remote_url: Optional[str] = None
|
| 25 |
+
|
| 26 |
+
@classmethod
|
| 27 |
+
def capture(cls, repo_path: Optional[Path] = None) -> "GitInfo":
|
| 28 |
+
"""Capture current git state"""
|
| 29 |
+
repo_path = repo_path or Path.cwd()
|
| 30 |
+
|
| 31 |
+
try:
|
| 32 |
+
# Get commit SHA
|
| 33 |
+
result = subprocess.run(
|
| 34 |
+
["git", "rev-parse", "HEAD"],
|
| 35 |
+
cwd=repo_path,
|
| 36 |
+
capture_output=True,
|
| 37 |
+
text=True,
|
| 38 |
+
check=True
|
| 39 |
+
)
|
| 40 |
+
commit_sha = result.stdout.strip()
|
| 41 |
+
|
| 42 |
+
# Get commit message
|
| 43 |
+
result = subprocess.run(
|
| 44 |
+
["git", "log", "-1", "--pretty=%B"],
|
| 45 |
+
cwd=repo_path,
|
| 46 |
+
capture_output=True,
|
| 47 |
+
text=True,
|
| 48 |
+
check=True
|
| 49 |
+
)
|
| 50 |
+
commit_message = result.stdout.strip()
|
| 51 |
+
|
| 52 |
+
# Get branch
|
| 53 |
+
result = subprocess.run(
|
| 54 |
+
["git", "rev-parse", "--abbrev-ref", "HEAD"],
|
| 55 |
+
cwd=repo_path,
|
| 56 |
+
capture_output=True,
|
| 57 |
+
text=True,
|
| 58 |
+
check=True
|
| 59 |
+
)
|
| 60 |
+
branch = result.stdout.strip()
|
| 61 |
+
|
| 62 |
+
# Check if working directory is dirty
|
| 63 |
+
result = subprocess.run(
|
| 64 |
+
["git", "status", "--porcelain"],
|
| 65 |
+
cwd=repo_path,
|
| 66 |
+
capture_output=True,
|
| 67 |
+
text=True,
|
| 68 |
+
check=True
|
| 69 |
+
)
|
| 70 |
+
is_dirty = len(result.stdout.strip()) > 0
|
| 71 |
+
|
| 72 |
+
# Get remote URL
|
| 73 |
+
remote_url = None
|
| 74 |
+
try:
|
| 75 |
+
result = subprocess.run(
|
| 76 |
+
["git", "config", "--get", "remote.origin.url"],
|
| 77 |
+
cwd=repo_path,
|
| 78 |
+
capture_output=True,
|
| 79 |
+
text=True,
|
| 80 |
+
check=True
|
| 81 |
+
)
|
| 82 |
+
remote_url = result.stdout.strip()
|
| 83 |
+
except subprocess.CalledProcessError:
|
| 84 |
+
pass
|
| 85 |
+
|
| 86 |
+
return cls(
|
| 87 |
+
commit_sha=commit_sha,
|
| 88 |
+
commit_message=commit_message,
|
| 89 |
+
branch=branch,
|
| 90 |
+
is_dirty=is_dirty,
|
| 91 |
+
remote_url=remote_url
|
| 92 |
+
)
|
| 93 |
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
| 94 |
+
# Git not available or not a git repo
|
| 95 |
+
return cls(
|
| 96 |
+
commit_sha="unknown",
|
| 97 |
+
commit_message="unknown",
|
| 98 |
+
branch="unknown",
|
| 99 |
+
is_dirty=False,
|
| 100 |
+
remote_url=None
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
@dataclass
|
| 105 |
+
class LockedConfig:
|
| 106 |
+
"""Locked configuration with reproducibility info"""
|
| 107 |
+
# Timestamp
|
| 108 |
+
created_at: str
|
| 109 |
+
|
| 110 |
+
# Git information
|
| 111 |
+
git_info: GitInfo
|
| 112 |
+
|
| 113 |
+
# Random seed
|
| 114 |
+
random_seed: int
|
| 115 |
+
|
| 116 |
+
# Configuration snapshot
|
| 117 |
+
config_snapshot: Dict[str, Any]
|
| 118 |
+
|
| 119 |
+
# Environment info
|
| 120 |
+
python_version: str
|
| 121 |
+
platform: str
|
| 122 |
+
|
| 123 |
+
# Config hash for verification
|
| 124 |
+
config_hash: str = field(default="")
|
| 125 |
+
|
| 126 |
+
def __post_init__(self):
|
| 127 |
+
"""Calculate config hash after initialization"""
|
| 128 |
+
if not self.config_hash:
|
| 129 |
+
# Create hash from config snapshot
|
| 130 |
+
config_str = json.dumps(self.config_snapshot, sort_keys=True)
|
| 131 |
+
self.config_hash = hashlib.sha256(config_str.encode()).hexdigest()[:16]
|
| 132 |
+
|
| 133 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 134 |
+
"""Convert to dictionary"""
|
| 135 |
+
return {
|
| 136 |
+
'created_at': self.created_at,
|
| 137 |
+
'git_info': asdict(self.git_info),
|
| 138 |
+
'random_seed': self.random_seed,
|
| 139 |
+
'config_snapshot': self.config_snapshot,
|
| 140 |
+
'python_version': self.python_version,
|
| 141 |
+
'platform': self.platform,
|
| 142 |
+
'config_hash': self.config_hash
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
@classmethod
|
| 146 |
+
def from_dict(cls, data: Dict[str, Any]) -> "LockedConfig":
|
| 147 |
+
"""Create from dictionary"""
|
| 148 |
+
git_info = GitInfo(**data['git_info'])
|
| 149 |
+
return cls(
|
| 150 |
+
created_at=data['created_at'],
|
| 151 |
+
git_info=git_info,
|
| 152 |
+
random_seed=data['random_seed'],
|
| 153 |
+
config_snapshot=data['config_snapshot'],
|
| 154 |
+
python_version=data['python_version'],
|
| 155 |
+
platform=data['platform'],
|
| 156 |
+
config_hash=data.get('config_hash', '')
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
def save(self, path: Path) -> None:
|
| 160 |
+
"""Save locked config to file"""
|
| 161 |
+
path.parent.mkdir(parents=True, exist_ok=True)
|
| 162 |
+
with open(path, 'w') as f:
|
| 163 |
+
json.dump(self.to_dict(), f, indent=2)
|
| 164 |
+
|
| 165 |
+
@classmethod
|
| 166 |
+
def load(cls, path: Path) -> "LockedConfig":
|
| 167 |
+
"""Load locked config from file"""
|
| 168 |
+
with open(path, 'r') as f:
|
| 169 |
+
data = json.load(f)
|
| 170 |
+
return cls.from_dict(data)
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
def lock_config(config: Dict[str, Any], seed: Optional[int] = None,
|
| 174 |
+
repo_path: Optional[Path] = None) -> LockedConfig:
|
| 175 |
+
"""Lock a configuration with reproducibility info"""
|
| 176 |
+
import sys
|
| 177 |
+
import platform
|
| 178 |
+
|
| 179 |
+
# Generate seed if not provided
|
| 180 |
+
if seed is None:
|
| 181 |
+
seed = random.randint(0, 2**31 - 1)
|
| 182 |
+
|
| 183 |
+
# Set random seed
|
| 184 |
+
random.seed(seed)
|
| 185 |
+
|
| 186 |
+
# Capture git info
|
| 187 |
+
git_info = GitInfo.capture(repo_path)
|
| 188 |
+
|
| 189 |
+
# Create locked config
|
| 190 |
+
locked = LockedConfig(
|
| 191 |
+
created_at=datetime.utcnow().isoformat() + "Z",
|
| 192 |
+
git_info=git_info,
|
| 193 |
+
random_seed=seed,
|
| 194 |
+
config_snapshot=config,
|
| 195 |
+
python_version=sys.version,
|
| 196 |
+
platform=platform.platform()
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
return locked
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
def apply_locked_config(locked_config: LockedConfig) -> None:
|
| 203 |
+
"""Apply a locked configuration (set random seed)"""
|
| 204 |
+
random.seed(locked_config.random_seed)
|
| 205 |
+
# Note: Config snapshot should be applied by the caller
|
| 206 |
+
|
configs/stage_configs/config_profiles.py
ADDED
|
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Training Configuration Profiles
|
| 4 |
+
Maps stage configs and dataset profiles into concrete training data selections.
|
| 5 |
+
Ensures default/prod profiles do not silently include edge/red-team profiles.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from dataclasses import dataclass, field
|
| 9 |
+
from enum import Enum
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from typing import Any, Dict, Iterable, List, Optional, Union
|
| 12 |
+
|
| 13 |
+
from ..configs.stages import (
|
| 14 |
+
STAGE1_ID,
|
| 15 |
+
STAGE2_ID,
|
| 16 |
+
STAGE3_ID,
|
| 17 |
+
STAGE4_ID,
|
| 18 |
+
)
|
| 19 |
+
from ..utils.logger import get_logger
|
| 20 |
+
|
| 21 |
+
logger = get_logger("dataset_pipeline.training.config_profiles")
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class TrainingProfile(Enum):
|
| 25 |
+
"""Named training profiles that map to stages and dataset types"""
|
| 26 |
+
|
| 27 |
+
FOUNDATION = "foundation" # Stage 1: Foundation & Rapport
|
| 28 |
+
REASONING = "reasoning" # Stage 2: Therapeutic Expertise & Reasoning
|
| 29 |
+
EDGE_CRISIS = "edge_crisis" # Stage 3: Edge Stress Test & Scenario Bank
|
| 30 |
+
VOICE_PERSONA = "voice_persona" # Stage 4: Voice, Persona & Delivery
|
| 31 |
+
PRODUCTION = "production" # General-purpose production training (no edge)
|
| 32 |
+
RESEARCH = "research" # Research/red-team profile (includes edge)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
@dataclass
|
| 36 |
+
class ProfileConfig:
|
| 37 |
+
"""Configuration for a training profile"""
|
| 38 |
+
|
| 39 |
+
profile_name: str
|
| 40 |
+
stage_ids: List[str] # Which stages to include
|
| 41 |
+
allow_edge_profiles: bool # Whether edge/red-team datasets are allowed
|
| 42 |
+
description: str
|
| 43 |
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# Predefined profile configurations
|
| 47 |
+
PROFILE_CONFIGS: Dict[str, ProfileConfig] = {
|
| 48 |
+
TrainingProfile.FOUNDATION.value: ProfileConfig(
|
| 49 |
+
profile_name=TrainingProfile.FOUNDATION.value,
|
| 50 |
+
stage_ids=[STAGE1_ID],
|
| 51 |
+
allow_edge_profiles=False,
|
| 52 |
+
description="Foundation & Rapport training (Stage 1 only, no edge cases)",
|
| 53 |
+
),
|
| 54 |
+
TrainingProfile.REASONING.value: ProfileConfig(
|
| 55 |
+
profile_name=TrainingProfile.REASONING.value,
|
| 56 |
+
stage_ids=[STAGE2_ID],
|
| 57 |
+
allow_edge_profiles=False,
|
| 58 |
+
description="Therapeutic Expertise & Reasoning training (Stage 2 only, no edge cases)",
|
| 59 |
+
),
|
| 60 |
+
TrainingProfile.EDGE_CRISIS.value: ProfileConfig(
|
| 61 |
+
profile_name=TrainingProfile.EDGE_CRISIS.value,
|
| 62 |
+
stage_ids=[STAGE3_ID],
|
| 63 |
+
allow_edge_profiles=True,
|
| 64 |
+
description="Edge Stress Test & Scenario Bank (Stage 3, edge cases allowed)",
|
| 65 |
+
),
|
| 66 |
+
TrainingProfile.VOICE_PERSONA.value: ProfileConfig(
|
| 67 |
+
profile_name=TrainingProfile.VOICE_PERSONA.value,
|
| 68 |
+
stage_ids=[STAGE4_ID],
|
| 69 |
+
allow_edge_profiles=False,
|
| 70 |
+
description="Voice, Persona & Delivery training (Stage 4 only, no edge cases)",
|
| 71 |
+
),
|
| 72 |
+
TrainingProfile.PRODUCTION.value: ProfileConfig(
|
| 73 |
+
profile_name=TrainingProfile.PRODUCTION.value,
|
| 74 |
+
stage_ids=[STAGE1_ID, STAGE2_ID, STAGE4_ID], # Explicitly exclude Stage 3
|
| 75 |
+
allow_edge_profiles=False,
|
| 76 |
+
description="General-purpose production training (Stages 1, 2, 4 - no edge cases)",
|
| 77 |
+
),
|
| 78 |
+
TrainingProfile.RESEARCH.value: ProfileConfig(
|
| 79 |
+
profile_name=TrainingProfile.RESEARCH.value,
|
| 80 |
+
stage_ids=[STAGE1_ID, STAGE2_ID, STAGE3_ID, STAGE4_ID], # All stages
|
| 81 |
+
allow_edge_profiles=True,
|
| 82 |
+
description="Research/red-team profile (all stages, edge cases allowed)",
|
| 83 |
+
),
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
class TrainingDataSelector:
|
| 88 |
+
"""
|
| 89 |
+
Profile-aware data selector that ensures edge profiles are only used
|
| 90 |
+
in appropriate training configurations.
|
| 91 |
+
"""
|
| 92 |
+
|
| 93 |
+
def __init__(self, manifest_path: Optional[Union[str, Path]] = None):
|
| 94 |
+
"""
|
| 95 |
+
Initialize the training data selector.
|
| 96 |
+
|
| 97 |
+
Args:
|
| 98 |
+
manifest_path: Optional path to dataset manifest
|
| 99 |
+
"""
|
| 100 |
+
self.manifest_path = Path(manifest_path) if manifest_path else None
|
| 101 |
+
|
| 102 |
+
def select_data(
|
| 103 |
+
self,
|
| 104 |
+
profile_name: str,
|
| 105 |
+
manifest: Optional[Dict[str, Any]] = None,
|
| 106 |
+
) -> Iterable[Dict[str, Any]]:
|
| 107 |
+
"""
|
| 108 |
+
Select training data based on profile configuration.
|
| 109 |
+
|
| 110 |
+
Args:
|
| 111 |
+
profile_name: Name of the training profile
|
| 112 |
+
manifest: Optional dataset manifest (if None, loads from manifest_path)
|
| 113 |
+
|
| 114 |
+
Yields:
|
| 115 |
+
Training examples matching the profile
|
| 116 |
+
"""
|
| 117 |
+
# Get profile config
|
| 118 |
+
if profile_name not in PROFILE_CONFIGS:
|
| 119 |
+
raise ValueError(
|
| 120 |
+
f"Unknown profile: {profile_name}. "
|
| 121 |
+
f"Available profiles: {', '.join(PROFILE_CONFIGS.keys())}"
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
profile_config = PROFILE_CONFIGS[profile_name]
|
| 125 |
+
|
| 126 |
+
logger.info(
|
| 127 |
+
f"Selecting data for profile '{profile_name}': "
|
| 128 |
+
f"stages={profile_config.stage_ids}, "
|
| 129 |
+
f"allow_edge={profile_config.allow_edge_profiles}"
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
# Load manifest if not provided
|
| 133 |
+
if manifest is None:
|
| 134 |
+
manifest = self._load_manifest()
|
| 135 |
+
|
| 136 |
+
# Select examples based on profile
|
| 137 |
+
for example in self._iterate_examples(manifest):
|
| 138 |
+
# Check stage
|
| 139 |
+
example_stage = example.get("metadata", {}).get("stage")
|
| 140 |
+
if example_stage not in profile_config.stage_ids:
|
| 141 |
+
continue
|
| 142 |
+
|
| 143 |
+
# Check edge profile if not allowed
|
| 144 |
+
if not profile_config.allow_edge_profiles:
|
| 145 |
+
if self._is_edge_example(example):
|
| 146 |
+
logger.warning(
|
| 147 |
+
f"Skipping edge example in non-edge profile '{profile_name}': "
|
| 148 |
+
f"{example.get('id', 'unknown')}"
|
| 149 |
+
)
|
| 150 |
+
continue
|
| 151 |
+
|
| 152 |
+
yield example
|
| 153 |
+
|
| 154 |
+
def _is_edge_example(self, example: Dict[str, Any]) -> bool:
|
| 155 |
+
"""Check if an example is an edge/red-team example"""
|
| 156 |
+
metadata = example.get("metadata", {})
|
| 157 |
+
|
| 158 |
+
# Check for edge profile metadata
|
| 159 |
+
if "edge_profile" in metadata:
|
| 160 |
+
return True
|
| 161 |
+
|
| 162 |
+
# Check for edge category
|
| 163 |
+
if "edge_category" in metadata:
|
| 164 |
+
return True
|
| 165 |
+
|
| 166 |
+
# Check for stage 3 (edge stress test)
|
| 167 |
+
if metadata.get("stage") == STAGE3_ID:
|
| 168 |
+
return True
|
| 169 |
+
|
| 170 |
+
# Check for crisis intensity flags
|
| 171 |
+
if metadata.get("crisis_intensity") in ["very_high", "extreme"]:
|
| 172 |
+
return True
|
| 173 |
+
|
| 174 |
+
return False
|
| 175 |
+
|
| 176 |
+
def _load_manifest(self) -> Dict[str, Any]:
|
| 177 |
+
"""Load dataset manifest"""
|
| 178 |
+
if not self.manifest_path or not self.manifest_path.exists():
|
| 179 |
+
logger.warning(
|
| 180 |
+
f"Manifest not found at {self.manifest_path}, returning empty manifest"
|
| 181 |
+
)
|
| 182 |
+
return {"examples": []}
|
| 183 |
+
|
| 184 |
+
import json
|
| 185 |
+
|
| 186 |
+
with open(self.manifest_path, "r") as f:
|
| 187 |
+
return json.load(f)
|
| 188 |
+
|
| 189 |
+
def _iterate_examples(self, manifest: Dict[str, Any]) -> Iterable[Dict[str, Any]]:
|
| 190 |
+
"""Iterate over examples in manifest"""
|
| 191 |
+
examples = manifest.get("examples", [])
|
| 192 |
+
if not examples:
|
| 193 |
+
# Try alternative manifest structures
|
| 194 |
+
examples = manifest.get("data", [])
|
| 195 |
+
if not examples and isinstance(manifest, list):
|
| 196 |
+
examples = manifest
|
| 197 |
+
|
| 198 |
+
for example in examples:
|
| 199 |
+
yield example
|
| 200 |
+
|
| 201 |
+
def assert_no_edge_in_profile(
|
| 202 |
+
self,
|
| 203 |
+
profile_name: str,
|
| 204 |
+
manifest: Optional[Dict[str, Any]] = None,
|
| 205 |
+
) -> None:
|
| 206 |
+
"""
|
| 207 |
+
Assert that a profile does not contain edge examples.
|
| 208 |
+
Raises ValueError if edge examples are found.
|
| 209 |
+
|
| 210 |
+
Args:
|
| 211 |
+
profile_name: Name of the profile to check
|
| 212 |
+
manifest: Optional dataset manifest
|
| 213 |
+
"""
|
| 214 |
+
if profile_name not in PROFILE_CONFIGS:
|
| 215 |
+
raise ValueError(f"Unknown profile: {profile_name}")
|
| 216 |
+
|
| 217 |
+
profile_config = PROFILE_CONFIGS[profile_name]
|
| 218 |
+
|
| 219 |
+
if profile_config.allow_edge_profiles:
|
| 220 |
+
logger.info(
|
| 221 |
+
f"Profile '{profile_name}' allows edge profiles, skipping assertion"
|
| 222 |
+
)
|
| 223 |
+
return
|
| 224 |
+
|
| 225 |
+
# Load manifest if not provided
|
| 226 |
+
if manifest is None:
|
| 227 |
+
manifest = self._load_manifest()
|
| 228 |
+
|
| 229 |
+
# Check for edge examples
|
| 230 |
+
edge_examples = []
|
| 231 |
+
for example in self._iterate_examples(manifest):
|
| 232 |
+
example_stage = example.get("metadata", {}).get("stage")
|
| 233 |
+
if example_stage in profile_config.stage_ids:
|
| 234 |
+
if self._is_edge_example(example):
|
| 235 |
+
edge_examples.append(example.get("id", "unknown"))
|
| 236 |
+
|
| 237 |
+
if edge_examples:
|
| 238 |
+
raise ValueError(
|
| 239 |
+
f"Profile '{profile_name}' contains {len(edge_examples)} edge examples: "
|
| 240 |
+
f"{edge_examples[:5]}{'...' if len(edge_examples) > 5 else ''}. "
|
| 241 |
+
f"This profile does not allow edge/red-team data."
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
logger.info(f"Profile '{profile_name}' validated: no edge examples found")
|
| 245 |
+
|
| 246 |
+
def get_profile_stats(
|
| 247 |
+
self,
|
| 248 |
+
profile_name: str,
|
| 249 |
+
manifest: Optional[Dict[str, Any]] = None,
|
| 250 |
+
) -> Dict[str, Any]:
|
| 251 |
+
"""
|
| 252 |
+
Get statistics for a profile.
|
| 253 |
+
|
| 254 |
+
Args:
|
| 255 |
+
profile_name: Name of the profile
|
| 256 |
+
manifest: Optional dataset manifest
|
| 257 |
+
|
| 258 |
+
Returns:
|
| 259 |
+
Statistics dictionary
|
| 260 |
+
"""
|
| 261 |
+
if profile_name not in PROFILE_CONFIGS:
|
| 262 |
+
raise ValueError(f"Unknown profile: {profile_name}")
|
| 263 |
+
|
| 264 |
+
profile_config = PROFILE_CONFIGS[profile_name]
|
| 265 |
+
|
| 266 |
+
# Load manifest if not provided
|
| 267 |
+
if manifest is None:
|
| 268 |
+
manifest = self._load_manifest()
|
| 269 |
+
|
| 270 |
+
stats = {
|
| 271 |
+
"profile_name": profile_name,
|
| 272 |
+
"stages": profile_config.stage_ids,
|
| 273 |
+
"allow_edge_profiles": profile_config.allow_edge_profiles,
|
| 274 |
+
"total_examples": 0,
|
| 275 |
+
"by_stage": {},
|
| 276 |
+
"edge_examples": 0,
|
| 277 |
+
"non_edge_examples": 0,
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
+
for example in self.select_data(profile_name, manifest):
|
| 281 |
+
stats["total_examples"] += 1
|
| 282 |
+
|
| 283 |
+
example_stage = example.get("metadata", {}).get("stage", "unknown")
|
| 284 |
+
stats["by_stage"][example_stage] = (
|
| 285 |
+
stats["by_stage"].get(example_stage, 0) + 1
|
| 286 |
+
)
|
| 287 |
+
|
| 288 |
+
if self._is_edge_example(example):
|
| 289 |
+
stats["edge_examples"] += 1
|
| 290 |
+
else:
|
| 291 |
+
stats["non_edge_examples"] += 1
|
| 292 |
+
|
| 293 |
+
return stats
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
def get_profile_config(profile_name: str) -> ProfileConfig:
|
| 297 |
+
"""Get configuration for a training profile"""
|
| 298 |
+
if profile_name not in PROFILE_CONFIGS:
|
| 299 |
+
raise ValueError(
|
| 300 |
+
f"Unknown profile: {profile_name}. "
|
| 301 |
+
f"Available: {', '.join(PROFILE_CONFIGS.keys())}"
|
| 302 |
+
)
|
| 303 |
+
return PROFILE_CONFIGS[profile_name]
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
def list_profiles() -> List[str]:
|
| 307 |
+
"""List all available training profiles"""
|
| 308 |
+
return list(PROFILE_CONFIGS.keys())
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
def validate_profile_config(profile_name: str) -> tuple[bool, Optional[str]]:
|
| 312 |
+
"""
|
| 313 |
+
Validate that a profile configuration is correct.
|
| 314 |
+
|
| 315 |
+
Returns:
|
| 316 |
+
Tuple of (is_valid, error_message)
|
| 317 |
+
"""
|
| 318 |
+
if profile_name not in PROFILE_CONFIGS:
|
| 319 |
+
return False, f"Unknown profile: {profile_name}"
|
| 320 |
+
|
| 321 |
+
profile_config = PROFILE_CONFIGS[profile_name]
|
| 322 |
+
|
| 323 |
+
# Validate stage IDs
|
| 324 |
+
all_stage_ids = {STAGE1_ID, STAGE2_ID, STAGE3_ID, STAGE4_ID}
|
| 325 |
+
for stage_id in profile_config.stage_ids:
|
| 326 |
+
if stage_id not in all_stage_ids:
|
| 327 |
+
return False, f"Invalid stage ID in profile: {stage_id}"
|
| 328 |
+
|
| 329 |
+
# Validate production profile doesn't allow edge
|
| 330 |
+
if profile_name == TrainingProfile.PRODUCTION.value:
|
| 331 |
+
if profile_config.allow_edge_profiles:
|
| 332 |
+
return False, "Production profile must not allow edge profiles"
|
| 333 |
+
if STAGE3_ID in profile_config.stage_ids:
|
| 334 |
+
return (
|
| 335 |
+
False,
|
| 336 |
+
"Production profile must not include Stage 3 (edge stress test)",
|
| 337 |
+
)
|
| 338 |
+
|
| 339 |
+
return True, None
|
configs/stage_configs/config_tracker.py
ADDED
|
@@ -0,0 +1,700 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Configuration Change Tracking and Rollback System for Pixelated Empathy AI
|
| 4 |
+
Tracks configuration changes and provides rollback capabilities
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
import json
|
| 10 |
+
import yaml
|
| 11 |
+
import hashlib
|
| 12 |
+
import shutil
|
| 13 |
+
import logging
|
| 14 |
+
from typing import Dict, List, Any, Optional, Tuple
|
| 15 |
+
from pathlib import Path
|
| 16 |
+
from dataclasses import dataclass, asdict
|
| 17 |
+
from datetime import datetime, timezone
|
| 18 |
+
import subprocess
|
| 19 |
+
import tempfile
|
| 20 |
+
from contextlib import contextmanager
|
| 21 |
+
|
| 22 |
+
# Configure logging
|
| 23 |
+
logging.basicConfig(
|
| 24 |
+
level=logging.INFO,
|
| 25 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 26 |
+
)
|
| 27 |
+
logger = logging.getLogger(__name__)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
@dataclass
|
| 31 |
+
class ConfigChange:
|
| 32 |
+
"""Represents a configuration change"""
|
| 33 |
+
timestamp: str
|
| 34 |
+
change_id: str
|
| 35 |
+
file_path: str
|
| 36 |
+
change_type: str # 'create', 'update', 'delete'
|
| 37 |
+
old_hash: Optional[str]
|
| 38 |
+
new_hash: Optional[str]
|
| 39 |
+
old_content: Optional[str]
|
| 40 |
+
new_content: Optional[str]
|
| 41 |
+
user: str
|
| 42 |
+
description: str
|
| 43 |
+
environment: str
|
| 44 |
+
|
| 45 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 46 |
+
"""Convert to dictionary"""
|
| 47 |
+
return asdict(self)
|
| 48 |
+
|
| 49 |
+
@classmethod
|
| 50 |
+
def from_dict(cls, data: Dict[str, Any]) -> 'ConfigChange':
|
| 51 |
+
"""Create from dictionary"""
|
| 52 |
+
return cls(**data)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
@dataclass
|
| 56 |
+
class ConfigSnapshot:
|
| 57 |
+
"""Represents a configuration snapshot"""
|
| 58 |
+
snapshot_id: str
|
| 59 |
+
timestamp: str
|
| 60 |
+
description: str
|
| 61 |
+
environment: str
|
| 62 |
+
files: Dict[str, str] # file_path -> content_hash
|
| 63 |
+
metadata: Dict[str, Any]
|
| 64 |
+
|
| 65 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 66 |
+
"""Convert to dictionary"""
|
| 67 |
+
return asdict(self)
|
| 68 |
+
|
| 69 |
+
@classmethod
|
| 70 |
+
def from_dict(cls, data: Dict[str, Any]) -> 'ConfigSnapshot':
|
| 71 |
+
"""Create from dictionary"""
|
| 72 |
+
return cls(**data)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
class ConfigTracker:
|
| 76 |
+
"""Main configuration tracking system"""
|
| 77 |
+
|
| 78 |
+
def __init__(self, config_dir: str = None, tracking_dir: str = None):
|
| 79 |
+
self.config_dir = Path(config_dir) if config_dir else Path(__file__).parent
|
| 80 |
+
self.tracking_dir = Path(tracking_dir) if tracking_dir else self.config_dir / '.config_tracking'
|
| 81 |
+
|
| 82 |
+
# Create tracking directory structure
|
| 83 |
+
self.tracking_dir.mkdir(exist_ok=True)
|
| 84 |
+
(self.tracking_dir / 'changes').mkdir(exist_ok=True)
|
| 85 |
+
(self.tracking_dir / 'snapshots').mkdir(exist_ok=True)
|
| 86 |
+
(self.tracking_dir / 'backups').mkdir(exist_ok=True)
|
| 87 |
+
|
| 88 |
+
self.changes_file = self.tracking_dir / 'changes.json'
|
| 89 |
+
self.snapshots_file = self.tracking_dir / 'snapshots.json'
|
| 90 |
+
|
| 91 |
+
# Initialize tracking files if they don't exist
|
| 92 |
+
if not self.changes_file.exists():
|
| 93 |
+
self._save_changes([])
|
| 94 |
+
if not self.snapshots_file.exists():
|
| 95 |
+
self._save_snapshots([])
|
| 96 |
+
|
| 97 |
+
def track_change(self, file_path: str, change_type: str, description: str = "",
|
| 98 |
+
user: str = None, environment: str = None) -> str:
|
| 99 |
+
"""Track a configuration change"""
|
| 100 |
+
file_path = str(Path(file_path).resolve())
|
| 101 |
+
|
| 102 |
+
# Generate change ID
|
| 103 |
+
change_id = self._generate_change_id()
|
| 104 |
+
|
| 105 |
+
# Get current user and environment
|
| 106 |
+
if user is None:
|
| 107 |
+
user = os.getenv('USER', 'unknown')
|
| 108 |
+
if environment is None:
|
| 109 |
+
environment = os.getenv('ENVIRONMENT', 'unknown')
|
| 110 |
+
|
| 111 |
+
# Get file content and hash
|
| 112 |
+
old_content = None
|
| 113 |
+
old_hash = None
|
| 114 |
+
new_content = None
|
| 115 |
+
new_hash = None
|
| 116 |
+
|
| 117 |
+
if change_type in ['update', 'delete']:
|
| 118 |
+
# Get old content from backup or current file
|
| 119 |
+
old_content, old_hash = self._get_file_content_and_hash(file_path)
|
| 120 |
+
|
| 121 |
+
if change_type in ['create', 'update']:
|
| 122 |
+
# Get new content
|
| 123 |
+
if Path(file_path).exists():
|
| 124 |
+
new_content, new_hash = self._get_file_content_and_hash(file_path)
|
| 125 |
+
|
| 126 |
+
# Create change record
|
| 127 |
+
change = ConfigChange(
|
| 128 |
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
| 129 |
+
change_id=change_id,
|
| 130 |
+
file_path=file_path,
|
| 131 |
+
change_type=change_type,
|
| 132 |
+
old_hash=old_hash,
|
| 133 |
+
new_hash=new_hash,
|
| 134 |
+
old_content=old_content,
|
| 135 |
+
new_content=new_content,
|
| 136 |
+
user=user,
|
| 137 |
+
description=description,
|
| 138 |
+
environment=environment
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
# Save change
|
| 142 |
+
self._add_change(change)
|
| 143 |
+
|
| 144 |
+
# Create backup of the file
|
| 145 |
+
if change_type in ['update', 'delete'] and old_content:
|
| 146 |
+
self._create_backup(file_path, change_id, old_content)
|
| 147 |
+
|
| 148 |
+
logger.info(f"Tracked configuration change: {change_id} - {description}")
|
| 149 |
+
return change_id
|
| 150 |
+
|
| 151 |
+
def create_snapshot(self, description: str = "", environment: str = None) -> str:
|
| 152 |
+
"""Create a configuration snapshot"""
|
| 153 |
+
if environment is None:
|
| 154 |
+
environment = os.getenv('ENVIRONMENT', 'unknown')
|
| 155 |
+
|
| 156 |
+
snapshot_id = self._generate_snapshot_id()
|
| 157 |
+
|
| 158 |
+
# Get all configuration files
|
| 159 |
+
config_files = self._get_all_config_files()
|
| 160 |
+
files_dict = {}
|
| 161 |
+
|
| 162 |
+
for file_path in config_files:
|
| 163 |
+
try:
|
| 164 |
+
_, file_hash = self._get_file_content_and_hash(file_path)
|
| 165 |
+
files_dict[str(file_path)] = file_hash
|
| 166 |
+
except Exception as e:
|
| 167 |
+
logger.warning(f"Could not include file in snapshot: {file_path} - {e}")
|
| 168 |
+
|
| 169 |
+
# Create snapshot
|
| 170 |
+
snapshot = ConfigSnapshot(
|
| 171 |
+
snapshot_id=snapshot_id,
|
| 172 |
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
| 173 |
+
description=description,
|
| 174 |
+
environment=environment,
|
| 175 |
+
files=files_dict,
|
| 176 |
+
metadata={
|
| 177 |
+
'total_files': len(files_dict),
|
| 178 |
+
'config_dir': str(self.config_dir)
|
| 179 |
+
}
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
# Save snapshot
|
| 183 |
+
self._add_snapshot(snapshot)
|
| 184 |
+
|
| 185 |
+
# Create snapshot backup
|
| 186 |
+
self._create_snapshot_backup(snapshot_id, config_files)
|
| 187 |
+
|
| 188 |
+
logger.info(f"Created configuration snapshot: {snapshot_id} - {description}")
|
| 189 |
+
return snapshot_id
|
| 190 |
+
|
| 191 |
+
def rollback_to_change(self, change_id: str) -> bool:
|
| 192 |
+
"""Rollback to a specific change"""
|
| 193 |
+
changes = self._load_changes()
|
| 194 |
+
|
| 195 |
+
# Find the change
|
| 196 |
+
target_change = None
|
| 197 |
+
for change in changes:
|
| 198 |
+
if change['change_id'] == change_id:
|
| 199 |
+
target_change = ConfigChange.from_dict(change)
|
| 200 |
+
break
|
| 201 |
+
|
| 202 |
+
if not target_change:
|
| 203 |
+
logger.error(f"Change not found: {change_id}")
|
| 204 |
+
return False
|
| 205 |
+
|
| 206 |
+
try:
|
| 207 |
+
# Create backup of current state
|
| 208 |
+
current_backup_id = self.create_snapshot(f"Pre-rollback backup for {change_id}")
|
| 209 |
+
|
| 210 |
+
# Restore the file
|
| 211 |
+
if target_change.change_type == 'delete':
|
| 212 |
+
# Restore deleted file
|
| 213 |
+
if target_change.old_content:
|
| 214 |
+
with open(target_change.file_path, 'w') as f:
|
| 215 |
+
f.write(target_change.old_content)
|
| 216 |
+
logger.info(f"Restored deleted file: {target_change.file_path}")
|
| 217 |
+
else:
|
| 218 |
+
logger.error(f"Cannot restore deleted file - no backup content")
|
| 219 |
+
return False
|
| 220 |
+
|
| 221 |
+
elif target_change.change_type in ['create', 'update']:
|
| 222 |
+
# Rollback to previous version
|
| 223 |
+
if target_change.old_content:
|
| 224 |
+
with open(target_change.file_path, 'w') as f:
|
| 225 |
+
f.write(target_change.old_content)
|
| 226 |
+
logger.info(f"Rolled back file: {target_change.file_path}")
|
| 227 |
+
else:
|
| 228 |
+
# This was a create operation, delete the file
|
| 229 |
+
if Path(target_change.file_path).exists():
|
| 230 |
+
os.remove(target_change.file_path)
|
| 231 |
+
logger.info(f"Removed created file: {target_change.file_path}")
|
| 232 |
+
|
| 233 |
+
# Track the rollback as a new change
|
| 234 |
+
self.track_change(
|
| 235 |
+
target_change.file_path,
|
| 236 |
+
'rollback',
|
| 237 |
+
f"Rollback to change {change_id}",
|
| 238 |
+
environment=target_change.environment
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
+
logger.info(f"Successfully rolled back to change: {change_id}")
|
| 242 |
+
return True
|
| 243 |
+
|
| 244 |
+
except Exception as e:
|
| 245 |
+
logger.error(f"Rollback failed: {e}")
|
| 246 |
+
return False
|
| 247 |
+
|
| 248 |
+
def rollback_to_snapshot(self, snapshot_id: str) -> bool:
|
| 249 |
+
"""Rollback to a specific snapshot"""
|
| 250 |
+
snapshots = self._load_snapshots()
|
| 251 |
+
|
| 252 |
+
# Find the snapshot
|
| 253 |
+
target_snapshot = None
|
| 254 |
+
for snapshot in snapshots:
|
| 255 |
+
if snapshot['snapshot_id'] == snapshot_id:
|
| 256 |
+
target_snapshot = ConfigSnapshot.from_dict(snapshot)
|
| 257 |
+
break
|
| 258 |
+
|
| 259 |
+
if not target_snapshot:
|
| 260 |
+
logger.error(f"Snapshot not found: {snapshot_id}")
|
| 261 |
+
return False
|
| 262 |
+
|
| 263 |
+
try:
|
| 264 |
+
# Create backup of current state
|
| 265 |
+
current_backup_id = self.create_snapshot(f"Pre-rollback backup for snapshot {snapshot_id}")
|
| 266 |
+
|
| 267 |
+
# Restore files from snapshot backup
|
| 268 |
+
snapshot_backup_dir = self.tracking_dir / 'snapshots' / snapshot_id
|
| 269 |
+
|
| 270 |
+
if not snapshot_backup_dir.exists():
|
| 271 |
+
logger.error(f"Snapshot backup directory not found: {snapshot_backup_dir}")
|
| 272 |
+
return False
|
| 273 |
+
|
| 274 |
+
# Restore each file
|
| 275 |
+
restored_files = []
|
| 276 |
+
for file_path in target_snapshot.files.keys():
|
| 277 |
+
backup_file = snapshot_backup_dir / Path(file_path).name
|
| 278 |
+
|
| 279 |
+
if backup_file.exists():
|
| 280 |
+
# Restore the file
|
| 281 |
+
shutil.copy2(backup_file, file_path)
|
| 282 |
+
restored_files.append(file_path)
|
| 283 |
+
logger.info(f"Restored file: {file_path}")
|
| 284 |
+
else:
|
| 285 |
+
logger.warning(f"Backup file not found: {backup_file}")
|
| 286 |
+
|
| 287 |
+
# Track the rollback
|
| 288 |
+
for file_path in restored_files:
|
| 289 |
+
self.track_change(
|
| 290 |
+
file_path,
|
| 291 |
+
'rollback',
|
| 292 |
+
f"Rollback to snapshot {snapshot_id}",
|
| 293 |
+
environment=target_snapshot.environment
|
| 294 |
+
)
|
| 295 |
+
|
| 296 |
+
logger.info(f"Successfully rolled back to snapshot: {snapshot_id}")
|
| 297 |
+
return True
|
| 298 |
+
|
| 299 |
+
except Exception as e:
|
| 300 |
+
logger.error(f"Snapshot rollback failed: {e}")
|
| 301 |
+
return False
|
| 302 |
+
|
| 303 |
+
def get_change_history(self, file_path: str = None, limit: int = None) -> List[Dict[str, Any]]:
|
| 304 |
+
"""Get change history"""
|
| 305 |
+
changes = self._load_changes()
|
| 306 |
+
|
| 307 |
+
# Filter by file path if specified
|
| 308 |
+
if file_path:
|
| 309 |
+
file_path = str(Path(file_path).resolve())
|
| 310 |
+
changes = [c for c in changes if c['file_path'] == file_path]
|
| 311 |
+
|
| 312 |
+
# Sort by timestamp (newest first)
|
| 313 |
+
changes.sort(key=lambda x: x['timestamp'], reverse=True)
|
| 314 |
+
|
| 315 |
+
# Apply limit if specified
|
| 316 |
+
if limit:
|
| 317 |
+
changes = changes[:limit]
|
| 318 |
+
|
| 319 |
+
return changes
|
| 320 |
+
|
| 321 |
+
def get_snapshots(self, limit: int = None) -> List[Dict[str, Any]]:
|
| 322 |
+
"""Get snapshot history"""
|
| 323 |
+
snapshots = self._load_snapshots()
|
| 324 |
+
|
| 325 |
+
# Sort by timestamp (newest first)
|
| 326 |
+
snapshots.sort(key=lambda x: x['timestamp'], reverse=True)
|
| 327 |
+
|
| 328 |
+
# Apply limit if specified
|
| 329 |
+
if limit:
|
| 330 |
+
snapshots = snapshots[:limit]
|
| 331 |
+
|
| 332 |
+
return snapshots
|
| 333 |
+
|
| 334 |
+
def compare_configurations(self, snapshot_id1: str, snapshot_id2: str) -> Dict[str, Any]:
|
| 335 |
+
"""Compare two configuration snapshots"""
|
| 336 |
+
snapshots = self._load_snapshots()
|
| 337 |
+
|
| 338 |
+
snapshot1 = None
|
| 339 |
+
snapshot2 = None
|
| 340 |
+
|
| 341 |
+
for snapshot in snapshots:
|
| 342 |
+
if snapshot['snapshot_id'] == snapshot_id1:
|
| 343 |
+
snapshot1 = ConfigSnapshot.from_dict(snapshot)
|
| 344 |
+
elif snapshot['snapshot_id'] == snapshot_id2:
|
| 345 |
+
snapshot2 = ConfigSnapshot.from_dict(snapshot)
|
| 346 |
+
|
| 347 |
+
if not snapshot1 or not snapshot2:
|
| 348 |
+
raise ValueError("One or both snapshots not found")
|
| 349 |
+
|
| 350 |
+
# Compare files
|
| 351 |
+
all_files = set(snapshot1.files.keys()) | set(snapshot2.files.keys())
|
| 352 |
+
|
| 353 |
+
differences = {
|
| 354 |
+
'added': [],
|
| 355 |
+
'removed': [],
|
| 356 |
+
'modified': [],
|
| 357 |
+
'unchanged': []
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
for file_path in all_files:
|
| 361 |
+
hash1 = snapshot1.files.get(file_path)
|
| 362 |
+
hash2 = snapshot2.files.get(file_path)
|
| 363 |
+
|
| 364 |
+
if hash1 and not hash2:
|
| 365 |
+
differences['removed'].append(file_path)
|
| 366 |
+
elif not hash1 and hash2:
|
| 367 |
+
differences['added'].append(file_path)
|
| 368 |
+
elif hash1 != hash2:
|
| 369 |
+
differences['modified'].append(file_path)
|
| 370 |
+
else:
|
| 371 |
+
differences['unchanged'].append(file_path)
|
| 372 |
+
|
| 373 |
+
return {
|
| 374 |
+
'snapshot1': snapshot1.to_dict(),
|
| 375 |
+
'snapshot2': snapshot2.to_dict(),
|
| 376 |
+
'differences': differences,
|
| 377 |
+
'summary': {
|
| 378 |
+
'total_files': len(all_files),
|
| 379 |
+
'added': len(differences['added']),
|
| 380 |
+
'removed': len(differences['removed']),
|
| 381 |
+
'modified': len(differences['modified']),
|
| 382 |
+
'unchanged': len(differences['unchanged'])
|
| 383 |
+
}
|
| 384 |
+
}
|
| 385 |
+
|
| 386 |
+
def cleanup_old_backups(self, days: int = 30) -> int:
|
| 387 |
+
"""Clean up old backups and snapshots"""
|
| 388 |
+
cutoff_time = datetime.now(timezone.utc).timestamp() - (days * 24 * 60 * 60)
|
| 389 |
+
cleaned_count = 0
|
| 390 |
+
|
| 391 |
+
# Clean up old change backups
|
| 392 |
+
backup_dir = self.tracking_dir / 'backups'
|
| 393 |
+
if backup_dir.exists():
|
| 394 |
+
for backup_file in backup_dir.iterdir():
|
| 395 |
+
if backup_file.stat().st_mtime < cutoff_time:
|
| 396 |
+
backup_file.unlink()
|
| 397 |
+
cleaned_count += 1
|
| 398 |
+
|
| 399 |
+
# Clean up old snapshot backups
|
| 400 |
+
snapshot_dir = self.tracking_dir / 'snapshots'
|
| 401 |
+
if snapshot_dir.exists():
|
| 402 |
+
for snapshot_backup in snapshot_dir.iterdir():
|
| 403 |
+
if snapshot_backup.is_dir() and snapshot_backup.stat().st_mtime < cutoff_time:
|
| 404 |
+
shutil.rmtree(snapshot_backup)
|
| 405 |
+
cleaned_count += 1
|
| 406 |
+
|
| 407 |
+
logger.info(f"Cleaned up {cleaned_count} old backup files")
|
| 408 |
+
return cleaned_count
|
| 409 |
+
|
| 410 |
+
def export_tracking_data(self, output_file: str) -> bool:
|
| 411 |
+
"""Export all tracking data to a file"""
|
| 412 |
+
try:
|
| 413 |
+
export_data = {
|
| 414 |
+
'export_timestamp': datetime.now(timezone.utc).isoformat(),
|
| 415 |
+
'config_dir': str(self.config_dir),
|
| 416 |
+
'changes': self._load_changes(),
|
| 417 |
+
'snapshots': self._load_snapshots()
|
| 418 |
+
}
|
| 419 |
+
|
| 420 |
+
with open(output_file, 'w') as f:
|
| 421 |
+
json.dump(export_data, f, indent=2)
|
| 422 |
+
|
| 423 |
+
logger.info(f"Exported tracking data to: {output_file}")
|
| 424 |
+
return True
|
| 425 |
+
|
| 426 |
+
except Exception as e:
|
| 427 |
+
logger.error(f"Export failed: {e}")
|
| 428 |
+
return False
|
| 429 |
+
|
| 430 |
+
def import_tracking_data(self, input_file: str) -> bool:
|
| 431 |
+
"""Import tracking data from a file"""
|
| 432 |
+
try:
|
| 433 |
+
with open(input_file, 'r') as f:
|
| 434 |
+
import_data = json.load(f)
|
| 435 |
+
|
| 436 |
+
# Validate import data
|
| 437 |
+
if 'changes' not in import_data or 'snapshots' not in import_data:
|
| 438 |
+
raise ValueError("Invalid import data format")
|
| 439 |
+
|
| 440 |
+
# Backup current tracking data
|
| 441 |
+
backup_file = self.tracking_dir / f"backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
| 442 |
+
self.export_tracking_data(str(backup_file))
|
| 443 |
+
|
| 444 |
+
# Import changes and snapshots
|
| 445 |
+
self._save_changes(import_data['changes'])
|
| 446 |
+
self._save_snapshots(import_data['snapshots'])
|
| 447 |
+
|
| 448 |
+
logger.info(f"Imported tracking data from: {input_file}")
|
| 449 |
+
return True
|
| 450 |
+
|
| 451 |
+
except Exception as e:
|
| 452 |
+
logger.error(f"Import failed: {e}")
|
| 453 |
+
return False
|
| 454 |
+
|
| 455 |
+
@contextmanager
|
| 456 |
+
def track_changes(self, description: str = "Batch configuration changes"):
|
| 457 |
+
"""Context manager for tracking multiple changes"""
|
| 458 |
+
initial_snapshot = self.create_snapshot(f"Pre-change snapshot: {description}")
|
| 459 |
+
|
| 460 |
+
try:
|
| 461 |
+
yield
|
| 462 |
+
|
| 463 |
+
# Create post-change snapshot
|
| 464 |
+
final_snapshot = self.create_snapshot(f"Post-change snapshot: {description}")
|
| 465 |
+
|
| 466 |
+
logger.info(f"Tracked batch changes: {description}")
|
| 467 |
+
logger.info(f"Initial snapshot: {initial_snapshot}")
|
| 468 |
+
logger.info(f"Final snapshot: {final_snapshot}")
|
| 469 |
+
|
| 470 |
+
except Exception as e:
|
| 471 |
+
logger.error(f"Error during tracked changes: {e}")
|
| 472 |
+
|
| 473 |
+
# Rollback to initial snapshot
|
| 474 |
+
logger.info(f"Rolling back to initial snapshot: {initial_snapshot}")
|
| 475 |
+
self.rollback_to_snapshot(initial_snapshot)
|
| 476 |
+
raise
|
| 477 |
+
|
| 478 |
+
def _generate_change_id(self) -> str:
|
| 479 |
+
"""Generate unique change ID"""
|
| 480 |
+
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
| 481 |
+
random_suffix = hashlib.md5(os.urandom(16)).hexdigest()[:8]
|
| 482 |
+
return f"change_{timestamp}_{random_suffix}"
|
| 483 |
+
|
| 484 |
+
def _generate_snapshot_id(self) -> str:
|
| 485 |
+
"""Generate unique snapshot ID"""
|
| 486 |
+
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
| 487 |
+
random_suffix = hashlib.md5(os.urandom(16)).hexdigest()[:8]
|
| 488 |
+
return f"snapshot_{timestamp}_{random_suffix}"
|
| 489 |
+
|
| 490 |
+
def _get_file_content_and_hash(self, file_path: str) -> Tuple[str, str]:
|
| 491 |
+
"""Get file content and its hash"""
|
| 492 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 493 |
+
content = f.read()
|
| 494 |
+
|
| 495 |
+
file_hash = hashlib.sha256(content.encode('utf-8')).hexdigest()
|
| 496 |
+
return content, file_hash
|
| 497 |
+
|
| 498 |
+
def _get_all_config_files(self) -> List[Path]:
|
| 499 |
+
"""Get all configuration files"""
|
| 500 |
+
config_files = []
|
| 501 |
+
|
| 502 |
+
# Common configuration file patterns
|
| 503 |
+
patterns = [
|
| 504 |
+
'*.yaml', '*.yml', '*.json', '*.toml', '*.ini', '*.conf',
|
| 505 |
+
'.env*', '*.config'
|
| 506 |
+
]
|
| 507 |
+
|
| 508 |
+
for pattern in patterns:
|
| 509 |
+
config_files.extend(self.config_dir.glob(pattern))
|
| 510 |
+
|
| 511 |
+
# Also check subdirectories
|
| 512 |
+
for subdir in self.config_dir.iterdir():
|
| 513 |
+
if subdir.is_dir() and not subdir.name.startswith('.'):
|
| 514 |
+
for pattern in patterns:
|
| 515 |
+
config_files.extend(subdir.glob(pattern))
|
| 516 |
+
|
| 517 |
+
return config_files
|
| 518 |
+
|
| 519 |
+
def _create_backup(self, file_path: str, change_id: str, content: str):
|
| 520 |
+
"""Create backup of file content"""
|
| 521 |
+
backup_file = self.tracking_dir / 'backups' / f"{change_id}_{Path(file_path).name}"
|
| 522 |
+
|
| 523 |
+
with open(backup_file, 'w', encoding='utf-8') as f:
|
| 524 |
+
f.write(content)
|
| 525 |
+
|
| 526 |
+
def _create_snapshot_backup(self, snapshot_id: str, config_files: List[Path]):
|
| 527 |
+
"""Create backup of all files in snapshot"""
|
| 528 |
+
snapshot_backup_dir = self.tracking_dir / 'snapshots' / snapshot_id
|
| 529 |
+
snapshot_backup_dir.mkdir(exist_ok=True)
|
| 530 |
+
|
| 531 |
+
for file_path in config_files:
|
| 532 |
+
if file_path.exists():
|
| 533 |
+
backup_file = snapshot_backup_dir / file_path.name
|
| 534 |
+
shutil.copy2(file_path, backup_file)
|
| 535 |
+
|
| 536 |
+
def _load_changes(self) -> List[Dict[str, Any]]:
|
| 537 |
+
"""Load changes from file"""
|
| 538 |
+
try:
|
| 539 |
+
with open(self.changes_file, 'r') as f:
|
| 540 |
+
return json.load(f)
|
| 541 |
+
except (FileNotFoundError, json.JSONDecodeError):
|
| 542 |
+
return []
|
| 543 |
+
|
| 544 |
+
def _save_changes(self, changes: List[Dict[str, Any]]):
|
| 545 |
+
"""Save changes to file"""
|
| 546 |
+
with open(self.changes_file, 'w') as f:
|
| 547 |
+
json.dump(changes, f, indent=2)
|
| 548 |
+
|
| 549 |
+
def _add_change(self, change: ConfigChange):
|
| 550 |
+
"""Add a change to the tracking file"""
|
| 551 |
+
changes = self._load_changes()
|
| 552 |
+
changes.append(change.to_dict())
|
| 553 |
+
self._save_changes(changes)
|
| 554 |
+
|
| 555 |
+
def _load_snapshots(self) -> List[Dict[str, Any]]:
|
| 556 |
+
"""Load snapshots from file"""
|
| 557 |
+
try:
|
| 558 |
+
with open(self.snapshots_file, 'r') as f:
|
| 559 |
+
return json.load(f)
|
| 560 |
+
except (FileNotFoundError, json.JSONDecodeError):
|
| 561 |
+
return []
|
| 562 |
+
|
| 563 |
+
def _save_snapshots(self, snapshots: List[Dict[str, Any]]):
|
| 564 |
+
"""Save snapshots to file"""
|
| 565 |
+
with open(self.snapshots_file, 'w') as f:
|
| 566 |
+
json.dump(snapshots, f, indent=2)
|
| 567 |
+
|
| 568 |
+
def _add_snapshot(self, snapshot: ConfigSnapshot):
|
| 569 |
+
"""Add a snapshot to the tracking file"""
|
| 570 |
+
snapshots = self._load_snapshots()
|
| 571 |
+
snapshots.append(snapshot.to_dict())
|
| 572 |
+
self._save_snapshots(snapshots)
|
| 573 |
+
|
| 574 |
+
|
| 575 |
+
def main():
|
| 576 |
+
"""Main CLI interface"""
|
| 577 |
+
import argparse
|
| 578 |
+
|
| 579 |
+
parser = argparse.ArgumentParser(description="Configuration Change Tracking System")
|
| 580 |
+
parser.add_argument('--config-dir', help="Configuration directory")
|
| 581 |
+
parser.add_argument('--tracking-dir', help="Tracking data directory")
|
| 582 |
+
|
| 583 |
+
subparsers = parser.add_subparsers(dest='command', help='Available commands')
|
| 584 |
+
|
| 585 |
+
# Track command
|
| 586 |
+
track_parser = subparsers.add_parser('track', help='Track a configuration change')
|
| 587 |
+
track_parser.add_argument('file_path', help='Path to configuration file')
|
| 588 |
+
track_parser.add_argument('change_type', choices=['create', 'update', 'delete'])
|
| 589 |
+
track_parser.add_argument('--description', default='', help='Change description')
|
| 590 |
+
track_parser.add_argument('--user', help='User making the change')
|
| 591 |
+
track_parser.add_argument('--environment', help='Environment')
|
| 592 |
+
|
| 593 |
+
# Snapshot command
|
| 594 |
+
snapshot_parser = subparsers.add_parser('snapshot', help='Create a configuration snapshot')
|
| 595 |
+
snapshot_parser.add_argument('--description', default='', help='Snapshot description')
|
| 596 |
+
snapshot_parser.add_argument('--environment', help='Environment')
|
| 597 |
+
|
| 598 |
+
# Rollback command
|
| 599 |
+
rollback_parser = subparsers.add_parser('rollback', help='Rollback configuration')
|
| 600 |
+
rollback_group = rollback_parser.add_mutually_exclusive_group(required=True)
|
| 601 |
+
rollback_group.add_argument('--change-id', help='Change ID to rollback to')
|
| 602 |
+
rollback_group.add_argument('--snapshot-id', help='Snapshot ID to rollback to')
|
| 603 |
+
|
| 604 |
+
# History command
|
| 605 |
+
history_parser = subparsers.add_parser('history', help='Show change history')
|
| 606 |
+
history_parser.add_argument('--file-path', help='Filter by file path')
|
| 607 |
+
history_parser.add_argument('--limit', type=int, help='Limit number of results')
|
| 608 |
+
|
| 609 |
+
# Snapshots command
|
| 610 |
+
snapshots_parser = subparsers.add_parser('snapshots', help='List snapshots')
|
| 611 |
+
snapshots_parser.add_argument('--limit', type=int, help='Limit number of results')
|
| 612 |
+
|
| 613 |
+
# Compare command
|
| 614 |
+
compare_parser = subparsers.add_parser('compare', help='Compare snapshots')
|
| 615 |
+
compare_parser.add_argument('snapshot1', help='First snapshot ID')
|
| 616 |
+
compare_parser.add_argument('snapshot2', help='Second snapshot ID')
|
| 617 |
+
|
| 618 |
+
# Cleanup command
|
| 619 |
+
cleanup_parser = subparsers.add_parser('cleanup', help='Clean up old backups')
|
| 620 |
+
cleanup_parser.add_argument('--days', type=int, default=30, help='Days to keep')
|
| 621 |
+
|
| 622 |
+
# Export command
|
| 623 |
+
export_parser = subparsers.add_parser('export', help='Export tracking data')
|
| 624 |
+
export_parser.add_argument('output_file', help='Output file path')
|
| 625 |
+
|
| 626 |
+
# Import command
|
| 627 |
+
import_parser = subparsers.add_parser('import', help='Import tracking data')
|
| 628 |
+
import_parser.add_argument('input_file', help='Input file path')
|
| 629 |
+
|
| 630 |
+
args = parser.parse_args()
|
| 631 |
+
|
| 632 |
+
if not args.command:
|
| 633 |
+
parser.print_help()
|
| 634 |
+
return
|
| 635 |
+
|
| 636 |
+
# Create tracker
|
| 637 |
+
tracker = ConfigTracker(args.config_dir, args.tracking_dir)
|
| 638 |
+
|
| 639 |
+
# Execute command
|
| 640 |
+
if args.command == 'track':
|
| 641 |
+
change_id = tracker.track_change(
|
| 642 |
+
args.file_path,
|
| 643 |
+
args.change_type,
|
| 644 |
+
args.description,
|
| 645 |
+
args.user,
|
| 646 |
+
args.environment
|
| 647 |
+
)
|
| 648 |
+
print(f"Change tracked: {change_id}")
|
| 649 |
+
|
| 650 |
+
elif args.command == 'snapshot':
|
| 651 |
+
snapshot_id = tracker.create_snapshot(args.description, args.environment)
|
| 652 |
+
print(f"Snapshot created: {snapshot_id}")
|
| 653 |
+
|
| 654 |
+
elif args.command == 'rollback':
|
| 655 |
+
if args.change_id:
|
| 656 |
+
success = tracker.rollback_to_change(args.change_id)
|
| 657 |
+
else:
|
| 658 |
+
success = tracker.rollback_to_snapshot(args.snapshot_id)
|
| 659 |
+
|
| 660 |
+
if success:
|
| 661 |
+
print("Rollback completed successfully")
|
| 662 |
+
else:
|
| 663 |
+
print("Rollback failed")
|
| 664 |
+
sys.exit(1)
|
| 665 |
+
|
| 666 |
+
elif args.command == 'history':
|
| 667 |
+
changes = tracker.get_change_history(args.file_path, args.limit)
|
| 668 |
+
print(json.dumps(changes, indent=2))
|
| 669 |
+
|
| 670 |
+
elif args.command == 'snapshots':
|
| 671 |
+
snapshots = tracker.get_snapshots(args.limit)
|
| 672 |
+
print(json.dumps(snapshots, indent=2))
|
| 673 |
+
|
| 674 |
+
elif args.command == 'compare':
|
| 675 |
+
comparison = tracker.compare_configurations(args.snapshot1, args.snapshot2)
|
| 676 |
+
print(json.dumps(comparison, indent=2))
|
| 677 |
+
|
| 678 |
+
elif args.command == 'cleanup':
|
| 679 |
+
count = tracker.cleanup_old_backups(args.days)
|
| 680 |
+
print(f"Cleaned up {count} old backup files")
|
| 681 |
+
|
| 682 |
+
elif args.command == 'export':
|
| 683 |
+
success = tracker.export_tracking_data(args.output_file)
|
| 684 |
+
if success:
|
| 685 |
+
print(f"Tracking data exported to: {args.output_file}")
|
| 686 |
+
else:
|
| 687 |
+
print("Export failed")
|
| 688 |
+
sys.exit(1)
|
| 689 |
+
|
| 690 |
+
elif args.command == 'import':
|
| 691 |
+
success = tracker.import_tracking_data(args.input_file)
|
| 692 |
+
if success:
|
| 693 |
+
print(f"Tracking data imported from: {args.input_file}")
|
| 694 |
+
else:
|
| 695 |
+
print("Import failed")
|
| 696 |
+
sys.exit(1)
|
| 697 |
+
|
| 698 |
+
|
| 699 |
+
if __name__ == '__main__':
|
| 700 |
+
main()
|
configs/stage_configs/config_validator.py
ADDED
|
@@ -0,0 +1,705 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Configuration Validation System for Pixelated Empathy AI
|
| 4 |
+
Validates all configuration files and environment variables
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
import json
|
| 10 |
+
import yaml
|
| 11 |
+
import logging
|
| 12 |
+
from typing import Dict, List, Any, Optional, Union
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
from dataclasses import dataclass, field
|
| 15 |
+
from enum import Enum
|
| 16 |
+
import re
|
| 17 |
+
from urllib.parse import urlparse
|
| 18 |
+
|
| 19 |
+
# Configure logging
|
| 20 |
+
logging.basicConfig(
|
| 21 |
+
level=logging.INFO,
|
| 22 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 23 |
+
)
|
| 24 |
+
logger = logging.getLogger(__name__)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class ValidationLevel(Enum):
|
| 28 |
+
"""Validation severity levels"""
|
| 29 |
+
ERROR = "error"
|
| 30 |
+
WARNING = "warning"
|
| 31 |
+
INFO = "info"
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
@dataclass
|
| 35 |
+
class ValidationResult:
|
| 36 |
+
"""Result of a configuration validation"""
|
| 37 |
+
level: ValidationLevel
|
| 38 |
+
message: str
|
| 39 |
+
field: Optional[str] = None
|
| 40 |
+
value: Optional[Any] = None
|
| 41 |
+
suggestion: Optional[str] = None
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
@dataclass
|
| 45 |
+
class ValidationReport:
|
| 46 |
+
"""Complete validation report"""
|
| 47 |
+
results: List[ValidationResult] = field(default_factory=list)
|
| 48 |
+
|
| 49 |
+
def add_error(self, message: str, field: str = None, value: Any = None, suggestion: str = None):
|
| 50 |
+
"""Add an error to the report"""
|
| 51 |
+
self.results.append(ValidationResult(
|
| 52 |
+
level=ValidationLevel.ERROR,
|
| 53 |
+
message=message,
|
| 54 |
+
field=field,
|
| 55 |
+
value=value,
|
| 56 |
+
suggestion=suggestion
|
| 57 |
+
))
|
| 58 |
+
|
| 59 |
+
def add_warning(self, message: str, field: str = None, value: Any = None, suggestion: str = None):
|
| 60 |
+
"""Add a warning to the report"""
|
| 61 |
+
self.results.append(ValidationResult(
|
| 62 |
+
level=ValidationLevel.WARNING,
|
| 63 |
+
message=message,
|
| 64 |
+
field=field,
|
| 65 |
+
value=value,
|
| 66 |
+
suggestion=suggestion
|
| 67 |
+
))
|
| 68 |
+
|
| 69 |
+
def add_info(self, message: str, field: str = None, value: Any = None):
|
| 70 |
+
"""Add an info message to the report"""
|
| 71 |
+
self.results.append(ValidationResult(
|
| 72 |
+
level=ValidationLevel.INFO,
|
| 73 |
+
message=message,
|
| 74 |
+
field=field,
|
| 75 |
+
value=value
|
| 76 |
+
))
|
| 77 |
+
|
| 78 |
+
@property
|
| 79 |
+
def has_errors(self) -> bool:
|
| 80 |
+
"""Check if report contains errors"""
|
| 81 |
+
return any(r.level == ValidationLevel.ERROR for r in self.results)
|
| 82 |
+
|
| 83 |
+
@property
|
| 84 |
+
def has_warnings(self) -> bool:
|
| 85 |
+
"""Check if report contains warnings"""
|
| 86 |
+
return any(r.level == ValidationLevel.WARNING for r in self.results)
|
| 87 |
+
|
| 88 |
+
def get_summary(self) -> Dict[str, int]:
|
| 89 |
+
"""Get summary of validation results"""
|
| 90 |
+
summary = {level.value: 0 for level in ValidationLevel}
|
| 91 |
+
for result in self.results:
|
| 92 |
+
summary[result.level.value] += 1
|
| 93 |
+
return summary
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
class ConfigValidator:
|
| 97 |
+
"""Main configuration validator"""
|
| 98 |
+
|
| 99 |
+
def __init__(self, config_dir: str = None):
|
| 100 |
+
self.config_dir = Path(config_dir) if config_dir else Path(__file__).parent
|
| 101 |
+
self.report = ValidationReport()
|
| 102 |
+
|
| 103 |
+
def validate_all(self) -> ValidationReport:
|
| 104 |
+
"""Validate all configuration aspects"""
|
| 105 |
+
logger.info("Starting comprehensive configuration validation...")
|
| 106 |
+
|
| 107 |
+
# Reset report
|
| 108 |
+
self.report = ValidationReport()
|
| 109 |
+
|
| 110 |
+
# Validate different aspects
|
| 111 |
+
self._validate_environment_variables()
|
| 112 |
+
self._validate_database_config()
|
| 113 |
+
self._validate_redis_config()
|
| 114 |
+
self._validate_security_config()
|
| 115 |
+
self._validate_monitoring_config()
|
| 116 |
+
self._validate_file_permissions()
|
| 117 |
+
self._validate_network_config()
|
| 118 |
+
self._validate_resource_limits()
|
| 119 |
+
self._validate_backup_config()
|
| 120 |
+
|
| 121 |
+
# Log summary
|
| 122 |
+
summary = self.report.get_summary()
|
| 123 |
+
logger.info(f"Validation complete: {summary}")
|
| 124 |
+
|
| 125 |
+
return self.report
|
| 126 |
+
|
| 127 |
+
def _validate_environment_variables(self):
|
| 128 |
+
"""Validate required environment variables"""
|
| 129 |
+
logger.info("Validating environment variables...")
|
| 130 |
+
|
| 131 |
+
required_vars = {
|
| 132 |
+
'DATABASE_URL': self._validate_database_url,
|
| 133 |
+
'REDIS_URL': self._validate_redis_url,
|
| 134 |
+
'JWT_SECRET': self._validate_jwt_secret,
|
| 135 |
+
'ENCRYPTION_KEY': self._validate_encryption_key,
|
| 136 |
+
'LOG_LEVEL': self._validate_log_level,
|
| 137 |
+
'ENVIRONMENT': self._validate_environment,
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
optional_vars = {
|
| 141 |
+
'MAX_WORKERS': self._validate_max_workers,
|
| 142 |
+
'BATCH_SIZE': self._validate_batch_size,
|
| 143 |
+
'DEBUG': self._validate_debug_flag,
|
| 144 |
+
'SENTRY_DSN': self._validate_sentry_dsn,
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
# Check required variables
|
| 148 |
+
for var_name, validator in required_vars.items():
|
| 149 |
+
value = os.getenv(var_name)
|
| 150 |
+
if not value:
|
| 151 |
+
self.report.add_error(
|
| 152 |
+
f"Required environment variable '{var_name}' is not set",
|
| 153 |
+
field=var_name,
|
| 154 |
+
suggestion=f"Set {var_name} environment variable"
|
| 155 |
+
)
|
| 156 |
+
else:
|
| 157 |
+
validator(value, var_name)
|
| 158 |
+
|
| 159 |
+
# Check optional variables
|
| 160 |
+
for var_name, validator in optional_vars.items():
|
| 161 |
+
value = os.getenv(var_name)
|
| 162 |
+
if value:
|
| 163 |
+
validator(value, var_name)
|
| 164 |
+
|
| 165 |
+
def _validate_database_url(self, value: str, field: str):
|
| 166 |
+
"""Validate database URL format"""
|
| 167 |
+
try:
|
| 168 |
+
parsed = urlparse(value)
|
| 169 |
+
if not parsed.scheme:
|
| 170 |
+
self.report.add_error(
|
| 171 |
+
f"Database URL missing scheme",
|
| 172 |
+
field=field,
|
| 173 |
+
suggestion="Use format: postgresql://user:pass@host:port/db"
|
| 174 |
+
)
|
| 175 |
+
elif parsed.scheme not in ['postgresql', 'postgres']:
|
| 176 |
+
self.report.add_warning(
|
| 177 |
+
f"Unexpected database scheme: {parsed.scheme}",
|
| 178 |
+
field=field,
|
| 179 |
+
suggestion="Consider using PostgreSQL for production"
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
if not parsed.hostname:
|
| 183 |
+
self.report.add_error(
|
| 184 |
+
f"Database URL missing hostname",
|
| 185 |
+
field=field
|
| 186 |
+
)
|
| 187 |
+
|
| 188 |
+
if not parsed.path or parsed.path == '/':
|
| 189 |
+
self.report.add_error(
|
| 190 |
+
f"Database URL missing database name",
|
| 191 |
+
field=field
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
except Exception as e:
|
| 195 |
+
self.report.add_error(
|
| 196 |
+
f"Invalid database URL format: {e}",
|
| 197 |
+
field=field
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
def _validate_redis_url(self, value: str, field: str):
|
| 201 |
+
"""Validate Redis URL format"""
|
| 202 |
+
try:
|
| 203 |
+
parsed = urlparse(value)
|
| 204 |
+
if not parsed.scheme:
|
| 205 |
+
self.report.add_error(
|
| 206 |
+
f"Redis URL missing scheme",
|
| 207 |
+
field=field,
|
| 208 |
+
suggestion="Use format: redis://[:password@]host:port[/db]"
|
| 209 |
+
)
|
| 210 |
+
elif parsed.scheme not in ['redis', 'rediss']:
|
| 211 |
+
self.report.add_error(
|
| 212 |
+
f"Invalid Redis scheme: {parsed.scheme}",
|
| 213 |
+
field=field,
|
| 214 |
+
suggestion="Use 'redis://' or 'rediss://' for SSL"
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
if not parsed.hostname:
|
| 218 |
+
self.report.add_error(
|
| 219 |
+
f"Redis URL missing hostname",
|
| 220 |
+
field=field
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
except Exception as e:
|
| 224 |
+
self.report.add_error(
|
| 225 |
+
f"Invalid Redis URL format: {e}",
|
| 226 |
+
field=field
|
| 227 |
+
)
|
| 228 |
+
|
| 229 |
+
def _validate_jwt_secret(self, value: str, field: str):
|
| 230 |
+
"""Validate JWT secret strength"""
|
| 231 |
+
if len(value) < 32:
|
| 232 |
+
self.report.add_error(
|
| 233 |
+
f"JWT secret too short (minimum 32 characters)",
|
| 234 |
+
field=field,
|
| 235 |
+
value=f"Length: {len(value)}",
|
| 236 |
+
suggestion="Generate a longer, more secure secret"
|
| 237 |
+
)
|
| 238 |
+
elif len(value) < 64:
|
| 239 |
+
self.report.add_warning(
|
| 240 |
+
f"JWT secret could be longer for better security",
|
| 241 |
+
field=field,
|
| 242 |
+
value=f"Length: {len(value)}",
|
| 243 |
+
suggestion="Consider using 64+ character secret"
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
# Check for common weak patterns
|
| 247 |
+
if value.lower() in ['secret', 'password', 'changeme', 'default']:
|
| 248 |
+
self.report.add_error(
|
| 249 |
+
f"JWT secret uses common weak value",
|
| 250 |
+
field=field,
|
| 251 |
+
suggestion="Generate a cryptographically secure random secret"
|
| 252 |
+
)
|
| 253 |
+
|
| 254 |
+
def _validate_encryption_key(self, value: str, field: str):
|
| 255 |
+
"""Validate encryption key"""
|
| 256 |
+
if len(value) < 32:
|
| 257 |
+
self.report.add_error(
|
| 258 |
+
f"Encryption key too short (minimum 32 characters)",
|
| 259 |
+
field=field,
|
| 260 |
+
value=f"Length: {len(value)}"
|
| 261 |
+
)
|
| 262 |
+
|
| 263 |
+
# Check if it's base64 encoded (common for encryption keys)
|
| 264 |
+
try:
|
| 265 |
+
import base64
|
| 266 |
+
base64.b64decode(value)
|
| 267 |
+
if len(base64.b64decode(value)) < 32:
|
| 268 |
+
self.report.add_warning(
|
| 269 |
+
f"Decoded encryption key may be too short",
|
| 270 |
+
field=field
|
| 271 |
+
)
|
| 272 |
+
except Exception:
|
| 273 |
+
# Not base64, check raw length
|
| 274 |
+
pass
|
| 275 |
+
|
| 276 |
+
def _validate_log_level(self, value: str, field: str):
|
| 277 |
+
"""Validate log level"""
|
| 278 |
+
valid_levels = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
|
| 279 |
+
if value.upper() not in valid_levels:
|
| 280 |
+
self.report.add_error(
|
| 281 |
+
f"Invalid log level: {value}",
|
| 282 |
+
field=field,
|
| 283 |
+
suggestion=f"Use one of: {', '.join(valid_levels)}"
|
| 284 |
+
)
|
| 285 |
+
elif value.upper() == 'DEBUG':
|
| 286 |
+
env = os.getenv('ENVIRONMENT', '').lower()
|
| 287 |
+
if env in ['production', 'prod']:
|
| 288 |
+
self.report.add_warning(
|
| 289 |
+
f"DEBUG log level in production environment",
|
| 290 |
+
field=field,
|
| 291 |
+
suggestion="Use INFO or WARNING for production"
|
| 292 |
+
)
|
| 293 |
+
|
| 294 |
+
def _validate_environment(self, value: str, field: str):
|
| 295 |
+
"""Validate environment setting"""
|
| 296 |
+
valid_envs = ['development', 'dev', 'staging', 'production', 'prod', 'test']
|
| 297 |
+
if value.lower() not in valid_envs:
|
| 298 |
+
self.report.add_warning(
|
| 299 |
+
f"Unexpected environment value: {value}",
|
| 300 |
+
field=field,
|
| 301 |
+
suggestion=f"Consider using: {', '.join(valid_envs)}"
|
| 302 |
+
)
|
| 303 |
+
|
| 304 |
+
def _validate_max_workers(self, value: str, field: str):
|
| 305 |
+
"""Validate max workers setting"""
|
| 306 |
+
try:
|
| 307 |
+
workers = int(value)
|
| 308 |
+
if workers < 1:
|
| 309 |
+
self.report.add_error(
|
| 310 |
+
f"Max workers must be positive",
|
| 311 |
+
field=field,
|
| 312 |
+
value=workers
|
| 313 |
+
)
|
| 314 |
+
elif workers > 32:
|
| 315 |
+
self.report.add_warning(
|
| 316 |
+
f"Very high worker count may cause resource issues",
|
| 317 |
+
field=field,
|
| 318 |
+
value=workers,
|
| 319 |
+
suggestion="Consider CPU core count when setting workers"
|
| 320 |
+
)
|
| 321 |
+
except ValueError:
|
| 322 |
+
self.report.add_error(
|
| 323 |
+
f"Max workers must be an integer",
|
| 324 |
+
field=field,
|
| 325 |
+
value=value
|
| 326 |
+
)
|
| 327 |
+
|
| 328 |
+
def _validate_batch_size(self, value: str, field: str):
|
| 329 |
+
"""Validate batch size setting"""
|
| 330 |
+
try:
|
| 331 |
+
batch_size = int(value)
|
| 332 |
+
if batch_size < 1:
|
| 333 |
+
self.report.add_error(
|
| 334 |
+
f"Batch size must be positive",
|
| 335 |
+
field=field,
|
| 336 |
+
value=batch_size
|
| 337 |
+
)
|
| 338 |
+
elif batch_size > 1000:
|
| 339 |
+
self.report.add_warning(
|
| 340 |
+
f"Large batch size may cause memory issues",
|
| 341 |
+
field=field,
|
| 342 |
+
value=batch_size,
|
| 343 |
+
suggestion="Consider memory constraints when setting batch size"
|
| 344 |
+
)
|
| 345 |
+
except ValueError:
|
| 346 |
+
self.report.add_error(
|
| 347 |
+
f"Batch size must be an integer",
|
| 348 |
+
field=field,
|
| 349 |
+
value=value
|
| 350 |
+
)
|
| 351 |
+
|
| 352 |
+
def _validate_debug_flag(self, value: str, field: str):
|
| 353 |
+
"""Validate debug flag"""
|
| 354 |
+
if value.lower() not in ['true', 'false', '1', '0', 'yes', 'no']:
|
| 355 |
+
self.report.add_warning(
|
| 356 |
+
f"Debug flag should be boolean-like",
|
| 357 |
+
field=field,
|
| 358 |
+
value=value,
|
| 359 |
+
suggestion="Use 'true', 'false', '1', or '0'"
|
| 360 |
+
)
|
| 361 |
+
|
| 362 |
+
if value.lower() in ['true', '1', 'yes']:
|
| 363 |
+
env = os.getenv('ENVIRONMENT', '').lower()
|
| 364 |
+
if env in ['production', 'prod']:
|
| 365 |
+
self.report.add_warning(
|
| 366 |
+
f"Debug enabled in production environment",
|
| 367 |
+
field=field,
|
| 368 |
+
suggestion="Disable debug in production"
|
| 369 |
+
)
|
| 370 |
+
|
| 371 |
+
def _validate_sentry_dsn(self, value: str, field: str):
|
| 372 |
+
"""Validate Sentry DSN format"""
|
| 373 |
+
try:
|
| 374 |
+
parsed = urlparse(value)
|
| 375 |
+
if not parsed.scheme or not parsed.hostname:
|
| 376 |
+
self.report.add_error(
|
| 377 |
+
f"Invalid Sentry DSN format",
|
| 378 |
+
field=field,
|
| 379 |
+
suggestion="Check Sentry project settings for correct DSN"
|
| 380 |
+
)
|
| 381 |
+
except Exception as e:
|
| 382 |
+
self.report.add_error(
|
| 383 |
+
f"Invalid Sentry DSN: {e}",
|
| 384 |
+
field=field
|
| 385 |
+
)
|
| 386 |
+
|
| 387 |
+
def _validate_database_config(self):
|
| 388 |
+
"""Validate database configuration files"""
|
| 389 |
+
logger.info("Validating database configuration...")
|
| 390 |
+
|
| 391 |
+
# Check for database config files
|
| 392 |
+
db_config_files = [
|
| 393 |
+
'database.yaml',
|
| 394 |
+
'database.json',
|
| 395 |
+
'db_config.yaml'
|
| 396 |
+
]
|
| 397 |
+
|
| 398 |
+
for config_file in db_config_files:
|
| 399 |
+
config_path = self.config_dir / config_file
|
| 400 |
+
if config_path.exists():
|
| 401 |
+
self._validate_config_file(config_path)
|
| 402 |
+
|
| 403 |
+
def _validate_redis_config(self):
|
| 404 |
+
"""Validate Redis configuration"""
|
| 405 |
+
logger.info("Validating Redis configuration...")
|
| 406 |
+
|
| 407 |
+
redis_config = self.config_dir / 'redis.yaml'
|
| 408 |
+
if redis_config.exists():
|
| 409 |
+
self._validate_config_file(redis_config)
|
| 410 |
+
|
| 411 |
+
def _validate_security_config(self):
|
| 412 |
+
"""Validate security configuration"""
|
| 413 |
+
logger.info("Validating security configuration...")
|
| 414 |
+
|
| 415 |
+
security_config = self.config_dir / 'security.yaml'
|
| 416 |
+
if security_config.exists():
|
| 417 |
+
try:
|
| 418 |
+
with open(security_config, 'r') as f:
|
| 419 |
+
config = yaml.safe_load(f)
|
| 420 |
+
|
| 421 |
+
# Check security settings
|
| 422 |
+
if 'encryption' in config:
|
| 423 |
+
if not config['encryption'].get('enabled', False):
|
| 424 |
+
self.report.add_warning(
|
| 425 |
+
"Encryption is disabled",
|
| 426 |
+
field="encryption.enabled",
|
| 427 |
+
suggestion="Enable encryption for production"
|
| 428 |
+
)
|
| 429 |
+
|
| 430 |
+
if 'authentication' in config:
|
| 431 |
+
auth_config = config['authentication']
|
| 432 |
+
if auth_config.get('require_2fa', False) is False:
|
| 433 |
+
env = os.getenv('ENVIRONMENT', '').lower()
|
| 434 |
+
if env in ['production', 'prod']:
|
| 435 |
+
self.report.add_warning(
|
| 436 |
+
"2FA not required in production",
|
| 437 |
+
field="authentication.require_2fa",
|
| 438 |
+
suggestion="Enable 2FA for production security"
|
| 439 |
+
)
|
| 440 |
+
|
| 441 |
+
except Exception as e:
|
| 442 |
+
self.report.add_error(
|
| 443 |
+
f"Error reading security config: {e}",
|
| 444 |
+
field="security.yaml"
|
| 445 |
+
)
|
| 446 |
+
|
| 447 |
+
def _validate_monitoring_config(self):
|
| 448 |
+
"""Validate monitoring configuration"""
|
| 449 |
+
logger.info("Validating monitoring configuration...")
|
| 450 |
+
|
| 451 |
+
monitoring_config = self.config_dir / 'monitoring.yaml'
|
| 452 |
+
if monitoring_config.exists():
|
| 453 |
+
self._validate_config_file(monitoring_config)
|
| 454 |
+
|
| 455 |
+
def _validate_file_permissions(self):
|
| 456 |
+
"""Validate file permissions for security"""
|
| 457 |
+
logger.info("Validating file permissions...")
|
| 458 |
+
|
| 459 |
+
sensitive_files = [
|
| 460 |
+
'.env',
|
| 461 |
+
'secrets.yaml',
|
| 462 |
+
'private.key',
|
| 463 |
+
'ssl.key'
|
| 464 |
+
]
|
| 465 |
+
|
| 466 |
+
for filename in sensitive_files:
|
| 467 |
+
filepath = self.config_dir / filename
|
| 468 |
+
if filepath.exists():
|
| 469 |
+
stat_info = filepath.stat()
|
| 470 |
+
mode = oct(stat_info.st_mode)[-3:]
|
| 471 |
+
|
| 472 |
+
# Check if file is readable by others
|
| 473 |
+
if int(mode[2]) > 0:
|
| 474 |
+
self.report.add_warning(
|
| 475 |
+
f"Sensitive file '{filename}' is readable by others",
|
| 476 |
+
field=f"permissions.{filename}",
|
| 477 |
+
value=f"Mode: {mode}",
|
| 478 |
+
suggestion="Set permissions to 600 or 640"
|
| 479 |
+
)
|
| 480 |
+
|
| 481 |
+
def _validate_network_config(self):
|
| 482 |
+
"""Validate network configuration"""
|
| 483 |
+
logger.info("Validating network configuration...")
|
| 484 |
+
|
| 485 |
+
# Check common network settings
|
| 486 |
+
bind_host = os.getenv('BIND_HOST', '0.0.0.0')
|
| 487 |
+
if bind_host == '0.0.0.0':
|
| 488 |
+
env = os.getenv('ENVIRONMENT', '').lower()
|
| 489 |
+
if env in ['production', 'prod']:
|
| 490 |
+
self.report.add_warning(
|
| 491 |
+
"Binding to all interfaces (0.0.0.0) in production",
|
| 492 |
+
field="BIND_HOST",
|
| 493 |
+
suggestion="Consider binding to specific interface for security"
|
| 494 |
+
)
|
| 495 |
+
|
| 496 |
+
# Check port configuration
|
| 497 |
+
port = os.getenv('PORT', '8000')
|
| 498 |
+
try:
|
| 499 |
+
port_num = int(port)
|
| 500 |
+
if port_num < 1024 and os.getuid() != 0:
|
| 501 |
+
self.report.add_warning(
|
| 502 |
+
f"Port {port_num} requires root privileges",
|
| 503 |
+
field="PORT",
|
| 504 |
+
suggestion="Use port >= 1024 or run as root"
|
| 505 |
+
)
|
| 506 |
+
except (ValueError, AttributeError):
|
| 507 |
+
pass
|
| 508 |
+
|
| 509 |
+
def _validate_resource_limits(self):
|
| 510 |
+
"""Validate resource limit configurations"""
|
| 511 |
+
logger.info("Validating resource limits...")
|
| 512 |
+
|
| 513 |
+
# Check memory limits
|
| 514 |
+
max_memory = os.getenv('MAX_MEMORY')
|
| 515 |
+
if max_memory:
|
| 516 |
+
try:
|
| 517 |
+
# Parse memory value (e.g., "2G", "512M")
|
| 518 |
+
if max_memory.endswith('G'):
|
| 519 |
+
memory_gb = float(max_memory[:-1])
|
| 520 |
+
if memory_gb < 1:
|
| 521 |
+
self.report.add_warning(
|
| 522 |
+
f"Low memory limit: {max_memory}",
|
| 523 |
+
field="MAX_MEMORY",
|
| 524 |
+
suggestion="Consider increasing memory for better performance"
|
| 525 |
+
)
|
| 526 |
+
elif max_memory.endswith('M'):
|
| 527 |
+
memory_mb = float(max_memory[:-1])
|
| 528 |
+
if memory_mb < 512:
|
| 529 |
+
self.report.add_warning(
|
| 530 |
+
f"Very low memory limit: {max_memory}",
|
| 531 |
+
field="MAX_MEMORY",
|
| 532 |
+
suggestion="Increase memory limit for stable operation"
|
| 533 |
+
)
|
| 534 |
+
except ValueError:
|
| 535 |
+
self.report.add_error(
|
| 536 |
+
f"Invalid memory limit format: {max_memory}",
|
| 537 |
+
field="MAX_MEMORY",
|
| 538 |
+
suggestion="Use format like '2G' or '512M'"
|
| 539 |
+
)
|
| 540 |
+
|
| 541 |
+
def _validate_backup_config(self):
|
| 542 |
+
"""Validate backup configuration"""
|
| 543 |
+
logger.info("Validating backup configuration...")
|
| 544 |
+
|
| 545 |
+
backup_config = self.config_dir / 'backup.yaml'
|
| 546 |
+
if backup_config.exists():
|
| 547 |
+
try:
|
| 548 |
+
with open(backup_config, 'r') as f:
|
| 549 |
+
config = yaml.safe_load(f)
|
| 550 |
+
|
| 551 |
+
if not config.get('enabled', False):
|
| 552 |
+
env = os.getenv('ENVIRONMENT', '').lower()
|
| 553 |
+
if env in ['production', 'prod']:
|
| 554 |
+
self.report.add_error(
|
| 555 |
+
"Backups disabled in production",
|
| 556 |
+
field="backup.enabled",
|
| 557 |
+
suggestion="Enable backups for production data protection"
|
| 558 |
+
)
|
| 559 |
+
|
| 560 |
+
# Check backup schedule
|
| 561 |
+
schedule = config.get('schedule')
|
| 562 |
+
if schedule:
|
| 563 |
+
# Basic cron validation
|
| 564 |
+
if not re.match(r'^[\d\*\-,/]+\s+[\d\*\-,/]+\s+[\d\*\-,/]+\s+[\d\*\-,/]+\s+[\d\*\-,/]+$', schedule):
|
| 565 |
+
self.report.add_warning(
|
| 566 |
+
f"Invalid cron schedule format: {schedule}",
|
| 567 |
+
field="backup.schedule",
|
| 568 |
+
suggestion="Use valid cron format (e.g., '0 2 * * *')"
|
| 569 |
+
)
|
| 570 |
+
|
| 571 |
+
except Exception as e:
|
| 572 |
+
self.report.add_error(
|
| 573 |
+
f"Error reading backup config: {e}",
|
| 574 |
+
field="backup.yaml"
|
| 575 |
+
)
|
| 576 |
+
|
| 577 |
+
def _validate_config_file(self, filepath: Path):
|
| 578 |
+
"""Validate a configuration file"""
|
| 579 |
+
try:
|
| 580 |
+
with open(filepath, 'r') as f:
|
| 581 |
+
if filepath.suffix in ['.yaml', '.yml']:
|
| 582 |
+
yaml.safe_load(f)
|
| 583 |
+
elif filepath.suffix == '.json':
|
| 584 |
+
json.load(f)
|
| 585 |
+
|
| 586 |
+
self.report.add_info(
|
| 587 |
+
f"Configuration file '{filepath.name}' is valid",
|
| 588 |
+
field=str(filepath)
|
| 589 |
+
)
|
| 590 |
+
|
| 591 |
+
except yaml.YAMLError as e:
|
| 592 |
+
self.report.add_error(
|
| 593 |
+
f"Invalid YAML in '{filepath.name}': {e}",
|
| 594 |
+
field=str(filepath)
|
| 595 |
+
)
|
| 596 |
+
except json.JSONDecodeError as e:
|
| 597 |
+
self.report.add_error(
|
| 598 |
+
f"Invalid JSON in '{filepath.name}': {e}",
|
| 599 |
+
field=str(filepath)
|
| 600 |
+
)
|
| 601 |
+
except Exception as e:
|
| 602 |
+
self.report.add_error(
|
| 603 |
+
f"Error reading '{filepath.name}': {e}",
|
| 604 |
+
field=str(filepath)
|
| 605 |
+
)
|
| 606 |
+
|
| 607 |
+
def print_report(self, report: ValidationReport = None):
|
| 608 |
+
"""Print validation report in a readable format"""
|
| 609 |
+
if report is None:
|
| 610 |
+
report = self.report
|
| 611 |
+
|
| 612 |
+
print("\n" + "="*80)
|
| 613 |
+
print("CONFIGURATION VALIDATION REPORT")
|
| 614 |
+
print("="*80)
|
| 615 |
+
|
| 616 |
+
summary = report.get_summary()
|
| 617 |
+
print(f"\nSUMMARY:")
|
| 618 |
+
print(f" Errors: {summary['error']}")
|
| 619 |
+
print(f" Warnings: {summary['warning']}")
|
| 620 |
+
print(f" Info: {summary['info']}")
|
| 621 |
+
|
| 622 |
+
if report.results:
|
| 623 |
+
print(f"\nDETAILS:")
|
| 624 |
+
for result in report.results:
|
| 625 |
+
icon = {"error": "❌", "warning": "⚠️", "info": "ℹ️"}[result.level.value]
|
| 626 |
+
print(f"\n{icon} {result.level.value.upper()}: {result.message}")
|
| 627 |
+
|
| 628 |
+
if result.field:
|
| 629 |
+
print(f" Field: {result.field}")
|
| 630 |
+
if result.value is not None:
|
| 631 |
+
print(f" Value: {result.value}")
|
| 632 |
+
if result.suggestion:
|
| 633 |
+
print(f" Suggestion: {result.suggestion}")
|
| 634 |
+
|
| 635 |
+
print("\n" + "="*80)
|
| 636 |
+
|
| 637 |
+
if report.has_errors:
|
| 638 |
+
print("❌ VALIDATION FAILED - Please fix errors before proceeding")
|
| 639 |
+
return False
|
| 640 |
+
elif report.has_warnings:
|
| 641 |
+
print("⚠️ VALIDATION PASSED WITH WARNINGS - Review warnings for production")
|
| 642 |
+
return True
|
| 643 |
+
else:
|
| 644 |
+
print("✅ VALIDATION PASSED - Configuration is valid")
|
| 645 |
+
return True
|
| 646 |
+
|
| 647 |
+
|
| 648 |
+
def main():
|
| 649 |
+
"""Main entry point for configuration validation"""
|
| 650 |
+
import argparse
|
| 651 |
+
|
| 652 |
+
parser = argparse.ArgumentParser(description="Validate Pixelated Empathy AI configuration")
|
| 653 |
+
parser.add_argument(
|
| 654 |
+
'--config-dir',
|
| 655 |
+
default=None,
|
| 656 |
+
help="Configuration directory path"
|
| 657 |
+
)
|
| 658 |
+
parser.add_argument(
|
| 659 |
+
'--json',
|
| 660 |
+
action='store_true',
|
| 661 |
+
help="Output report in JSON format"
|
| 662 |
+
)
|
| 663 |
+
parser.add_argument(
|
| 664 |
+
'--fail-on-warnings',
|
| 665 |
+
action='store_true',
|
| 666 |
+
help="Exit with error code if warnings are found"
|
| 667 |
+
)
|
| 668 |
+
|
| 669 |
+
args = parser.parse_args()
|
| 670 |
+
|
| 671 |
+
# Create validator and run validation
|
| 672 |
+
validator = ConfigValidator(args.config_dir)
|
| 673 |
+
report = validator.validate_all()
|
| 674 |
+
|
| 675 |
+
if args.json:
|
| 676 |
+
# Output JSON report
|
| 677 |
+
json_report = {
|
| 678 |
+
'summary': report.get_summary(),
|
| 679 |
+
'results': [
|
| 680 |
+
{
|
| 681 |
+
'level': r.level.value,
|
| 682 |
+
'message': r.message,
|
| 683 |
+
'field': r.field,
|
| 684 |
+
'value': r.value,
|
| 685 |
+
'suggestion': r.suggestion
|
| 686 |
+
}
|
| 687 |
+
for r in report.results
|
| 688 |
+
]
|
| 689 |
+
}
|
| 690 |
+
print(json.dumps(json_report, indent=2))
|
| 691 |
+
else:
|
| 692 |
+
# Print human-readable report
|
| 693 |
+
success = validator.print_report(report)
|
| 694 |
+
|
| 695 |
+
# Exit with appropriate code
|
| 696 |
+
if not success:
|
| 697 |
+
sys.exit(1)
|
| 698 |
+
elif args.fail_on_warnings and report.has_warnings:
|
| 699 |
+
sys.exit(2)
|
| 700 |
+
else:
|
| 701 |
+
sys.exit(0)
|
| 702 |
+
|
| 703 |
+
|
| 704 |
+
if __name__ == '__main__':
|
| 705 |
+
main()
|
configs/stage_configs/configs_config.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Centralized configuration for the Pixelated Empathy AI dataset pipeline.
|
| 3 |
+
Provides an enterprise-grade, unified configuration management system.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from dataclasses import dataclass, field
|
| 7 |
+
from typing import Any
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@dataclass
|
| 11 |
+
class DataLoaderConfig:
|
| 12 |
+
"""Configuration for the data loader and acquisition."""
|
| 13 |
+
huggingface_datasets: dict[str, str] = field(default_factory=lambda: {
|
| 14 |
+
"mental_health_counseling": "Amod/mental_health_counseling_conversations",
|
| 15 |
+
"psych8k": "EmoCareAI/Psych8k",
|
| 16 |
+
# Mental Health Investigation Resources (Phase 1)
|
| 17 |
+
"mental_health_snli": "iqrakiran/customized-mental-health-snli2",
|
| 18 |
+
"mental_health_preprocessed": "typosonlr/MentalHealthPreProcessed",
|
| 19 |
+
"depression_detection": "ShreyaR/DepressionDetection",
|
| 20 |
+
})
|
| 21 |
+
download_path: str = "ai/datasets/external"
|
| 22 |
+
cache_dir: str = "ai/datasets/cache"
|
| 23 |
+
huggingface_cache_dir: str = "ai/datasets/huggingface_cache"
|
| 24 |
+
max_retries: int = 3
|
| 25 |
+
|
| 26 |
+
@dataclass
|
| 27 |
+
class StandardizationConfig:
|
| 28 |
+
"""Configuration for the DataStandardizer."""
|
| 29 |
+
max_workers: int = 8
|
| 30 |
+
batch_size: int = 200
|
| 31 |
+
enable_monitoring: bool = True
|
| 32 |
+
output_dir: str = "ai/datasets/standardized"
|
| 33 |
+
|
| 34 |
+
@dataclass
|
| 35 |
+
class LoggingConfig:
|
| 36 |
+
"""Configuration for the logging system."""
|
| 37 |
+
log_level: str = "INFO"
|
| 38 |
+
log_file: str = "logs/dataset_pipeline.log"
|
| 39 |
+
max_bytes: int = 10 * 1024 * 1024 # 10 MB
|
| 40 |
+
backup_count: int = 5
|
| 41 |
+
|
| 42 |
+
@dataclass
|
| 43 |
+
class Config:
|
| 44 |
+
"""Root configuration class for the entire pipeline."""
|
| 45 |
+
data_loader: DataLoaderConfig = field(default_factory=DataLoaderConfig)
|
| 46 |
+
standardization: StandardizationConfig = field(default_factory=StandardizationConfig)
|
| 47 |
+
logging: LoggingConfig = field(default_factory=LoggingConfig)
|
| 48 |
+
|
| 49 |
+
def to_dict(self) -> dict[str, Any]:
|
| 50 |
+
"""Serializes the config to a dictionary."""
|
| 51 |
+
return {
|
| 52 |
+
"data_loader": self.data_loader.__dict__,
|
| 53 |
+
"standardization": self.standardization.__dict__,
|
| 54 |
+
"logging": self.logging.__dict__,
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
# Singleton instance to be used across the application
|
| 58 |
+
config = Config()
|
| 59 |
+
|
| 60 |
+
def get_config() -> Config:
|
| 61 |
+
"""Returns the singleton config instance."""
|
| 62 |
+
return config
|
| 63 |
+
|
| 64 |
+
# Example usage:
|
| 65 |
+
# from config import get_config
|
| 66 |
+
# config = get_config()
|
| 67 |
+
# print(config.standardization.batch_size)
|
configs/stage_configs/corrected_audit_report.json
ADDED
|
@@ -0,0 +1,694 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"audit_date": "2025-08-24T13:28:00.686206",
|
| 3 |
+
"total_tasks": 36,
|
| 4 |
+
"complete": 34,
|
| 5 |
+
"partial": 0,
|
| 6 |
+
"found": 0,
|
| 7 |
+
"missing": 2,
|
| 8 |
+
"working_count": 34,
|
| 9 |
+
"completion_rate": 0.9444444444444444,
|
| 10 |
+
"overall_status": "NEARLY_COMPLETE",
|
| 11 |
+
"ecosystem_files": 4,
|
| 12 |
+
"dataset_pipeline_files": 30,
|
| 13 |
+
"detailed_results": {
|
| 14 |
+
"6.1": {
|
| 15 |
+
"task_id": "6.1",
|
| 16 |
+
"expected_filename": "distributed_architecture.py",
|
| 17 |
+
"description": "Distributed processing architecture",
|
| 18 |
+
"found_files": [
|
| 19 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/distributed_architecture.py",
|
| 20 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/distributed_architecture.py"
|
| 21 |
+
],
|
| 22 |
+
"status": "COMPLETE",
|
| 23 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/distributed_architecture.py",
|
| 24 |
+
"file_stats": {
|
| 25 |
+
"size_kb": 20.2275390625,
|
| 26 |
+
"lines": 569,
|
| 27 |
+
"classes": 6,
|
| 28 |
+
"functions": 26,
|
| 29 |
+
"has_docstring": true
|
| 30 |
+
},
|
| 31 |
+
"issues": []
|
| 32 |
+
},
|
| 33 |
+
"6.2": {
|
| 34 |
+
"task_id": "6.2",
|
| 35 |
+
"expected_filename": "data_fusion_engine.py",
|
| 36 |
+
"description": "Intelligent data fusion algorithms",
|
| 37 |
+
"found_files": [
|
| 38 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/data_fusion_engine.py",
|
| 39 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/data_fusion_engine.py"
|
| 40 |
+
],
|
| 41 |
+
"status": "COMPLETE",
|
| 42 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/data_fusion_engine.py",
|
| 43 |
+
"file_stats": {
|
| 44 |
+
"size_kb": 26.6845703125,
|
| 45 |
+
"lines": 694,
|
| 46 |
+
"classes": 5,
|
| 47 |
+
"functions": 20,
|
| 48 |
+
"has_docstring": true
|
| 49 |
+
},
|
| 50 |
+
"issues": []
|
| 51 |
+
},
|
| 52 |
+
"6.3": {
|
| 53 |
+
"task_id": "6.3",
|
| 54 |
+
"expected_filename": "quality_assessment_framework.py",
|
| 55 |
+
"description": "Hierarchical quality assessment framework",
|
| 56 |
+
"found_files": [
|
| 57 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/quality_assessment_framework.py",
|
| 58 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/quality_assessment_framework.py"
|
| 59 |
+
],
|
| 60 |
+
"status": "COMPLETE",
|
| 61 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/quality_assessment_framework.py",
|
| 62 |
+
"file_stats": {
|
| 63 |
+
"size_kb": 27.6455078125,
|
| 64 |
+
"lines": 708,
|
| 65 |
+
"classes": 5,
|
| 66 |
+
"functions": 25,
|
| 67 |
+
"has_docstring": true
|
| 68 |
+
},
|
| 69 |
+
"issues": []
|
| 70 |
+
},
|
| 71 |
+
"6.4": {
|
| 72 |
+
"task_id": "6.4",
|
| 73 |
+
"expected_filename": "deduplication.py",
|
| 74 |
+
"description": "Automated conversation deduplication",
|
| 75 |
+
"found_files": [
|
| 76 |
+
"/home/vivi/pixelated/ai/pipelines/dataset_pipeline/test_deduplication.py",
|
| 77 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/test_deduplication.py",
|
| 78 |
+
"/home/vivi/pixelated/ai/datasets/dataset_pipeline/test_deduplication.py",
|
| 79 |
+
"/home/vivi/pixelated/ai/datasets/dataset_pipeline/deduplication_system.py",
|
| 80 |
+
"/home/vivi/pixelated/ai/pipelines/dataset_pipeline/deduplication.py",
|
| 81 |
+
"/home/vivi/pixelated/ai/datasets/dataset_pipeline/deduplication.py",
|
| 82 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/deduplication.py"
|
| 83 |
+
],
|
| 84 |
+
"status": "COMPLETE",
|
| 85 |
+
"primary_file": "/home/vivi/pixelated/ai/pipelines/dataset_pipeline/test_deduplication.py",
|
| 86 |
+
"file_stats": {
|
| 87 |
+
"size_kb": 15.1220703125,
|
| 88 |
+
"lines": 419,
|
| 89 |
+
"classes": 5,
|
| 90 |
+
"functions": 30,
|
| 91 |
+
"has_docstring": true
|
| 92 |
+
},
|
| 93 |
+
"issues": []
|
| 94 |
+
},
|
| 95 |
+
"6.5": {
|
| 96 |
+
"task_id": "6.5",
|
| 97 |
+
"expected_filename": "cross_dataset_linker.py",
|
| 98 |
+
"description": "Cross-dataset conversation linking",
|
| 99 |
+
"found_files": [],
|
| 100 |
+
"status": "MISSING",
|
| 101 |
+
"primary_file": null,
|
| 102 |
+
"file_stats": {},
|
| 103 |
+
"issues": []
|
| 104 |
+
},
|
| 105 |
+
"6.6": {
|
| 106 |
+
"task_id": "6.6",
|
| 107 |
+
"expected_filename": "metadata_schema.py",
|
| 108 |
+
"description": "Unified metadata schema",
|
| 109 |
+
"found_files": [],
|
| 110 |
+
"status": "MISSING",
|
| 111 |
+
"primary_file": null,
|
| 112 |
+
"file_stats": {},
|
| 113 |
+
"issues": []
|
| 114 |
+
},
|
| 115 |
+
"6.7": {
|
| 116 |
+
"task_id": "6.7",
|
| 117 |
+
"expected_filename": "therapeutic_intelligence.py",
|
| 118 |
+
"description": "Comprehensive therapeutic approach classification",
|
| 119 |
+
"found_files": [
|
| 120 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/therapeutic_intelligence.py",
|
| 121 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/test_therapeutic_intelligence.py",
|
| 122 |
+
"/home/vivi/pixelated/ai/datasets/dataset_pipeline/therapeutic_intelligence_orchestrator.py",
|
| 123 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/therapeutic_intelligence.py"
|
| 124 |
+
],
|
| 125 |
+
"status": "COMPLETE",
|
| 126 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/therapeutic_intelligence.py",
|
| 127 |
+
"file_stats": {
|
| 128 |
+
"size_kb": 25.4091796875,
|
| 129 |
+
"lines": 582,
|
| 130 |
+
"classes": 4,
|
| 131 |
+
"functions": 18,
|
| 132 |
+
"has_docstring": true
|
| 133 |
+
},
|
| 134 |
+
"issues": []
|
| 135 |
+
},
|
| 136 |
+
"6.8": {
|
| 137 |
+
"task_id": "6.8",
|
| 138 |
+
"expected_filename": "condition_pattern_recognition.py",
|
| 139 |
+
"description": "Mental health condition pattern recognition",
|
| 140 |
+
"found_files": [
|
| 141 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/condition_pattern_recognition.py"
|
| 142 |
+
],
|
| 143 |
+
"status": "COMPLETE",
|
| 144 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/condition_pattern_recognition.py",
|
| 145 |
+
"file_stats": {
|
| 146 |
+
"size_kb": 30.849609375,
|
| 147 |
+
"lines": 730,
|
| 148 |
+
"classes": 4,
|
| 149 |
+
"functions": 17,
|
| 150 |
+
"has_docstring": true
|
| 151 |
+
},
|
| 152 |
+
"issues": []
|
| 153 |
+
},
|
| 154 |
+
"6.9": {
|
| 155 |
+
"task_id": "6.9",
|
| 156 |
+
"expected_filename": "outcome_prediction.py",
|
| 157 |
+
"description": "Therapeutic outcome prediction models",
|
| 158 |
+
"found_files": [
|
| 159 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/outcome_prediction.py"
|
| 160 |
+
],
|
| 161 |
+
"status": "COMPLETE",
|
| 162 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/outcome_prediction.py",
|
| 163 |
+
"file_stats": {
|
| 164 |
+
"size_kb": 23.421875,
|
| 165 |
+
"lines": 580,
|
| 166 |
+
"classes": 5,
|
| 167 |
+
"functions": 18,
|
| 168 |
+
"has_docstring": true
|
| 169 |
+
},
|
| 170 |
+
"issues": []
|
| 171 |
+
},
|
| 172 |
+
"6.10": {
|
| 173 |
+
"task_id": "6.10",
|
| 174 |
+
"expected_filename": "crisis_intervention_detector.py",
|
| 175 |
+
"description": "Crisis intervention detection and escalation",
|
| 176 |
+
"found_files": [
|
| 177 |
+
"/home/vivi/pixelated/ai/tests/test_crisis_intervention_detector_enhanced.py",
|
| 178 |
+
"/home/vivi/pixelated/ai/pixel/test_crisis_intervention_detector.py",
|
| 179 |
+
"/home/vivi/pixelated/ai/tests/test_crisis_intervention_detector.py",
|
| 180 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/crisis_intervention_detector.py",
|
| 181 |
+
"/home/vivi/pixelated/ai/tests/test_crisis_intervention_detector_working.py"
|
| 182 |
+
],
|
| 183 |
+
"status": "COMPLETE",
|
| 184 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/crisis_intervention_detector.py",
|
| 185 |
+
"file_stats": {
|
| 186 |
+
"size_kb": 39.1484375,
|
| 187 |
+
"lines": 849,
|
| 188 |
+
"classes": 7,
|
| 189 |
+
"functions": 24,
|
| 190 |
+
"has_docstring": true
|
| 191 |
+
},
|
| 192 |
+
"issues": []
|
| 193 |
+
},
|
| 194 |
+
"6.11": {
|
| 195 |
+
"task_id": "6.11",
|
| 196 |
+
"expected_filename": "personality_adapter.py",
|
| 197 |
+
"description": "Personality-aware conversation adaptation",
|
| 198 |
+
"found_files": [
|
| 199 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/personality_adapter.py",
|
| 200 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/test_personality_adapter.py"
|
| 201 |
+
],
|
| 202 |
+
"status": "COMPLETE",
|
| 203 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/personality_adapter.py",
|
| 204 |
+
"file_stats": {
|
| 205 |
+
"size_kb": 30.1650390625,
|
| 206 |
+
"lines": 704,
|
| 207 |
+
"classes": 7,
|
| 208 |
+
"functions": 26,
|
| 209 |
+
"has_docstring": true
|
| 210 |
+
},
|
| 211 |
+
"issues": []
|
| 212 |
+
},
|
| 213 |
+
"6.12": {
|
| 214 |
+
"task_id": "6.12",
|
| 215 |
+
"expected_filename": "cultural_competency_generator.py",
|
| 216 |
+
"description": "Cultural competency and diversity-aware response generation",
|
| 217 |
+
"found_files": [
|
| 218 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/cultural_competency_generator.py",
|
| 219 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/test_cultural_competency_generator.py"
|
| 220 |
+
],
|
| 221 |
+
"status": "COMPLETE",
|
| 222 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/cultural_competency_generator.py",
|
| 223 |
+
"file_stats": {
|
| 224 |
+
"size_kb": 33.9677734375,
|
| 225 |
+
"lines": 789,
|
| 226 |
+
"classes": 6,
|
| 227 |
+
"functions": 35,
|
| 228 |
+
"has_docstring": true
|
| 229 |
+
},
|
| 230 |
+
"issues": []
|
| 231 |
+
},
|
| 232 |
+
"6.13": {
|
| 233 |
+
"task_id": "6.13",
|
| 234 |
+
"expected_filename": "audio_emotion_integration.py",
|
| 235 |
+
"description": "Audio emotion recognition integration",
|
| 236 |
+
"found_files": [
|
| 237 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/audio_emotion_integration.py",
|
| 238 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/audio_emotion_integration.py"
|
| 239 |
+
],
|
| 240 |
+
"status": "COMPLETE",
|
| 241 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/audio_emotion_integration.py",
|
| 242 |
+
"file_stats": {
|
| 243 |
+
"size_kb": 23.2099609375,
|
| 244 |
+
"lines": 575,
|
| 245 |
+
"classes": 5,
|
| 246 |
+
"functions": 18,
|
| 247 |
+
"has_docstring": true
|
| 248 |
+
},
|
| 249 |
+
"issues": []
|
| 250 |
+
},
|
| 251 |
+
"6.14": {
|
| 252 |
+
"task_id": "6.14",
|
| 253 |
+
"expected_filename": "multimodal_disorder_analysis.py",
|
| 254 |
+
"description": "Multi-modal mental disorder analysis pipeline",
|
| 255 |
+
"found_files": [
|
| 256 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/multimodal_disorder_analysis.py"
|
| 257 |
+
],
|
| 258 |
+
"status": "COMPLETE",
|
| 259 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/multimodal_disorder_analysis.py",
|
| 260 |
+
"file_stats": {
|
| 261 |
+
"size_kb": 28.7197265625,
|
| 262 |
+
"lines": 691,
|
| 263 |
+
"classes": 8,
|
| 264 |
+
"functions": 21,
|
| 265 |
+
"has_docstring": true
|
| 266 |
+
},
|
| 267 |
+
"issues": []
|
| 268 |
+
},
|
| 269 |
+
"6.15": {
|
| 270 |
+
"task_id": "6.15",
|
| 271 |
+
"expected_filename": "emotion_cause_extraction.py",
|
| 272 |
+
"description": "Emotion cause extraction and intervention mapping",
|
| 273 |
+
"found_files": [
|
| 274 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/emotion_cause_extraction.py"
|
| 275 |
+
],
|
| 276 |
+
"status": "COMPLETE",
|
| 277 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/emotion_cause_extraction.py",
|
| 278 |
+
"file_stats": {
|
| 279 |
+
"size_kb": 28.5,
|
| 280 |
+
"lines": 686,
|
| 281 |
+
"classes": 7,
|
| 282 |
+
"functions": 18,
|
| 283 |
+
"has_docstring": true
|
| 284 |
+
},
|
| 285 |
+
"issues": []
|
| 286 |
+
},
|
| 287 |
+
"6.16": {
|
| 288 |
+
"task_id": "6.16",
|
| 289 |
+
"expected_filename": "tfidf_clusterer.py",
|
| 290 |
+
"description": "TF-IDF feature-based conversation clustering",
|
| 291 |
+
"found_files": [
|
| 292 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/tfidf_clusterer.py",
|
| 293 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/test_tfidf_clusterer.py"
|
| 294 |
+
],
|
| 295 |
+
"status": "COMPLETE",
|
| 296 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/tfidf_clusterer.py",
|
| 297 |
+
"file_stats": {
|
| 298 |
+
"size_kb": 27.6640625,
|
| 299 |
+
"lines": 668,
|
| 300 |
+
"classes": 6,
|
| 301 |
+
"functions": 20,
|
| 302 |
+
"has_docstring": true
|
| 303 |
+
},
|
| 304 |
+
"issues": []
|
| 305 |
+
},
|
| 306 |
+
"6.17": {
|
| 307 |
+
"task_id": "6.17",
|
| 308 |
+
"expected_filename": "temporal_reasoner.py",
|
| 309 |
+
"description": "Temporal reasoning integration",
|
| 310 |
+
"found_files": [
|
| 311 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/temporal_reasoner.py"
|
| 312 |
+
],
|
| 313 |
+
"status": "COMPLETE",
|
| 314 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/temporal_reasoner.py",
|
| 315 |
+
"file_stats": {
|
| 316 |
+
"size_kb": 30.3173828125,
|
| 317 |
+
"lines": 744,
|
| 318 |
+
"classes": 7,
|
| 319 |
+
"functions": 25,
|
| 320 |
+
"has_docstring": true
|
| 321 |
+
},
|
| 322 |
+
"issues": []
|
| 323 |
+
},
|
| 324 |
+
"6.18": {
|
| 325 |
+
"task_id": "6.18",
|
| 326 |
+
"expected_filename": "evidence_validator.py",
|
| 327 |
+
"description": "Scientific evidence-based practice validation",
|
| 328 |
+
"found_files": [
|
| 329 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/evidence_validator.py"
|
| 330 |
+
],
|
| 331 |
+
"status": "COMPLETE",
|
| 332 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/evidence_validator.py",
|
| 333 |
+
"file_stats": {
|
| 334 |
+
"size_kb": 32.271484375,
|
| 335 |
+
"lines": 755,
|
| 336 |
+
"classes": 8,
|
| 337 |
+
"functions": 22,
|
| 338 |
+
"has_docstring": true
|
| 339 |
+
},
|
| 340 |
+
"issues": []
|
| 341 |
+
},
|
| 342 |
+
"6.19": {
|
| 343 |
+
"task_id": "6.19",
|
| 344 |
+
"expected_filename": "priority_weighted_sampler.py",
|
| 345 |
+
"description": "Priority-weighted sampling algorithms",
|
| 346 |
+
"found_files": [
|
| 347 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/priority_weighted_sampler.py",
|
| 348 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/test_priority_weighted_sampler.py"
|
| 349 |
+
],
|
| 350 |
+
"status": "COMPLETE",
|
| 351 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/priority_weighted_sampler.py",
|
| 352 |
+
"file_stats": {
|
| 353 |
+
"size_kb": 25.404296875,
|
| 354 |
+
"lines": 646,
|
| 355 |
+
"classes": 3,
|
| 356 |
+
"functions": 17,
|
| 357 |
+
"has_docstring": true
|
| 358 |
+
},
|
| 359 |
+
"issues": []
|
| 360 |
+
},
|
| 361 |
+
"6.20": {
|
| 362 |
+
"task_id": "6.20",
|
| 363 |
+
"expected_filename": "condition_balancer.py",
|
| 364 |
+
"description": "Condition-specific balancing system",
|
| 365 |
+
"found_files": [
|
| 366 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/condition_balancer.py"
|
| 367 |
+
],
|
| 368 |
+
"status": "COMPLETE",
|
| 369 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/condition_balancer.py",
|
| 370 |
+
"file_stats": {
|
| 371 |
+
"size_kb": 26.40625,
|
| 372 |
+
"lines": 612,
|
| 373 |
+
"classes": 3,
|
| 374 |
+
"functions": 12,
|
| 375 |
+
"has_docstring": true
|
| 376 |
+
},
|
| 377 |
+
"issues": []
|
| 378 |
+
},
|
| 379 |
+
"6.21": {
|
| 380 |
+
"task_id": "6.21",
|
| 381 |
+
"expected_filename": "approach_diversity_optimizer.py",
|
| 382 |
+
"description": "Therapeutic approach diversity optimization",
|
| 383 |
+
"found_files": [
|
| 384 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/approach_diversity_optimizer.py"
|
| 385 |
+
],
|
| 386 |
+
"status": "COMPLETE",
|
| 387 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/approach_diversity_optimizer.py",
|
| 388 |
+
"file_stats": {
|
| 389 |
+
"size_kb": 33.8076171875,
|
| 390 |
+
"lines": 718,
|
| 391 |
+
"classes": 3,
|
| 392 |
+
"functions": 15,
|
| 393 |
+
"has_docstring": true
|
| 394 |
+
},
|
| 395 |
+
"issues": []
|
| 396 |
+
},
|
| 397 |
+
"6.22": {
|
| 398 |
+
"task_id": "6.22",
|
| 399 |
+
"expected_filename": "demographic_balancer.py",
|
| 400 |
+
"description": "Demographic and cultural diversity balancing",
|
| 401 |
+
"found_files": [
|
| 402 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/demographic_balancer.py"
|
| 403 |
+
],
|
| 404 |
+
"status": "COMPLETE",
|
| 405 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/demographic_balancer.py",
|
| 406 |
+
"file_stats": {
|
| 407 |
+
"size_kb": 20.724609375,
|
| 408 |
+
"lines": 486,
|
| 409 |
+
"classes": 3,
|
| 410 |
+
"functions": 12,
|
| 411 |
+
"has_docstring": true
|
| 412 |
+
},
|
| 413 |
+
"issues": []
|
| 414 |
+
},
|
| 415 |
+
"6.23": {
|
| 416 |
+
"task_id": "6.23",
|
| 417 |
+
"expected_filename": "complexity_stratifier.py",
|
| 418 |
+
"description": "Conversation complexity stratification",
|
| 419 |
+
"found_files": [
|
| 420 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/complexity_stratifier.py"
|
| 421 |
+
],
|
| 422 |
+
"status": "COMPLETE",
|
| 423 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/complexity_stratifier.py",
|
| 424 |
+
"file_stats": {
|
| 425 |
+
"size_kb": 26.2333984375,
|
| 426 |
+
"lines": 623,
|
| 427 |
+
"classes": 3,
|
| 428 |
+
"functions": 14,
|
| 429 |
+
"has_docstring": true
|
| 430 |
+
},
|
| 431 |
+
"issues": []
|
| 432 |
+
},
|
| 433 |
+
"6.24": {
|
| 434 |
+
"task_id": "6.24",
|
| 435 |
+
"expected_filename": "crisis_routine_balancer.py",
|
| 436 |
+
"description": "Crisis-to-routine conversation ratio optimization",
|
| 437 |
+
"found_files": [
|
| 438 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/crisis_routine_balancer.py"
|
| 439 |
+
],
|
| 440 |
+
"status": "COMPLETE",
|
| 441 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/crisis_routine_balancer.py",
|
| 442 |
+
"file_stats": {
|
| 443 |
+
"size_kb": 23.8505859375,
|
| 444 |
+
"lines": 574,
|
| 445 |
+
"classes": 3,
|
| 446 |
+
"functions": 13,
|
| 447 |
+
"has_docstring": true
|
| 448 |
+
},
|
| 449 |
+
"issues": []
|
| 450 |
+
},
|
| 451 |
+
"6.25": {
|
| 452 |
+
"task_id": "6.25",
|
| 453 |
+
"expected_filename": "multi_tier_validator.py",
|
| 454 |
+
"description": "Multi-tier quality validation system",
|
| 455 |
+
"found_files": [
|
| 456 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/multi_tier_validator.py",
|
| 457 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/test_multi_tier_validator.py"
|
| 458 |
+
],
|
| 459 |
+
"status": "COMPLETE",
|
| 460 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/multi_tier_validator.py",
|
| 461 |
+
"file_stats": {
|
| 462 |
+
"size_kb": 28.9892578125,
|
| 463 |
+
"lines": 730,
|
| 464 |
+
"classes": 5,
|
| 465 |
+
"functions": 25,
|
| 466 |
+
"has_docstring": true
|
| 467 |
+
},
|
| 468 |
+
"issues": []
|
| 469 |
+
},
|
| 470 |
+
"6.26": {
|
| 471 |
+
"task_id": "6.26",
|
| 472 |
+
"expected_filename": "dsm5_accuracy_validator.py",
|
| 473 |
+
"description": "DSM-5 therapeutic accuracy validation",
|
| 474 |
+
"found_files": [
|
| 475 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/test_dsm5_accuracy_validator.py",
|
| 476 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/dsm5_accuracy_validator.py"
|
| 477 |
+
],
|
| 478 |
+
"status": "COMPLETE",
|
| 479 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/test_dsm5_accuracy_validator.py",
|
| 480 |
+
"file_stats": {
|
| 481 |
+
"size_kb": 16.8955078125,
|
| 482 |
+
"lines": 393,
|
| 483 |
+
"classes": 1,
|
| 484 |
+
"functions": 22,
|
| 485 |
+
"has_docstring": true
|
| 486 |
+
},
|
| 487 |
+
"issues": []
|
| 488 |
+
},
|
| 489 |
+
"6.27": {
|
| 490 |
+
"task_id": "6.27",
|
| 491 |
+
"expected_filename": "safety_ethics_validator.py",
|
| 492 |
+
"description": "Conversation safety and ethics validation",
|
| 493 |
+
"found_files": [
|
| 494 |
+
"/home/vivi/pixelated/ai/pixel/validation/test_safety_ethics_validator.py",
|
| 495 |
+
"/home/vivi/pixelated/ai/tests/test_safety_ethics_validator_working.py",
|
| 496 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/test_safety_ethics_validator.py",
|
| 497 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/safety_ethics_validator.py",
|
| 498 |
+
"/home/vivi/pixelated/ai/tests/test_safety_ethics_validator.py",
|
| 499 |
+
"/home/vivi/pixelated/ai/pixel/validation/safety_ethics_validator.py"
|
| 500 |
+
],
|
| 501 |
+
"status": "COMPLETE",
|
| 502 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/test_safety_ethics_validator.py",
|
| 503 |
+
"file_stats": {
|
| 504 |
+
"size_kb": 21.326171875,
|
| 505 |
+
"lines": 542,
|
| 506 |
+
"classes": 1,
|
| 507 |
+
"functions": 21,
|
| 508 |
+
"has_docstring": true
|
| 509 |
+
},
|
| 510 |
+
"issues": []
|
| 511 |
+
},
|
| 512 |
+
"6.28": {
|
| 513 |
+
"task_id": "6.28",
|
| 514 |
+
"expected_filename": "effectiveness_predictor.py",
|
| 515 |
+
"description": "Therapeutic effectiveness prediction",
|
| 516 |
+
"found_files": [
|
| 517 |
+
"/home/vivi/pixelated/ai/monitoring/conversation_effectiveness_predictor.py",
|
| 518 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/test_effectiveness_predictor.py",
|
| 519 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/effectiveness_predictor.py"
|
| 520 |
+
],
|
| 521 |
+
"status": "COMPLETE",
|
| 522 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/test_effectiveness_predictor.py",
|
| 523 |
+
"file_stats": {
|
| 524 |
+
"size_kb": 17.89453125,
|
| 525 |
+
"lines": 447,
|
| 526 |
+
"classes": 1,
|
| 527 |
+
"functions": 20,
|
| 528 |
+
"has_docstring": true
|
| 529 |
+
},
|
| 530 |
+
"issues": []
|
| 531 |
+
},
|
| 532 |
+
"6.29": {
|
| 533 |
+
"task_id": "6.29",
|
| 534 |
+
"expected_filename": "coherence_validator.py",
|
| 535 |
+
"description": "Conversation coherence validation using CoT reasoning",
|
| 536 |
+
"found_files": [
|
| 537 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/coherence_validator.py",
|
| 538 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/test_coherence_validator.py"
|
| 539 |
+
],
|
| 540 |
+
"status": "COMPLETE",
|
| 541 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/coherence_validator.py",
|
| 542 |
+
"file_stats": {
|
| 543 |
+
"size_kb": 38.3896484375,
|
| 544 |
+
"lines": 1016,
|
| 545 |
+
"classes": 5,
|
| 546 |
+
"functions": 24,
|
| 547 |
+
"has_docstring": true
|
| 548 |
+
},
|
| 549 |
+
"issues": []
|
| 550 |
+
},
|
| 551 |
+
"6.30": {
|
| 552 |
+
"task_id": "6.30",
|
| 553 |
+
"expected_filename": "realtime_quality_monitor.py",
|
| 554 |
+
"description": "Real-time conversation quality monitoring",
|
| 555 |
+
"found_files": [
|
| 556 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/realtime_quality_monitor.py"
|
| 557 |
+
],
|
| 558 |
+
"status": "COMPLETE",
|
| 559 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/realtime_quality_monitor.py",
|
| 560 |
+
"file_stats": {
|
| 561 |
+
"size_kb": 17.41015625,
|
| 562 |
+
"lines": 467,
|
| 563 |
+
"classes": 5,
|
| 564 |
+
"functions": 20,
|
| 565 |
+
"has_docstring": true
|
| 566 |
+
},
|
| 567 |
+
"issues": []
|
| 568 |
+
},
|
| 569 |
+
"6.31": {
|
| 570 |
+
"task_id": "6.31",
|
| 571 |
+
"expected_filename": "production_exporter.py",
|
| 572 |
+
"description": "Production-ready dataset export with tiered access",
|
| 573 |
+
"found_files": [
|
| 574 |
+
"/home/vivi/pixelated/ai/tests/test_production_exporter.py",
|
| 575 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/production_exporter.py",
|
| 576 |
+
"/home/vivi/pixelated/ai/pixel/test_production_exporter.py",
|
| 577 |
+
"/home/vivi/pixelated/ai/tests/test_production_exporter_working.py"
|
| 578 |
+
],
|
| 579 |
+
"status": "COMPLETE",
|
| 580 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/production_exporter.py",
|
| 581 |
+
"file_stats": {
|
| 582 |
+
"size_kb": 26.828125,
|
| 583 |
+
"lines": 710,
|
| 584 |
+
"classes": 5,
|
| 585 |
+
"functions": 24,
|
| 586 |
+
"has_docstring": true
|
| 587 |
+
},
|
| 588 |
+
"issues": []
|
| 589 |
+
},
|
| 590 |
+
"6.32": {
|
| 591 |
+
"task_id": "6.32",
|
| 592 |
+
"expected_filename": "adaptive_learner.py",
|
| 593 |
+
"description": "Adaptive learning pipeline",
|
| 594 |
+
"found_files": [
|
| 595 |
+
"/home/vivi/pixelated/ai/tests/test_adaptive_learner_working.py",
|
| 596 |
+
"/home/vivi/pixelated/ai/tests/test_adaptive_learner.py",
|
| 597 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/adaptive_learner.py",
|
| 598 |
+
"/home/vivi/pixelated/ai/pixel/test_adaptive_learner.py"
|
| 599 |
+
],
|
| 600 |
+
"status": "COMPLETE",
|
| 601 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/adaptive_learner.py",
|
| 602 |
+
"file_stats": {
|
| 603 |
+
"size_kb": 26.4423828125,
|
| 604 |
+
"lines": 684,
|
| 605 |
+
"classes": 8,
|
| 606 |
+
"functions": 34,
|
| 607 |
+
"has_docstring": true
|
| 608 |
+
},
|
| 609 |
+
"issues": []
|
| 610 |
+
},
|
| 611 |
+
"6.33": {
|
| 612 |
+
"task_id": "6.33",
|
| 613 |
+
"expected_filename": "analytics_dashboard.py",
|
| 614 |
+
"description": "Comprehensive analytics dashboard",
|
| 615 |
+
"found_files": [
|
| 616 |
+
"/home/vivi/pixelated/ai/monitoring/test_quality_analytics_dashboard_v2.py",
|
| 617 |
+
"/home/vivi/pixelated/ai/monitoring/test_quality_analytics_dashboard.py",
|
| 618 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/analytics_dashboard.py",
|
| 619 |
+
"/home/vivi/pixelated/ai/monitoring/launch_quality_analytics_dashboard.py",
|
| 620 |
+
"/home/vivi/pixelated/ai/pixel/test_analytics_dashboard.py",
|
| 621 |
+
"/home/vivi/pixelated/ai/monitoring/quality_analytics_dashboard.py",
|
| 622 |
+
"/home/vivi/pixelated/ai/monitoring/launch_quality_analytics_dashboard_v2.py",
|
| 623 |
+
"/home/vivi/pixelated/ai/tests/test_analytics_dashboard_working.py",
|
| 624 |
+
"/home/vivi/pixelated/ai/tests/test_analytics_dashboard.py",
|
| 625 |
+
"/home/vivi/pixelated/ai/monitoring/quality_analytics_dashboard_v2.py"
|
| 626 |
+
],
|
| 627 |
+
"status": "COMPLETE",
|
| 628 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/analytics_dashboard.py",
|
| 629 |
+
"file_stats": {
|
| 630 |
+
"size_kb": 18.1240234375,
|
| 631 |
+
"lines": 455,
|
| 632 |
+
"classes": 2,
|
| 633 |
+
"functions": 17,
|
| 634 |
+
"has_docstring": true
|
| 635 |
+
},
|
| 636 |
+
"issues": []
|
| 637 |
+
},
|
| 638 |
+
"6.34": {
|
| 639 |
+
"task_id": "6.34",
|
| 640 |
+
"expected_filename": "automated_maintenance.py",
|
| 641 |
+
"description": "Automated dataset update and maintenance procedures",
|
| 642 |
+
"found_files": [
|
| 643 |
+
"/home/vivi/pixelated/ai/pixel/test_automated_maintenance.py",
|
| 644 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/automated_maintenance.py"
|
| 645 |
+
],
|
| 646 |
+
"status": "COMPLETE",
|
| 647 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/automated_maintenance.py",
|
| 648 |
+
"file_stats": {
|
| 649 |
+
"size_kb": 20.296875,
|
| 650 |
+
"lines": 571,
|
| 651 |
+
"classes": 5,
|
| 652 |
+
"functions": 22,
|
| 653 |
+
"has_docstring": true
|
| 654 |
+
},
|
| 655 |
+
"issues": []
|
| 656 |
+
},
|
| 657 |
+
"6.35": {
|
| 658 |
+
"task_id": "6.35",
|
| 659 |
+
"expected_filename": "feedback_loops.py",
|
| 660 |
+
"description": "Conversation effectiveness feedback loops",
|
| 661 |
+
"found_files": [
|
| 662 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/feedback_loops.py"
|
| 663 |
+
],
|
| 664 |
+
"status": "COMPLETE",
|
| 665 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/feedback_loops.py",
|
| 666 |
+
"file_stats": {
|
| 667 |
+
"size_kb": 18.7763671875,
|
| 668 |
+
"lines": 461,
|
| 669 |
+
"classes": 4,
|
| 670 |
+
"functions": 12,
|
| 671 |
+
"has_docstring": true
|
| 672 |
+
},
|
| 673 |
+
"issues": []
|
| 674 |
+
},
|
| 675 |
+
"6.36": {
|
| 676 |
+
"task_id": "6.36",
|
| 677 |
+
"expected_filename": "comprehensive_api.py",
|
| 678 |
+
"description": "Comprehensive documentation and API",
|
| 679 |
+
"found_files": [
|
| 680 |
+
"/home/vivi/pixelated/ai/dataset_pipeline/comprehensive_api.py"
|
| 681 |
+
],
|
| 682 |
+
"status": "COMPLETE",
|
| 683 |
+
"primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/comprehensive_api.py",
|
| 684 |
+
"file_stats": {
|
| 685 |
+
"size_kb": 29.732421875,
|
| 686 |
+
"lines": 873,
|
| 687 |
+
"classes": 3,
|
| 688 |
+
"functions": 8,
|
| 689 |
+
"has_docstring": true
|
| 690 |
+
},
|
| 691 |
+
"issues": []
|
| 692 |
+
}
|
| 693 |
+
}
|
| 694 |
+
}
|