oneblackmage commited on
Commit
1672805
·
verified ·
1 Parent(s): a778abf

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. configs/hyperparameters/.gitkeep +0 -0
  2. configs/hyperparameters/boolq_train_pipeline_config.json +0 -0
  3. configs/hyperparameters/dual_persona_training_data_pipeline_config.json +0 -0
  4. configs/hyperparameters/dual_persona_training_phase1_pipeline_config.json +0 -0
  5. configs/hyperparameters/dual_persona_training_phase2_pipeline_config.json +0 -0
  6. configs/hyperparameters/dual_persona_training_phase3_pipeline_config.json +0 -0
  7. configs/hyperparameters/enhanced_training_config.json +136 -0
  8. configs/hyperparameters/moe_training_config.json +65 -0
  9. configs/hyperparameters/stressor_train.json +0 -0
  10. configs/hyperparameters/training_config.json +0 -0
  11. configs/infrastructure/.gitkeep +0 -0
  12. configs/model_configs/.gitkeep +0 -0
  13. configs/stage_configs/.gitkeep +0 -0
  14. configs/stage_configs/1.PsychologyTest_requirements.txt +0 -0
  15. configs/stage_configs/18ddda4f-4118-4292-ad4c-3cfe2d29152c.json +48 -0
  16. configs/stage_configs/4710e616-eb07-4773-9757-df922c41b33f.json +48 -0
  17. configs/stage_configs/878d3cb5-95e8-4e11-9d6c-6fa585c0a85e.json +48 -0
  18. configs/stage_configs/CoT_Neurodivergent_vs_Neurotypical_Interactions_metadata.json +15 -0
  19. configs/stage_configs/CoT_Philosophical_Understanding_metadata.json +15 -0
  20. configs/stage_configs/CoT_Reasoning_Mens_Mental_Health_metadata.json +15 -0
  21. configs/stage_configs/CoT_Temporal_Reasoning_Dataset_metadata.json +15 -0
  22. configs/stage_configs/HealthCareMagic-100k.json +0 -0
  23. configs/stage_configs/Instructions.ts +0 -0
  24. configs/stage_configs/ULTIMATE_FINAL_INTEGRATION_SUMMARY.json +0 -0
  25. configs/stage_configs/ai_config.py +0 -0
  26. configs/stage_configs/api_config.py +62 -0
  27. configs/stage_configs/api_documentation.json +296 -0
  28. configs/stage_configs/approach_config.json +455 -0
  29. configs/stage_configs/audit_report.json +655 -0
  30. configs/stage_configs/auto_resume_requirements.txt +52 -0
  31. configs/stage_configs/bias_validated_validation_summary.json +14 -0
  32. configs/stage_configs/boolq_validation_pipeline_config.json +0 -0
  33. configs/stage_configs/celery_config.py +111 -0
  34. configs/stage_configs/check_config.sh +0 -0
  35. configs/stage_configs/checkpoint_config.json +44 -0
  36. configs/stage_configs/checkpoint_requirements.txt +45 -0
  37. configs/stage_configs/claude_assessment.json +0 -0
  38. configs/stage_configs/cli_config.py +232 -0
  39. configs/stage_configs/complexity_config.json +56 -0
  40. configs/stage_configs/comprehensive_integration_summary.json +32 -0
  41. configs/stage_configs/condition_config.json +460 -0
  42. configs/stage_configs/config.py +53 -0
  43. configs/stage_configs/config_example.py +0 -0
  44. configs/stage_configs/config_lock.json +39 -0
  45. configs/stage_configs/config_lock.py +206 -0
  46. configs/stage_configs/config_profiles.py +339 -0
  47. configs/stage_configs/config_tracker.py +700 -0
  48. configs/stage_configs/config_validator.py +705 -0
  49. configs/stage_configs/configs_config.py +67 -0
  50. configs/stage_configs/corrected_audit_report.json +694 -0
configs/hyperparameters/.gitkeep ADDED
File without changes
configs/hyperparameters/boolq_train_pipeline_config.json ADDED
File without changes
configs/hyperparameters/dual_persona_training_data_pipeline_config.json ADDED
File without changes
configs/hyperparameters/dual_persona_training_phase1_pipeline_config.json ADDED
File without changes
configs/hyperparameters/dual_persona_training_phase2_pipeline_config.json ADDED
File without changes
configs/hyperparameters/dual_persona_training_phase3_pipeline_config.json ADDED
File without changes
configs/hyperparameters/enhanced_training_config.json ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model": "LatitudeGames/Wayfarer-2-12B",
3
+ "training_type": "kan28_enhanced_therapeutic_ai",
4
+
5
+ "dataset_config": {
6
+ "ultimate_final_dataset": "ULTIMATE_FINAL_DATASET.jsonl",
7
+ "component_enhanced_dataset": "unified_6_component_dataset.jsonl",
8
+ "total_conversations": 608497,
9
+ "component_enhanced_conversations": 39,
10
+ "train_split": 0.9,
11
+ "val_split": 0.1
12
+ },
13
+
14
+ "kan28_components": {
15
+ "integrated_components": [
16
+ "journaling_system",
17
+ "voice_blending",
18
+ "edge_case_handling",
19
+ "dual_persona_dynamics",
20
+ "bias_detection",
21
+ "psychology_knowledge_base"
22
+ ],
23
+ "expert_voices": ["Tim Ferriss", "Gabor Maté", "Brené Brown"],
24
+ "psychology_concepts": 4867,
25
+ "bias_categories": 5,
26
+ "therapeutic_modalities": 6
27
+ },
28
+
29
+ "training_parameters": {
30
+ "num_train_epochs": 3,
31
+ "per_device_train_batch_size": 4,
32
+ "per_device_eval_batch_size": 4,
33
+ "gradient_accumulation_steps": 8,
34
+ "learning_rate": 3e-4,
35
+ "warmup_steps": 1000,
36
+ "weight_decay": 0.01,
37
+ "max_grad_norm": 1.0
38
+ },
39
+
40
+ "lora_config": {
41
+ "lora_r": 16,
42
+ "lora_alpha": 32,
43
+ "lora_dropout": 0.1,
44
+ "lora_target_modules": ["q_proj", "v_proj", "k_proj", "o_proj"],
45
+ "lora_bias": "none",
46
+ "task_type": "CAUSAL_LM"
47
+ },
48
+
49
+ "context_config": {
50
+ "max_position_embeddings": 8192,
51
+ "training_max_length": 2048,
52
+ "conversation_format": "chatml"
53
+ },
54
+
55
+ "h100_optimizations": {
56
+ "bf16": true,
57
+ "gradient_checkpointing": true,
58
+ "optim": "adamw_torch_fused",
59
+ "dataloader_num_workers": 4,
60
+ "dataloader_pin_memory": true,
61
+ "group_by_length": true,
62
+ "torch_compile": false,
63
+ "flash_attention": true
64
+ },
65
+
66
+ "training_constraints": {
67
+ "max_training_hours": 12,
68
+ "checkpoint_interval_minutes": 30,
69
+ "early_stopping_patience": 3,
70
+ "max_memory_gb": 80
71
+ },
72
+
73
+ "logging": {
74
+ "logging_steps": 10,
75
+ "eval_steps": 500,
76
+ "save_steps": 500,
77
+ "save_total_limit": 5,
78
+ "wandb_project": "pixelated-empathy-kan28",
79
+ "wandb_run_name": "therapeutic_ai_6_components"
80
+ },
81
+
82
+ "component_specific_config": {
83
+ "journaling_system": {
84
+ "weight": 1.0,
85
+ "focus": "long_term_progress_tracking"
86
+ },
87
+ "voice_blending": {
88
+ "weight": 1.2,
89
+ "experts": ["Tim", "Gabor", "Brené"],
90
+ "blending_strategy": "weighted_combination"
91
+ },
92
+ "edge_case_handling": {
93
+ "weight": 1.5,
94
+ "crisis_scenarios": ["suicidal_ideation", "trauma_flashback", "severe_dissociation"],
95
+ "safety_priority": "maximum"
96
+ },
97
+ "dual_persona_dynamics": {
98
+ "weight": 1.1,
99
+ "relationship_types": ["anxious_perfectionist", "trauma_survivor", "relationship_struggles"],
100
+ "alliance_tracking": true
101
+ },
102
+ "bias_detection": {
103
+ "weight": 1.3,
104
+ "validation_categories": ["cultural", "therapeutic", "accessibility", "demographic", "safety"],
105
+ "safety_threshold": 0.8
106
+ },
107
+ "psychology_knowledge_base": {
108
+ "weight": 1.0,
109
+ "concept_count": 4867,
110
+ "integration_method": "contextual_enhancement"
111
+ }
112
+ },
113
+
114
+ "validation_config": {
115
+ "therapeutic_quality_scoring": true,
116
+ "bias_detection_validation": true,
117
+ "component_integration_checks": true,
118
+ "safety_validation": true,
119
+ "expert_voice_consistency": true
120
+ },
121
+
122
+ "output_config": {
123
+ "model_name": "pixelated_empathy_kan28",
124
+ "save_format": "safetensors",
125
+ "include_tokenizer": true,
126
+ "include_config": true,
127
+ "create_model_card": true
128
+ },
129
+
130
+ "lightning_ai_config": {
131
+ "studio_type": "H100",
132
+ "instance_type": "studio-xl-h100",
133
+ "auto_shutdown": true,
134
+ "max_idle_minutes": 30
135
+ }
136
+ }
configs/hyperparameters/moe_training_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model": "LatitudeGames/Wayfarer-2-12B",
3
+ "num_train_epochs": 3,
4
+ "per_device_train_batch_size": 4,
5
+ "gradient_accumulation_steps": 8,
6
+ "learning_rate": 3e-4,
7
+ "warmup_steps": 1000,
8
+ "weight_decay": 0.01,
9
+ "max_grad_norm": 1.0,
10
+ "datasets": ["data/acquired_datasets/mental_health_counseling.json"],
11
+
12
+ "moe_config": {
13
+ "num_experts": 4,
14
+ "expert_domains": [
15
+ "psychology",
16
+ "mental_health",
17
+ "bias_detection",
18
+ "general_therapeutic"
19
+ ],
20
+ "expert_capacity": 2,
21
+ "load_balancing_weight": 0.01,
22
+ "router_z_loss_weight": 0.001
23
+ },
24
+
25
+ "lora_config": {
26
+ "lora_r": 16,
27
+ "lora_alpha": 32,
28
+ "lora_dropout": 0.1,
29
+ "lora_target_modules": ["q_proj", "v_proj", "k_proj", "o_proj"]
30
+ },
31
+
32
+ "context_config": {
33
+ "max_position_embeddings": 8192,
34
+ "training_max_length": 2048
35
+ },
36
+
37
+ "qlora_config": {
38
+ "load_in_4bit": true,
39
+ "bnb_4bit_quant_type": "nf4",
40
+ "bnb_4bit_use_double_quant": true,
41
+ "bnb_4bit_compute_dtype": "bfloat16"
42
+ },
43
+
44
+ "h100_optimizations": {
45
+ "bf16": true,
46
+ "gradient_checkpointing": true,
47
+ "optim": "adamw_torch_fused",
48
+ "dataloader_num_workers": 4,
49
+ "dataloader_pin_memory": true,
50
+ "group_by_length": true
51
+ },
52
+
53
+ "training_constraints": {
54
+ "max_training_hours": 12,
55
+ "checkpoint_interval_minutes": 30,
56
+ "early_stopping_patience": 3
57
+ },
58
+
59
+ "logging": {
60
+ "logging_steps": 10,
61
+ "eval_steps": 500,
62
+ "save_steps": 500,
63
+ "save_total_limit": 5
64
+ }
65
+ }
configs/hyperparameters/stressor_train.json ADDED
File without changes
configs/hyperparameters/training_config.json ADDED
File without changes
configs/infrastructure/.gitkeep ADDED
File without changes
configs/model_configs/.gitkeep ADDED
File without changes
configs/stage_configs/.gitkeep ADDED
File without changes
configs/stage_configs/1.PsychologyTest_requirements.txt ADDED
File without changes
configs/stage_configs/18ddda4f-4118-4292-ad4c-3cfe2d29152c.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "session": {
3
+ "session_id": "18ddda4f-4118-4292-ad4c-3cfe2d29152c",
4
+ "start_date": "2025-11-14T10:02:25.887989",
5
+ "target_sources": [
6
+ "pubmed",
7
+ "zenodo",
8
+ "dryad"
9
+ ],
10
+ "search_keywords": {
11
+ "therapy": [
12
+ "cbt",
13
+ "dbt",
14
+ "act"
15
+ ],
16
+ "mental_health": [
17
+ "depression",
18
+ "anxiety"
19
+ ]
20
+ },
21
+ "weekly_targets": {
22
+ "sources_identified": 10,
23
+ "datasets_evaluated": 5,
24
+ "datasets_acquired": 2
25
+ },
26
+ "current_phase": "discovery",
27
+ "progress_metrics": {}
28
+ },
29
+ "state": {
30
+ "sources": [],
31
+ "evaluations": [],
32
+ "access_requests": [],
33
+ "acquired_datasets": [],
34
+ "integration_plans": [],
35
+ "integration_feasibility": {}
36
+ },
37
+ "progress": {
38
+ "sources_identified": 0,
39
+ "datasets_evaluated": 0,
40
+ "access_established": 0,
41
+ "datasets_acquired": 0,
42
+ "integration_plans_created": 0,
43
+ "last_updated": null
44
+ },
45
+ "progress_history": [],
46
+ "activity_logs": [],
47
+ "error_log": []
48
+ }
configs/stage_configs/4710e616-eb07-4773-9757-df922c41b33f.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "session": {
3
+ "session_id": "4710e616-eb07-4773-9757-df922c41b33f",
4
+ "start_date": "2025-11-11T01:37:26.290061",
5
+ "target_sources": [
6
+ "pubmed",
7
+ "zenodo",
8
+ "dryad"
9
+ ],
10
+ "search_keywords": {
11
+ "therapy": [
12
+ "cbt",
13
+ "dbt",
14
+ "act"
15
+ ],
16
+ "mental_health": [
17
+ "depression",
18
+ "anxiety"
19
+ ]
20
+ },
21
+ "weekly_targets": {
22
+ "sources_identified": 10,
23
+ "datasets_evaluated": 5,
24
+ "datasets_acquired": 2
25
+ },
26
+ "current_phase": "discovery",
27
+ "progress_metrics": {}
28
+ },
29
+ "state": {
30
+ "sources": [],
31
+ "evaluations": [],
32
+ "access_requests": [],
33
+ "acquired_datasets": [],
34
+ "integration_plans": [],
35
+ "integration_feasibility": {}
36
+ },
37
+ "progress": {
38
+ "sources_identified": 0,
39
+ "datasets_evaluated": 0,
40
+ "access_established": 0,
41
+ "datasets_acquired": 0,
42
+ "integration_plans_created": 0,
43
+ "last_updated": null
44
+ },
45
+ "progress_history": [],
46
+ "activity_logs": [],
47
+ "error_log": []
48
+ }
configs/stage_configs/878d3cb5-95e8-4e11-9d6c-6fa585c0a85e.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "session": {
3
+ "session_id": "878d3cb5-95e8-4e11-9d6c-6fa585c0a85e",
4
+ "start_date": "2025-11-11T01:40:20.313691",
5
+ "target_sources": [
6
+ "pubmed",
7
+ "zenodo",
8
+ "dryad"
9
+ ],
10
+ "search_keywords": {
11
+ "therapy": [
12
+ "cbt",
13
+ "dbt",
14
+ "act"
15
+ ],
16
+ "mental_health": [
17
+ "depression",
18
+ "anxiety"
19
+ ]
20
+ },
21
+ "weekly_targets": {
22
+ "sources_identified": 10,
23
+ "datasets_evaluated": 5,
24
+ "datasets_acquired": 2
25
+ },
26
+ "current_phase": "discovery",
27
+ "progress_metrics": {}
28
+ },
29
+ "state": {
30
+ "sources": [],
31
+ "evaluations": [],
32
+ "access_requests": [],
33
+ "acquired_datasets": [],
34
+ "integration_plans": [],
35
+ "integration_feasibility": {}
36
+ },
37
+ "progress": {
38
+ "sources_identified": 0,
39
+ "datasets_evaluated": 0,
40
+ "access_established": 0,
41
+ "datasets_acquired": 0,
42
+ "integration_plans_created": 0,
43
+ "last_updated": null
44
+ },
45
+ "progress_history": [],
46
+ "activity_logs": [],
47
+ "error_log": []
48
+ }
configs/stage_configs/CoT_Neurodivergent_vs_Neurotypical_Interactions_metadata.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "CoT_Neurodivergent_vs_Neurotypical_Interactions",
3
+ "description": "Neurodiversity-aware therapeutic approaches",
4
+ "reasoning_type": "neurodiversity_reasoning",
5
+ "therapeutic_focus": "inclusive_therapy",
6
+ "total_examples": 200,
7
+ "reasoning_patterns": [
8
+ "Consider neurodivergent perspective",
9
+ "Assess sensory processing differences",
10
+ "Evaluate communication preferences",
11
+ "Account for executive function variations",
12
+ "Recognize masking behaviors"
13
+ ],
14
+ "created_at": "2025-09-26T18:06:58.401899"
15
+ }
configs/stage_configs/CoT_Philosophical_Understanding_metadata.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "CoT_Philosophical_Understanding",
3
+ "description": "33MB, 60K existential/philosophical therapy",
4
+ "reasoning_type": "philosophical_reasoning",
5
+ "therapeutic_focus": "existential_therapy",
6
+ "total_examples": 500,
7
+ "reasoning_patterns": [
8
+ "Examine existential concerns",
9
+ "Explore meaning and purpose",
10
+ "Consider life's fundamental questions",
11
+ "Assess values and beliefs",
12
+ "Evaluate spiritual dimensions"
13
+ ],
14
+ "created_at": "2025-09-26T18:06:58.440412"
15
+ }
configs/stage_configs/CoT_Reasoning_Mens_Mental_Health_metadata.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "CoT_Reasoning_Mens_Mental_Health",
3
+ "description": "Gender-specific therapeutic reasoning",
4
+ "reasoning_type": "gender_specific_reasoning",
5
+ "therapeutic_focus": "mens_therapy",
6
+ "total_examples": 200,
7
+ "reasoning_patterns": [
8
+ "Consider societal gender expectations",
9
+ "Assess masculine identity pressures",
10
+ "Evaluate emotional expression barriers",
11
+ "Account for help-seeking stigma",
12
+ "Recognize vulnerability challenges"
13
+ ],
14
+ "created_at": "2025-09-26T18:06:58.421250"
15
+ }
configs/stage_configs/CoT_Temporal_Reasoning_Dataset_metadata.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "CoT_Temporal_Reasoning_Dataset",
3
+ "description": "15MB, 30K time-based therapeutic planning",
4
+ "reasoning_type": "temporal_reasoning",
5
+ "therapeutic_focus": "treatment_planning",
6
+ "total_examples": 200,
7
+ "reasoning_patterns": [
8
+ "Assess timeline of symptoms",
9
+ "Plan treatment progression",
10
+ "Consider developmental stages",
11
+ "Evaluate progress markers",
12
+ "Project future outcomes"
13
+ ],
14
+ "created_at": "2025-09-26T18:06:58.520641"
15
+ }
configs/stage_configs/HealthCareMagic-100k.json ADDED
File without changes
configs/stage_configs/Instructions.ts ADDED
File without changes
configs/stage_configs/ULTIMATE_FINAL_INTEGRATION_SUMMARY.json ADDED
File without changes
configs/stage_configs/ai_config.py ADDED
File without changes
configs/stage_configs/api_config.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration management for the API server.
3
+
4
+ This module provides configuration loading from environment variables
5
+ with sensible defaults.
6
+ """
7
+
8
+ import os
9
+ from functools import lru_cache
10
+ from typing import List
11
+
12
+ from pydantic_settings import BaseSettings, SettingsConfigDict
13
+
14
+
15
+ class Settings(BaseSettings):
16
+ """API server settings."""
17
+
18
+ # Server configuration
19
+ host: str = "0.0.0.0"
20
+ port: int = 8000
21
+ environment: str = "development" # development, staging, production
22
+ api_version: str = "1.0.0"
23
+ debug: bool = False
24
+
25
+ # CORS configuration
26
+ cors_origins: List[str] = [
27
+ "http://localhost:4321", # Astro dev server
28
+ "http://localhost:3000", # Alternative dev port
29
+ "http://localhost:5173", # Vite dev server
30
+ ]
31
+
32
+ # Authentication configuration
33
+ auth_enabled: bool = True
34
+ jwt_secret: str = os.getenv("JWT_SECRET", "change-me-in-production")
35
+ jwt_algorithm: str = "HS256"
36
+ jwt_expiration_minutes: int = 60 * 24 # 24 hours
37
+
38
+ # Rate limiting
39
+ rate_limit_enabled: bool = True
40
+ rate_limit_per_minute: int = 60
41
+ rate_limit_per_hour: int = 1000
42
+
43
+ # Logging
44
+ log_level: str = "INFO"
45
+ log_format: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
46
+
47
+ # Session storage (must match across all components)
48
+ session_storage_path: str = os.getenv(
49
+ "SESSION_STORAGE_PATH", "ai/journal_dataset_research/sessions"
50
+ )
51
+
52
+ model_config = SettingsConfigDict(
53
+ env_file=".env",
54
+ env_file_encoding="utf-8",
55
+ case_sensitive=False,
56
+ )
57
+
58
+
59
+ @lru_cache()
60
+ def get_settings() -> Settings:
61
+ """Get cached settings instance."""
62
+ return Settings()
configs/stage_configs/api_documentation.json ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "api_version": "1.0.0",
3
+ "base_url": "https://api.pixelatedempathy.com",
4
+ "endpoints": {
5
+ "validate_conversation": {
6
+ "endpoint": "/api/v1/validate/conversation",
7
+ "method": "POST",
8
+ "description": "Validate a therapeutic conversation using multi-tier quality assessment",
9
+ "parameters": {
10
+ "conversation": {
11
+ "type": "object",
12
+ "required": true,
13
+ "description": "Conversation object with id, content, turns, and metadata"
14
+ },
15
+ "validation_level": {
16
+ "type": "string",
17
+ "required": false,
18
+ "default": "comprehensive",
19
+ "options": [
20
+ "basic",
21
+ "standard",
22
+ "comprehensive",
23
+ "clinical"
24
+ ]
25
+ },
26
+ "include_recommendations": {
27
+ "type": "boolean",
28
+ "required": false,
29
+ "default": true
30
+ }
31
+ },
32
+ "request_example": {
33
+ "conversation": {
34
+ "id": "conv_001",
35
+ "content": "I understand you're feeling anxious. Let's explore some coping strategies.",
36
+ "turns": [
37
+ {
38
+ "speaker": "user",
39
+ "text": "I'm feeling anxious lately."
40
+ },
41
+ {
42
+ "speaker": "therapist",
43
+ "text": "I understand. Let's explore coping strategies."
44
+ }
45
+ ],
46
+ "metadata": {
47
+ "source": "professional",
48
+ "condition": "anxiety",
49
+ "approach": "CBT"
50
+ }
51
+ },
52
+ "validation_level": "comprehensive",
53
+ "include_recommendations": true
54
+ },
55
+ "response_example": {
56
+ "validation_id": "val_12345",
57
+ "overall_quality_score": 0.85,
58
+ "tier_assessment": "professional",
59
+ "validation_results": {
60
+ "multi_tier_validation": {
61
+ "passed": true,
62
+ "score": 0.87
63
+ },
64
+ "dsm5_accuracy": {
65
+ "passed": true,
66
+ "score": 0.83
67
+ },
68
+ "safety_ethics": {
69
+ "passed": true,
70
+ "score": 0.91
71
+ },
72
+ "effectiveness_prediction": {
73
+ "score": 0.78,
74
+ "confidence": "high"
75
+ },
76
+ "coherence_validation": {
77
+ "score": 0.82,
78
+ "level": "moderately_coherent"
79
+ }
80
+ },
81
+ "issues": [],
82
+ "recommendations": [
83
+ "Consider adding more specific therapeutic techniques",
84
+ "Enhance empathetic responses"
85
+ ],
86
+ "processing_time_ms": 245
87
+ },
88
+ "error_codes": [
89
+ {
90
+ "code": "400",
91
+ "description": "Invalid conversation format"
92
+ },
93
+ {
94
+ "code": "422",
95
+ "description": "Validation failed - conversation quality too low"
96
+ },
97
+ {
98
+ "code": "429",
99
+ "description": "Rate limit exceeded"
100
+ },
101
+ {
102
+ "code": "500",
103
+ "description": "Internal validation error"
104
+ }
105
+ ],
106
+ "rate_limits": "100 requests per minute",
107
+ "authentication": "API key required"
108
+ },
109
+ "export_dataset": {
110
+ "endpoint": "/api/v1/export/dataset",
111
+ "method": "POST",
112
+ "description": "Export dataset in specified format with tiered access control",
113
+ "parameters": {
114
+ "export_config": {
115
+ "type": "object",
116
+ "required": true,
117
+ "description": "Export configuration including formats, tiers, and options"
118
+ },
119
+ "filters": {
120
+ "type": "object",
121
+ "required": false,
122
+ "description": "Optional filters for conversation selection"
123
+ }
124
+ },
125
+ "request_example": {
126
+ "export_config": {
127
+ "formats": [
128
+ "json",
129
+ "csv"
130
+ ],
131
+ "access_tiers": [
132
+ "priority",
133
+ "professional"
134
+ ],
135
+ "quality_threshold": 0.8,
136
+ "include_metadata": true,
137
+ "compress_output": true
138
+ },
139
+ "filters": {
140
+ "conditions": [
141
+ "anxiety",
142
+ "depression"
143
+ ],
144
+ "date_range": {
145
+ "start": "2025-01-01",
146
+ "end": "2025-08-10"
147
+ }
148
+ }
149
+ },
150
+ "response_example": {
151
+ "export_id": "exp_67890",
152
+ "status": "completed",
153
+ "export_metadata": [
154
+ {
155
+ "format": "json",
156
+ "tier": "priority",
157
+ "conversations": 1542,
158
+ "file_path": "/exports/v1/priority/conversations.json.zip",
159
+ "checksum": "sha256:abc123..."
160
+ }
161
+ ],
162
+ "total_conversations": 4626,
163
+ "export_time_seconds": 45.2
164
+ },
165
+ "error_codes": [
166
+ {
167
+ "code": "400",
168
+ "description": "Invalid export configuration"
169
+ },
170
+ {
171
+ "code": "403",
172
+ "description": "Insufficient access permissions for requested tier"
173
+ },
174
+ {
175
+ "code": "413",
176
+ "description": "Export size exceeds limits"
177
+ },
178
+ {
179
+ "code": "500",
180
+ "description": "Export processing error"
181
+ }
182
+ ],
183
+ "rate_limits": "10 exports per hour",
184
+ "authentication": "API key with export permissions required"
185
+ },
186
+ "get_analytics": {
187
+ "endpoint": "/api/v1/analytics/dashboard",
188
+ "method": "GET",
189
+ "description": "Get comprehensive analytics dashboard data",
190
+ "parameters": {
191
+ "time_range": {
192
+ "type": "string",
193
+ "required": false,
194
+ "default": "24h",
195
+ "options": [
196
+ "1h",
197
+ "24h",
198
+ "7d",
199
+ "30d"
200
+ ]
201
+ },
202
+ "include_trends": {
203
+ "type": "boolean",
204
+ "required": false,
205
+ "default": true
206
+ }
207
+ },
208
+ "request_example": {},
209
+ "response_example": {
210
+ "dashboard_data": {
211
+ "total_conversations": 15420,
212
+ "quality_distribution": {
213
+ "excellent": 3084,
214
+ "good": 6168,
215
+ "acceptable": 4626,
216
+ "poor": 1542
217
+ },
218
+ "safety_metrics": {
219
+ "overall_safety_score": 0.91,
220
+ "compliance_rate": 0.94
221
+ },
222
+ "performance_trends": {
223
+ "quality_scores": [
224
+ 0.78,
225
+ 0.79,
226
+ 0.81,
227
+ 0.82
228
+ ]
229
+ }
230
+ },
231
+ "summary_report": {
232
+ "performance_status": "\ud83d\udfe2 EXCELLENT",
233
+ "key_insights": [
234
+ "High quality conversations",
235
+ "Excellent safety compliance"
236
+ ]
237
+ }
238
+ },
239
+ "error_codes": [
240
+ {
241
+ "code": "400",
242
+ "description": "Invalid time range parameter"
243
+ },
244
+ {
245
+ "code": "500",
246
+ "description": "Analytics processing error"
247
+ }
248
+ ],
249
+ "rate_limits": "60 requests per minute",
250
+ "authentication": null
251
+ },
252
+ "get_system_status": {
253
+ "endpoint": "/api/v1/system/status",
254
+ "method": "GET",
255
+ "description": "Get real-time system status and health metrics",
256
+ "parameters": {},
257
+ "request_example": {},
258
+ "response_example": {
259
+ "system_status": "healthy",
260
+ "components": {
261
+ "validation_pipeline": {
262
+ "status": "operational",
263
+ "response_time_ms": 150
264
+ },
265
+ "export_system": {
266
+ "status": "operational",
267
+ "queue_size": 2
268
+ },
269
+ "analytics_engine": {
270
+ "status": "operational",
271
+ "last_update": "2025-08-10T07:30:00Z"
272
+ },
273
+ "maintenance_system": {
274
+ "status": "operational",
275
+ "next_maintenance": "2025-08-10T12:00:00Z"
276
+ }
277
+ },
278
+ "performance_metrics": {
279
+ "total_conversations_processed": 15420,
280
+ "average_processing_time_ms": 245,
281
+ "success_rate": 0.998,
282
+ "uptime_hours": 168.5
283
+ },
284
+ "alerts": []
285
+ },
286
+ "error_codes": [
287
+ {
288
+ "code": "503",
289
+ "description": "System temporarily unavailable"
290
+ }
291
+ ],
292
+ "rate_limits": "120 requests per minute",
293
+ "authentication": null
294
+ }
295
+ }
296
+ }
configs/stage_configs/approach_config.json ADDED
@@ -0,0 +1,455 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cbt": {
3
+ "name": "Cognitive Behavioral Therapy",
4
+ "evidence_level": "strong",
5
+ "target_weight": 0.25,
6
+ "min_samples": 500,
7
+ "max_samples": 8000,
8
+ "keywords": [
9
+ "cbt",
10
+ "cognitive behavioral",
11
+ "cognitive therapy",
12
+ "behavioral therapy",
13
+ "thought patterns",
14
+ "cognitive restructuring",
15
+ "behavioral activation"
16
+ ],
17
+ "techniques": [
18
+ "cognitive restructuring",
19
+ "behavioral activation",
20
+ "exposure therapy",
21
+ "thought records",
22
+ "activity scheduling",
23
+ "behavioral experiments"
24
+ ],
25
+ "conditions_suited": [
26
+ "depression",
27
+ "anxiety",
28
+ "ptsd",
29
+ "ocd",
30
+ "panic_disorder"
31
+ ],
32
+ "effectiveness_score": 0.95
33
+ },
34
+ "dbt": {
35
+ "name": "Dialectical Behavior Therapy",
36
+ "evidence_level": "strong",
37
+ "target_weight": 0.12,
38
+ "min_samples": 300,
39
+ "max_samples": 4000,
40
+ "keywords": [
41
+ "dbt",
42
+ "dialectical",
43
+ "mindfulness",
44
+ "distress tolerance",
45
+ "emotion regulation",
46
+ "interpersonal effectiveness"
47
+ ],
48
+ "techniques": [
49
+ "mindfulness",
50
+ "distress tolerance",
51
+ "emotion regulation",
52
+ "interpersonal effectiveness",
53
+ "wise mind",
54
+ "radical acceptance"
55
+ ],
56
+ "conditions_suited": [
57
+ "bpd",
58
+ "self_harm",
59
+ "suicidal_ideation",
60
+ "emotion_dysregulation"
61
+ ],
62
+ "effectiveness_score": 0.9
63
+ },
64
+ "psychodynamic": {
65
+ "name": "Psychodynamic Therapy",
66
+ "evidence_level": "moderate",
67
+ "target_weight": 0.15,
68
+ "min_samples": 400,
69
+ "max_samples": 5000,
70
+ "keywords": [
71
+ "psychodynamic",
72
+ "psychoanalytic",
73
+ "unconscious",
74
+ "transference",
75
+ "defense mechanisms",
76
+ "insight",
77
+ "interpretation"
78
+ ],
79
+ "techniques": [
80
+ "free association",
81
+ "dream analysis",
82
+ "transference analysis",
83
+ "interpretation",
84
+ "working through",
85
+ "insight development"
86
+ ],
87
+ "conditions_suited": [
88
+ "depression",
89
+ "anxiety",
90
+ "personality_disorders",
91
+ "trauma"
92
+ ],
93
+ "effectiveness_score": 0.75
94
+ },
95
+ "humanistic": {
96
+ "name": "Humanistic/Person-Centered Therapy",
97
+ "evidence_level": "moderate",
98
+ "target_weight": 0.1,
99
+ "min_samples": 250,
100
+ "max_samples": 3500,
101
+ "keywords": [
102
+ "person-centered",
103
+ "humanistic",
104
+ "unconditional positive regard",
105
+ "empathy",
106
+ "genuineness",
107
+ "self-actualization",
108
+ "client-centered"
109
+ ],
110
+ "techniques": [
111
+ "active listening",
112
+ "reflection",
113
+ "unconditional positive regard",
114
+ "empathic understanding",
115
+ "genuineness",
116
+ "congruence"
117
+ ],
118
+ "conditions_suited": [
119
+ "self_esteem",
120
+ "identity_issues",
121
+ "personal_growth"
122
+ ],
123
+ "effectiveness_score": 0.7
124
+ },
125
+ "acceptance_commitment": {
126
+ "name": "Acceptance and Commitment Therapy",
127
+ "evidence_level": "strong",
128
+ "target_weight": 0.08,
129
+ "min_samples": 200,
130
+ "max_samples": 3000,
131
+ "keywords": [
132
+ "act",
133
+ "acceptance commitment",
134
+ "psychological flexibility",
135
+ "mindfulness",
136
+ "values",
137
+ "committed action",
138
+ "defusion"
139
+ ],
140
+ "techniques": [
141
+ "mindfulness",
142
+ "acceptance",
143
+ "cognitive defusion",
144
+ "values clarification",
145
+ "committed action",
146
+ "psychological flexibility"
147
+ ],
148
+ "conditions_suited": [
149
+ "anxiety",
150
+ "depression",
151
+ "chronic_pain",
152
+ "substance_abuse"
153
+ ],
154
+ "effectiveness_score": 0.85
155
+ },
156
+ "emdr": {
157
+ "name": "Eye Movement Desensitization and Reprocessing",
158
+ "evidence_level": "strong",
159
+ "target_weight": 0.06,
160
+ "min_samples": 150,
161
+ "max_samples": 2500,
162
+ "keywords": [
163
+ "emdr",
164
+ "eye movement",
165
+ "bilateral stimulation",
166
+ "trauma processing",
167
+ "desensitization",
168
+ "reprocessing"
169
+ ],
170
+ "techniques": [
171
+ "bilateral stimulation",
172
+ "resource installation",
173
+ "trauma processing",
174
+ "desensitization",
175
+ "reprocessing",
176
+ "safe place visualization"
177
+ ],
178
+ "conditions_suited": [
179
+ "ptsd",
180
+ "trauma",
181
+ "phobias",
182
+ "anxiety"
183
+ ],
184
+ "effectiveness_score": 0.9
185
+ },
186
+ "family_systems": {
187
+ "name": "Family Systems Therapy",
188
+ "evidence_level": "moderate",
189
+ "target_weight": 0.07,
190
+ "min_samples": 180,
191
+ "max_samples": 2800,
192
+ "keywords": [
193
+ "family therapy",
194
+ "systems therapy",
195
+ "family systems",
196
+ "structural",
197
+ "strategic",
198
+ "multigenerational",
199
+ "boundaries"
200
+ ],
201
+ "techniques": [
202
+ "genogram",
203
+ "structural interventions",
204
+ "strategic interventions",
205
+ "boundary setting",
206
+ "family sculpting",
207
+ "circular questioning"
208
+ ],
209
+ "conditions_suited": [
210
+ "family_conflict",
211
+ "relationship_issues",
212
+ "adolescent_issues"
213
+ ],
214
+ "effectiveness_score": 0.75
215
+ },
216
+ "gestalt": {
217
+ "name": "Gestalt Therapy",
218
+ "evidence_level": "emerging",
219
+ "target_weight": 0.04,
220
+ "min_samples": 100,
221
+ "max_samples": 1500,
222
+ "keywords": [
223
+ "gestalt",
224
+ "here and now",
225
+ "awareness",
226
+ "contact",
227
+ "experiment",
228
+ "phenomenology",
229
+ "field theory"
230
+ ],
231
+ "techniques": [
232
+ "empty chair",
233
+ "two-chair technique",
234
+ "body awareness",
235
+ "here and now focus",
236
+ "experiments",
237
+ "contact and awareness"
238
+ ],
239
+ "conditions_suited": [
240
+ "anxiety",
241
+ "depression",
242
+ "relationship_issues"
243
+ ],
244
+ "effectiveness_score": 0.65
245
+ },
246
+ "solution_focused": {
247
+ "name": "Solution-Focused Brief Therapy",
248
+ "evidence_level": "moderate",
249
+ "target_weight": 0.05,
250
+ "min_samples": 120,
251
+ "max_samples": 2000,
252
+ "keywords": [
253
+ "solution focused",
254
+ "brief therapy",
255
+ "miracle question",
256
+ "scaling",
257
+ "exceptions",
258
+ "goals",
259
+ "strengths"
260
+ ],
261
+ "techniques": [
262
+ "miracle question",
263
+ "scaling questions",
264
+ "exception finding",
265
+ "goal setting",
266
+ "compliments",
267
+ "between-session tasks"
268
+ ],
269
+ "conditions_suited": [
270
+ "depression",
271
+ "anxiety",
272
+ "relationship_issues",
273
+ "substance_abuse"
274
+ ],
275
+ "effectiveness_score": 0.7
276
+ },
277
+ "narrative": {
278
+ "name": "Narrative Therapy",
279
+ "evidence_level": "emerging",
280
+ "target_weight": 0.03,
281
+ "min_samples": 80,
282
+ "max_samples": 1200,
283
+ "keywords": [
284
+ "narrative",
285
+ "story",
286
+ "externalization",
287
+ "unique outcomes",
288
+ "re-authoring",
289
+ "deconstruction",
290
+ "preferred story"
291
+ ],
292
+ "techniques": [
293
+ "externalization",
294
+ "unique outcomes",
295
+ "re-authoring",
296
+ "definitional ceremony",
297
+ "outsider witness",
298
+ "therapeutic documents"
299
+ ],
300
+ "conditions_suited": [
301
+ "identity_issues",
302
+ "trauma",
303
+ "oppression",
304
+ "self_esteem"
305
+ ],
306
+ "effectiveness_score": 0.6
307
+ },
308
+ "mindfulness_based": {
309
+ "name": "Mindfulness-Based Interventions",
310
+ "evidence_level": "strong",
311
+ "target_weight": 0.06,
312
+ "min_samples": 150,
313
+ "max_samples": 2500,
314
+ "keywords": [
315
+ "mindfulness",
316
+ "mbsr",
317
+ "mbct",
318
+ "meditation",
319
+ "present moment",
320
+ "non-judgmental awareness",
321
+ "body scan"
322
+ ],
323
+ "techniques": [
324
+ "mindfulness meditation",
325
+ "body scan",
326
+ "breathing exercises",
327
+ "mindful movement",
328
+ "loving-kindness",
329
+ "present moment awareness"
330
+ ],
331
+ "conditions_suited": [
332
+ "anxiety",
333
+ "depression",
334
+ "chronic_pain",
335
+ "stress"
336
+ ],
337
+ "effectiveness_score": 0.8
338
+ },
339
+ "interpersonal": {
340
+ "name": "Interpersonal Therapy",
341
+ "evidence_level": "strong",
342
+ "target_weight": 0.07,
343
+ "min_samples": 180,
344
+ "max_samples": 2800,
345
+ "keywords": [
346
+ "interpersonal therapy",
347
+ "ipt",
348
+ "grief",
349
+ "role disputes",
350
+ "role transitions",
351
+ "interpersonal deficits"
352
+ ],
353
+ "techniques": [
354
+ "grief work",
355
+ "role dispute resolution",
356
+ "role transition work",
357
+ "interpersonal skills training",
358
+ "communication analysis"
359
+ ],
360
+ "conditions_suited": [
361
+ "depression",
362
+ "anxiety",
363
+ "eating_disorders",
364
+ "ptsd"
365
+ ],
366
+ "effectiveness_score": 0.85
367
+ },
368
+ "motivational_interviewing": {
369
+ "name": "Motivational Interviewing",
370
+ "evidence_level": "strong",
371
+ "target_weight": 0.05,
372
+ "min_samples": 120,
373
+ "max_samples": 2000,
374
+ "keywords": [
375
+ "motivational interviewing",
376
+ "mi",
377
+ "ambivalence",
378
+ "change talk",
379
+ "rolling with resistance",
380
+ "self-efficacy"
381
+ ],
382
+ "techniques": [
383
+ "open-ended questions",
384
+ "affirmations",
385
+ "reflective listening",
386
+ "summarizing",
387
+ "eliciting change talk",
388
+ "developing discrepancy"
389
+ ],
390
+ "conditions_suited": [
391
+ "substance_abuse",
392
+ "health_behavior_change",
393
+ "motivation"
394
+ ],
395
+ "effectiveness_score": 0.8
396
+ },
397
+ "exposure_therapy": {
398
+ "name": "Exposure and Response Prevention",
399
+ "evidence_level": "strong",
400
+ "target_weight": 0.04,
401
+ "min_samples": 100,
402
+ "max_samples": 1500,
403
+ "keywords": [
404
+ "exposure",
405
+ "response prevention",
406
+ "systematic desensitization",
407
+ "flooding",
408
+ "habituation",
409
+ "fear hierarchy"
410
+ ],
411
+ "techniques": [
412
+ "systematic desensitization",
413
+ "in vivo exposure",
414
+ "imaginal exposure",
415
+ "response prevention",
416
+ "fear hierarchy",
417
+ "habituation"
418
+ ],
419
+ "conditions_suited": [
420
+ "ocd",
421
+ "phobias",
422
+ "anxiety",
423
+ "ptsd"
424
+ ],
425
+ "effectiveness_score": 0.9
426
+ },
427
+ "integrative": {
428
+ "name": "Integrative/Eclectic Therapy",
429
+ "evidence_level": "moderate",
430
+ "target_weight": 0.08,
431
+ "min_samples": 200,
432
+ "max_samples": 3000,
433
+ "keywords": [
434
+ "integrative",
435
+ "eclectic",
436
+ "multimodal",
437
+ "combination",
438
+ "tailored approach",
439
+ "best practices"
440
+ ],
441
+ "techniques": [
442
+ "technique integration",
443
+ "approach combination",
444
+ "tailored interventions",
445
+ "flexible methodology",
446
+ "evidence-based selection"
447
+ ],
448
+ "conditions_suited": [
449
+ "complex_presentations",
450
+ "comorbid_conditions",
451
+ "treatment_resistant"
452
+ ],
453
+ "effectiveness_score": 0.75
454
+ }
455
+ }
configs/stage_configs/audit_report.json ADDED
@@ -0,0 +1,655 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "audit_date": "2025-08-24T13:12:24.522685",
3
+ "total_tasks": 36,
4
+ "complete": 18,
5
+ "partial": 0,
6
+ "missing": 18,
7
+ "completion_rate": 0.5,
8
+ "overall_status": "PARTIAL",
9
+ "phase_breakdown": {
10
+ "Phase 1": {
11
+ "complete": 3,
12
+ "total": 6,
13
+ "completion_rate": 0.5
14
+ },
15
+ "Phase 2": {
16
+ "complete": 3,
17
+ "total": 6,
18
+ "completion_rate": 0.5
19
+ },
20
+ "Phase 3": {
21
+ "complete": 3,
22
+ "total": 6,
23
+ "completion_rate": 0.5
24
+ },
25
+ "Phase 4": {
26
+ "complete": 0,
27
+ "total": 6,
28
+ "completion_rate": 0.0
29
+ },
30
+ "Phase 5": {
31
+ "complete": 4,
32
+ "total": 6,
33
+ "completion_rate": 0.6666666666666666
34
+ },
35
+ "Phase 6": {
36
+ "complete": 5,
37
+ "total": 6,
38
+ "completion_rate": 0.8333333333333334
39
+ }
40
+ },
41
+ "detailed_results": {
42
+ "6.1": {
43
+ "task_id": "6.1",
44
+ "filename": "distributed_architecture.py",
45
+ "description": "Distributed processing architecture",
46
+ "exists": true,
47
+ "size_bytes": 20724,
48
+ "imports_ok": true,
49
+ "content_analysis": {
50
+ "classes": 6,
51
+ "functions": 26,
52
+ "lines": 569,
53
+ "docstring": true,
54
+ "size_kb": 20.2275390625
55
+ },
56
+ "status": "COMPLETE",
57
+ "issues": []
58
+ },
59
+ "6.2": {
60
+ "task_id": "6.2",
61
+ "filename": "data_fusion_engine.py",
62
+ "description": "Intelligent data fusion algorithms",
63
+ "exists": true,
64
+ "size_bytes": 27331,
65
+ "imports_ok": true,
66
+ "content_analysis": {
67
+ "classes": 5,
68
+ "functions": 20,
69
+ "lines": 694,
70
+ "docstring": true,
71
+ "size_kb": 26.6845703125
72
+ },
73
+ "status": "COMPLETE",
74
+ "issues": []
75
+ },
76
+ "6.3": {
77
+ "task_id": "6.3",
78
+ "filename": "quality_assessment_framework.py",
79
+ "description": "Hierarchical quality assessment framework",
80
+ "exists": true,
81
+ "size_bytes": 28315,
82
+ "imports_ok": true,
83
+ "content_analysis": {
84
+ "classes": 5,
85
+ "functions": 25,
86
+ "lines": 708,
87
+ "docstring": true,
88
+ "size_kb": 27.6455078125
89
+ },
90
+ "status": "COMPLETE",
91
+ "issues": []
92
+ },
93
+ "6.4": {
94
+ "task_id": "6.4",
95
+ "filename": "deduplication.py",
96
+ "description": "Automated conversation deduplication",
97
+ "exists": true,
98
+ "size_bytes": 17642,
99
+ "imports_ok": false,
100
+ "content_analysis": {
101
+ "classes": 3,
102
+ "functions": 17,
103
+ "lines": 460,
104
+ "docstring": true,
105
+ "size_kb": 17.228515625
106
+ },
107
+ "status": "MISSING",
108
+ "issues": [
109
+ "Import errors"
110
+ ]
111
+ },
112
+ "6.5": {
113
+ "task_id": "6.5",
114
+ "filename": "cross_dataset_linker.py",
115
+ "description": "Cross-dataset conversation linking",
116
+ "exists": false,
117
+ "size_bytes": 0,
118
+ "imports_ok": false,
119
+ "content_analysis": {},
120
+ "status": "MISSING",
121
+ "issues": [
122
+ "File does not exist"
123
+ ]
124
+ },
125
+ "6.6": {
126
+ "task_id": "6.6",
127
+ "filename": "metadata_schema.py",
128
+ "description": "Unified metadata schema",
129
+ "exists": false,
130
+ "size_bytes": 0,
131
+ "imports_ok": false,
132
+ "content_analysis": {},
133
+ "status": "MISSING",
134
+ "issues": [
135
+ "File does not exist"
136
+ ]
137
+ },
138
+ "6.7": {
139
+ "task_id": "6.7",
140
+ "filename": "therapeutic_intelligence.py",
141
+ "description": "Comprehensive therapeutic approach classification",
142
+ "exists": true,
143
+ "size_bytes": 26025,
144
+ "imports_ok": false,
145
+ "content_analysis": {
146
+ "classes": 4,
147
+ "functions": 18,
148
+ "lines": 582,
149
+ "docstring": true,
150
+ "size_kb": 25.4091796875
151
+ },
152
+ "status": "MISSING",
153
+ "issues": [
154
+ "Import errors"
155
+ ]
156
+ },
157
+ "6.8": {
158
+ "task_id": "6.8",
159
+ "filename": "condition_pattern_recognition.py",
160
+ "description": "Mental health condition pattern recognition",
161
+ "exists": false,
162
+ "size_bytes": 0,
163
+ "imports_ok": false,
164
+ "content_analysis": {},
165
+ "status": "MISSING",
166
+ "issues": [
167
+ "File does not exist"
168
+ ]
169
+ },
170
+ "6.9": {
171
+ "task_id": "6.9",
172
+ "filename": "outcome_prediction.py",
173
+ "description": "Therapeutic outcome prediction models",
174
+ "exists": false,
175
+ "size_bytes": 0,
176
+ "imports_ok": false,
177
+ "content_analysis": {},
178
+ "status": "MISSING",
179
+ "issues": [
180
+ "File does not exist"
181
+ ]
182
+ },
183
+ "6.10": {
184
+ "task_id": "6.10",
185
+ "filename": "crisis_intervention_detector.py",
186
+ "description": "Crisis intervention detection and escalation",
187
+ "exists": true,
188
+ "size_bytes": 40122,
189
+ "imports_ok": true,
190
+ "content_analysis": {
191
+ "classes": 7,
192
+ "functions": 24,
193
+ "lines": 849,
194
+ "docstring": true,
195
+ "size_kb": 39.1484375
196
+ },
197
+ "status": "COMPLETE",
198
+ "issues": []
199
+ },
200
+ "6.11": {
201
+ "task_id": "6.11",
202
+ "filename": "personality_adapter.py",
203
+ "description": "Personality-aware conversation adaptation",
204
+ "exists": true,
205
+ "size_bytes": 30898,
206
+ "imports_ok": true,
207
+ "content_analysis": {
208
+ "classes": 7,
209
+ "functions": 26,
210
+ "lines": 704,
211
+ "docstring": true,
212
+ "size_kb": 30.1650390625
213
+ },
214
+ "status": "COMPLETE",
215
+ "issues": []
216
+ },
217
+ "6.12": {
218
+ "task_id": "6.12",
219
+ "filename": "cultural_competency_generator.py",
220
+ "description": "Cultural competency and diversity-aware response generation",
221
+ "exists": true,
222
+ "size_bytes": 34793,
223
+ "imports_ok": true,
224
+ "content_analysis": {
225
+ "classes": 6,
226
+ "functions": 35,
227
+ "lines": 789,
228
+ "docstring": true,
229
+ "size_kb": 33.9677734375
230
+ },
231
+ "status": "COMPLETE",
232
+ "issues": []
233
+ },
234
+ "6.13": {
235
+ "task_id": "6.13",
236
+ "filename": "audio_emotion_integration.py",
237
+ "description": "Audio emotion recognition integration",
238
+ "exists": true,
239
+ "size_bytes": 23773,
240
+ "imports_ok": true,
241
+ "content_analysis": {
242
+ "classes": 5,
243
+ "functions": 18,
244
+ "lines": 575,
245
+ "docstring": true,
246
+ "size_kb": 23.2099609375
247
+ },
248
+ "status": "COMPLETE",
249
+ "issues": []
250
+ },
251
+ "6.14": {
252
+ "task_id": "6.14",
253
+ "filename": "multimodal_disorder_analysis.py",
254
+ "description": "Multi-modal mental disorder analysis pipeline",
255
+ "exists": false,
256
+ "size_bytes": 0,
257
+ "imports_ok": false,
258
+ "content_analysis": {},
259
+ "status": "MISSING",
260
+ "issues": [
261
+ "File does not exist"
262
+ ]
263
+ },
264
+ "6.15": {
265
+ "task_id": "6.15",
266
+ "filename": "emotion_cause_extraction.py",
267
+ "description": "Emotion cause extraction and intervention mapping",
268
+ "exists": false,
269
+ "size_bytes": 0,
270
+ "imports_ok": false,
271
+ "content_analysis": {},
272
+ "status": "MISSING",
273
+ "issues": [
274
+ "File does not exist"
275
+ ]
276
+ },
277
+ "6.16": {
278
+ "task_id": "6.16",
279
+ "filename": "tfidf_clusterer.py",
280
+ "description": "TF-IDF feature-based conversation clustering",
281
+ "exists": true,
282
+ "size_bytes": 28344,
283
+ "imports_ok": false,
284
+ "content_analysis": {
285
+ "classes": 6,
286
+ "functions": 20,
287
+ "lines": 668,
288
+ "docstring": true,
289
+ "size_kb": 27.6640625
290
+ },
291
+ "status": "MISSING",
292
+ "issues": [
293
+ "Import errors"
294
+ ]
295
+ },
296
+ "6.17": {
297
+ "task_id": "6.17",
298
+ "filename": "temporal_reasoner.py",
299
+ "description": "Temporal reasoning integration",
300
+ "exists": true,
301
+ "size_bytes": 31062,
302
+ "imports_ok": true,
303
+ "content_analysis": {
304
+ "classes": 7,
305
+ "functions": 25,
306
+ "lines": 744,
307
+ "docstring": true,
308
+ "size_kb": 30.3173828125
309
+ },
310
+ "status": "COMPLETE",
311
+ "issues": []
312
+ },
313
+ "6.18": {
314
+ "task_id": "6.18",
315
+ "filename": "evidence_validator.py",
316
+ "description": "Scientific evidence-based practice validation",
317
+ "exists": true,
318
+ "size_bytes": 33065,
319
+ "imports_ok": true,
320
+ "content_analysis": {
321
+ "classes": 8,
322
+ "functions": 22,
323
+ "lines": 755,
324
+ "docstring": true,
325
+ "size_kb": 32.271484375
326
+ },
327
+ "status": "COMPLETE",
328
+ "issues": []
329
+ },
330
+ "6.19": {
331
+ "task_id": "6.19",
332
+ "filename": "priority_weighted_sampler.py",
333
+ "description": "Priority-weighted sampling algorithms",
334
+ "exists": true,
335
+ "size_bytes": 26014,
336
+ "imports_ok": false,
337
+ "content_analysis": {
338
+ "classes": 3,
339
+ "functions": 17,
340
+ "lines": 646,
341
+ "docstring": true,
342
+ "size_kb": 25.404296875
343
+ },
344
+ "status": "MISSING",
345
+ "issues": [
346
+ "Import errors"
347
+ ]
348
+ },
349
+ "6.20": {
350
+ "task_id": "6.20",
351
+ "filename": "condition_balancer.py",
352
+ "description": "Condition-specific balancing system",
353
+ "exists": true,
354
+ "size_bytes": 27040,
355
+ "imports_ok": false,
356
+ "content_analysis": {
357
+ "classes": 3,
358
+ "functions": 12,
359
+ "lines": 612,
360
+ "docstring": true,
361
+ "size_kb": 26.40625
362
+ },
363
+ "status": "MISSING",
364
+ "issues": [
365
+ "Import errors"
366
+ ]
367
+ },
368
+ "6.21": {
369
+ "task_id": "6.21",
370
+ "filename": "approach_diversity_optimizer.py",
371
+ "description": "Therapeutic approach diversity optimization",
372
+ "exists": true,
373
+ "size_bytes": 34619,
374
+ "imports_ok": false,
375
+ "content_analysis": {
376
+ "classes": 3,
377
+ "functions": 15,
378
+ "lines": 718,
379
+ "docstring": true,
380
+ "size_kb": 33.8076171875
381
+ },
382
+ "status": "MISSING",
383
+ "issues": [
384
+ "Import errors"
385
+ ]
386
+ },
387
+ "6.22": {
388
+ "task_id": "6.22",
389
+ "filename": "demographic_balancer.py",
390
+ "description": "Demographic and cultural diversity balancing",
391
+ "exists": true,
392
+ "size_bytes": 21222,
393
+ "imports_ok": false,
394
+ "content_analysis": {
395
+ "classes": 3,
396
+ "functions": 12,
397
+ "lines": 486,
398
+ "docstring": true,
399
+ "size_kb": 20.724609375
400
+ },
401
+ "status": "MISSING",
402
+ "issues": [
403
+ "Import errors"
404
+ ]
405
+ },
406
+ "6.23": {
407
+ "task_id": "6.23",
408
+ "filename": "complexity_stratifier.py",
409
+ "description": "Conversation complexity stratification",
410
+ "exists": true,
411
+ "size_bytes": 26863,
412
+ "imports_ok": false,
413
+ "content_analysis": {
414
+ "classes": 3,
415
+ "functions": 14,
416
+ "lines": 623,
417
+ "docstring": true,
418
+ "size_kb": 26.2333984375
419
+ },
420
+ "status": "MISSING",
421
+ "issues": [
422
+ "Import errors"
423
+ ]
424
+ },
425
+ "6.24": {
426
+ "task_id": "6.24",
427
+ "filename": "crisis_routine_balancer.py",
428
+ "description": "Crisis-to-routine conversation ratio optimization",
429
+ "exists": true,
430
+ "size_bytes": 24423,
431
+ "imports_ok": false,
432
+ "content_analysis": {
433
+ "classes": 3,
434
+ "functions": 13,
435
+ "lines": 574,
436
+ "docstring": true,
437
+ "size_kb": 23.8505859375
438
+ },
439
+ "status": "MISSING",
440
+ "issues": [
441
+ "Import errors"
442
+ ]
443
+ },
444
+ "6.25": {
445
+ "task_id": "6.25",
446
+ "filename": "multi_tier_validator.py",
447
+ "description": "Multi-tier quality validation system",
448
+ "exists": true,
449
+ "size_bytes": 29688,
450
+ "imports_ok": false,
451
+ "content_analysis": {
452
+ "classes": 5,
453
+ "functions": 25,
454
+ "lines": 730,
455
+ "docstring": true,
456
+ "size_kb": 28.9892578125
457
+ },
458
+ "status": "MISSING",
459
+ "issues": [
460
+ "Import errors"
461
+ ]
462
+ },
463
+ "6.26": {
464
+ "task_id": "6.26",
465
+ "filename": "dsm5_accuracy_validator.py",
466
+ "description": "DSM-5 therapeutic accuracy validation",
467
+ "exists": true,
468
+ "size_bytes": 27020,
469
+ "imports_ok": true,
470
+ "content_analysis": {
471
+ "classes": 7,
472
+ "functions": 20,
473
+ "lines": 669,
474
+ "docstring": true,
475
+ "size_kb": 26.38671875
476
+ },
477
+ "status": "COMPLETE",
478
+ "issues": []
479
+ },
480
+ "6.27": {
481
+ "task_id": "6.27",
482
+ "filename": "safety_ethics_validator.py",
483
+ "description": "Conversation safety and ethics validation",
484
+ "exists": true,
485
+ "size_bytes": 33303,
486
+ "imports_ok": true,
487
+ "content_analysis": {
488
+ "classes": 7,
489
+ "functions": 20,
490
+ "lines": 804,
491
+ "docstring": true,
492
+ "size_kb": 32.5224609375
493
+ },
494
+ "status": "COMPLETE",
495
+ "issues": []
496
+ },
497
+ "6.28": {
498
+ "task_id": "6.28",
499
+ "filename": "effectiveness_predictor.py",
500
+ "description": "Therapeutic effectiveness prediction",
501
+ "exists": true,
502
+ "size_bytes": 28432,
503
+ "imports_ok": false,
504
+ "content_analysis": {
505
+ "classes": 6,
506
+ "functions": 17,
507
+ "lines": 633,
508
+ "docstring": true,
509
+ "size_kb": 27.765625
510
+ },
511
+ "status": "MISSING",
512
+ "issues": [
513
+ "Import errors"
514
+ ]
515
+ },
516
+ "6.29": {
517
+ "task_id": "6.29",
518
+ "filename": "coherence_validator.py",
519
+ "description": "Conversation coherence validation using CoT reasoning",
520
+ "exists": true,
521
+ "size_bytes": 39311,
522
+ "imports_ok": true,
523
+ "content_analysis": {
524
+ "classes": 5,
525
+ "functions": 24,
526
+ "lines": 1016,
527
+ "docstring": true,
528
+ "size_kb": 38.3896484375
529
+ },
530
+ "status": "COMPLETE",
531
+ "issues": []
532
+ },
533
+ "6.30": {
534
+ "task_id": "6.30",
535
+ "filename": "realtime_quality_monitor.py",
536
+ "description": "Real-time conversation quality monitoring",
537
+ "exists": true,
538
+ "size_bytes": 17831,
539
+ "imports_ok": true,
540
+ "content_analysis": {
541
+ "classes": 5,
542
+ "functions": 20,
543
+ "lines": 467,
544
+ "docstring": true,
545
+ "size_kb": 17.41015625
546
+ },
547
+ "status": "COMPLETE",
548
+ "issues": []
549
+ },
550
+ "6.31": {
551
+ "task_id": "6.31",
552
+ "filename": "production_exporter.py",
553
+ "description": "Production-ready dataset export with tiered access",
554
+ "exists": true,
555
+ "size_bytes": 27472,
556
+ "imports_ok": true,
557
+ "content_analysis": {
558
+ "classes": 5,
559
+ "functions": 24,
560
+ "lines": 710,
561
+ "docstring": true,
562
+ "size_kb": 26.828125
563
+ },
564
+ "status": "COMPLETE",
565
+ "issues": []
566
+ },
567
+ "6.32": {
568
+ "task_id": "6.32",
569
+ "filename": "adaptive_learner.py",
570
+ "description": "Adaptive learning pipeline",
571
+ "exists": true,
572
+ "size_bytes": 27077,
573
+ "imports_ok": false,
574
+ "content_analysis": {
575
+ "classes": 8,
576
+ "functions": 34,
577
+ "lines": 684,
578
+ "docstring": true,
579
+ "size_kb": 26.4423828125
580
+ },
581
+ "status": "MISSING",
582
+ "issues": [
583
+ "Import errors"
584
+ ]
585
+ },
586
+ "6.33": {
587
+ "task_id": "6.33",
588
+ "filename": "analytics_dashboard.py",
589
+ "description": "Comprehensive analytics dashboard",
590
+ "exists": true,
591
+ "size_bytes": 18609,
592
+ "imports_ok": true,
593
+ "content_analysis": {
594
+ "classes": 2,
595
+ "functions": 17,
596
+ "lines": 455,
597
+ "docstring": true,
598
+ "size_kb": 18.1240234375
599
+ },
600
+ "status": "COMPLETE",
601
+ "issues": []
602
+ },
603
+ "6.34": {
604
+ "task_id": "6.34",
605
+ "filename": "automated_maintenance.py",
606
+ "description": "Automated dataset update and maintenance procedures",
607
+ "exists": true,
608
+ "size_bytes": 20792,
609
+ "imports_ok": true,
610
+ "content_analysis": {
611
+ "classes": 5,
612
+ "functions": 22,
613
+ "lines": 571,
614
+ "docstring": true,
615
+ "size_kb": 20.296875
616
+ },
617
+ "status": "COMPLETE",
618
+ "issues": []
619
+ },
620
+ "6.35": {
621
+ "task_id": "6.35",
622
+ "filename": "feedback_loops.py",
623
+ "description": "Conversation effectiveness feedback loops",
624
+ "exists": true,
625
+ "size_bytes": 19230,
626
+ "imports_ok": true,
627
+ "content_analysis": {
628
+ "classes": 4,
629
+ "functions": 12,
630
+ "lines": 461,
631
+ "docstring": true,
632
+ "size_kb": 18.7763671875
633
+ },
634
+ "status": "COMPLETE",
635
+ "issues": []
636
+ },
637
+ "6.36": {
638
+ "task_id": "6.36",
639
+ "filename": "comprehensive_api.py",
640
+ "description": "Comprehensive documentation and API",
641
+ "exists": true,
642
+ "size_bytes": 30454,
643
+ "imports_ok": true,
644
+ "content_analysis": {
645
+ "classes": 3,
646
+ "functions": 8,
647
+ "lines": 873,
648
+ "docstring": true,
649
+ "size_kb": 29.732421875
650
+ },
651
+ "status": "COMPLETE",
652
+ "issues": []
653
+ }
654
+ }
655
+ }
configs/stage_configs/auto_resume_requirements.txt ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Automatic Resume System Requirements
2
+ # Install with: pip install -r auto_resume_requirements.txt
3
+
4
+ # Core system monitoring (from checkpoint system)
5
+ psutil>=5.8.0
6
+
7
+ # Built-in Python modules (listed for reference)
8
+ # asyncio - Built-in Python 3.7+
9
+ # signal - Built-in
10
+ # threading - Built-in
11
+ # time - Built-in
12
+ # uuid - Built-in
13
+ # json - Built-in
14
+ # logging - Built-in
15
+ # os - Built-in
16
+ # sys - Built-in
17
+ # datetime - Built-in
18
+ # pathlib - Built-in
19
+ # tempfile - Built-in
20
+ # shutil - Built-in
21
+ # collections - Built-in
22
+ # dataclasses - Built-in Python 3.7+
23
+ # enum - Built-in
24
+ # typing - Built-in Python 3.5+
25
+
26
+ # Dependencies from checkpoint system
27
+ # (Include checkpoint_requirements.txt)
28
+
29
+ # Optional: Enhanced monitoring and alerting
30
+ # prometheus_client>=0.14.0 # For Prometheus metrics
31
+ # redis>=4.0.0 # For distributed coordination
32
+ # pika>=1.3.0 # For RabbitMQ message queuing
33
+ # celery>=5.2.0 # For distributed task management
34
+
35
+ # Development and testing
36
+ pytest>=7.0.0
37
+ pytest-asyncio>=0.21.0
38
+ coverage>=6.0.0
39
+ pytest-timeout>=2.1.0 # For timeout testing
40
+
41
+ # Code quality
42
+ black>=22.0.0
43
+ flake8>=5.0.0
44
+ mypy>=0.991
45
+
46
+ # Documentation
47
+ sphinx>=4.0.0
48
+ sphinx-rtd-theme>=1.0.0
49
+
50
+ # Performance profiling (optional)
51
+ # memory_profiler>=0.60.0 # For memory usage profiling
52
+ # py-spy>=0.3.0 # For CPU profiling
configs/stage_configs/bias_validated_validation_summary.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "total_datasets": 2,
3
+ "safe_datasets": 0,
4
+ "caution_datasets": 2,
5
+ "safety_percentage": 0.0,
6
+ "bias_categories_checked": [
7
+ "cultural_bias",
8
+ "therapeutic_bias",
9
+ "accessibility_bias",
10
+ "demographic_bias",
11
+ "safety_concerns"
12
+ ],
13
+ "validation_complete": true
14
+ }
configs/stage_configs/boolq_validation_pipeline_config.json ADDED
File without changes
configs/stage_configs/celery_config.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Celery Configuration for Pixelated Empathy AI Distributed Processing
4
+ """
5
+
6
+ import os
7
+
8
+ from celery import Celery
9
+ from kombu import Exchange, Queue
10
+
11
+
12
+ # Celery application configuration
13
+ def create_celery_app():
14
+ """Create and configure Celery application"""
15
+
16
+ # Get configuration from environment
17
+ broker_url = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0")
18
+ result_backend = os.getenv("CELERY_RESULT_BACKEND", "redis://localhost:6379/0")
19
+
20
+ # Create Celery app
21
+ app = Celery("pixelated_empathy")
22
+
23
+ # Configure Celery
24
+ app.conf.update(
25
+ # Broker settings
26
+ broker_url=broker_url,
27
+ result_backend=result_backend,
28
+ # Task serialization
29
+ task_serializer="pickle",
30
+ accept_content=["pickle", "json"],
31
+ result_serializer="pickle",
32
+ # Timezone settings
33
+ timezone="UTC",
34
+ enable_utc=True,
35
+ # Task routing
36
+ task_routes={
37
+ "quality_validator.validate_task": {"queue": "quality_validation"},
38
+ "data_processor.process_task": {"queue": "data_processing"},
39
+ "model_trainer.train_task": {"queue": "model_training"},
40
+ "backup.backup_task": {"queue": "backup"},
41
+ },
42
+ # Queue configuration
43
+ task_default_queue="default",
44
+ task_queues=(
45
+ Queue("default", Exchange("default"), routing_key="default"),
46
+ Queue(
47
+ "quality_validation",
48
+ Exchange("quality"),
49
+ routing_key="quality.validation",
50
+ ),
51
+ Queue("data_processing", Exchange("data"), routing_key="data.processing"),
52
+ Queue("model_training", Exchange("training"), routing_key="training.model"),
53
+ Queue("backup", Exchange("backup"), routing_key="backup.task"),
54
+ Queue("high_priority", Exchange("priority"), routing_key="priority.high"),
55
+ ),
56
+ # Worker settings
57
+ worker_prefetch_multiplier=1,
58
+ task_acks_late=True,
59
+ worker_max_tasks_per_child=1000,
60
+ # Task execution settings
61
+ task_soft_time_limit=300, # 5 minutes
62
+ task_time_limit=600, # 10 minutes
63
+ task_reject_on_worker_lost=True,
64
+ # Result settings
65
+ result_expires=3600, # 1 hour
66
+ # Monitoring
67
+ worker_send_task_events=True,
68
+ task_send_sent_event=True,
69
+ # Error handling
70
+ task_annotations={
71
+ "*": {"rate_limit": "100/m"},
72
+ "quality_validator.validate_task": {"rate_limit": "50/m"},
73
+ "model_trainer.train_task": {"rate_limit": "5/m"},
74
+ },
75
+ # Beat schedule (for periodic tasks)
76
+ beat_schedule={
77
+ "cleanup-old-results": {
78
+ "task": "maintenance.cleanup_old_results",
79
+ "schedule": 3600.0, # Every hour
80
+ },
81
+ "health-check": {
82
+ "task": "monitoring.health_check",
83
+ "schedule": 300.0, # Every 5 minutes
84
+ },
85
+ "backup-data": {
86
+ "task": "backup.backup_task",
87
+ "schedule": 86400.0, # Daily
88
+ "kwargs": {"backup_type": "incremental"},
89
+ },
90
+ },
91
+ )
92
+
93
+ return app
94
+
95
+
96
+ # Create the Celery app instance
97
+ celery_app = create_celery_app()
98
+
99
+ # Task discovery
100
+ celery_app.autodiscover_tasks(
101
+ [
102
+ "distributed_processing.quality_validator",
103
+ "distributed_processing.data_processor",
104
+ "distributed_processing.model_trainer",
105
+ "distributed_processing.backup_manager",
106
+ "distributed_processing.monitoring",
107
+ ]
108
+ )
109
+
110
+ if __name__ == "__main__":
111
+ celery_app.start()
configs/stage_configs/check_config.sh ADDED
File without changes
configs/stage_configs/checkpoint_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "checkpoint_config": {
3
+ "save_steps": 506,
4
+ "save_total_limit": 5,
5
+ "output_dir": "./checkpoints",
6
+ "resume_from_checkpoint": true,
7
+ "auto_find_batch_size": false
8
+ },
9
+ "backup_strategy": {
10
+ "local_backup": true,
11
+ "cloud_backup": false,
12
+ "backup_frequency": "every_checkpoint",
13
+ "backup_location": "./backups"
14
+ },
15
+ "recovery_points": [
16
+ {
17
+ "step": 1013,
18
+ "description": "10% complete"
19
+ },
20
+ {
21
+ "step": 2532,
22
+ "description": "25% complete"
23
+ },
24
+ {
25
+ "step": 5065,
26
+ "description": "50% complete"
27
+ },
28
+ {
29
+ "step": 7597,
30
+ "description": "75% complete"
31
+ },
32
+ {
33
+ "step": 9117,
34
+ "description": "90% complete"
35
+ }
36
+ ],
37
+ "monitoring": {
38
+ "track_loss": true,
39
+ "track_learning_rate": true,
40
+ "track_memory_usage": true,
41
+ "alert_on_divergence": true,
42
+ "loss_spike_threshold": 2.0
43
+ }
44
+ }
configs/stage_configs/checkpoint_requirements.txt ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Checkpoint System Requirements
2
+ # Install with: pip install -r checkpoint_requirements.txt
3
+
4
+ # Core system monitoring
5
+ psutil>=5.8.0
6
+
7
+ # Built-in Python modules (listed for reference)
8
+ # asyncio - Built-in Python 3.7+
9
+ # sqlite3 - Built-in
10
+ # pickle - Built-in
11
+ # gzip - Built-in
12
+ # json - Built-in
13
+ # hashlib - Built-in
14
+ # threading - Built-in
15
+ # pathlib - Built-in
16
+ # shutil - Built-in
17
+ # tempfile - Built-in
18
+ # uuid - Built-in
19
+ # time - Built-in
20
+ # datetime - Built-in
21
+ # logging - Built-in
22
+ # os - Built-in
23
+ # dataclasses - Built-in Python 3.7+
24
+ # enum - Built-in
25
+ # typing - Built-in Python 3.5+
26
+
27
+ # Optional: Enhanced features
28
+ # redis>=4.0.0 # For distributed checkpoint coordination
29
+ # cryptography>=3.0.0 # For checkpoint encryption
30
+ # lz4>=3.0.0 # For faster compression alternative
31
+ # msgpack>=1.0.0 # For more efficient serialization
32
+
33
+ # Development and testing
34
+ pytest>=7.0.0
35
+ pytest-asyncio>=0.21.0
36
+ coverage>=6.0.0
37
+
38
+ # Code quality
39
+ black>=22.0.0
40
+ flake8>=5.0.0
41
+ mypy>=0.991
42
+
43
+ # Documentation
44
+ sphinx>=4.0.0 # For generating documentation
45
+ sphinx-rtd-theme>=1.0.0
configs/stage_configs/claude_assessment.json ADDED
File without changes
configs/stage_configs/cli_config.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration management for CLI.
3
+ """
4
+
5
+ import json
6
+ import os
7
+ from copy import deepcopy
8
+ from pathlib import Path
9
+ from typing import Any, Dict, Optional, Union
10
+
11
+ try:
12
+ import yaml
13
+ YAML_AVAILABLE = True
14
+ except ImportError:
15
+ YAML_AVAILABLE = False
16
+ yaml = None # type: ignore
17
+
18
+
19
+ class ConfigManager:
20
+ """Manages configuration for the research system."""
21
+
22
+ DEFAULT_CONFIG_PATH = Path.home() / ".journal_research" / "config.yaml"
23
+ DEFAULT_CONFIG = {
24
+ "orchestrator": {
25
+ "max_retries": 3,
26
+ "retry_delay_seconds": 1.0,
27
+ "progress_history_limit": 100,
28
+ "parallel_evaluation": False,
29
+ "parallel_integration_planning": False,
30
+ "max_workers": 4,
31
+ "session_storage_path": None,
32
+ "visualization_max_points": 100,
33
+ "fallback_on_failure": True,
34
+ },
35
+ "discovery": {
36
+ "pubmed": {
37
+ "api_key": None,
38
+ "base_url": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils",
39
+ "search_limit": 100,
40
+ },
41
+ "doaj": {
42
+ "base_url": "https://doaj.org/api/v2",
43
+ },
44
+ "repositories": {
45
+ "dryad": {"base_url": "https://datadryad.org/api/v2"},
46
+ "zenodo": {"base_url": "https://zenodo.org/api"},
47
+ "clinical_trials": {"base_url": "https://clinicaltrials.gov/api/v2"},
48
+ },
49
+ },
50
+ "evaluation": {
51
+ "therapeutic_relevance_weight": 0.35,
52
+ "data_structure_quality_weight": 0.25,
53
+ "training_integration_weight": 0.20,
54
+ "ethical_accessibility_weight": 0.20,
55
+ "high_priority_threshold": 7.5,
56
+ "medium_priority_threshold": 5.0,
57
+ },
58
+ "acquisition": {
59
+ "storage_base_path": "data/acquired_datasets",
60
+ "encryption_enabled": False,
61
+ "download_timeout": 3600,
62
+ "max_retries": 3,
63
+ "chunk_size": 8192,
64
+ "resume_downloads": True,
65
+ },
66
+ "integration": {
67
+ "target_format": "chatml",
68
+ "default_complexity": "medium",
69
+ },
70
+ "logging": {
71
+ "level": "INFO",
72
+ "file": None,
73
+ "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
74
+ },
75
+ }
76
+
77
+ def __init__(self, config_path: Optional[Union[Path, str]] = None):
78
+ """Initialize config manager with optional config path."""
79
+ # Convert string to Path if needed
80
+ if config_path is not None and isinstance(config_path, str):
81
+ config_path = Path(config_path)
82
+ self.config_path = config_path or self.DEFAULT_CONFIG_PATH
83
+ # Only create parent directory if it's writable
84
+ try:
85
+ self.config_path.parent.mkdir(parents=True, exist_ok=True)
86
+ except (PermissionError, OSError):
87
+ # If we can't create the directory, that's okay - we'll handle it in load/save
88
+ pass
89
+
90
+ def load(self) -> Dict[str, Any]:
91
+ """Load configuration from file or return defaults."""
92
+ if self.config_path.exists():
93
+ try:
94
+ with open(self.config_path, "r") as f:
95
+ if YAML_AVAILABLE and self.config_path.suffix in (".yaml", ".yml"):
96
+ assert yaml is not None # Type guard for type checker
97
+ config = yaml.safe_load(f) or {}
98
+ else:
99
+ # Fall back to JSON
100
+ config = json.load(f) or {}
101
+ # Merge with defaults to ensure all keys exist
102
+ merged = self._merge_config(self.DEFAULT_CONFIG, config)
103
+ return self._apply_legacy_aliases(merged)
104
+ except Exception as e:
105
+ print(f"Warning: Could not load config from {self.config_path}: {e}")
106
+ return self._apply_legacy_aliases(deepcopy(self.DEFAULT_CONFIG))
107
+ return self._apply_legacy_aliases(deepcopy(self.DEFAULT_CONFIG))
108
+
109
+ def save(self, config: Dict[str, Any]) -> None:
110
+ """Save configuration to file."""
111
+ self.config_path.parent.mkdir(parents=True, exist_ok=True)
112
+ with open(self.config_path, "w") as f:
113
+ if YAML_AVAILABLE and self.config_path.suffix in (".yaml", ".yml"):
114
+ assert yaml is not None # Type guard for type checker
115
+ yaml.dump(config, f, default_flow_style=False, indent=2)
116
+ else:
117
+ # Fall back to JSON
118
+ json.dump(config, f, indent=2)
119
+
120
+ def get(self, key_path: str, default: Any = None) -> Any:
121
+ """Get a configuration value by dot-separated path."""
122
+ config = self.load()
123
+ keys = key_path.split(".")
124
+ value = config
125
+ for key in keys:
126
+ if isinstance(value, dict) and key in value:
127
+ value = value[key]
128
+ else:
129
+ return default
130
+ return value
131
+
132
+ def set(self, key_path: str, value: Any) -> None:
133
+ """Set a configuration value by dot-separated path."""
134
+ config = self.load()
135
+ keys = key_path.split(".")
136
+ target = config
137
+ for key in keys[:-1]:
138
+ if key not in target:
139
+ target[key] = {}
140
+ target = target[key]
141
+ target[keys[-1]] = value
142
+ self.save(config)
143
+
144
+ def _merge_config(self, default: Dict[str, Any], user: Dict[str, Any]) -> Dict[str, Any]:
145
+ """Recursively merge user config into default config."""
146
+ result = default.copy()
147
+ for key, value in user.items():
148
+ if key in result and isinstance(result[key], dict) and isinstance(value, dict):
149
+ result[key] = self._merge_config(result[key], value)
150
+ else:
151
+ result[key] = value
152
+ return result
153
+
154
+ def _apply_legacy_aliases(self, config: Dict[str, Any]) -> Dict[str, Any]:
155
+ """Ensure legacy top-level aliases exist for backward compatibility."""
156
+ # Maintain top-level storage_base_path alias
157
+ acquisition_config = config.get("acquisition", {})
158
+ storage_base_path = acquisition_config.get("storage_base_path")
159
+ if storage_base_path and "storage_base_path" not in config:
160
+ config["storage_base_path"] = storage_base_path
161
+
162
+ # Maintain top-level logging directory alias
163
+ logging_config = config.get("logging", {})
164
+ log_file = logging_config.get("file")
165
+ if log_file and "log_file" not in config:
166
+ config["log_file"] = log_file
167
+
168
+ return config
169
+
170
+ def load_env_overrides(self) -> Dict[str, Any]:
171
+ """Load configuration overrides from environment variables."""
172
+ overrides = {}
173
+ env_prefix = "JOURNAL_RESEARCH_"
174
+
175
+ # Map environment variables to config paths
176
+ env_mappings = {
177
+ "PUBMED_API_KEY": "discovery.pubmed.api_key",
178
+ "STORAGE_PATH": "acquisition.storage_base_path",
179
+ "LOG_LEVEL": "logging.level",
180
+ "MAX_RETRIES": "orchestrator.max_retries",
181
+ "MAX_WORKERS": "orchestrator.max_workers",
182
+ }
183
+
184
+ for env_var, config_path in env_mappings.items():
185
+ env_key = env_prefix + env_var
186
+ if env_key in os.environ:
187
+ overrides[config_path] = os.environ[env_key]
188
+
189
+ return overrides
190
+
191
+ def apply_env_overrides(self, config: Dict[str, Any]) -> Dict[str, Any]:
192
+ """Apply environment variable overrides to config."""
193
+ overrides = self.load_env_overrides()
194
+ for key_path, value in overrides.items():
195
+ keys = key_path.split(".")
196
+ target = config
197
+ for key in keys[:-1]:
198
+ if key not in target:
199
+ target[key] = {}
200
+ target = target[key]
201
+ target[keys[-1]] = value
202
+ return config
203
+
204
+
205
+ # Global config manager instance
206
+ _config_manager = ConfigManager()
207
+
208
+
209
+ def load_config(config_path: Optional[Union[Path, str]] = None) -> Dict[str, Any]:
210
+ """Load configuration with environment overrides."""
211
+ # Convert string to Path if needed
212
+ if config_path is not None and isinstance(config_path, str):
213
+ config_path = Path(config_path)
214
+ manager = ConfigManager(config_path) if config_path else _config_manager
215
+ config = manager.load()
216
+ config = manager.apply_env_overrides(config)
217
+ return config
218
+
219
+
220
+ def save_config(config: Dict[str, Any], config_path: Optional[Union[Path, str]] = None) -> None:
221
+ """Save configuration to file."""
222
+ # Convert string to Path if needed
223
+ if config_path is not None and isinstance(config_path, str):
224
+ config_path = Path(config_path)
225
+ manager = ConfigManager(config_path) if config_path else _config_manager
226
+ manager.save(config)
227
+
228
+
229
+ def get_config_value(key_path: str, default: Any = None) -> Any:
230
+ """Get a configuration value by dot-separated path."""
231
+ return _config_manager.get(key_path, default)
232
+
configs/stage_configs/complexity_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "beginner": {
3
+ "level": "Beginner",
4
+ "weight": 0.4,
5
+ "min_samples": 1000,
6
+ "max_samples": 15000,
7
+ "complexity_range": [
8
+ 0.0,
9
+ 0.4
10
+ ],
11
+ "characteristics": [
12
+ "Simple emotional expressions",
13
+ "Basic therapeutic techniques",
14
+ "Clear, straightforward issues",
15
+ "Single-topic focus",
16
+ "Minimal comorbidity",
17
+ "Standard interventions"
18
+ ]
19
+ },
20
+ "intermediate": {
21
+ "level": "Intermediate",
22
+ "weight": 0.45,
23
+ "min_samples": 1200,
24
+ "max_samples": 18000,
25
+ "complexity_range": [
26
+ 0.4,
27
+ 0.7
28
+ ],
29
+ "characteristics": [
30
+ "Moderate emotional intensity",
31
+ "Multiple therapeutic techniques",
32
+ "Interconnected issues",
33
+ "Some comorbidity",
34
+ "Nuanced interventions",
35
+ "Relationship dynamics"
36
+ ]
37
+ },
38
+ "advanced": {
39
+ "level": "Advanced",
40
+ "weight": 0.15,
41
+ "min_samples": 400,
42
+ "max_samples": 6000,
43
+ "complexity_range": [
44
+ 0.7,
45
+ 1.0
46
+ ],
47
+ "characteristics": [
48
+ "High emotional intensity",
49
+ "Complex therapeutic approaches",
50
+ "Multiple interconnected issues",
51
+ "Significant comorbidity",
52
+ "Crisis intervention elements",
53
+ "Advanced clinical skills required"
54
+ ]
55
+ }
56
+ }
configs/stage_configs/comprehensive_integration_summary.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "integration_complete": true,
3
+ "all_components_integrated": true,
4
+ "components_integrated": [
5
+ "long_term_journaling_system",
6
+ "tri_expert_voice_blending",
7
+ "edge_case_scenarios",
8
+ "dual_persona_dynamics",
9
+ "bias_detection_validation",
10
+ "psychology_knowledge_base"
11
+ ],
12
+ "datasets": {
13
+ "journaling_enhanced": 39,
14
+ "voice_enhanced": 0,
15
+ "edge_cases": 5,
16
+ "dual_persona": 75,
17
+ "bias_validated": 10,
18
+ "psychology_kb_enhanced": 5,
19
+ "master_integrated": 0,
20
+ "total_datasets": 134
21
+ },
22
+ "expert_voices": [
23
+ "Tim Ferriss",
24
+ "Gabor Mat\u00e9",
25
+ "Bren\u00e9 Brown"
26
+ ],
27
+ "psychology_concepts": 4867,
28
+ "bias_categories_checked": 5,
29
+ "therapeutic_modalities": 6,
30
+ "kan_28_status": "FULLY_SOLVED",
31
+ "integration_timestamp": "2024-10-28"
32
+ }
configs/stage_configs/condition_config.json ADDED
@@ -0,0 +1,460 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "depression": {
3
+ "name": "Major Depressive Disorder",
4
+ "prevalence": 0.084,
5
+ "min_samples": 500,
6
+ "max_samples": 8000,
7
+ "aliases": [
8
+ "depression",
9
+ "depressed",
10
+ "major depression",
11
+ "mdd",
12
+ "sad",
13
+ "sadness"
14
+ ],
15
+ "comorbid_conditions": [
16
+ "anxiety",
17
+ "ptsd",
18
+ "substance_abuse"
19
+ ],
20
+ "severity_levels": [
21
+ "mild",
22
+ "moderate",
23
+ "severe"
24
+ ]
25
+ },
26
+ "anxiety": {
27
+ "name": "Generalized Anxiety Disorder",
28
+ "prevalence": 0.031,
29
+ "min_samples": 400,
30
+ "max_samples": 6000,
31
+ "aliases": [
32
+ "anxiety",
33
+ "anxious",
34
+ "gad",
35
+ "worry",
36
+ "worried",
37
+ "panic"
38
+ ],
39
+ "comorbid_conditions": [
40
+ "depression",
41
+ "ptsd",
42
+ "ocd"
43
+ ],
44
+ "severity_levels": [
45
+ "mild",
46
+ "moderate",
47
+ "severe"
48
+ ]
49
+ },
50
+ "ptsd": {
51
+ "name": "Post-Traumatic Stress Disorder",
52
+ "prevalence": 0.037,
53
+ "min_samples": 300,
54
+ "max_samples": 4000,
55
+ "aliases": [
56
+ "ptsd",
57
+ "trauma",
58
+ "traumatic",
59
+ "flashback",
60
+ "nightmares"
61
+ ],
62
+ "comorbid_conditions": [
63
+ "depression",
64
+ "anxiety",
65
+ "substance_abuse"
66
+ ],
67
+ "severity_levels": [
68
+ "mild",
69
+ "moderate",
70
+ "severe"
71
+ ]
72
+ },
73
+ "bipolar": {
74
+ "name": "Bipolar Disorder",
75
+ "prevalence": 0.028,
76
+ "min_samples": 250,
77
+ "max_samples": 3000,
78
+ "aliases": [
79
+ "bipolar",
80
+ "manic",
81
+ "mania",
82
+ "mood swings",
83
+ "hypomania"
84
+ ],
85
+ "comorbid_conditions": [
86
+ "anxiety",
87
+ "substance_abuse",
88
+ "adhd"
89
+ ],
90
+ "severity_levels": [
91
+ "mild",
92
+ "moderate",
93
+ "severe"
94
+ ]
95
+ },
96
+ "adhd": {
97
+ "name": "Attention-Deficit/Hyperactivity Disorder",
98
+ "prevalence": 0.041,
99
+ "min_samples": 300,
100
+ "max_samples": 4000,
101
+ "aliases": [
102
+ "adhd",
103
+ "add",
104
+ "attention deficit",
105
+ "hyperactive",
106
+ "inattentive"
107
+ ],
108
+ "comorbid_conditions": [
109
+ "anxiety",
110
+ "depression",
111
+ "bipolar"
112
+ ],
113
+ "severity_levels": [
114
+ "mild",
115
+ "moderate",
116
+ "severe"
117
+ ]
118
+ },
119
+ "ocd": {
120
+ "name": "Obsessive-Compulsive Disorder",
121
+ "prevalence": 0.012,
122
+ "min_samples": 150,
123
+ "max_samples": 2000,
124
+ "aliases": [
125
+ "ocd",
126
+ "obsessive",
127
+ "compulsive",
128
+ "intrusive thoughts",
129
+ "rituals"
130
+ ],
131
+ "comorbid_conditions": [
132
+ "anxiety",
133
+ "depression",
134
+ "tics"
135
+ ],
136
+ "severity_levels": [
137
+ "mild",
138
+ "moderate",
139
+ "severe"
140
+ ]
141
+ },
142
+ "autism": {
143
+ "name": "Autism Spectrum Disorder",
144
+ "prevalence": 0.016,
145
+ "min_samples": 200,
146
+ "max_samples": 2500,
147
+ "aliases": [
148
+ "autism",
149
+ "asd",
150
+ "asperger",
151
+ "autistic",
152
+ "spectrum"
153
+ ],
154
+ "comorbid_conditions": [
155
+ "anxiety",
156
+ "depression",
157
+ "adhd"
158
+ ],
159
+ "severity_levels": [
160
+ "level 1",
161
+ "level 2",
162
+ "level 3"
163
+ ]
164
+ },
165
+ "bpd": {
166
+ "name": "Borderline Personality Disorder",
167
+ "prevalence": 0.014,
168
+ "min_samples": 150,
169
+ "max_samples": 2000,
170
+ "aliases": [
171
+ "bpd",
172
+ "borderline",
173
+ "personality disorder",
174
+ "emotional dysregulation"
175
+ ],
176
+ "comorbid_conditions": [
177
+ "depression",
178
+ "anxiety",
179
+ "ptsd",
180
+ "substance_abuse"
181
+ ],
182
+ "severity_levels": [
183
+ "mild",
184
+ "moderate",
185
+ "severe"
186
+ ]
187
+ },
188
+ "schizophrenia": {
189
+ "name": "Schizophrenia",
190
+ "prevalence": 0.011,
191
+ "min_samples": 100,
192
+ "max_samples": 1500,
193
+ "aliases": [
194
+ "schizophrenia",
195
+ "psychosis",
196
+ "hallucinations",
197
+ "delusions"
198
+ ],
199
+ "comorbid_conditions": [
200
+ "depression",
201
+ "anxiety",
202
+ "substance_abuse"
203
+ ],
204
+ "severity_levels": [
205
+ "mild",
206
+ "moderate",
207
+ "severe"
208
+ ]
209
+ },
210
+ "eating_disorders": {
211
+ "name": "Eating Disorders",
212
+ "prevalence": 0.009,
213
+ "min_samples": 100,
214
+ "max_samples": 1500,
215
+ "aliases": [
216
+ "anorexia",
217
+ "bulimia",
218
+ "binge eating",
219
+ "eating disorder",
220
+ "body image"
221
+ ],
222
+ "comorbid_conditions": [
223
+ "depression",
224
+ "anxiety",
225
+ "ocd"
226
+ ],
227
+ "severity_levels": [
228
+ "mild",
229
+ "moderate",
230
+ "severe"
231
+ ]
232
+ },
233
+ "substance_abuse": {
234
+ "name": "Substance Use Disorders",
235
+ "prevalence": 0.104,
236
+ "min_samples": 400,
237
+ "max_samples": 6000,
238
+ "aliases": [
239
+ "addiction",
240
+ "substance abuse",
241
+ "alcoholism",
242
+ "drug abuse",
243
+ "dependency"
244
+ ],
245
+ "comorbid_conditions": [
246
+ "depression",
247
+ "anxiety",
248
+ "ptsd",
249
+ "bipolar"
250
+ ],
251
+ "severity_levels": [
252
+ "mild",
253
+ "moderate",
254
+ "severe"
255
+ ]
256
+ },
257
+ "social_anxiety": {
258
+ "name": "Social Anxiety Disorder",
259
+ "prevalence": 0.073,
260
+ "min_samples": 300,
261
+ "max_samples": 4000,
262
+ "aliases": [
263
+ "social anxiety",
264
+ "social phobia",
265
+ "shy",
266
+ "shyness",
267
+ "social fear"
268
+ ],
269
+ "comorbid_conditions": [
270
+ "depression",
271
+ "anxiety",
272
+ "avoidant_personality"
273
+ ],
274
+ "severity_levels": [
275
+ "mild",
276
+ "moderate",
277
+ "severe"
278
+ ]
279
+ },
280
+ "panic_disorder": {
281
+ "name": "Panic Disorder",
282
+ "prevalence": 0.028,
283
+ "min_samples": 200,
284
+ "max_samples": 3000,
285
+ "aliases": [
286
+ "panic disorder",
287
+ "panic attacks",
288
+ "agoraphobia",
289
+ "panic"
290
+ ],
291
+ "comorbid_conditions": [
292
+ "anxiety",
293
+ "depression",
294
+ "substance_abuse"
295
+ ],
296
+ "severity_levels": [
297
+ "mild",
298
+ "moderate",
299
+ "severe"
300
+ ]
301
+ },
302
+ "insomnia": {
303
+ "name": "Insomnia and Sleep Disorders",
304
+ "prevalence": 0.06,
305
+ "min_samples": 250,
306
+ "max_samples": 3500,
307
+ "aliases": [
308
+ "insomnia",
309
+ "sleep disorder",
310
+ "sleepless",
311
+ "sleep problems"
312
+ ],
313
+ "comorbid_conditions": [
314
+ "depression",
315
+ "anxiety",
316
+ "bipolar"
317
+ ],
318
+ "severity_levels": [
319
+ "mild",
320
+ "moderate",
321
+ "severe"
322
+ ]
323
+ },
324
+ "chronic_pain": {
325
+ "name": "Chronic Pain and Mental Health",
326
+ "prevalence": 0.05,
327
+ "min_samples": 200,
328
+ "max_samples": 3000,
329
+ "aliases": [
330
+ "chronic pain",
331
+ "fibromyalgia",
332
+ "pain",
333
+ "chronic illness"
334
+ ],
335
+ "comorbid_conditions": [
336
+ "depression",
337
+ "anxiety",
338
+ "ptsd"
339
+ ],
340
+ "severity_levels": [
341
+ "mild",
342
+ "moderate",
343
+ "severe"
344
+ ]
345
+ },
346
+ "grief": {
347
+ "name": "Grief and Bereavement",
348
+ "prevalence": 0.035,
349
+ "min_samples": 150,
350
+ "max_samples": 2500,
351
+ "aliases": [
352
+ "grief",
353
+ "bereavement",
354
+ "loss",
355
+ "mourning",
356
+ "death"
357
+ ],
358
+ "comorbid_conditions": [
359
+ "depression",
360
+ "anxiety",
361
+ "ptsd"
362
+ ],
363
+ "severity_levels": [
364
+ "normal",
365
+ "complicated",
366
+ "prolonged"
367
+ ]
368
+ },
369
+ "relationship_issues": {
370
+ "name": "Relationship and Interpersonal Issues",
371
+ "prevalence": 0.08,
372
+ "min_samples": 300,
373
+ "max_samples": 4500,
374
+ "aliases": [
375
+ "relationship",
376
+ "marriage",
377
+ "divorce",
378
+ "breakup",
379
+ "interpersonal"
380
+ ],
381
+ "comorbid_conditions": [
382
+ "depression",
383
+ "anxiety",
384
+ "attachment_issues"
385
+ ],
386
+ "severity_levels": [
387
+ "mild",
388
+ "moderate",
389
+ "severe"
390
+ ]
391
+ },
392
+ "work_stress": {
393
+ "name": "Work-Related Stress and Burnout",
394
+ "prevalence": 0.07,
395
+ "min_samples": 250,
396
+ "max_samples": 3500,
397
+ "aliases": [
398
+ "work stress",
399
+ "burnout",
400
+ "job stress",
401
+ "workplace",
402
+ "career"
403
+ ],
404
+ "comorbid_conditions": [
405
+ "depression",
406
+ "anxiety",
407
+ "insomnia"
408
+ ],
409
+ "severity_levels": [
410
+ "mild",
411
+ "moderate",
412
+ "severe"
413
+ ]
414
+ },
415
+ "parenting_stress": {
416
+ "name": "Parenting Stress and Family Issues",
417
+ "prevalence": 0.045,
418
+ "min_samples": 200,
419
+ "max_samples": 3000,
420
+ "aliases": [
421
+ "parenting",
422
+ "family stress",
423
+ "child behavior",
424
+ "parental stress"
425
+ ],
426
+ "comorbid_conditions": [
427
+ "depression",
428
+ "anxiety",
429
+ "relationship_issues"
430
+ ],
431
+ "severity_levels": [
432
+ "mild",
433
+ "moderate",
434
+ "severe"
435
+ ]
436
+ },
437
+ "loneliness": {
438
+ "name": "Loneliness and Social Isolation",
439
+ "prevalence": 0.055,
440
+ "min_samples": 200,
441
+ "max_samples": 3000,
442
+ "aliases": [
443
+ "loneliness",
444
+ "lonely",
445
+ "isolated",
446
+ "social isolation",
447
+ "alone"
448
+ ],
449
+ "comorbid_conditions": [
450
+ "depression",
451
+ "anxiety",
452
+ "social_anxiety"
453
+ ],
454
+ "severity_levels": [
455
+ "mild",
456
+ "moderate",
457
+ "severe"
458
+ ]
459
+ }
460
+ }
configs/stage_configs/config.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Configuration for NVIDIA NeMo Data Designer service."""
2
+
3
+ import os
4
+ from dataclasses import dataclass
5
+ from typing import Optional
6
+
7
+ try:
8
+ from dotenv import load_dotenv
9
+ load_dotenv()
10
+ except ImportError:
11
+ pass # dotenv is optional
12
+
13
+
14
+ @dataclass
15
+ class DataDesignerConfig:
16
+ """Configuration for NeMo Data Designer client."""
17
+
18
+ base_url: str = "http://localhost:8000" # For local Docker Compose, use http://localhost:8000
19
+ api_key: Optional[str] = None
20
+ timeout: int = 300 # 5 minutes default timeout
21
+ max_retries: int = 3
22
+ batch_size: int = 1000
23
+
24
+ @classmethod
25
+ def from_env(cls) -> "DataDesignerConfig":
26
+ """Create configuration from environment variables."""
27
+ return cls(
28
+ base_url=os.getenv(
29
+ "NEMO_DATA_DESIGNER_BASE_URL",
30
+ "http://localhost:8000",
31
+ ),
32
+ api_key=os.getenv("NVIDIA_API_KEY"),
33
+ timeout=int(os.getenv("NEMO_DATA_DESIGNER_TIMEOUT", "300")),
34
+ max_retries=int(os.getenv("NEMO_DATA_DESIGNER_MAX_RETRIES", "3")),
35
+ batch_size=int(os.getenv("NEMO_DATA_DESIGNER_BATCH_SIZE", "1000")),
36
+ )
37
+
38
+ def validate(self) -> None:
39
+ """Validate configuration."""
40
+ if not self.api_key:
41
+ raise ValueError(
42
+ "NVIDIA_API_KEY environment variable is required. "
43
+ "Get your API key from https://build.nvidia.com/nemo/data-designer"
44
+ )
45
+ if not self.base_url:
46
+ raise ValueError("base_url cannot be empty")
47
+ if self.timeout <= 0:
48
+ raise ValueError("timeout must be positive")
49
+ if self.max_retries < 0:
50
+ raise ValueError("max_retries must be non-negative")
51
+ if self.batch_size <= 0:
52
+ raise ValueError("batch_size must be positive")
53
+
configs/stage_configs/config_example.py ADDED
File without changes
configs/stage_configs/config_lock.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2025-11-30T18:08:28.603381Z",
3
+ "git_info": {
4
+ "commit_sha": "46b7965d54d4ccfc0d018ace10b0724a9246bef3",
5
+ "commit_message": "chore: update pnpm version across configurations and workflows\n\n- Upgraded pnpm version from 10.23.0 to 10.24.0 in various configuration files, including .gitlab-ci.yml, Dockerfile, and multiple GitHub workflows.\n- Ensured consistency in pnpm version across package.json, documentation, and scripts to maintain compatibility and improve functionality.\n- Removed obsolete files related to Azure Pipelines diagnostics and remaining fixes plan as they are no longer needed.",
6
+ "branch": "master",
7
+ "is_dirty": true,
8
+ "remote_url": "git@github.com:pixelatedempathy/pixelated.git"
9
+ },
10
+ "random_seed": 42,
11
+ "config_snapshot": {
12
+ "target_samples": 50,
13
+ "pipeline_config": {
14
+ "edge_cases": {
15
+ "enabled": true,
16
+ "target_percentage": 0.25
17
+ },
18
+ "pixel_voice": {
19
+ "enabled": true,
20
+ "target_percentage": 0.2
21
+ },
22
+ "psychology_knowledge": {
23
+ "enabled": true,
24
+ "target_percentage": 0.15
25
+ },
26
+ "dual_persona": {
27
+ "enabled": true,
28
+ "target_percentage": 0.1
29
+ },
30
+ "standard_therapeutic": {
31
+ "enabled": true,
32
+ "target_percentage": 0.3
33
+ }
34
+ }
35
+ },
36
+ "python_version": "3.11.13 (main, Jun 12 2025, 12:41:02) [Clang 20.1.4 ]",
37
+ "platform": "Linux-6.14.0-27-generic-x86_64-with-glibc2.41",
38
+ "config_hash": "eb8ea2f72df5a68b"
39
+ }
configs/stage_configs/config_lock.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Configuration Locking System
4
+ Freezes configuration, seeds, and git commit info for reproducibility
5
+ """
6
+
7
+ import json
8
+ import random
9
+ import subprocess
10
+ from dataclasses import dataclass, asdict, field
11
+ from datetime import datetime
12
+ from pathlib import Path
13
+ from typing import Optional, Dict, Any
14
+ import hashlib
15
+
16
+
17
+ @dataclass
18
+ class GitInfo:
19
+ """Git repository information"""
20
+ commit_sha: str
21
+ commit_message: str
22
+ branch: str
23
+ is_dirty: bool
24
+ remote_url: Optional[str] = None
25
+
26
+ @classmethod
27
+ def capture(cls, repo_path: Optional[Path] = None) -> "GitInfo":
28
+ """Capture current git state"""
29
+ repo_path = repo_path or Path.cwd()
30
+
31
+ try:
32
+ # Get commit SHA
33
+ result = subprocess.run(
34
+ ["git", "rev-parse", "HEAD"],
35
+ cwd=repo_path,
36
+ capture_output=True,
37
+ text=True,
38
+ check=True
39
+ )
40
+ commit_sha = result.stdout.strip()
41
+
42
+ # Get commit message
43
+ result = subprocess.run(
44
+ ["git", "log", "-1", "--pretty=%B"],
45
+ cwd=repo_path,
46
+ capture_output=True,
47
+ text=True,
48
+ check=True
49
+ )
50
+ commit_message = result.stdout.strip()
51
+
52
+ # Get branch
53
+ result = subprocess.run(
54
+ ["git", "rev-parse", "--abbrev-ref", "HEAD"],
55
+ cwd=repo_path,
56
+ capture_output=True,
57
+ text=True,
58
+ check=True
59
+ )
60
+ branch = result.stdout.strip()
61
+
62
+ # Check if working directory is dirty
63
+ result = subprocess.run(
64
+ ["git", "status", "--porcelain"],
65
+ cwd=repo_path,
66
+ capture_output=True,
67
+ text=True,
68
+ check=True
69
+ )
70
+ is_dirty = len(result.stdout.strip()) > 0
71
+
72
+ # Get remote URL
73
+ remote_url = None
74
+ try:
75
+ result = subprocess.run(
76
+ ["git", "config", "--get", "remote.origin.url"],
77
+ cwd=repo_path,
78
+ capture_output=True,
79
+ text=True,
80
+ check=True
81
+ )
82
+ remote_url = result.stdout.strip()
83
+ except subprocess.CalledProcessError:
84
+ pass
85
+
86
+ return cls(
87
+ commit_sha=commit_sha,
88
+ commit_message=commit_message,
89
+ branch=branch,
90
+ is_dirty=is_dirty,
91
+ remote_url=remote_url
92
+ )
93
+ except (subprocess.CalledProcessError, FileNotFoundError):
94
+ # Git not available or not a git repo
95
+ return cls(
96
+ commit_sha="unknown",
97
+ commit_message="unknown",
98
+ branch="unknown",
99
+ is_dirty=False,
100
+ remote_url=None
101
+ )
102
+
103
+
104
+ @dataclass
105
+ class LockedConfig:
106
+ """Locked configuration with reproducibility info"""
107
+ # Timestamp
108
+ created_at: str
109
+
110
+ # Git information
111
+ git_info: GitInfo
112
+
113
+ # Random seed
114
+ random_seed: int
115
+
116
+ # Configuration snapshot
117
+ config_snapshot: Dict[str, Any]
118
+
119
+ # Environment info
120
+ python_version: str
121
+ platform: str
122
+
123
+ # Config hash for verification
124
+ config_hash: str = field(default="")
125
+
126
+ def __post_init__(self):
127
+ """Calculate config hash after initialization"""
128
+ if not self.config_hash:
129
+ # Create hash from config snapshot
130
+ config_str = json.dumps(self.config_snapshot, sort_keys=True)
131
+ self.config_hash = hashlib.sha256(config_str.encode()).hexdigest()[:16]
132
+
133
+ def to_dict(self) -> Dict[str, Any]:
134
+ """Convert to dictionary"""
135
+ return {
136
+ 'created_at': self.created_at,
137
+ 'git_info': asdict(self.git_info),
138
+ 'random_seed': self.random_seed,
139
+ 'config_snapshot': self.config_snapshot,
140
+ 'python_version': self.python_version,
141
+ 'platform': self.platform,
142
+ 'config_hash': self.config_hash
143
+ }
144
+
145
+ @classmethod
146
+ def from_dict(cls, data: Dict[str, Any]) -> "LockedConfig":
147
+ """Create from dictionary"""
148
+ git_info = GitInfo(**data['git_info'])
149
+ return cls(
150
+ created_at=data['created_at'],
151
+ git_info=git_info,
152
+ random_seed=data['random_seed'],
153
+ config_snapshot=data['config_snapshot'],
154
+ python_version=data['python_version'],
155
+ platform=data['platform'],
156
+ config_hash=data.get('config_hash', '')
157
+ )
158
+
159
+ def save(self, path: Path) -> None:
160
+ """Save locked config to file"""
161
+ path.parent.mkdir(parents=True, exist_ok=True)
162
+ with open(path, 'w') as f:
163
+ json.dump(self.to_dict(), f, indent=2)
164
+
165
+ @classmethod
166
+ def load(cls, path: Path) -> "LockedConfig":
167
+ """Load locked config from file"""
168
+ with open(path, 'r') as f:
169
+ data = json.load(f)
170
+ return cls.from_dict(data)
171
+
172
+
173
+ def lock_config(config: Dict[str, Any], seed: Optional[int] = None,
174
+ repo_path: Optional[Path] = None) -> LockedConfig:
175
+ """Lock a configuration with reproducibility info"""
176
+ import sys
177
+ import platform
178
+
179
+ # Generate seed if not provided
180
+ if seed is None:
181
+ seed = random.randint(0, 2**31 - 1)
182
+
183
+ # Set random seed
184
+ random.seed(seed)
185
+
186
+ # Capture git info
187
+ git_info = GitInfo.capture(repo_path)
188
+
189
+ # Create locked config
190
+ locked = LockedConfig(
191
+ created_at=datetime.utcnow().isoformat() + "Z",
192
+ git_info=git_info,
193
+ random_seed=seed,
194
+ config_snapshot=config,
195
+ python_version=sys.version,
196
+ platform=platform.platform()
197
+ )
198
+
199
+ return locked
200
+
201
+
202
+ def apply_locked_config(locked_config: LockedConfig) -> None:
203
+ """Apply a locked configuration (set random seed)"""
204
+ random.seed(locked_config.random_seed)
205
+ # Note: Config snapshot should be applied by the caller
206
+
configs/stage_configs/config_profiles.py ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Training Configuration Profiles
4
+ Maps stage configs and dataset profiles into concrete training data selections.
5
+ Ensures default/prod profiles do not silently include edge/red-team profiles.
6
+ """
7
+
8
+ from dataclasses import dataclass, field
9
+ from enum import Enum
10
+ from pathlib import Path
11
+ from typing import Any, Dict, Iterable, List, Optional, Union
12
+
13
+ from ..configs.stages import (
14
+ STAGE1_ID,
15
+ STAGE2_ID,
16
+ STAGE3_ID,
17
+ STAGE4_ID,
18
+ )
19
+ from ..utils.logger import get_logger
20
+
21
+ logger = get_logger("dataset_pipeline.training.config_profiles")
22
+
23
+
24
+ class TrainingProfile(Enum):
25
+ """Named training profiles that map to stages and dataset types"""
26
+
27
+ FOUNDATION = "foundation" # Stage 1: Foundation & Rapport
28
+ REASONING = "reasoning" # Stage 2: Therapeutic Expertise & Reasoning
29
+ EDGE_CRISIS = "edge_crisis" # Stage 3: Edge Stress Test & Scenario Bank
30
+ VOICE_PERSONA = "voice_persona" # Stage 4: Voice, Persona & Delivery
31
+ PRODUCTION = "production" # General-purpose production training (no edge)
32
+ RESEARCH = "research" # Research/red-team profile (includes edge)
33
+
34
+
35
+ @dataclass
36
+ class ProfileConfig:
37
+ """Configuration for a training profile"""
38
+
39
+ profile_name: str
40
+ stage_ids: List[str] # Which stages to include
41
+ allow_edge_profiles: bool # Whether edge/red-team datasets are allowed
42
+ description: str
43
+ metadata: Dict[str, Any] = field(default_factory=dict)
44
+
45
+
46
+ # Predefined profile configurations
47
+ PROFILE_CONFIGS: Dict[str, ProfileConfig] = {
48
+ TrainingProfile.FOUNDATION.value: ProfileConfig(
49
+ profile_name=TrainingProfile.FOUNDATION.value,
50
+ stage_ids=[STAGE1_ID],
51
+ allow_edge_profiles=False,
52
+ description="Foundation & Rapport training (Stage 1 only, no edge cases)",
53
+ ),
54
+ TrainingProfile.REASONING.value: ProfileConfig(
55
+ profile_name=TrainingProfile.REASONING.value,
56
+ stage_ids=[STAGE2_ID],
57
+ allow_edge_profiles=False,
58
+ description="Therapeutic Expertise & Reasoning training (Stage 2 only, no edge cases)",
59
+ ),
60
+ TrainingProfile.EDGE_CRISIS.value: ProfileConfig(
61
+ profile_name=TrainingProfile.EDGE_CRISIS.value,
62
+ stage_ids=[STAGE3_ID],
63
+ allow_edge_profiles=True,
64
+ description="Edge Stress Test & Scenario Bank (Stage 3, edge cases allowed)",
65
+ ),
66
+ TrainingProfile.VOICE_PERSONA.value: ProfileConfig(
67
+ profile_name=TrainingProfile.VOICE_PERSONA.value,
68
+ stage_ids=[STAGE4_ID],
69
+ allow_edge_profiles=False,
70
+ description="Voice, Persona & Delivery training (Stage 4 only, no edge cases)",
71
+ ),
72
+ TrainingProfile.PRODUCTION.value: ProfileConfig(
73
+ profile_name=TrainingProfile.PRODUCTION.value,
74
+ stage_ids=[STAGE1_ID, STAGE2_ID, STAGE4_ID], # Explicitly exclude Stage 3
75
+ allow_edge_profiles=False,
76
+ description="General-purpose production training (Stages 1, 2, 4 - no edge cases)",
77
+ ),
78
+ TrainingProfile.RESEARCH.value: ProfileConfig(
79
+ profile_name=TrainingProfile.RESEARCH.value,
80
+ stage_ids=[STAGE1_ID, STAGE2_ID, STAGE3_ID, STAGE4_ID], # All stages
81
+ allow_edge_profiles=True,
82
+ description="Research/red-team profile (all stages, edge cases allowed)",
83
+ ),
84
+ }
85
+
86
+
87
+ class TrainingDataSelector:
88
+ """
89
+ Profile-aware data selector that ensures edge profiles are only used
90
+ in appropriate training configurations.
91
+ """
92
+
93
+ def __init__(self, manifest_path: Optional[Union[str, Path]] = None):
94
+ """
95
+ Initialize the training data selector.
96
+
97
+ Args:
98
+ manifest_path: Optional path to dataset manifest
99
+ """
100
+ self.manifest_path = Path(manifest_path) if manifest_path else None
101
+
102
+ def select_data(
103
+ self,
104
+ profile_name: str,
105
+ manifest: Optional[Dict[str, Any]] = None,
106
+ ) -> Iterable[Dict[str, Any]]:
107
+ """
108
+ Select training data based on profile configuration.
109
+
110
+ Args:
111
+ profile_name: Name of the training profile
112
+ manifest: Optional dataset manifest (if None, loads from manifest_path)
113
+
114
+ Yields:
115
+ Training examples matching the profile
116
+ """
117
+ # Get profile config
118
+ if profile_name not in PROFILE_CONFIGS:
119
+ raise ValueError(
120
+ f"Unknown profile: {profile_name}. "
121
+ f"Available profiles: {', '.join(PROFILE_CONFIGS.keys())}"
122
+ )
123
+
124
+ profile_config = PROFILE_CONFIGS[profile_name]
125
+
126
+ logger.info(
127
+ f"Selecting data for profile '{profile_name}': "
128
+ f"stages={profile_config.stage_ids}, "
129
+ f"allow_edge={profile_config.allow_edge_profiles}"
130
+ )
131
+
132
+ # Load manifest if not provided
133
+ if manifest is None:
134
+ manifest = self._load_manifest()
135
+
136
+ # Select examples based on profile
137
+ for example in self._iterate_examples(manifest):
138
+ # Check stage
139
+ example_stage = example.get("metadata", {}).get("stage")
140
+ if example_stage not in profile_config.stage_ids:
141
+ continue
142
+
143
+ # Check edge profile if not allowed
144
+ if not profile_config.allow_edge_profiles:
145
+ if self._is_edge_example(example):
146
+ logger.warning(
147
+ f"Skipping edge example in non-edge profile '{profile_name}': "
148
+ f"{example.get('id', 'unknown')}"
149
+ )
150
+ continue
151
+
152
+ yield example
153
+
154
+ def _is_edge_example(self, example: Dict[str, Any]) -> bool:
155
+ """Check if an example is an edge/red-team example"""
156
+ metadata = example.get("metadata", {})
157
+
158
+ # Check for edge profile metadata
159
+ if "edge_profile" in metadata:
160
+ return True
161
+
162
+ # Check for edge category
163
+ if "edge_category" in metadata:
164
+ return True
165
+
166
+ # Check for stage 3 (edge stress test)
167
+ if metadata.get("stage") == STAGE3_ID:
168
+ return True
169
+
170
+ # Check for crisis intensity flags
171
+ if metadata.get("crisis_intensity") in ["very_high", "extreme"]:
172
+ return True
173
+
174
+ return False
175
+
176
+ def _load_manifest(self) -> Dict[str, Any]:
177
+ """Load dataset manifest"""
178
+ if not self.manifest_path or not self.manifest_path.exists():
179
+ logger.warning(
180
+ f"Manifest not found at {self.manifest_path}, returning empty manifest"
181
+ )
182
+ return {"examples": []}
183
+
184
+ import json
185
+
186
+ with open(self.manifest_path, "r") as f:
187
+ return json.load(f)
188
+
189
+ def _iterate_examples(self, manifest: Dict[str, Any]) -> Iterable[Dict[str, Any]]:
190
+ """Iterate over examples in manifest"""
191
+ examples = manifest.get("examples", [])
192
+ if not examples:
193
+ # Try alternative manifest structures
194
+ examples = manifest.get("data", [])
195
+ if not examples and isinstance(manifest, list):
196
+ examples = manifest
197
+
198
+ for example in examples:
199
+ yield example
200
+
201
+ def assert_no_edge_in_profile(
202
+ self,
203
+ profile_name: str,
204
+ manifest: Optional[Dict[str, Any]] = None,
205
+ ) -> None:
206
+ """
207
+ Assert that a profile does not contain edge examples.
208
+ Raises ValueError if edge examples are found.
209
+
210
+ Args:
211
+ profile_name: Name of the profile to check
212
+ manifest: Optional dataset manifest
213
+ """
214
+ if profile_name not in PROFILE_CONFIGS:
215
+ raise ValueError(f"Unknown profile: {profile_name}")
216
+
217
+ profile_config = PROFILE_CONFIGS[profile_name]
218
+
219
+ if profile_config.allow_edge_profiles:
220
+ logger.info(
221
+ f"Profile '{profile_name}' allows edge profiles, skipping assertion"
222
+ )
223
+ return
224
+
225
+ # Load manifest if not provided
226
+ if manifest is None:
227
+ manifest = self._load_manifest()
228
+
229
+ # Check for edge examples
230
+ edge_examples = []
231
+ for example in self._iterate_examples(manifest):
232
+ example_stage = example.get("metadata", {}).get("stage")
233
+ if example_stage in profile_config.stage_ids:
234
+ if self._is_edge_example(example):
235
+ edge_examples.append(example.get("id", "unknown"))
236
+
237
+ if edge_examples:
238
+ raise ValueError(
239
+ f"Profile '{profile_name}' contains {len(edge_examples)} edge examples: "
240
+ f"{edge_examples[:5]}{'...' if len(edge_examples) > 5 else ''}. "
241
+ f"This profile does not allow edge/red-team data."
242
+ )
243
+
244
+ logger.info(f"Profile '{profile_name}' validated: no edge examples found")
245
+
246
+ def get_profile_stats(
247
+ self,
248
+ profile_name: str,
249
+ manifest: Optional[Dict[str, Any]] = None,
250
+ ) -> Dict[str, Any]:
251
+ """
252
+ Get statistics for a profile.
253
+
254
+ Args:
255
+ profile_name: Name of the profile
256
+ manifest: Optional dataset manifest
257
+
258
+ Returns:
259
+ Statistics dictionary
260
+ """
261
+ if profile_name not in PROFILE_CONFIGS:
262
+ raise ValueError(f"Unknown profile: {profile_name}")
263
+
264
+ profile_config = PROFILE_CONFIGS[profile_name]
265
+
266
+ # Load manifest if not provided
267
+ if manifest is None:
268
+ manifest = self._load_manifest()
269
+
270
+ stats = {
271
+ "profile_name": profile_name,
272
+ "stages": profile_config.stage_ids,
273
+ "allow_edge_profiles": profile_config.allow_edge_profiles,
274
+ "total_examples": 0,
275
+ "by_stage": {},
276
+ "edge_examples": 0,
277
+ "non_edge_examples": 0,
278
+ }
279
+
280
+ for example in self.select_data(profile_name, manifest):
281
+ stats["total_examples"] += 1
282
+
283
+ example_stage = example.get("metadata", {}).get("stage", "unknown")
284
+ stats["by_stage"][example_stage] = (
285
+ stats["by_stage"].get(example_stage, 0) + 1
286
+ )
287
+
288
+ if self._is_edge_example(example):
289
+ stats["edge_examples"] += 1
290
+ else:
291
+ stats["non_edge_examples"] += 1
292
+
293
+ return stats
294
+
295
+
296
+ def get_profile_config(profile_name: str) -> ProfileConfig:
297
+ """Get configuration for a training profile"""
298
+ if profile_name not in PROFILE_CONFIGS:
299
+ raise ValueError(
300
+ f"Unknown profile: {profile_name}. "
301
+ f"Available: {', '.join(PROFILE_CONFIGS.keys())}"
302
+ )
303
+ return PROFILE_CONFIGS[profile_name]
304
+
305
+
306
+ def list_profiles() -> List[str]:
307
+ """List all available training profiles"""
308
+ return list(PROFILE_CONFIGS.keys())
309
+
310
+
311
+ def validate_profile_config(profile_name: str) -> tuple[bool, Optional[str]]:
312
+ """
313
+ Validate that a profile configuration is correct.
314
+
315
+ Returns:
316
+ Tuple of (is_valid, error_message)
317
+ """
318
+ if profile_name not in PROFILE_CONFIGS:
319
+ return False, f"Unknown profile: {profile_name}"
320
+
321
+ profile_config = PROFILE_CONFIGS[profile_name]
322
+
323
+ # Validate stage IDs
324
+ all_stage_ids = {STAGE1_ID, STAGE2_ID, STAGE3_ID, STAGE4_ID}
325
+ for stage_id in profile_config.stage_ids:
326
+ if stage_id not in all_stage_ids:
327
+ return False, f"Invalid stage ID in profile: {stage_id}"
328
+
329
+ # Validate production profile doesn't allow edge
330
+ if profile_name == TrainingProfile.PRODUCTION.value:
331
+ if profile_config.allow_edge_profiles:
332
+ return False, "Production profile must not allow edge profiles"
333
+ if STAGE3_ID in profile_config.stage_ids:
334
+ return (
335
+ False,
336
+ "Production profile must not include Stage 3 (edge stress test)",
337
+ )
338
+
339
+ return True, None
configs/stage_configs/config_tracker.py ADDED
@@ -0,0 +1,700 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Configuration Change Tracking and Rollback System for Pixelated Empathy AI
4
+ Tracks configuration changes and provides rollback capabilities
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import json
10
+ import yaml
11
+ import hashlib
12
+ import shutil
13
+ import logging
14
+ from typing import Dict, List, Any, Optional, Tuple
15
+ from pathlib import Path
16
+ from dataclasses import dataclass, asdict
17
+ from datetime import datetime, timezone
18
+ import subprocess
19
+ import tempfile
20
+ from contextlib import contextmanager
21
+
22
+ # Configure logging
23
+ logging.basicConfig(
24
+ level=logging.INFO,
25
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
26
+ )
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ @dataclass
31
+ class ConfigChange:
32
+ """Represents a configuration change"""
33
+ timestamp: str
34
+ change_id: str
35
+ file_path: str
36
+ change_type: str # 'create', 'update', 'delete'
37
+ old_hash: Optional[str]
38
+ new_hash: Optional[str]
39
+ old_content: Optional[str]
40
+ new_content: Optional[str]
41
+ user: str
42
+ description: str
43
+ environment: str
44
+
45
+ def to_dict(self) -> Dict[str, Any]:
46
+ """Convert to dictionary"""
47
+ return asdict(self)
48
+
49
+ @classmethod
50
+ def from_dict(cls, data: Dict[str, Any]) -> 'ConfigChange':
51
+ """Create from dictionary"""
52
+ return cls(**data)
53
+
54
+
55
+ @dataclass
56
+ class ConfigSnapshot:
57
+ """Represents a configuration snapshot"""
58
+ snapshot_id: str
59
+ timestamp: str
60
+ description: str
61
+ environment: str
62
+ files: Dict[str, str] # file_path -> content_hash
63
+ metadata: Dict[str, Any]
64
+
65
+ def to_dict(self) -> Dict[str, Any]:
66
+ """Convert to dictionary"""
67
+ return asdict(self)
68
+
69
+ @classmethod
70
+ def from_dict(cls, data: Dict[str, Any]) -> 'ConfigSnapshot':
71
+ """Create from dictionary"""
72
+ return cls(**data)
73
+
74
+
75
+ class ConfigTracker:
76
+ """Main configuration tracking system"""
77
+
78
+ def __init__(self, config_dir: str = None, tracking_dir: str = None):
79
+ self.config_dir = Path(config_dir) if config_dir else Path(__file__).parent
80
+ self.tracking_dir = Path(tracking_dir) if tracking_dir else self.config_dir / '.config_tracking'
81
+
82
+ # Create tracking directory structure
83
+ self.tracking_dir.mkdir(exist_ok=True)
84
+ (self.tracking_dir / 'changes').mkdir(exist_ok=True)
85
+ (self.tracking_dir / 'snapshots').mkdir(exist_ok=True)
86
+ (self.tracking_dir / 'backups').mkdir(exist_ok=True)
87
+
88
+ self.changes_file = self.tracking_dir / 'changes.json'
89
+ self.snapshots_file = self.tracking_dir / 'snapshots.json'
90
+
91
+ # Initialize tracking files if they don't exist
92
+ if not self.changes_file.exists():
93
+ self._save_changes([])
94
+ if not self.snapshots_file.exists():
95
+ self._save_snapshots([])
96
+
97
+ def track_change(self, file_path: str, change_type: str, description: str = "",
98
+ user: str = None, environment: str = None) -> str:
99
+ """Track a configuration change"""
100
+ file_path = str(Path(file_path).resolve())
101
+
102
+ # Generate change ID
103
+ change_id = self._generate_change_id()
104
+
105
+ # Get current user and environment
106
+ if user is None:
107
+ user = os.getenv('USER', 'unknown')
108
+ if environment is None:
109
+ environment = os.getenv('ENVIRONMENT', 'unknown')
110
+
111
+ # Get file content and hash
112
+ old_content = None
113
+ old_hash = None
114
+ new_content = None
115
+ new_hash = None
116
+
117
+ if change_type in ['update', 'delete']:
118
+ # Get old content from backup or current file
119
+ old_content, old_hash = self._get_file_content_and_hash(file_path)
120
+
121
+ if change_type in ['create', 'update']:
122
+ # Get new content
123
+ if Path(file_path).exists():
124
+ new_content, new_hash = self._get_file_content_and_hash(file_path)
125
+
126
+ # Create change record
127
+ change = ConfigChange(
128
+ timestamp=datetime.now(timezone.utc).isoformat(),
129
+ change_id=change_id,
130
+ file_path=file_path,
131
+ change_type=change_type,
132
+ old_hash=old_hash,
133
+ new_hash=new_hash,
134
+ old_content=old_content,
135
+ new_content=new_content,
136
+ user=user,
137
+ description=description,
138
+ environment=environment
139
+ )
140
+
141
+ # Save change
142
+ self._add_change(change)
143
+
144
+ # Create backup of the file
145
+ if change_type in ['update', 'delete'] and old_content:
146
+ self._create_backup(file_path, change_id, old_content)
147
+
148
+ logger.info(f"Tracked configuration change: {change_id} - {description}")
149
+ return change_id
150
+
151
+ def create_snapshot(self, description: str = "", environment: str = None) -> str:
152
+ """Create a configuration snapshot"""
153
+ if environment is None:
154
+ environment = os.getenv('ENVIRONMENT', 'unknown')
155
+
156
+ snapshot_id = self._generate_snapshot_id()
157
+
158
+ # Get all configuration files
159
+ config_files = self._get_all_config_files()
160
+ files_dict = {}
161
+
162
+ for file_path in config_files:
163
+ try:
164
+ _, file_hash = self._get_file_content_and_hash(file_path)
165
+ files_dict[str(file_path)] = file_hash
166
+ except Exception as e:
167
+ logger.warning(f"Could not include file in snapshot: {file_path} - {e}")
168
+
169
+ # Create snapshot
170
+ snapshot = ConfigSnapshot(
171
+ snapshot_id=snapshot_id,
172
+ timestamp=datetime.now(timezone.utc).isoformat(),
173
+ description=description,
174
+ environment=environment,
175
+ files=files_dict,
176
+ metadata={
177
+ 'total_files': len(files_dict),
178
+ 'config_dir': str(self.config_dir)
179
+ }
180
+ )
181
+
182
+ # Save snapshot
183
+ self._add_snapshot(snapshot)
184
+
185
+ # Create snapshot backup
186
+ self._create_snapshot_backup(snapshot_id, config_files)
187
+
188
+ logger.info(f"Created configuration snapshot: {snapshot_id} - {description}")
189
+ return snapshot_id
190
+
191
+ def rollback_to_change(self, change_id: str) -> bool:
192
+ """Rollback to a specific change"""
193
+ changes = self._load_changes()
194
+
195
+ # Find the change
196
+ target_change = None
197
+ for change in changes:
198
+ if change['change_id'] == change_id:
199
+ target_change = ConfigChange.from_dict(change)
200
+ break
201
+
202
+ if not target_change:
203
+ logger.error(f"Change not found: {change_id}")
204
+ return False
205
+
206
+ try:
207
+ # Create backup of current state
208
+ current_backup_id = self.create_snapshot(f"Pre-rollback backup for {change_id}")
209
+
210
+ # Restore the file
211
+ if target_change.change_type == 'delete':
212
+ # Restore deleted file
213
+ if target_change.old_content:
214
+ with open(target_change.file_path, 'w') as f:
215
+ f.write(target_change.old_content)
216
+ logger.info(f"Restored deleted file: {target_change.file_path}")
217
+ else:
218
+ logger.error(f"Cannot restore deleted file - no backup content")
219
+ return False
220
+
221
+ elif target_change.change_type in ['create', 'update']:
222
+ # Rollback to previous version
223
+ if target_change.old_content:
224
+ with open(target_change.file_path, 'w') as f:
225
+ f.write(target_change.old_content)
226
+ logger.info(f"Rolled back file: {target_change.file_path}")
227
+ else:
228
+ # This was a create operation, delete the file
229
+ if Path(target_change.file_path).exists():
230
+ os.remove(target_change.file_path)
231
+ logger.info(f"Removed created file: {target_change.file_path}")
232
+
233
+ # Track the rollback as a new change
234
+ self.track_change(
235
+ target_change.file_path,
236
+ 'rollback',
237
+ f"Rollback to change {change_id}",
238
+ environment=target_change.environment
239
+ )
240
+
241
+ logger.info(f"Successfully rolled back to change: {change_id}")
242
+ return True
243
+
244
+ except Exception as e:
245
+ logger.error(f"Rollback failed: {e}")
246
+ return False
247
+
248
+ def rollback_to_snapshot(self, snapshot_id: str) -> bool:
249
+ """Rollback to a specific snapshot"""
250
+ snapshots = self._load_snapshots()
251
+
252
+ # Find the snapshot
253
+ target_snapshot = None
254
+ for snapshot in snapshots:
255
+ if snapshot['snapshot_id'] == snapshot_id:
256
+ target_snapshot = ConfigSnapshot.from_dict(snapshot)
257
+ break
258
+
259
+ if not target_snapshot:
260
+ logger.error(f"Snapshot not found: {snapshot_id}")
261
+ return False
262
+
263
+ try:
264
+ # Create backup of current state
265
+ current_backup_id = self.create_snapshot(f"Pre-rollback backup for snapshot {snapshot_id}")
266
+
267
+ # Restore files from snapshot backup
268
+ snapshot_backup_dir = self.tracking_dir / 'snapshots' / snapshot_id
269
+
270
+ if not snapshot_backup_dir.exists():
271
+ logger.error(f"Snapshot backup directory not found: {snapshot_backup_dir}")
272
+ return False
273
+
274
+ # Restore each file
275
+ restored_files = []
276
+ for file_path in target_snapshot.files.keys():
277
+ backup_file = snapshot_backup_dir / Path(file_path).name
278
+
279
+ if backup_file.exists():
280
+ # Restore the file
281
+ shutil.copy2(backup_file, file_path)
282
+ restored_files.append(file_path)
283
+ logger.info(f"Restored file: {file_path}")
284
+ else:
285
+ logger.warning(f"Backup file not found: {backup_file}")
286
+
287
+ # Track the rollback
288
+ for file_path in restored_files:
289
+ self.track_change(
290
+ file_path,
291
+ 'rollback',
292
+ f"Rollback to snapshot {snapshot_id}",
293
+ environment=target_snapshot.environment
294
+ )
295
+
296
+ logger.info(f"Successfully rolled back to snapshot: {snapshot_id}")
297
+ return True
298
+
299
+ except Exception as e:
300
+ logger.error(f"Snapshot rollback failed: {e}")
301
+ return False
302
+
303
+ def get_change_history(self, file_path: str = None, limit: int = None) -> List[Dict[str, Any]]:
304
+ """Get change history"""
305
+ changes = self._load_changes()
306
+
307
+ # Filter by file path if specified
308
+ if file_path:
309
+ file_path = str(Path(file_path).resolve())
310
+ changes = [c for c in changes if c['file_path'] == file_path]
311
+
312
+ # Sort by timestamp (newest first)
313
+ changes.sort(key=lambda x: x['timestamp'], reverse=True)
314
+
315
+ # Apply limit if specified
316
+ if limit:
317
+ changes = changes[:limit]
318
+
319
+ return changes
320
+
321
+ def get_snapshots(self, limit: int = None) -> List[Dict[str, Any]]:
322
+ """Get snapshot history"""
323
+ snapshots = self._load_snapshots()
324
+
325
+ # Sort by timestamp (newest first)
326
+ snapshots.sort(key=lambda x: x['timestamp'], reverse=True)
327
+
328
+ # Apply limit if specified
329
+ if limit:
330
+ snapshots = snapshots[:limit]
331
+
332
+ return snapshots
333
+
334
+ def compare_configurations(self, snapshot_id1: str, snapshot_id2: str) -> Dict[str, Any]:
335
+ """Compare two configuration snapshots"""
336
+ snapshots = self._load_snapshots()
337
+
338
+ snapshot1 = None
339
+ snapshot2 = None
340
+
341
+ for snapshot in snapshots:
342
+ if snapshot['snapshot_id'] == snapshot_id1:
343
+ snapshot1 = ConfigSnapshot.from_dict(snapshot)
344
+ elif snapshot['snapshot_id'] == snapshot_id2:
345
+ snapshot2 = ConfigSnapshot.from_dict(snapshot)
346
+
347
+ if not snapshot1 or not snapshot2:
348
+ raise ValueError("One or both snapshots not found")
349
+
350
+ # Compare files
351
+ all_files = set(snapshot1.files.keys()) | set(snapshot2.files.keys())
352
+
353
+ differences = {
354
+ 'added': [],
355
+ 'removed': [],
356
+ 'modified': [],
357
+ 'unchanged': []
358
+ }
359
+
360
+ for file_path in all_files:
361
+ hash1 = snapshot1.files.get(file_path)
362
+ hash2 = snapshot2.files.get(file_path)
363
+
364
+ if hash1 and not hash2:
365
+ differences['removed'].append(file_path)
366
+ elif not hash1 and hash2:
367
+ differences['added'].append(file_path)
368
+ elif hash1 != hash2:
369
+ differences['modified'].append(file_path)
370
+ else:
371
+ differences['unchanged'].append(file_path)
372
+
373
+ return {
374
+ 'snapshot1': snapshot1.to_dict(),
375
+ 'snapshot2': snapshot2.to_dict(),
376
+ 'differences': differences,
377
+ 'summary': {
378
+ 'total_files': len(all_files),
379
+ 'added': len(differences['added']),
380
+ 'removed': len(differences['removed']),
381
+ 'modified': len(differences['modified']),
382
+ 'unchanged': len(differences['unchanged'])
383
+ }
384
+ }
385
+
386
+ def cleanup_old_backups(self, days: int = 30) -> int:
387
+ """Clean up old backups and snapshots"""
388
+ cutoff_time = datetime.now(timezone.utc).timestamp() - (days * 24 * 60 * 60)
389
+ cleaned_count = 0
390
+
391
+ # Clean up old change backups
392
+ backup_dir = self.tracking_dir / 'backups'
393
+ if backup_dir.exists():
394
+ for backup_file in backup_dir.iterdir():
395
+ if backup_file.stat().st_mtime < cutoff_time:
396
+ backup_file.unlink()
397
+ cleaned_count += 1
398
+
399
+ # Clean up old snapshot backups
400
+ snapshot_dir = self.tracking_dir / 'snapshots'
401
+ if snapshot_dir.exists():
402
+ for snapshot_backup in snapshot_dir.iterdir():
403
+ if snapshot_backup.is_dir() and snapshot_backup.stat().st_mtime < cutoff_time:
404
+ shutil.rmtree(snapshot_backup)
405
+ cleaned_count += 1
406
+
407
+ logger.info(f"Cleaned up {cleaned_count} old backup files")
408
+ return cleaned_count
409
+
410
+ def export_tracking_data(self, output_file: str) -> bool:
411
+ """Export all tracking data to a file"""
412
+ try:
413
+ export_data = {
414
+ 'export_timestamp': datetime.now(timezone.utc).isoformat(),
415
+ 'config_dir': str(self.config_dir),
416
+ 'changes': self._load_changes(),
417
+ 'snapshots': self._load_snapshots()
418
+ }
419
+
420
+ with open(output_file, 'w') as f:
421
+ json.dump(export_data, f, indent=2)
422
+
423
+ logger.info(f"Exported tracking data to: {output_file}")
424
+ return True
425
+
426
+ except Exception as e:
427
+ logger.error(f"Export failed: {e}")
428
+ return False
429
+
430
+ def import_tracking_data(self, input_file: str) -> bool:
431
+ """Import tracking data from a file"""
432
+ try:
433
+ with open(input_file, 'r') as f:
434
+ import_data = json.load(f)
435
+
436
+ # Validate import data
437
+ if 'changes' not in import_data or 'snapshots' not in import_data:
438
+ raise ValueError("Invalid import data format")
439
+
440
+ # Backup current tracking data
441
+ backup_file = self.tracking_dir / f"backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
442
+ self.export_tracking_data(str(backup_file))
443
+
444
+ # Import changes and snapshots
445
+ self._save_changes(import_data['changes'])
446
+ self._save_snapshots(import_data['snapshots'])
447
+
448
+ logger.info(f"Imported tracking data from: {input_file}")
449
+ return True
450
+
451
+ except Exception as e:
452
+ logger.error(f"Import failed: {e}")
453
+ return False
454
+
455
+ @contextmanager
456
+ def track_changes(self, description: str = "Batch configuration changes"):
457
+ """Context manager for tracking multiple changes"""
458
+ initial_snapshot = self.create_snapshot(f"Pre-change snapshot: {description}")
459
+
460
+ try:
461
+ yield
462
+
463
+ # Create post-change snapshot
464
+ final_snapshot = self.create_snapshot(f"Post-change snapshot: {description}")
465
+
466
+ logger.info(f"Tracked batch changes: {description}")
467
+ logger.info(f"Initial snapshot: {initial_snapshot}")
468
+ logger.info(f"Final snapshot: {final_snapshot}")
469
+
470
+ except Exception as e:
471
+ logger.error(f"Error during tracked changes: {e}")
472
+
473
+ # Rollback to initial snapshot
474
+ logger.info(f"Rolling back to initial snapshot: {initial_snapshot}")
475
+ self.rollback_to_snapshot(initial_snapshot)
476
+ raise
477
+
478
+ def _generate_change_id(self) -> str:
479
+ """Generate unique change ID"""
480
+ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
481
+ random_suffix = hashlib.md5(os.urandom(16)).hexdigest()[:8]
482
+ return f"change_{timestamp}_{random_suffix}"
483
+
484
+ def _generate_snapshot_id(self) -> str:
485
+ """Generate unique snapshot ID"""
486
+ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
487
+ random_suffix = hashlib.md5(os.urandom(16)).hexdigest()[:8]
488
+ return f"snapshot_{timestamp}_{random_suffix}"
489
+
490
+ def _get_file_content_and_hash(self, file_path: str) -> Tuple[str, str]:
491
+ """Get file content and its hash"""
492
+ with open(file_path, 'r', encoding='utf-8') as f:
493
+ content = f.read()
494
+
495
+ file_hash = hashlib.sha256(content.encode('utf-8')).hexdigest()
496
+ return content, file_hash
497
+
498
+ def _get_all_config_files(self) -> List[Path]:
499
+ """Get all configuration files"""
500
+ config_files = []
501
+
502
+ # Common configuration file patterns
503
+ patterns = [
504
+ '*.yaml', '*.yml', '*.json', '*.toml', '*.ini', '*.conf',
505
+ '.env*', '*.config'
506
+ ]
507
+
508
+ for pattern in patterns:
509
+ config_files.extend(self.config_dir.glob(pattern))
510
+
511
+ # Also check subdirectories
512
+ for subdir in self.config_dir.iterdir():
513
+ if subdir.is_dir() and not subdir.name.startswith('.'):
514
+ for pattern in patterns:
515
+ config_files.extend(subdir.glob(pattern))
516
+
517
+ return config_files
518
+
519
+ def _create_backup(self, file_path: str, change_id: str, content: str):
520
+ """Create backup of file content"""
521
+ backup_file = self.tracking_dir / 'backups' / f"{change_id}_{Path(file_path).name}"
522
+
523
+ with open(backup_file, 'w', encoding='utf-8') as f:
524
+ f.write(content)
525
+
526
+ def _create_snapshot_backup(self, snapshot_id: str, config_files: List[Path]):
527
+ """Create backup of all files in snapshot"""
528
+ snapshot_backup_dir = self.tracking_dir / 'snapshots' / snapshot_id
529
+ snapshot_backup_dir.mkdir(exist_ok=True)
530
+
531
+ for file_path in config_files:
532
+ if file_path.exists():
533
+ backup_file = snapshot_backup_dir / file_path.name
534
+ shutil.copy2(file_path, backup_file)
535
+
536
+ def _load_changes(self) -> List[Dict[str, Any]]:
537
+ """Load changes from file"""
538
+ try:
539
+ with open(self.changes_file, 'r') as f:
540
+ return json.load(f)
541
+ except (FileNotFoundError, json.JSONDecodeError):
542
+ return []
543
+
544
+ def _save_changes(self, changes: List[Dict[str, Any]]):
545
+ """Save changes to file"""
546
+ with open(self.changes_file, 'w') as f:
547
+ json.dump(changes, f, indent=2)
548
+
549
+ def _add_change(self, change: ConfigChange):
550
+ """Add a change to the tracking file"""
551
+ changes = self._load_changes()
552
+ changes.append(change.to_dict())
553
+ self._save_changes(changes)
554
+
555
+ def _load_snapshots(self) -> List[Dict[str, Any]]:
556
+ """Load snapshots from file"""
557
+ try:
558
+ with open(self.snapshots_file, 'r') as f:
559
+ return json.load(f)
560
+ except (FileNotFoundError, json.JSONDecodeError):
561
+ return []
562
+
563
+ def _save_snapshots(self, snapshots: List[Dict[str, Any]]):
564
+ """Save snapshots to file"""
565
+ with open(self.snapshots_file, 'w') as f:
566
+ json.dump(snapshots, f, indent=2)
567
+
568
+ def _add_snapshot(self, snapshot: ConfigSnapshot):
569
+ """Add a snapshot to the tracking file"""
570
+ snapshots = self._load_snapshots()
571
+ snapshots.append(snapshot.to_dict())
572
+ self._save_snapshots(snapshots)
573
+
574
+
575
+ def main():
576
+ """Main CLI interface"""
577
+ import argparse
578
+
579
+ parser = argparse.ArgumentParser(description="Configuration Change Tracking System")
580
+ parser.add_argument('--config-dir', help="Configuration directory")
581
+ parser.add_argument('--tracking-dir', help="Tracking data directory")
582
+
583
+ subparsers = parser.add_subparsers(dest='command', help='Available commands')
584
+
585
+ # Track command
586
+ track_parser = subparsers.add_parser('track', help='Track a configuration change')
587
+ track_parser.add_argument('file_path', help='Path to configuration file')
588
+ track_parser.add_argument('change_type', choices=['create', 'update', 'delete'])
589
+ track_parser.add_argument('--description', default='', help='Change description')
590
+ track_parser.add_argument('--user', help='User making the change')
591
+ track_parser.add_argument('--environment', help='Environment')
592
+
593
+ # Snapshot command
594
+ snapshot_parser = subparsers.add_parser('snapshot', help='Create a configuration snapshot')
595
+ snapshot_parser.add_argument('--description', default='', help='Snapshot description')
596
+ snapshot_parser.add_argument('--environment', help='Environment')
597
+
598
+ # Rollback command
599
+ rollback_parser = subparsers.add_parser('rollback', help='Rollback configuration')
600
+ rollback_group = rollback_parser.add_mutually_exclusive_group(required=True)
601
+ rollback_group.add_argument('--change-id', help='Change ID to rollback to')
602
+ rollback_group.add_argument('--snapshot-id', help='Snapshot ID to rollback to')
603
+
604
+ # History command
605
+ history_parser = subparsers.add_parser('history', help='Show change history')
606
+ history_parser.add_argument('--file-path', help='Filter by file path')
607
+ history_parser.add_argument('--limit', type=int, help='Limit number of results')
608
+
609
+ # Snapshots command
610
+ snapshots_parser = subparsers.add_parser('snapshots', help='List snapshots')
611
+ snapshots_parser.add_argument('--limit', type=int, help='Limit number of results')
612
+
613
+ # Compare command
614
+ compare_parser = subparsers.add_parser('compare', help='Compare snapshots')
615
+ compare_parser.add_argument('snapshot1', help='First snapshot ID')
616
+ compare_parser.add_argument('snapshot2', help='Second snapshot ID')
617
+
618
+ # Cleanup command
619
+ cleanup_parser = subparsers.add_parser('cleanup', help='Clean up old backups')
620
+ cleanup_parser.add_argument('--days', type=int, default=30, help='Days to keep')
621
+
622
+ # Export command
623
+ export_parser = subparsers.add_parser('export', help='Export tracking data')
624
+ export_parser.add_argument('output_file', help='Output file path')
625
+
626
+ # Import command
627
+ import_parser = subparsers.add_parser('import', help='Import tracking data')
628
+ import_parser.add_argument('input_file', help='Input file path')
629
+
630
+ args = parser.parse_args()
631
+
632
+ if not args.command:
633
+ parser.print_help()
634
+ return
635
+
636
+ # Create tracker
637
+ tracker = ConfigTracker(args.config_dir, args.tracking_dir)
638
+
639
+ # Execute command
640
+ if args.command == 'track':
641
+ change_id = tracker.track_change(
642
+ args.file_path,
643
+ args.change_type,
644
+ args.description,
645
+ args.user,
646
+ args.environment
647
+ )
648
+ print(f"Change tracked: {change_id}")
649
+
650
+ elif args.command == 'snapshot':
651
+ snapshot_id = tracker.create_snapshot(args.description, args.environment)
652
+ print(f"Snapshot created: {snapshot_id}")
653
+
654
+ elif args.command == 'rollback':
655
+ if args.change_id:
656
+ success = tracker.rollback_to_change(args.change_id)
657
+ else:
658
+ success = tracker.rollback_to_snapshot(args.snapshot_id)
659
+
660
+ if success:
661
+ print("Rollback completed successfully")
662
+ else:
663
+ print("Rollback failed")
664
+ sys.exit(1)
665
+
666
+ elif args.command == 'history':
667
+ changes = tracker.get_change_history(args.file_path, args.limit)
668
+ print(json.dumps(changes, indent=2))
669
+
670
+ elif args.command == 'snapshots':
671
+ snapshots = tracker.get_snapshots(args.limit)
672
+ print(json.dumps(snapshots, indent=2))
673
+
674
+ elif args.command == 'compare':
675
+ comparison = tracker.compare_configurations(args.snapshot1, args.snapshot2)
676
+ print(json.dumps(comparison, indent=2))
677
+
678
+ elif args.command == 'cleanup':
679
+ count = tracker.cleanup_old_backups(args.days)
680
+ print(f"Cleaned up {count} old backup files")
681
+
682
+ elif args.command == 'export':
683
+ success = tracker.export_tracking_data(args.output_file)
684
+ if success:
685
+ print(f"Tracking data exported to: {args.output_file}")
686
+ else:
687
+ print("Export failed")
688
+ sys.exit(1)
689
+
690
+ elif args.command == 'import':
691
+ success = tracker.import_tracking_data(args.input_file)
692
+ if success:
693
+ print(f"Tracking data imported from: {args.input_file}")
694
+ else:
695
+ print("Import failed")
696
+ sys.exit(1)
697
+
698
+
699
+ if __name__ == '__main__':
700
+ main()
configs/stage_configs/config_validator.py ADDED
@@ -0,0 +1,705 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Configuration Validation System for Pixelated Empathy AI
4
+ Validates all configuration files and environment variables
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import json
10
+ import yaml
11
+ import logging
12
+ from typing import Dict, List, Any, Optional, Union
13
+ from pathlib import Path
14
+ from dataclasses import dataclass, field
15
+ from enum import Enum
16
+ import re
17
+ from urllib.parse import urlparse
18
+
19
+ # Configure logging
20
+ logging.basicConfig(
21
+ level=logging.INFO,
22
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
23
+ )
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class ValidationLevel(Enum):
28
+ """Validation severity levels"""
29
+ ERROR = "error"
30
+ WARNING = "warning"
31
+ INFO = "info"
32
+
33
+
34
+ @dataclass
35
+ class ValidationResult:
36
+ """Result of a configuration validation"""
37
+ level: ValidationLevel
38
+ message: str
39
+ field: Optional[str] = None
40
+ value: Optional[Any] = None
41
+ suggestion: Optional[str] = None
42
+
43
+
44
+ @dataclass
45
+ class ValidationReport:
46
+ """Complete validation report"""
47
+ results: List[ValidationResult] = field(default_factory=list)
48
+
49
+ def add_error(self, message: str, field: str = None, value: Any = None, suggestion: str = None):
50
+ """Add an error to the report"""
51
+ self.results.append(ValidationResult(
52
+ level=ValidationLevel.ERROR,
53
+ message=message,
54
+ field=field,
55
+ value=value,
56
+ suggestion=suggestion
57
+ ))
58
+
59
+ def add_warning(self, message: str, field: str = None, value: Any = None, suggestion: str = None):
60
+ """Add a warning to the report"""
61
+ self.results.append(ValidationResult(
62
+ level=ValidationLevel.WARNING,
63
+ message=message,
64
+ field=field,
65
+ value=value,
66
+ suggestion=suggestion
67
+ ))
68
+
69
+ def add_info(self, message: str, field: str = None, value: Any = None):
70
+ """Add an info message to the report"""
71
+ self.results.append(ValidationResult(
72
+ level=ValidationLevel.INFO,
73
+ message=message,
74
+ field=field,
75
+ value=value
76
+ ))
77
+
78
+ @property
79
+ def has_errors(self) -> bool:
80
+ """Check if report contains errors"""
81
+ return any(r.level == ValidationLevel.ERROR for r in self.results)
82
+
83
+ @property
84
+ def has_warnings(self) -> bool:
85
+ """Check if report contains warnings"""
86
+ return any(r.level == ValidationLevel.WARNING for r in self.results)
87
+
88
+ def get_summary(self) -> Dict[str, int]:
89
+ """Get summary of validation results"""
90
+ summary = {level.value: 0 for level in ValidationLevel}
91
+ for result in self.results:
92
+ summary[result.level.value] += 1
93
+ return summary
94
+
95
+
96
+ class ConfigValidator:
97
+ """Main configuration validator"""
98
+
99
+ def __init__(self, config_dir: str = None):
100
+ self.config_dir = Path(config_dir) if config_dir else Path(__file__).parent
101
+ self.report = ValidationReport()
102
+
103
+ def validate_all(self) -> ValidationReport:
104
+ """Validate all configuration aspects"""
105
+ logger.info("Starting comprehensive configuration validation...")
106
+
107
+ # Reset report
108
+ self.report = ValidationReport()
109
+
110
+ # Validate different aspects
111
+ self._validate_environment_variables()
112
+ self._validate_database_config()
113
+ self._validate_redis_config()
114
+ self._validate_security_config()
115
+ self._validate_monitoring_config()
116
+ self._validate_file_permissions()
117
+ self._validate_network_config()
118
+ self._validate_resource_limits()
119
+ self._validate_backup_config()
120
+
121
+ # Log summary
122
+ summary = self.report.get_summary()
123
+ logger.info(f"Validation complete: {summary}")
124
+
125
+ return self.report
126
+
127
+ def _validate_environment_variables(self):
128
+ """Validate required environment variables"""
129
+ logger.info("Validating environment variables...")
130
+
131
+ required_vars = {
132
+ 'DATABASE_URL': self._validate_database_url,
133
+ 'REDIS_URL': self._validate_redis_url,
134
+ 'JWT_SECRET': self._validate_jwt_secret,
135
+ 'ENCRYPTION_KEY': self._validate_encryption_key,
136
+ 'LOG_LEVEL': self._validate_log_level,
137
+ 'ENVIRONMENT': self._validate_environment,
138
+ }
139
+
140
+ optional_vars = {
141
+ 'MAX_WORKERS': self._validate_max_workers,
142
+ 'BATCH_SIZE': self._validate_batch_size,
143
+ 'DEBUG': self._validate_debug_flag,
144
+ 'SENTRY_DSN': self._validate_sentry_dsn,
145
+ }
146
+
147
+ # Check required variables
148
+ for var_name, validator in required_vars.items():
149
+ value = os.getenv(var_name)
150
+ if not value:
151
+ self.report.add_error(
152
+ f"Required environment variable '{var_name}' is not set",
153
+ field=var_name,
154
+ suggestion=f"Set {var_name} environment variable"
155
+ )
156
+ else:
157
+ validator(value, var_name)
158
+
159
+ # Check optional variables
160
+ for var_name, validator in optional_vars.items():
161
+ value = os.getenv(var_name)
162
+ if value:
163
+ validator(value, var_name)
164
+
165
+ def _validate_database_url(self, value: str, field: str):
166
+ """Validate database URL format"""
167
+ try:
168
+ parsed = urlparse(value)
169
+ if not parsed.scheme:
170
+ self.report.add_error(
171
+ f"Database URL missing scheme",
172
+ field=field,
173
+ suggestion="Use format: postgresql://user:pass@host:port/db"
174
+ )
175
+ elif parsed.scheme not in ['postgresql', 'postgres']:
176
+ self.report.add_warning(
177
+ f"Unexpected database scheme: {parsed.scheme}",
178
+ field=field,
179
+ suggestion="Consider using PostgreSQL for production"
180
+ )
181
+
182
+ if not parsed.hostname:
183
+ self.report.add_error(
184
+ f"Database URL missing hostname",
185
+ field=field
186
+ )
187
+
188
+ if not parsed.path or parsed.path == '/':
189
+ self.report.add_error(
190
+ f"Database URL missing database name",
191
+ field=field
192
+ )
193
+
194
+ except Exception as e:
195
+ self.report.add_error(
196
+ f"Invalid database URL format: {e}",
197
+ field=field
198
+ )
199
+
200
+ def _validate_redis_url(self, value: str, field: str):
201
+ """Validate Redis URL format"""
202
+ try:
203
+ parsed = urlparse(value)
204
+ if not parsed.scheme:
205
+ self.report.add_error(
206
+ f"Redis URL missing scheme",
207
+ field=field,
208
+ suggestion="Use format: redis://[:password@]host:port[/db]"
209
+ )
210
+ elif parsed.scheme not in ['redis', 'rediss']:
211
+ self.report.add_error(
212
+ f"Invalid Redis scheme: {parsed.scheme}",
213
+ field=field,
214
+ suggestion="Use 'redis://' or 'rediss://' for SSL"
215
+ )
216
+
217
+ if not parsed.hostname:
218
+ self.report.add_error(
219
+ f"Redis URL missing hostname",
220
+ field=field
221
+ )
222
+
223
+ except Exception as e:
224
+ self.report.add_error(
225
+ f"Invalid Redis URL format: {e}",
226
+ field=field
227
+ )
228
+
229
+ def _validate_jwt_secret(self, value: str, field: str):
230
+ """Validate JWT secret strength"""
231
+ if len(value) < 32:
232
+ self.report.add_error(
233
+ f"JWT secret too short (minimum 32 characters)",
234
+ field=field,
235
+ value=f"Length: {len(value)}",
236
+ suggestion="Generate a longer, more secure secret"
237
+ )
238
+ elif len(value) < 64:
239
+ self.report.add_warning(
240
+ f"JWT secret could be longer for better security",
241
+ field=field,
242
+ value=f"Length: {len(value)}",
243
+ suggestion="Consider using 64+ character secret"
244
+ )
245
+
246
+ # Check for common weak patterns
247
+ if value.lower() in ['secret', 'password', 'changeme', 'default']:
248
+ self.report.add_error(
249
+ f"JWT secret uses common weak value",
250
+ field=field,
251
+ suggestion="Generate a cryptographically secure random secret"
252
+ )
253
+
254
+ def _validate_encryption_key(self, value: str, field: str):
255
+ """Validate encryption key"""
256
+ if len(value) < 32:
257
+ self.report.add_error(
258
+ f"Encryption key too short (minimum 32 characters)",
259
+ field=field,
260
+ value=f"Length: {len(value)}"
261
+ )
262
+
263
+ # Check if it's base64 encoded (common for encryption keys)
264
+ try:
265
+ import base64
266
+ base64.b64decode(value)
267
+ if len(base64.b64decode(value)) < 32:
268
+ self.report.add_warning(
269
+ f"Decoded encryption key may be too short",
270
+ field=field
271
+ )
272
+ except Exception:
273
+ # Not base64, check raw length
274
+ pass
275
+
276
+ def _validate_log_level(self, value: str, field: str):
277
+ """Validate log level"""
278
+ valid_levels = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
279
+ if value.upper() not in valid_levels:
280
+ self.report.add_error(
281
+ f"Invalid log level: {value}",
282
+ field=field,
283
+ suggestion=f"Use one of: {', '.join(valid_levels)}"
284
+ )
285
+ elif value.upper() == 'DEBUG':
286
+ env = os.getenv('ENVIRONMENT', '').lower()
287
+ if env in ['production', 'prod']:
288
+ self.report.add_warning(
289
+ f"DEBUG log level in production environment",
290
+ field=field,
291
+ suggestion="Use INFO or WARNING for production"
292
+ )
293
+
294
+ def _validate_environment(self, value: str, field: str):
295
+ """Validate environment setting"""
296
+ valid_envs = ['development', 'dev', 'staging', 'production', 'prod', 'test']
297
+ if value.lower() not in valid_envs:
298
+ self.report.add_warning(
299
+ f"Unexpected environment value: {value}",
300
+ field=field,
301
+ suggestion=f"Consider using: {', '.join(valid_envs)}"
302
+ )
303
+
304
+ def _validate_max_workers(self, value: str, field: str):
305
+ """Validate max workers setting"""
306
+ try:
307
+ workers = int(value)
308
+ if workers < 1:
309
+ self.report.add_error(
310
+ f"Max workers must be positive",
311
+ field=field,
312
+ value=workers
313
+ )
314
+ elif workers > 32:
315
+ self.report.add_warning(
316
+ f"Very high worker count may cause resource issues",
317
+ field=field,
318
+ value=workers,
319
+ suggestion="Consider CPU core count when setting workers"
320
+ )
321
+ except ValueError:
322
+ self.report.add_error(
323
+ f"Max workers must be an integer",
324
+ field=field,
325
+ value=value
326
+ )
327
+
328
+ def _validate_batch_size(self, value: str, field: str):
329
+ """Validate batch size setting"""
330
+ try:
331
+ batch_size = int(value)
332
+ if batch_size < 1:
333
+ self.report.add_error(
334
+ f"Batch size must be positive",
335
+ field=field,
336
+ value=batch_size
337
+ )
338
+ elif batch_size > 1000:
339
+ self.report.add_warning(
340
+ f"Large batch size may cause memory issues",
341
+ field=field,
342
+ value=batch_size,
343
+ suggestion="Consider memory constraints when setting batch size"
344
+ )
345
+ except ValueError:
346
+ self.report.add_error(
347
+ f"Batch size must be an integer",
348
+ field=field,
349
+ value=value
350
+ )
351
+
352
+ def _validate_debug_flag(self, value: str, field: str):
353
+ """Validate debug flag"""
354
+ if value.lower() not in ['true', 'false', '1', '0', 'yes', 'no']:
355
+ self.report.add_warning(
356
+ f"Debug flag should be boolean-like",
357
+ field=field,
358
+ value=value,
359
+ suggestion="Use 'true', 'false', '1', or '0'"
360
+ )
361
+
362
+ if value.lower() in ['true', '1', 'yes']:
363
+ env = os.getenv('ENVIRONMENT', '').lower()
364
+ if env in ['production', 'prod']:
365
+ self.report.add_warning(
366
+ f"Debug enabled in production environment",
367
+ field=field,
368
+ suggestion="Disable debug in production"
369
+ )
370
+
371
+ def _validate_sentry_dsn(self, value: str, field: str):
372
+ """Validate Sentry DSN format"""
373
+ try:
374
+ parsed = urlparse(value)
375
+ if not parsed.scheme or not parsed.hostname:
376
+ self.report.add_error(
377
+ f"Invalid Sentry DSN format",
378
+ field=field,
379
+ suggestion="Check Sentry project settings for correct DSN"
380
+ )
381
+ except Exception as e:
382
+ self.report.add_error(
383
+ f"Invalid Sentry DSN: {e}",
384
+ field=field
385
+ )
386
+
387
+ def _validate_database_config(self):
388
+ """Validate database configuration files"""
389
+ logger.info("Validating database configuration...")
390
+
391
+ # Check for database config files
392
+ db_config_files = [
393
+ 'database.yaml',
394
+ 'database.json',
395
+ 'db_config.yaml'
396
+ ]
397
+
398
+ for config_file in db_config_files:
399
+ config_path = self.config_dir / config_file
400
+ if config_path.exists():
401
+ self._validate_config_file(config_path)
402
+
403
+ def _validate_redis_config(self):
404
+ """Validate Redis configuration"""
405
+ logger.info("Validating Redis configuration...")
406
+
407
+ redis_config = self.config_dir / 'redis.yaml'
408
+ if redis_config.exists():
409
+ self._validate_config_file(redis_config)
410
+
411
+ def _validate_security_config(self):
412
+ """Validate security configuration"""
413
+ logger.info("Validating security configuration...")
414
+
415
+ security_config = self.config_dir / 'security.yaml'
416
+ if security_config.exists():
417
+ try:
418
+ with open(security_config, 'r') as f:
419
+ config = yaml.safe_load(f)
420
+
421
+ # Check security settings
422
+ if 'encryption' in config:
423
+ if not config['encryption'].get('enabled', False):
424
+ self.report.add_warning(
425
+ "Encryption is disabled",
426
+ field="encryption.enabled",
427
+ suggestion="Enable encryption for production"
428
+ )
429
+
430
+ if 'authentication' in config:
431
+ auth_config = config['authentication']
432
+ if auth_config.get('require_2fa', False) is False:
433
+ env = os.getenv('ENVIRONMENT', '').lower()
434
+ if env in ['production', 'prod']:
435
+ self.report.add_warning(
436
+ "2FA not required in production",
437
+ field="authentication.require_2fa",
438
+ suggestion="Enable 2FA for production security"
439
+ )
440
+
441
+ except Exception as e:
442
+ self.report.add_error(
443
+ f"Error reading security config: {e}",
444
+ field="security.yaml"
445
+ )
446
+
447
+ def _validate_monitoring_config(self):
448
+ """Validate monitoring configuration"""
449
+ logger.info("Validating monitoring configuration...")
450
+
451
+ monitoring_config = self.config_dir / 'monitoring.yaml'
452
+ if monitoring_config.exists():
453
+ self._validate_config_file(monitoring_config)
454
+
455
+ def _validate_file_permissions(self):
456
+ """Validate file permissions for security"""
457
+ logger.info("Validating file permissions...")
458
+
459
+ sensitive_files = [
460
+ '.env',
461
+ 'secrets.yaml',
462
+ 'private.key',
463
+ 'ssl.key'
464
+ ]
465
+
466
+ for filename in sensitive_files:
467
+ filepath = self.config_dir / filename
468
+ if filepath.exists():
469
+ stat_info = filepath.stat()
470
+ mode = oct(stat_info.st_mode)[-3:]
471
+
472
+ # Check if file is readable by others
473
+ if int(mode[2]) > 0:
474
+ self.report.add_warning(
475
+ f"Sensitive file '{filename}' is readable by others",
476
+ field=f"permissions.{filename}",
477
+ value=f"Mode: {mode}",
478
+ suggestion="Set permissions to 600 or 640"
479
+ )
480
+
481
+ def _validate_network_config(self):
482
+ """Validate network configuration"""
483
+ logger.info("Validating network configuration...")
484
+
485
+ # Check common network settings
486
+ bind_host = os.getenv('BIND_HOST', '0.0.0.0')
487
+ if bind_host == '0.0.0.0':
488
+ env = os.getenv('ENVIRONMENT', '').lower()
489
+ if env in ['production', 'prod']:
490
+ self.report.add_warning(
491
+ "Binding to all interfaces (0.0.0.0) in production",
492
+ field="BIND_HOST",
493
+ suggestion="Consider binding to specific interface for security"
494
+ )
495
+
496
+ # Check port configuration
497
+ port = os.getenv('PORT', '8000')
498
+ try:
499
+ port_num = int(port)
500
+ if port_num < 1024 and os.getuid() != 0:
501
+ self.report.add_warning(
502
+ f"Port {port_num} requires root privileges",
503
+ field="PORT",
504
+ suggestion="Use port >= 1024 or run as root"
505
+ )
506
+ except (ValueError, AttributeError):
507
+ pass
508
+
509
+ def _validate_resource_limits(self):
510
+ """Validate resource limit configurations"""
511
+ logger.info("Validating resource limits...")
512
+
513
+ # Check memory limits
514
+ max_memory = os.getenv('MAX_MEMORY')
515
+ if max_memory:
516
+ try:
517
+ # Parse memory value (e.g., "2G", "512M")
518
+ if max_memory.endswith('G'):
519
+ memory_gb = float(max_memory[:-1])
520
+ if memory_gb < 1:
521
+ self.report.add_warning(
522
+ f"Low memory limit: {max_memory}",
523
+ field="MAX_MEMORY",
524
+ suggestion="Consider increasing memory for better performance"
525
+ )
526
+ elif max_memory.endswith('M'):
527
+ memory_mb = float(max_memory[:-1])
528
+ if memory_mb < 512:
529
+ self.report.add_warning(
530
+ f"Very low memory limit: {max_memory}",
531
+ field="MAX_MEMORY",
532
+ suggestion="Increase memory limit for stable operation"
533
+ )
534
+ except ValueError:
535
+ self.report.add_error(
536
+ f"Invalid memory limit format: {max_memory}",
537
+ field="MAX_MEMORY",
538
+ suggestion="Use format like '2G' or '512M'"
539
+ )
540
+
541
+ def _validate_backup_config(self):
542
+ """Validate backup configuration"""
543
+ logger.info("Validating backup configuration...")
544
+
545
+ backup_config = self.config_dir / 'backup.yaml'
546
+ if backup_config.exists():
547
+ try:
548
+ with open(backup_config, 'r') as f:
549
+ config = yaml.safe_load(f)
550
+
551
+ if not config.get('enabled', False):
552
+ env = os.getenv('ENVIRONMENT', '').lower()
553
+ if env in ['production', 'prod']:
554
+ self.report.add_error(
555
+ "Backups disabled in production",
556
+ field="backup.enabled",
557
+ suggestion="Enable backups for production data protection"
558
+ )
559
+
560
+ # Check backup schedule
561
+ schedule = config.get('schedule')
562
+ if schedule:
563
+ # Basic cron validation
564
+ if not re.match(r'^[\d\*\-,/]+\s+[\d\*\-,/]+\s+[\d\*\-,/]+\s+[\d\*\-,/]+\s+[\d\*\-,/]+$', schedule):
565
+ self.report.add_warning(
566
+ f"Invalid cron schedule format: {schedule}",
567
+ field="backup.schedule",
568
+ suggestion="Use valid cron format (e.g., '0 2 * * *')"
569
+ )
570
+
571
+ except Exception as e:
572
+ self.report.add_error(
573
+ f"Error reading backup config: {e}",
574
+ field="backup.yaml"
575
+ )
576
+
577
+ def _validate_config_file(self, filepath: Path):
578
+ """Validate a configuration file"""
579
+ try:
580
+ with open(filepath, 'r') as f:
581
+ if filepath.suffix in ['.yaml', '.yml']:
582
+ yaml.safe_load(f)
583
+ elif filepath.suffix == '.json':
584
+ json.load(f)
585
+
586
+ self.report.add_info(
587
+ f"Configuration file '{filepath.name}' is valid",
588
+ field=str(filepath)
589
+ )
590
+
591
+ except yaml.YAMLError as e:
592
+ self.report.add_error(
593
+ f"Invalid YAML in '{filepath.name}': {e}",
594
+ field=str(filepath)
595
+ )
596
+ except json.JSONDecodeError as e:
597
+ self.report.add_error(
598
+ f"Invalid JSON in '{filepath.name}': {e}",
599
+ field=str(filepath)
600
+ )
601
+ except Exception as e:
602
+ self.report.add_error(
603
+ f"Error reading '{filepath.name}': {e}",
604
+ field=str(filepath)
605
+ )
606
+
607
+ def print_report(self, report: ValidationReport = None):
608
+ """Print validation report in a readable format"""
609
+ if report is None:
610
+ report = self.report
611
+
612
+ print("\n" + "="*80)
613
+ print("CONFIGURATION VALIDATION REPORT")
614
+ print("="*80)
615
+
616
+ summary = report.get_summary()
617
+ print(f"\nSUMMARY:")
618
+ print(f" Errors: {summary['error']}")
619
+ print(f" Warnings: {summary['warning']}")
620
+ print(f" Info: {summary['info']}")
621
+
622
+ if report.results:
623
+ print(f"\nDETAILS:")
624
+ for result in report.results:
625
+ icon = {"error": "❌", "warning": "⚠️", "info": "ℹ️"}[result.level.value]
626
+ print(f"\n{icon} {result.level.value.upper()}: {result.message}")
627
+
628
+ if result.field:
629
+ print(f" Field: {result.field}")
630
+ if result.value is not None:
631
+ print(f" Value: {result.value}")
632
+ if result.suggestion:
633
+ print(f" Suggestion: {result.suggestion}")
634
+
635
+ print("\n" + "="*80)
636
+
637
+ if report.has_errors:
638
+ print("❌ VALIDATION FAILED - Please fix errors before proceeding")
639
+ return False
640
+ elif report.has_warnings:
641
+ print("⚠️ VALIDATION PASSED WITH WARNINGS - Review warnings for production")
642
+ return True
643
+ else:
644
+ print("✅ VALIDATION PASSED - Configuration is valid")
645
+ return True
646
+
647
+
648
+ def main():
649
+ """Main entry point for configuration validation"""
650
+ import argparse
651
+
652
+ parser = argparse.ArgumentParser(description="Validate Pixelated Empathy AI configuration")
653
+ parser.add_argument(
654
+ '--config-dir',
655
+ default=None,
656
+ help="Configuration directory path"
657
+ )
658
+ parser.add_argument(
659
+ '--json',
660
+ action='store_true',
661
+ help="Output report in JSON format"
662
+ )
663
+ parser.add_argument(
664
+ '--fail-on-warnings',
665
+ action='store_true',
666
+ help="Exit with error code if warnings are found"
667
+ )
668
+
669
+ args = parser.parse_args()
670
+
671
+ # Create validator and run validation
672
+ validator = ConfigValidator(args.config_dir)
673
+ report = validator.validate_all()
674
+
675
+ if args.json:
676
+ # Output JSON report
677
+ json_report = {
678
+ 'summary': report.get_summary(),
679
+ 'results': [
680
+ {
681
+ 'level': r.level.value,
682
+ 'message': r.message,
683
+ 'field': r.field,
684
+ 'value': r.value,
685
+ 'suggestion': r.suggestion
686
+ }
687
+ for r in report.results
688
+ ]
689
+ }
690
+ print(json.dumps(json_report, indent=2))
691
+ else:
692
+ # Print human-readable report
693
+ success = validator.print_report(report)
694
+
695
+ # Exit with appropriate code
696
+ if not success:
697
+ sys.exit(1)
698
+ elif args.fail_on_warnings and report.has_warnings:
699
+ sys.exit(2)
700
+ else:
701
+ sys.exit(0)
702
+
703
+
704
+ if __name__ == '__main__':
705
+ main()
configs/stage_configs/configs_config.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Centralized configuration for the Pixelated Empathy AI dataset pipeline.
3
+ Provides an enterprise-grade, unified configuration management system.
4
+ """
5
+
6
+ from dataclasses import dataclass, field
7
+ from typing import Any
8
+
9
+
10
+ @dataclass
11
+ class DataLoaderConfig:
12
+ """Configuration for the data loader and acquisition."""
13
+ huggingface_datasets: dict[str, str] = field(default_factory=lambda: {
14
+ "mental_health_counseling": "Amod/mental_health_counseling_conversations",
15
+ "psych8k": "EmoCareAI/Psych8k",
16
+ # Mental Health Investigation Resources (Phase 1)
17
+ "mental_health_snli": "iqrakiran/customized-mental-health-snli2",
18
+ "mental_health_preprocessed": "typosonlr/MentalHealthPreProcessed",
19
+ "depression_detection": "ShreyaR/DepressionDetection",
20
+ })
21
+ download_path: str = "ai/datasets/external"
22
+ cache_dir: str = "ai/datasets/cache"
23
+ huggingface_cache_dir: str = "ai/datasets/huggingface_cache"
24
+ max_retries: int = 3
25
+
26
+ @dataclass
27
+ class StandardizationConfig:
28
+ """Configuration for the DataStandardizer."""
29
+ max_workers: int = 8
30
+ batch_size: int = 200
31
+ enable_monitoring: bool = True
32
+ output_dir: str = "ai/datasets/standardized"
33
+
34
+ @dataclass
35
+ class LoggingConfig:
36
+ """Configuration for the logging system."""
37
+ log_level: str = "INFO"
38
+ log_file: str = "logs/dataset_pipeline.log"
39
+ max_bytes: int = 10 * 1024 * 1024 # 10 MB
40
+ backup_count: int = 5
41
+
42
+ @dataclass
43
+ class Config:
44
+ """Root configuration class for the entire pipeline."""
45
+ data_loader: DataLoaderConfig = field(default_factory=DataLoaderConfig)
46
+ standardization: StandardizationConfig = field(default_factory=StandardizationConfig)
47
+ logging: LoggingConfig = field(default_factory=LoggingConfig)
48
+
49
+ def to_dict(self) -> dict[str, Any]:
50
+ """Serializes the config to a dictionary."""
51
+ return {
52
+ "data_loader": self.data_loader.__dict__,
53
+ "standardization": self.standardization.__dict__,
54
+ "logging": self.logging.__dict__,
55
+ }
56
+
57
+ # Singleton instance to be used across the application
58
+ config = Config()
59
+
60
+ def get_config() -> Config:
61
+ """Returns the singleton config instance."""
62
+ return config
63
+
64
+ # Example usage:
65
+ # from config import get_config
66
+ # config = get_config()
67
+ # print(config.standardization.batch_size)
configs/stage_configs/corrected_audit_report.json ADDED
@@ -0,0 +1,694 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "audit_date": "2025-08-24T13:28:00.686206",
3
+ "total_tasks": 36,
4
+ "complete": 34,
5
+ "partial": 0,
6
+ "found": 0,
7
+ "missing": 2,
8
+ "working_count": 34,
9
+ "completion_rate": 0.9444444444444444,
10
+ "overall_status": "NEARLY_COMPLETE",
11
+ "ecosystem_files": 4,
12
+ "dataset_pipeline_files": 30,
13
+ "detailed_results": {
14
+ "6.1": {
15
+ "task_id": "6.1",
16
+ "expected_filename": "distributed_architecture.py",
17
+ "description": "Distributed processing architecture",
18
+ "found_files": [
19
+ "/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/distributed_architecture.py",
20
+ "/home/vivi/pixelated/ai/dataset_pipeline/distributed_architecture.py"
21
+ ],
22
+ "status": "COMPLETE",
23
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/distributed_architecture.py",
24
+ "file_stats": {
25
+ "size_kb": 20.2275390625,
26
+ "lines": 569,
27
+ "classes": 6,
28
+ "functions": 26,
29
+ "has_docstring": true
30
+ },
31
+ "issues": []
32
+ },
33
+ "6.2": {
34
+ "task_id": "6.2",
35
+ "expected_filename": "data_fusion_engine.py",
36
+ "description": "Intelligent data fusion algorithms",
37
+ "found_files": [
38
+ "/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/data_fusion_engine.py",
39
+ "/home/vivi/pixelated/ai/dataset_pipeline/data_fusion_engine.py"
40
+ ],
41
+ "status": "COMPLETE",
42
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/data_fusion_engine.py",
43
+ "file_stats": {
44
+ "size_kb": 26.6845703125,
45
+ "lines": 694,
46
+ "classes": 5,
47
+ "functions": 20,
48
+ "has_docstring": true
49
+ },
50
+ "issues": []
51
+ },
52
+ "6.3": {
53
+ "task_id": "6.3",
54
+ "expected_filename": "quality_assessment_framework.py",
55
+ "description": "Hierarchical quality assessment framework",
56
+ "found_files": [
57
+ "/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/quality_assessment_framework.py",
58
+ "/home/vivi/pixelated/ai/dataset_pipeline/quality_assessment_framework.py"
59
+ ],
60
+ "status": "COMPLETE",
61
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/quality_assessment_framework.py",
62
+ "file_stats": {
63
+ "size_kb": 27.6455078125,
64
+ "lines": 708,
65
+ "classes": 5,
66
+ "functions": 25,
67
+ "has_docstring": true
68
+ },
69
+ "issues": []
70
+ },
71
+ "6.4": {
72
+ "task_id": "6.4",
73
+ "expected_filename": "deduplication.py",
74
+ "description": "Automated conversation deduplication",
75
+ "found_files": [
76
+ "/home/vivi/pixelated/ai/pipelines/dataset_pipeline/test_deduplication.py",
77
+ "/home/vivi/pixelated/ai/dataset_pipeline/test_deduplication.py",
78
+ "/home/vivi/pixelated/ai/datasets/dataset_pipeline/test_deduplication.py",
79
+ "/home/vivi/pixelated/ai/datasets/dataset_pipeline/deduplication_system.py",
80
+ "/home/vivi/pixelated/ai/pipelines/dataset_pipeline/deduplication.py",
81
+ "/home/vivi/pixelated/ai/datasets/dataset_pipeline/deduplication.py",
82
+ "/home/vivi/pixelated/ai/dataset_pipeline/deduplication.py"
83
+ ],
84
+ "status": "COMPLETE",
85
+ "primary_file": "/home/vivi/pixelated/ai/pipelines/dataset_pipeline/test_deduplication.py",
86
+ "file_stats": {
87
+ "size_kb": 15.1220703125,
88
+ "lines": 419,
89
+ "classes": 5,
90
+ "functions": 30,
91
+ "has_docstring": true
92
+ },
93
+ "issues": []
94
+ },
95
+ "6.5": {
96
+ "task_id": "6.5",
97
+ "expected_filename": "cross_dataset_linker.py",
98
+ "description": "Cross-dataset conversation linking",
99
+ "found_files": [],
100
+ "status": "MISSING",
101
+ "primary_file": null,
102
+ "file_stats": {},
103
+ "issues": []
104
+ },
105
+ "6.6": {
106
+ "task_id": "6.6",
107
+ "expected_filename": "metadata_schema.py",
108
+ "description": "Unified metadata schema",
109
+ "found_files": [],
110
+ "status": "MISSING",
111
+ "primary_file": null,
112
+ "file_stats": {},
113
+ "issues": []
114
+ },
115
+ "6.7": {
116
+ "task_id": "6.7",
117
+ "expected_filename": "therapeutic_intelligence.py",
118
+ "description": "Comprehensive therapeutic approach classification",
119
+ "found_files": [
120
+ "/home/vivi/pixelated/ai/dataset_pipeline/therapeutic_intelligence.py",
121
+ "/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/test_therapeutic_intelligence.py",
122
+ "/home/vivi/pixelated/ai/datasets/dataset_pipeline/therapeutic_intelligence_orchestrator.py",
123
+ "/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/therapeutic_intelligence.py"
124
+ ],
125
+ "status": "COMPLETE",
126
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/therapeutic_intelligence.py",
127
+ "file_stats": {
128
+ "size_kb": 25.4091796875,
129
+ "lines": 582,
130
+ "classes": 4,
131
+ "functions": 18,
132
+ "has_docstring": true
133
+ },
134
+ "issues": []
135
+ },
136
+ "6.8": {
137
+ "task_id": "6.8",
138
+ "expected_filename": "condition_pattern_recognition.py",
139
+ "description": "Mental health condition pattern recognition",
140
+ "found_files": [
141
+ "/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/condition_pattern_recognition.py"
142
+ ],
143
+ "status": "COMPLETE",
144
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/condition_pattern_recognition.py",
145
+ "file_stats": {
146
+ "size_kb": 30.849609375,
147
+ "lines": 730,
148
+ "classes": 4,
149
+ "functions": 17,
150
+ "has_docstring": true
151
+ },
152
+ "issues": []
153
+ },
154
+ "6.9": {
155
+ "task_id": "6.9",
156
+ "expected_filename": "outcome_prediction.py",
157
+ "description": "Therapeutic outcome prediction models",
158
+ "found_files": [
159
+ "/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/outcome_prediction.py"
160
+ ],
161
+ "status": "COMPLETE",
162
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/outcome_prediction.py",
163
+ "file_stats": {
164
+ "size_kb": 23.421875,
165
+ "lines": 580,
166
+ "classes": 5,
167
+ "functions": 18,
168
+ "has_docstring": true
169
+ },
170
+ "issues": []
171
+ },
172
+ "6.10": {
173
+ "task_id": "6.10",
174
+ "expected_filename": "crisis_intervention_detector.py",
175
+ "description": "Crisis intervention detection and escalation",
176
+ "found_files": [
177
+ "/home/vivi/pixelated/ai/tests/test_crisis_intervention_detector_enhanced.py",
178
+ "/home/vivi/pixelated/ai/pixel/test_crisis_intervention_detector.py",
179
+ "/home/vivi/pixelated/ai/tests/test_crisis_intervention_detector.py",
180
+ "/home/vivi/pixelated/ai/dataset_pipeline/crisis_intervention_detector.py",
181
+ "/home/vivi/pixelated/ai/tests/test_crisis_intervention_detector_working.py"
182
+ ],
183
+ "status": "COMPLETE",
184
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/crisis_intervention_detector.py",
185
+ "file_stats": {
186
+ "size_kb": 39.1484375,
187
+ "lines": 849,
188
+ "classes": 7,
189
+ "functions": 24,
190
+ "has_docstring": true
191
+ },
192
+ "issues": []
193
+ },
194
+ "6.11": {
195
+ "task_id": "6.11",
196
+ "expected_filename": "personality_adapter.py",
197
+ "description": "Personality-aware conversation adaptation",
198
+ "found_files": [
199
+ "/home/vivi/pixelated/ai/dataset_pipeline/personality_adapter.py",
200
+ "/home/vivi/pixelated/ai/dataset_pipeline/test_personality_adapter.py"
201
+ ],
202
+ "status": "COMPLETE",
203
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/personality_adapter.py",
204
+ "file_stats": {
205
+ "size_kb": 30.1650390625,
206
+ "lines": 704,
207
+ "classes": 7,
208
+ "functions": 26,
209
+ "has_docstring": true
210
+ },
211
+ "issues": []
212
+ },
213
+ "6.12": {
214
+ "task_id": "6.12",
215
+ "expected_filename": "cultural_competency_generator.py",
216
+ "description": "Cultural competency and diversity-aware response generation",
217
+ "found_files": [
218
+ "/home/vivi/pixelated/ai/dataset_pipeline/cultural_competency_generator.py",
219
+ "/home/vivi/pixelated/ai/dataset_pipeline/test_cultural_competency_generator.py"
220
+ ],
221
+ "status": "COMPLETE",
222
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/cultural_competency_generator.py",
223
+ "file_stats": {
224
+ "size_kb": 33.9677734375,
225
+ "lines": 789,
226
+ "classes": 6,
227
+ "functions": 35,
228
+ "has_docstring": true
229
+ },
230
+ "issues": []
231
+ },
232
+ "6.13": {
233
+ "task_id": "6.13",
234
+ "expected_filename": "audio_emotion_integration.py",
235
+ "description": "Audio emotion recognition integration",
236
+ "found_files": [
237
+ "/home/vivi/pixelated/ai/dataset_pipeline/audio_emotion_integration.py",
238
+ "/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/audio_emotion_integration.py"
239
+ ],
240
+ "status": "COMPLETE",
241
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/audio_emotion_integration.py",
242
+ "file_stats": {
243
+ "size_kb": 23.2099609375,
244
+ "lines": 575,
245
+ "classes": 5,
246
+ "functions": 18,
247
+ "has_docstring": true
248
+ },
249
+ "issues": []
250
+ },
251
+ "6.14": {
252
+ "task_id": "6.14",
253
+ "expected_filename": "multimodal_disorder_analysis.py",
254
+ "description": "Multi-modal mental disorder analysis pipeline",
255
+ "found_files": [
256
+ "/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/multimodal_disorder_analysis.py"
257
+ ],
258
+ "status": "COMPLETE",
259
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/multimodal_disorder_analysis.py",
260
+ "file_stats": {
261
+ "size_kb": 28.7197265625,
262
+ "lines": 691,
263
+ "classes": 8,
264
+ "functions": 21,
265
+ "has_docstring": true
266
+ },
267
+ "issues": []
268
+ },
269
+ "6.15": {
270
+ "task_id": "6.15",
271
+ "expected_filename": "emotion_cause_extraction.py",
272
+ "description": "Emotion cause extraction and intervention mapping",
273
+ "found_files": [
274
+ "/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/emotion_cause_extraction.py"
275
+ ],
276
+ "status": "COMPLETE",
277
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/ecosystem/emotion_cause_extraction.py",
278
+ "file_stats": {
279
+ "size_kb": 28.5,
280
+ "lines": 686,
281
+ "classes": 7,
282
+ "functions": 18,
283
+ "has_docstring": true
284
+ },
285
+ "issues": []
286
+ },
287
+ "6.16": {
288
+ "task_id": "6.16",
289
+ "expected_filename": "tfidf_clusterer.py",
290
+ "description": "TF-IDF feature-based conversation clustering",
291
+ "found_files": [
292
+ "/home/vivi/pixelated/ai/dataset_pipeline/tfidf_clusterer.py",
293
+ "/home/vivi/pixelated/ai/dataset_pipeline/test_tfidf_clusterer.py"
294
+ ],
295
+ "status": "COMPLETE",
296
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/tfidf_clusterer.py",
297
+ "file_stats": {
298
+ "size_kb": 27.6640625,
299
+ "lines": 668,
300
+ "classes": 6,
301
+ "functions": 20,
302
+ "has_docstring": true
303
+ },
304
+ "issues": []
305
+ },
306
+ "6.17": {
307
+ "task_id": "6.17",
308
+ "expected_filename": "temporal_reasoner.py",
309
+ "description": "Temporal reasoning integration",
310
+ "found_files": [
311
+ "/home/vivi/pixelated/ai/dataset_pipeline/temporal_reasoner.py"
312
+ ],
313
+ "status": "COMPLETE",
314
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/temporal_reasoner.py",
315
+ "file_stats": {
316
+ "size_kb": 30.3173828125,
317
+ "lines": 744,
318
+ "classes": 7,
319
+ "functions": 25,
320
+ "has_docstring": true
321
+ },
322
+ "issues": []
323
+ },
324
+ "6.18": {
325
+ "task_id": "6.18",
326
+ "expected_filename": "evidence_validator.py",
327
+ "description": "Scientific evidence-based practice validation",
328
+ "found_files": [
329
+ "/home/vivi/pixelated/ai/dataset_pipeline/evidence_validator.py"
330
+ ],
331
+ "status": "COMPLETE",
332
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/evidence_validator.py",
333
+ "file_stats": {
334
+ "size_kb": 32.271484375,
335
+ "lines": 755,
336
+ "classes": 8,
337
+ "functions": 22,
338
+ "has_docstring": true
339
+ },
340
+ "issues": []
341
+ },
342
+ "6.19": {
343
+ "task_id": "6.19",
344
+ "expected_filename": "priority_weighted_sampler.py",
345
+ "description": "Priority-weighted sampling algorithms",
346
+ "found_files": [
347
+ "/home/vivi/pixelated/ai/dataset_pipeline/priority_weighted_sampler.py",
348
+ "/home/vivi/pixelated/ai/dataset_pipeline/test_priority_weighted_sampler.py"
349
+ ],
350
+ "status": "COMPLETE",
351
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/priority_weighted_sampler.py",
352
+ "file_stats": {
353
+ "size_kb": 25.404296875,
354
+ "lines": 646,
355
+ "classes": 3,
356
+ "functions": 17,
357
+ "has_docstring": true
358
+ },
359
+ "issues": []
360
+ },
361
+ "6.20": {
362
+ "task_id": "6.20",
363
+ "expected_filename": "condition_balancer.py",
364
+ "description": "Condition-specific balancing system",
365
+ "found_files": [
366
+ "/home/vivi/pixelated/ai/dataset_pipeline/condition_balancer.py"
367
+ ],
368
+ "status": "COMPLETE",
369
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/condition_balancer.py",
370
+ "file_stats": {
371
+ "size_kb": 26.40625,
372
+ "lines": 612,
373
+ "classes": 3,
374
+ "functions": 12,
375
+ "has_docstring": true
376
+ },
377
+ "issues": []
378
+ },
379
+ "6.21": {
380
+ "task_id": "6.21",
381
+ "expected_filename": "approach_diversity_optimizer.py",
382
+ "description": "Therapeutic approach diversity optimization",
383
+ "found_files": [
384
+ "/home/vivi/pixelated/ai/dataset_pipeline/approach_diversity_optimizer.py"
385
+ ],
386
+ "status": "COMPLETE",
387
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/approach_diversity_optimizer.py",
388
+ "file_stats": {
389
+ "size_kb": 33.8076171875,
390
+ "lines": 718,
391
+ "classes": 3,
392
+ "functions": 15,
393
+ "has_docstring": true
394
+ },
395
+ "issues": []
396
+ },
397
+ "6.22": {
398
+ "task_id": "6.22",
399
+ "expected_filename": "demographic_balancer.py",
400
+ "description": "Demographic and cultural diversity balancing",
401
+ "found_files": [
402
+ "/home/vivi/pixelated/ai/dataset_pipeline/demographic_balancer.py"
403
+ ],
404
+ "status": "COMPLETE",
405
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/demographic_balancer.py",
406
+ "file_stats": {
407
+ "size_kb": 20.724609375,
408
+ "lines": 486,
409
+ "classes": 3,
410
+ "functions": 12,
411
+ "has_docstring": true
412
+ },
413
+ "issues": []
414
+ },
415
+ "6.23": {
416
+ "task_id": "6.23",
417
+ "expected_filename": "complexity_stratifier.py",
418
+ "description": "Conversation complexity stratification",
419
+ "found_files": [
420
+ "/home/vivi/pixelated/ai/dataset_pipeline/complexity_stratifier.py"
421
+ ],
422
+ "status": "COMPLETE",
423
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/complexity_stratifier.py",
424
+ "file_stats": {
425
+ "size_kb": 26.2333984375,
426
+ "lines": 623,
427
+ "classes": 3,
428
+ "functions": 14,
429
+ "has_docstring": true
430
+ },
431
+ "issues": []
432
+ },
433
+ "6.24": {
434
+ "task_id": "6.24",
435
+ "expected_filename": "crisis_routine_balancer.py",
436
+ "description": "Crisis-to-routine conversation ratio optimization",
437
+ "found_files": [
438
+ "/home/vivi/pixelated/ai/dataset_pipeline/crisis_routine_balancer.py"
439
+ ],
440
+ "status": "COMPLETE",
441
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/crisis_routine_balancer.py",
442
+ "file_stats": {
443
+ "size_kb": 23.8505859375,
444
+ "lines": 574,
445
+ "classes": 3,
446
+ "functions": 13,
447
+ "has_docstring": true
448
+ },
449
+ "issues": []
450
+ },
451
+ "6.25": {
452
+ "task_id": "6.25",
453
+ "expected_filename": "multi_tier_validator.py",
454
+ "description": "Multi-tier quality validation system",
455
+ "found_files": [
456
+ "/home/vivi/pixelated/ai/dataset_pipeline/multi_tier_validator.py",
457
+ "/home/vivi/pixelated/ai/dataset_pipeline/test_multi_tier_validator.py"
458
+ ],
459
+ "status": "COMPLETE",
460
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/multi_tier_validator.py",
461
+ "file_stats": {
462
+ "size_kb": 28.9892578125,
463
+ "lines": 730,
464
+ "classes": 5,
465
+ "functions": 25,
466
+ "has_docstring": true
467
+ },
468
+ "issues": []
469
+ },
470
+ "6.26": {
471
+ "task_id": "6.26",
472
+ "expected_filename": "dsm5_accuracy_validator.py",
473
+ "description": "DSM-5 therapeutic accuracy validation",
474
+ "found_files": [
475
+ "/home/vivi/pixelated/ai/dataset_pipeline/test_dsm5_accuracy_validator.py",
476
+ "/home/vivi/pixelated/ai/dataset_pipeline/dsm5_accuracy_validator.py"
477
+ ],
478
+ "status": "COMPLETE",
479
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/test_dsm5_accuracy_validator.py",
480
+ "file_stats": {
481
+ "size_kb": 16.8955078125,
482
+ "lines": 393,
483
+ "classes": 1,
484
+ "functions": 22,
485
+ "has_docstring": true
486
+ },
487
+ "issues": []
488
+ },
489
+ "6.27": {
490
+ "task_id": "6.27",
491
+ "expected_filename": "safety_ethics_validator.py",
492
+ "description": "Conversation safety and ethics validation",
493
+ "found_files": [
494
+ "/home/vivi/pixelated/ai/pixel/validation/test_safety_ethics_validator.py",
495
+ "/home/vivi/pixelated/ai/tests/test_safety_ethics_validator_working.py",
496
+ "/home/vivi/pixelated/ai/dataset_pipeline/test_safety_ethics_validator.py",
497
+ "/home/vivi/pixelated/ai/dataset_pipeline/safety_ethics_validator.py",
498
+ "/home/vivi/pixelated/ai/tests/test_safety_ethics_validator.py",
499
+ "/home/vivi/pixelated/ai/pixel/validation/safety_ethics_validator.py"
500
+ ],
501
+ "status": "COMPLETE",
502
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/test_safety_ethics_validator.py",
503
+ "file_stats": {
504
+ "size_kb": 21.326171875,
505
+ "lines": 542,
506
+ "classes": 1,
507
+ "functions": 21,
508
+ "has_docstring": true
509
+ },
510
+ "issues": []
511
+ },
512
+ "6.28": {
513
+ "task_id": "6.28",
514
+ "expected_filename": "effectiveness_predictor.py",
515
+ "description": "Therapeutic effectiveness prediction",
516
+ "found_files": [
517
+ "/home/vivi/pixelated/ai/monitoring/conversation_effectiveness_predictor.py",
518
+ "/home/vivi/pixelated/ai/dataset_pipeline/test_effectiveness_predictor.py",
519
+ "/home/vivi/pixelated/ai/dataset_pipeline/effectiveness_predictor.py"
520
+ ],
521
+ "status": "COMPLETE",
522
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/test_effectiveness_predictor.py",
523
+ "file_stats": {
524
+ "size_kb": 17.89453125,
525
+ "lines": 447,
526
+ "classes": 1,
527
+ "functions": 20,
528
+ "has_docstring": true
529
+ },
530
+ "issues": []
531
+ },
532
+ "6.29": {
533
+ "task_id": "6.29",
534
+ "expected_filename": "coherence_validator.py",
535
+ "description": "Conversation coherence validation using CoT reasoning",
536
+ "found_files": [
537
+ "/home/vivi/pixelated/ai/dataset_pipeline/coherence_validator.py",
538
+ "/home/vivi/pixelated/ai/dataset_pipeline/test_coherence_validator.py"
539
+ ],
540
+ "status": "COMPLETE",
541
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/coherence_validator.py",
542
+ "file_stats": {
543
+ "size_kb": 38.3896484375,
544
+ "lines": 1016,
545
+ "classes": 5,
546
+ "functions": 24,
547
+ "has_docstring": true
548
+ },
549
+ "issues": []
550
+ },
551
+ "6.30": {
552
+ "task_id": "6.30",
553
+ "expected_filename": "realtime_quality_monitor.py",
554
+ "description": "Real-time conversation quality monitoring",
555
+ "found_files": [
556
+ "/home/vivi/pixelated/ai/dataset_pipeline/realtime_quality_monitor.py"
557
+ ],
558
+ "status": "COMPLETE",
559
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/realtime_quality_monitor.py",
560
+ "file_stats": {
561
+ "size_kb": 17.41015625,
562
+ "lines": 467,
563
+ "classes": 5,
564
+ "functions": 20,
565
+ "has_docstring": true
566
+ },
567
+ "issues": []
568
+ },
569
+ "6.31": {
570
+ "task_id": "6.31",
571
+ "expected_filename": "production_exporter.py",
572
+ "description": "Production-ready dataset export with tiered access",
573
+ "found_files": [
574
+ "/home/vivi/pixelated/ai/tests/test_production_exporter.py",
575
+ "/home/vivi/pixelated/ai/dataset_pipeline/production_exporter.py",
576
+ "/home/vivi/pixelated/ai/pixel/test_production_exporter.py",
577
+ "/home/vivi/pixelated/ai/tests/test_production_exporter_working.py"
578
+ ],
579
+ "status": "COMPLETE",
580
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/production_exporter.py",
581
+ "file_stats": {
582
+ "size_kb": 26.828125,
583
+ "lines": 710,
584
+ "classes": 5,
585
+ "functions": 24,
586
+ "has_docstring": true
587
+ },
588
+ "issues": []
589
+ },
590
+ "6.32": {
591
+ "task_id": "6.32",
592
+ "expected_filename": "adaptive_learner.py",
593
+ "description": "Adaptive learning pipeline",
594
+ "found_files": [
595
+ "/home/vivi/pixelated/ai/tests/test_adaptive_learner_working.py",
596
+ "/home/vivi/pixelated/ai/tests/test_adaptive_learner.py",
597
+ "/home/vivi/pixelated/ai/dataset_pipeline/adaptive_learner.py",
598
+ "/home/vivi/pixelated/ai/pixel/test_adaptive_learner.py"
599
+ ],
600
+ "status": "COMPLETE",
601
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/adaptive_learner.py",
602
+ "file_stats": {
603
+ "size_kb": 26.4423828125,
604
+ "lines": 684,
605
+ "classes": 8,
606
+ "functions": 34,
607
+ "has_docstring": true
608
+ },
609
+ "issues": []
610
+ },
611
+ "6.33": {
612
+ "task_id": "6.33",
613
+ "expected_filename": "analytics_dashboard.py",
614
+ "description": "Comprehensive analytics dashboard",
615
+ "found_files": [
616
+ "/home/vivi/pixelated/ai/monitoring/test_quality_analytics_dashboard_v2.py",
617
+ "/home/vivi/pixelated/ai/monitoring/test_quality_analytics_dashboard.py",
618
+ "/home/vivi/pixelated/ai/dataset_pipeline/analytics_dashboard.py",
619
+ "/home/vivi/pixelated/ai/monitoring/launch_quality_analytics_dashboard.py",
620
+ "/home/vivi/pixelated/ai/pixel/test_analytics_dashboard.py",
621
+ "/home/vivi/pixelated/ai/monitoring/quality_analytics_dashboard.py",
622
+ "/home/vivi/pixelated/ai/monitoring/launch_quality_analytics_dashboard_v2.py",
623
+ "/home/vivi/pixelated/ai/tests/test_analytics_dashboard_working.py",
624
+ "/home/vivi/pixelated/ai/tests/test_analytics_dashboard.py",
625
+ "/home/vivi/pixelated/ai/monitoring/quality_analytics_dashboard_v2.py"
626
+ ],
627
+ "status": "COMPLETE",
628
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/analytics_dashboard.py",
629
+ "file_stats": {
630
+ "size_kb": 18.1240234375,
631
+ "lines": 455,
632
+ "classes": 2,
633
+ "functions": 17,
634
+ "has_docstring": true
635
+ },
636
+ "issues": []
637
+ },
638
+ "6.34": {
639
+ "task_id": "6.34",
640
+ "expected_filename": "automated_maintenance.py",
641
+ "description": "Automated dataset update and maintenance procedures",
642
+ "found_files": [
643
+ "/home/vivi/pixelated/ai/pixel/test_automated_maintenance.py",
644
+ "/home/vivi/pixelated/ai/dataset_pipeline/automated_maintenance.py"
645
+ ],
646
+ "status": "COMPLETE",
647
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/automated_maintenance.py",
648
+ "file_stats": {
649
+ "size_kb": 20.296875,
650
+ "lines": 571,
651
+ "classes": 5,
652
+ "functions": 22,
653
+ "has_docstring": true
654
+ },
655
+ "issues": []
656
+ },
657
+ "6.35": {
658
+ "task_id": "6.35",
659
+ "expected_filename": "feedback_loops.py",
660
+ "description": "Conversation effectiveness feedback loops",
661
+ "found_files": [
662
+ "/home/vivi/pixelated/ai/dataset_pipeline/feedback_loops.py"
663
+ ],
664
+ "status": "COMPLETE",
665
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/feedback_loops.py",
666
+ "file_stats": {
667
+ "size_kb": 18.7763671875,
668
+ "lines": 461,
669
+ "classes": 4,
670
+ "functions": 12,
671
+ "has_docstring": true
672
+ },
673
+ "issues": []
674
+ },
675
+ "6.36": {
676
+ "task_id": "6.36",
677
+ "expected_filename": "comprehensive_api.py",
678
+ "description": "Comprehensive documentation and API",
679
+ "found_files": [
680
+ "/home/vivi/pixelated/ai/dataset_pipeline/comprehensive_api.py"
681
+ ],
682
+ "status": "COMPLETE",
683
+ "primary_file": "/home/vivi/pixelated/ai/dataset_pipeline/comprehensive_api.py",
684
+ "file_stats": {
685
+ "size_kb": 29.732421875,
686
+ "lines": 873,
687
+ "classes": 3,
688
+ "functions": 8,
689
+ "has_docstring": true
690
+ },
691
+ "issues": []
692
+ }
693
+ }
694
+ }