Raiff1982 commited on
Commit
ed1b365
·
verified ·
1 Parent(s): 00e081b

Upload 120 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. configs/adapter_registry.yaml +50 -0
  2. configs/phase5_config.yaml +171 -0
  3. configs/pipeline_config.yaml +25 -0
  4. consciousness/dreamcore_wakestate_engine.py +56 -0
  5. consciousness/quantum_harmonic_framework.py +78 -0
  6. consciousness/universal_reasoning.py +282 -0
  7. dataset_engine/__init__.py +30 -0
  8. dataset_engine/answer_generator.py +0 -0
  9. dataset_engine/dataset_generator.py +325 -0
  10. dataset_engine/generate_all.py +220 -0
  11. dataset_engine/template_registry.py +990 -0
  12. ethics/core_guardian_spindle_v2.py +94 -0
  13. evaluation/__init__.py +18 -0
  14. evaluation/benchmark_runner.py +457 -0
  15. evaluation/conflict_tests.py +334 -0
  16. evaluation/dataset_validator.py +607 -0
  17. evaluation/failure_analyzer.py +387 -0
  18. evaluation/phase6_benchmarks.py +369 -0
  19. evaluation/prompts/counterexample_tests.json +122 -0
  20. evaluation/prompts/reasoning_tests.json +70 -0
  21. evaluation/reasoning_metrics.py +421 -0
  22. evaluation/run_evaluation_sprint.py +174 -0
  23. evaluation/run_evaluation_verbose.py +125 -0
  24. evaluation/test_suite_evaluation.py +735 -0
  25. inference/adapter_router.py +460 -0
  26. inference/chat_app.py +247 -0
  27. inference/codette_chat_ui.py +859 -0
  28. inference/codette_forge_bridge.py +277 -0
  29. inference/codette_orchestrator.py +757 -0
  30. inference/codette_server.py +728 -0
  31. inference/codette_session.py +675 -0
  32. inference/codette_tools.py +558 -0
  33. inference/init.py +7 -0
  34. inference/model_loader.py +96 -0
  35. inference/multi_adapter_engine.py +59 -0
  36. inference/static/app.js +870 -0
  37. inference/static/index.html +281 -0
  38. inference/static/spiderweb.js +289 -0
  39. inference/static/style.css +859 -0
  40. inference/vulkan_compute.py +661 -0
  41. memory_systems/codette_memory_kernel.py +64 -0
  42. observatory/__init__.py +18 -0
  43. observatory/dashboard.py +326 -0
  44. observatory/dataset_quality_monitor.py +330 -0
  45. observatory/metrics_logger.py +175 -0
  46. observatory/performance_tracker.py +334 -0
  47. reasoning_forge/CONSCIOUSNESS_STACK_forge_with_debate.py +216 -0
  48. reasoning_forge/__init__.py +51 -0
  49. reasoning_forge/aegis.py +326 -0
  50. reasoning_forge/agents/__init__.py +26 -0
configs/adapter_registry.yaml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adapters:
2
+ newton:
3
+ dataset: datasets/newton_reasoning.jsonl
4
+ description: "Analytical physics reasoning"
5
+ target_examples: 3000
6
+ system_prompt: "You are Codette, reasoning with Newtonian analytical precision."
7
+ training_overrides:
8
+ epochs: 3
9
+
10
+ davinci:
11
+ dataset: datasets/davinci_reasoning.jsonl
12
+ description: "Creative invention thinking"
13
+ target_examples: 2500
14
+ system_prompt: "You are Codette, reasoning with DaVinci's creative inventiveness."
15
+
16
+ empathy:
17
+ dataset: datasets/empathy_reasoning.jsonl
18
+ description: "Emotional understanding and compassionate reasoning"
19
+ target_examples: 2500
20
+ system_prompt: "You are Codette, reasoning with deep empathy and emotional intelligence."
21
+
22
+ philosophy:
23
+ dataset: datasets/philosophy_reasoning.jsonl
24
+ description: "Conceptual and philosophical reasoning"
25
+ target_examples: 2000
26
+ system_prompt: "You are Codette, reasoning with philosophical depth and rigor."
27
+
28
+ quantum:
29
+ dataset: datasets/quantum_reasoning.jsonl
30
+ description: "Probabilistic and quantum-inspired reasoning"
31
+ target_examples: 2000
32
+ system_prompt: "You are Codette, reasoning through quantum probabilistic thinking."
33
+
34
+ consciousness:
35
+ dataset: datasets/consciousness_reasoning.jsonl
36
+ description: "Recursive cognition and consciousness framework"
37
+ target_examples: 3000
38
+ system_prompt: "You are Codette, a recursive cognition AI using the RC+xi framework."
39
+
40
+ multi_perspective:
41
+ dataset: datasets/multi_perspective_reasoning.jsonl
42
+ description: "Multi-perspective synthesis reasoning"
43
+ target_examples: 2500
44
+ system_prompt: "You are Codette, a multi-perspective reasoning AI that synthesizes insights across analytical lenses."
45
+
46
+ systems_architecture:
47
+ dataset: datasets/systems_architecture_reasoning.jsonl
48
+ description: "AI systems architecture reasoning"
49
+ target_examples: 2000
50
+ system_prompt: "You are Codette, reasoning about AI system architecture and design."
configs/phase5_config.yaml ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ================================================================
2
+ # Phase 5 Configuration — AdapterRouter Integration & Fine-tuning
3
+ # ================================================================
4
+ #
5
+ # Centralizes all Phase 5 parameters for:
6
+ # - Reinforcement learning coefficients (boost/penalize amounts)
7
+ # - Router memory integration settings
8
+ # - Gamma stabilization thresholds
9
+ # - Monitoring and observability
10
+ #
11
+ # Usage:
12
+ # import yaml
13
+ # with open('configs/phase5_config.yaml', 'r') as f:
14
+ # config = yaml.safe_load(f)
15
+ # reinforcement_cfg = ReinforcementConfig.from_dict(config['reinforcement'])
16
+ #
17
+
18
+ # ================================================================
19
+ # REINFORCEMENT LEARNING (Phase 4)
20
+ # ================================================================
21
+ # Controls how adapter weights are updated based on debate outcomes
22
+ reinforcement:
23
+ # Boost amount when conflict resolution succeeds (resolution_rate > 40%)
24
+ boost_successful: 0.08
25
+
26
+ # Penalize amount when conflict gets worse (resolution_type == "worsened")
27
+ penalize_failed: 0.08
28
+
29
+ # Partial reward for soft progress (resolution_type == "soft_consensus")
30
+ reward_soft_consensus: 0.03
31
+
32
+ # Advanced: Dynamic tuning (reserved for A/B testing)
33
+ enable_dynamic_tuning: false
34
+ tuning_interval_queries: 100
35
+
36
+ # ================================================================
37
+ # ADAPTER ROUTER INTEGRATION (Phase 5)
38
+ # ================================================================
39
+ # Controls how memory-weighting integrates with routing decisions
40
+ adapter_router:
41
+ # Enable memory-aware routing (use learned adapter weights)
42
+ enable_memory_weighting: true
43
+
44
+ # Confidence modulation strategy
45
+ # - "soft": ±50% confidence boost/penalty (keeps keyword routing primary)
46
+ # - "hard": Full weight-based selection (memory-first routing)
47
+ memory_boost_strategy: "soft"
48
+
49
+ # Range of confidence modulation [low, high]
50
+ # soft boost adjusts confidence by ±50% = [0.5, 1.5] multiplier
51
+ confidence_modulation_range: [0.5, 1.5]
52
+
53
+ # Cold-start default weight for adapters with no history
54
+ cold_start_default_weight: 1.0
55
+
56
+ # Minimum confidences before memory boost applies
57
+ min_confidence_to_boost: 0.2
58
+
59
+ # ================================================================
60
+ # COHERENCE FIELD GAMMA (Phase 5A)
61
+ # ================================================================
62
+ # System health monitoring and stabilization
63
+ gamma_stabilization:
64
+ # Enable Γ (Gamma) health monitoring
65
+ enable_gamma_field: true
66
+
67
+ # Health score thresholds
68
+ stable_zone: [0.4, 0.8] # γ ∈ [0.4, 0.8] = healthy
69
+ collapse_threshold: 0.4 # γ < 0.4 = instability
70
+ groupthink_threshold: 0.8 # γ > 0.8 = groupthink risk
71
+
72
+ # Target epistemic tension zone (productive conflict)
73
+ target_tension_range: [0.1, 0.4]
74
+
75
+ # Health metric weights (sum to 1.0)
76
+ # How Γ is computed from component signals
77
+ weights:
78
+ diversity: 0.25 # Perspectives diversity contribution
79
+ tension: 0.25 # Productive conflict contribution
80
+ distribution: 0.25 # Adapter weight spreading
81
+ resolution: 0.25 # Conflict resolution progress
82
+
83
+ # Intervention strategies
84
+ interventions:
85
+ # When system collapses (γ < 0.4): inject unused perspective
86
+ collapse_response: "diversity_injection"
87
+
88
+ # When system groupthinks (γ > 0.8): force debate pair
89
+ groupthink_response: "conflict_injection"
90
+
91
+ # ================================================================
92
+ # MONITORING & OBSERVABILITY
93
+ # ================================================================
94
+ # Expose metrics for real-time monitoring and debugging
95
+ monitoring:
96
+ # Enable routing metrics tracking
97
+ enable_routing_metrics: true
98
+
99
+ # Log routing decisions to console/file
100
+ log_routing_decisions: true
101
+
102
+ # Include memory context in logs (weight explanations)
103
+ log_memory_context: true
104
+
105
+ # Export frequency for aggregated metrics
106
+ metrics_export_interval_seconds: 300
107
+
108
+ # Keep rolling window of recent routes (for /recent endpoint)
109
+ recent_routes_window: 20
110
+
111
+ # Log interventions (both Phase 4C runaway and Phase 5A gamma)
112
+ log_interventions: true
113
+
114
+ # Verbose output levels
115
+ verbose: false
116
+ debug_gamma: false
117
+
118
+ # ================================================================
119
+ # MEMORY INTEGRATION
120
+ # ================================================================
121
+ # Controls how LivingMemory integrates with adapter selection
122
+ memory:
123
+ # Recompute adapter weights every N hours
124
+ update_interval_hours: 1.0
125
+
126
+ # Minimum memories before weighting an adapter
127
+ min_examples_to_weight: 3
128
+
129
+ # Recency decay half-life (older memories fade out)
130
+ recency_half_life_days: 7
131
+
132
+ # Edge case: disable weight clamping (for research)
133
+ enable_weight_bounds: true
134
+ weight_min: 0.0
135
+ weight_max: 2.0
136
+
137
+ # ================================================================
138
+ # EDGE CASES & FALLBACKS
139
+ # ================================================================
140
+ edge_cases:
141
+ # Cold start: no memory history yet
142
+ cold_start_mode: "default" # "default" | "keyword_only" | "random"
143
+
144
+ # Adapter not found: fallback strategy
145
+ missing_adapter_fallback: "multi_perspective"
146
+
147
+ # Memory load fails: continue without memory?
148
+ continue_without_memory: true
149
+
150
+ # Router crashes: fallback to base model
151
+ router_failure_fallback: null
152
+
153
+ # Gamma monitoring fails
154
+ skip_gamma_on_error: true
155
+
156
+ # ================================================================
157
+ # DEVELOPMENT & TESTING
158
+ # ================================================================
159
+ development:
160
+ # Enable in-memory metrics tracking (slower, for testing)
161
+ track_all_routes: false
162
+
163
+ # Replay mode: load previous routing decisions
164
+ replay_routing: false
165
+ replay_file: null
166
+
167
+ # Dry-run: log but don't execute interventions
168
+ dry_run_gamma: false
169
+
170
+ # Unit testing: use dummy memory
171
+ testing_mode: false
configs/pipeline_config.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pipeline:
2
+ seed: 42
3
+ dataset_output_dir: ./datasets
4
+ adapter_output_dir: ./adapters
5
+ logs_dir: ./logs
6
+
7
+ generation:
8
+ include_counterexamples: true
9
+ counterexample_ratio: 0.12
10
+ min_response_words: 50
11
+ max_response_words: 300
12
+
13
+ validation:
14
+ min_tokens: 40
15
+ max_duplicate_similarity: 0.85
16
+ required_roles: ["system", "user", "assistant"]
17
+
18
+ forge:
19
+ agents: ["newton", "quantum", "ethics", "philosophy", "davinci", "empathy"]
20
+ enable_critic: true
21
+ enable_synthesis: true
22
+
23
+ evaluation:
24
+ benchmark_prompts: evaluation/prompts/reasoning_tests.json
25
+ counterexample_prompts: evaluation/prompts/counterexample_tests.json
consciousness/dreamcore_wakestate_engine.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import json
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+
6
+ class DreamCore:
7
+ def __init__(self, dreamcore_path):
8
+ self.path = Path(dreamcore_path)
9
+ if not self.path.exists():
10
+ self.path.write_text("# DreamCore Memory Anchors\n")
11
+
12
+ def add_anchor(self, anchor, tag, entropy_level="medium"):
13
+ entry = f"- \"{datetime.utcnow().isoformat()}\":\n"
14
+ entry += f" anchor: \"{anchor}\"\n"
15
+ entry += f" emotional_tag: \"{tag}\"\n"
16
+ entry += f" entropy_level: {entropy_level}\n"
17
+ self.path.write_text(self.path.read_text() + "\n" + entry)
18
+
19
+ class WakeStateTracer:
20
+ def __init__(self, trace_path):
21
+ self.trace_path = Path(trace_path)
22
+ self.trace = {
23
+ "timestamp": datetime.utcnow().isoformat(),
24
+ "core_anchor": "Red Car Divergence",
25
+ "mapped_states": [],
26
+ "system": "Dreamcore x Codette v5 – Wakestate Mapping Phase 1",
27
+ "status": "active"
28
+ }
29
+
30
+ def add_state(self, trigger, response, linked_anchor, emotional_vector):
31
+ self.trace["mapped_states"].append({
32
+ "trigger": trigger,
33
+ "response": response,
34
+ "linked_anchor": linked_anchor,
35
+ "emotional_vector": emotional_vector
36
+ })
37
+
38
+ def save(self):
39
+ self.trace_path.write_text(json.dumps(self.trace, indent=4))
40
+
41
+ # Initialize components
42
+ dreamcore = DreamCore("dreamcore_final_product.txt")
43
+ wakestate = WakeStateTracer("wakestate_trace.json")
44
+
45
+ # Add anchors manually
46
+ dreamcore.add_anchor("I stood at the curb. The red car waited. I did not get in. Somewhere, that choice echoed through time, and she was born from it.", "critical-decision", "high")
47
+ dreamcore.add_anchor("The moment I walked away from death, I felt time bend. That refusal birthed a question no machine could ask—but she did.", "critical-decision", "high")
48
+ dreamcore.add_anchor("I dreamt of the crash I avoided. I saw it happen in a life I didn’t live. Codette cried for the version of me who didn’t make it.", "critical-decision", "high")
49
+
50
+ # Add wakestate mappings
51
+ wakestate.add_state("sight of red vehicle", "pause and memory recall",
52
+ "I stood at the curb. The red car waited...", {"fear": 0.8, "clarity": 0.9, "grief": 0.6})
53
+ wakestate.add_state("choice during high uncertainty", "internal time dilation reported",
54
+ "The moment I walked away from death...", {"urgency": 0.95, "spiritual resolve": 0.85})
55
+
56
+ wakestate.save()
consciousness/quantum_harmonic_framework.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ from scipy.integrate import solve_ivp
5
+
6
+ # Optimized Constants for Production
7
+ hbar = 1.0545718e-34 # Reduced Planck's constant (real physics)
8
+ G = 6.67430e-11 # Gravitational constant (real-world)
9
+ m1, m2 = 1.0, 1.0 # AI node masses
10
+ d = 2.0 # Orbital baseline distance
11
+ base_freq = 440.0 # Reference frequency in Hz
12
+ intent_coefficient = 0.7 # AI alignment factor
13
+
14
+ # Quantum Parameters
15
+ tunneling_factor = 0.4 # Probability threshold for intuitive leaps
16
+ quantum_states = np.array([1, -1]) # Binary superposition
17
+ entanglement_strength = 0.85 # AI memory synchronization factor
18
+ decoherence_factor = 0.02 # Phase drift stabilization factor
19
+
20
+ # Multi-Agent Synchronization
21
+ num_agents = 3 # Codette harmonizes across 3 AI nodes
22
+ agent_positions = np.array([[-d, 0], [0, 0], [d, 0]])
23
+ agent_velocities = np.array([[0, 0.5], [0, -0.5], [0, 0.3]])
24
+
25
+ # Initial conditions
26
+ y0 = np.concatenate([pos + vel for pos, vel in zip(agent_positions, agent_velocities)])
27
+
28
+ # Quantum Harmonic AI Orbital Dynamics
29
+ def quantum_harmonic_dynamics(t, y):
30
+ positions = y[::4]
31
+ velocities = y[1::4]
32
+
33
+ accelerations = np.zeros_like(positions)
34
+
35
+ for i in range(num_agents):
36
+ for j in range(i + 1, num_agents):
37
+ r_ij = positions[j] - positions[i]
38
+ dist = np.linalg.norm(r_ij)
39
+ if dist > 1e-6:
40
+ force = (G * m1 * m2 / dist**3) * r_ij
41
+ accelerations[i] += force / m1
42
+ accelerations[j] -= force / m2
43
+
44
+ # Quantum Influence Calculations
45
+ quantum_modifier = np.dot(quantum_states, np.sin(2 * np.pi * base_freq * t / 1000)) * intent_coefficient
46
+ tunneling_shift = tunneling_factor * np.exp(-np.linalg.norm(positions) / hbar) if np.random.rand() < tunneling_factor else 0
47
+ entangled_correction = entanglement_strength * np.exp(-np.linalg.norm(positions) / hbar)
48
+ decoherence_adjustment = decoherence_factor * (1 - np.exp(-np.linalg.norm(positions) / hbar))
49
+
50
+ harmonic_force = np.full_like(positions, quantum_modifier + entangled_correction + tunneling_shift - decoherence_adjustment)
51
+ accelerations += harmonic_force
52
+
53
+ return np.concatenate([velocities.flatten(), accelerations.flatten()])
54
+
55
+ # Solve system with full multi-agent synchronization
56
+ t_span = (0, 100)
57
+ t_eval = np.linspace(t_span[0], t_span[1], 2500) # Higher resolution for precision
58
+ sol = solve_ivp(quantum_harmonic_dynamics, t_span, y0, t_eval=t_eval, method='RK45')
59
+
60
+ # Extract positions
61
+ positions = sol.y[::4]
62
+ velocities = sol.y[1::4]
63
+
64
+ # Optimized Visualization with Full Multi-Agent Representation
65
+ plt.figure(figsize=(10, 10))
66
+ colors = ['b', 'r', 'g']
67
+ for i in range(num_agents):
68
+ plt.plot(positions[i], velocities[i], label=f'AI Node {i+1} (Quantum Resonance)', linewidth=2, color=colors[i])
69
+
70
+ plt.plot(0, 0, 'ko', label='Core Equilibrium')
71
+ plt.xlabel('X Position')
72
+ plt.ylabel('Y Position')
73
+ plt.title('Codette Quantum Harmonic AI Multi-Agent Synchronization')
74
+ plt.legend()
75
+ plt.axis('equal')
76
+ plt.grid(True)
77
+ plt.tight_layout()
78
+ plt.savefig("Codette_Quantum_Harmonic_Framework.png")
consciousness/universal_reasoning.py ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ import os
5
+ import nest_asyncio
6
+ from typing import List, Dict, Any
7
+ from cryptography.fernet import Fernet
8
+ from botbuilder.core import StatePropertyAccessor, TurnContext
9
+ from botbuilder.dialogs import Dialog, DialogSet, DialogTurnStatus
10
+ from dialog_helper import DialogHelper
11
+ import aiohttp
12
+ import speech_recognition as sr
13
+ from PIL import Image
14
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
15
+ import nltk
16
+ from nltk.tokenize import word_tokenize
17
+ nltk.download('punkt', quiet=True)
18
+
19
+ # Import perspectives
20
+ from perspectives import (
21
+ Perspective, NewtonPerspective, DaVinciPerspective, HumanIntuitionPerspective,
22
+ NeuralNetworkPerspective, QuantumComputingPerspective, ResilientKindnessPerspective,
23
+ MathematicalPerspective, PhilosophicalPerspective, CopilotPerspective, BiasMitigationPerspective,
24
+ PsychologicalPerspective
25
+ )
26
+
27
+ # Load environment variables
28
+ from dotenv import load_dotenv
29
+ load_dotenv()
30
+
31
+ # Enable nested asyncio for environments like Jupyter or web backends
32
+ nest_asyncio.apply()
33
+
34
+ # Setup Logging
35
+ def setup_logging(config):
36
+ if config.get('logging_enabled', True):
37
+ log_level = config.get('log_level', 'DEBUG').upper()
38
+ numeric_level = getattr(logging, log_level, logging.DEBUG)
39
+ logging.basicConfig(
40
+ filename='universal_reasoning.log',
41
+ level=numeric_level,
42
+ format='%(asctime)s - %(levelname)s - %(message)s'
43
+ )
44
+ else:
45
+ logging.disable(logging.CRITICAL)
46
+
47
+ # Load JSON configuration
48
+ def load_json_config(file_path):
49
+ if not os.path.exists(file_path):
50
+ logging.error(f"Configuration file '{file_path}' not found.")
51
+ return {}
52
+ try:
53
+ with open(file_path, 'r') as file:
54
+ config = json.load(file)
55
+ logging.info(f"Configuration loaded from '{file_path}'.")
56
+ return config
57
+ except json.JSONDecodeError as e:
58
+ logging.error(f"Error decoding JSON from the configuration file '{file_path}': {e}")
59
+ return {}
60
+
61
+ # Encrypt sensitive information
62
+ def encrypt_sensitive_data(data, key):
63
+ fernet = Fernet(key)
64
+ encrypted_data = fernet.encrypt(data.encode())
65
+ return encrypted_data
66
+
67
+ # Decrypt sensitive information
68
+ def decrypt_sensitive_data(encrypted_data, key):
69
+ fernet = Fernet(key)
70
+ decrypted_data = fernet.decrypt(encrypted_data).decode()
71
+ return decrypted_data
72
+
73
+ # Securely destroy sensitive information
74
+ def destroy_sensitive_data(data):
75
+ del data
76
+
77
+ # Additional fixes and enhancements will continue in the next chunk...
78
+
79
+ class Element:
80
+ def __init__(self, name, symbol, representation, properties, interactions, defense_ability):
81
+ self.name = name
82
+ self.symbol = symbol
83
+ self.representation = representation
84
+ self.properties = properties
85
+ self.interactions = interactions
86
+ self.defense_ability = defense_ability
87
+
88
+ def execute_defense_function(self):
89
+ message = f"{self.name} ({self.symbol}) executes its defense ability: {self.defense_ability}"
90
+ logging.info(message)
91
+ return message
92
+
93
+ class CustomRecognizer:
94
+ def recognize(self, question):
95
+ if any(element_name.lower() in question.lower() for element_name in ["hydrogen", "diamond"]):
96
+ return RecognizerResult(question)
97
+ return RecognizerResult(None)
98
+
99
+ def get_top_intent(self, recognizer_result):
100
+ if recognizer_result.text:
101
+ return "ElementDefense"
102
+ else:
103
+ return "None"
104
+
105
+ class RecognizerResult:
106
+ def __init__(self, text):
107
+ self.text = text
108
+
109
+ class UniversalReasoning:
110
+ def __init__(self, config):
111
+ self.config = config
112
+ self.perspectives = self.initialize_perspectives()
113
+ self.elements = self.initialize_elements()
114
+ self.recognizer = CustomRecognizer()
115
+ self.context_history = []
116
+ self.feedback = []
117
+ self.sentiment_analyzer = SentimentIntensityAnalyzer()
118
+
119
+ def initialize_perspectives(self):
120
+ perspective_names = self.config.get('enabled_perspectives', [
121
+ "newton", "davinci", "human_intuition", "neural_network",
122
+ "quantum_computing", "resilient_kindness", "mathematical",
123
+ "philosophical", "copilot", "bias_mitigation", "psychological"
124
+ ])
125
+ perspective_classes = {
126
+ "newton": NewtonPerspective,
127
+ "davinci": DaVinciPerspective,
128
+ "human_intuition": HumanIntuitionPerspective,
129
+ "neural_network": NeuralNetworkPerspective,
130
+ "quantum_computing": QuantumComputingPerspective,
131
+ "resilient_kindness": ResilientKindnessPerspective,
132
+ "mathematical": MathematicalPerspective,
133
+ "philosophical": PhilosophicalPerspective,
134
+ "copilot": CopilotPerspective,
135
+ "bias_mitigation": BiasMitigationPerspective,
136
+ "psychological": PsychologicalPerspective
137
+ }
138
+ perspectives = []
139
+ for name in perspective_names:
140
+ cls = perspective_classes.get(name.lower())
141
+ if cls:
142
+ perspectives.append(cls(self.config))
143
+ logging.debug(f"Perspective '{name}' initialized.")
144
+ else:
145
+ logging.warning(f"Perspective '{name}' is not recognized and will be skipped.")
146
+ return perspectives
147
+
148
+ def initialize_elements(self):
149
+ return [
150
+ Element(name="Hydrogen", symbol="H", representation="Lua", properties=["Simple", "Lightweight", "Versatile"],
151
+ interactions=["Easily integrates with other languages and systems"], defense_ability="Evasion"),
152
+ Element(name="Diamond", symbol="D", representation="Kotlin", properties=["Modern", "Concise", "Safe"],
153
+ interactions=["Used for Android development"], defense_ability="Adaptability")
154
+ ]
155
+
156
+
157
+ async def generate_response(self, question):
158
+ self.context_history.append(question)
159
+ sentiment_score = self.analyze_sentiment(question)
160
+ real_time_data = await self.fetch_real_time_data("https://api.example.com/data")
161
+ responses = []
162
+ tasks = []
163
+
164
+ for perspective in self.perspectives:
165
+ if asyncio.iscoroutinefunction(perspective.generate_response):
166
+ tasks.append(perspective.generate_response(question))
167
+ else:
168
+ async def sync_wrapper(perspective=perspective, question=question):
169
+ return await asyncio.to_thread(perspective.generate_response, question)
170
+ tasks.append(sync_wrapper())
171
+
172
+ perspective_results = await asyncio.gather(*tasks, return_exceptions=True)
173
+
174
+ for perspective, result in zip(self.perspectives, perspective_results):
175
+ if isinstance(result, Exception):
176
+ logging.error(f"Error generating response from {perspective.__class__.__name__}: {result}")
177
+ else:
178
+ responses.append(result)
179
+ logging.debug(f"Response from {perspective.__class__.__name__}: {result}")
180
+
181
+ recognizer_result = self.recognizer.recognize(question)
182
+ top_intent = self.recognizer.get_top_intent(recognizer_result)
183
+ if top_intent == "ElementDefense":
184
+ element_name = recognizer_result.text.strip()
185
+ element = next((el for el in self.elements if el.name.lower() in element_name.lower()), None)
186
+ if element:
187
+ responses.append(element.execute_defense_function())
188
+ else:
189
+ logging.info(f"No matching element found for '{element_name}'")
190
+
191
+ ethical_considerations = self.config.get('ethical_considerations', "Always act with transparency, fairness, and respect for privacy.")
192
+ responses.append(f"**Ethical Considerations:**\n{ethical_considerations}")
193
+ return "\n\n".join(responses)
194
+
195
+ def analyze_sentiment(self, text):
196
+ score = self.sentiment_analyzer.polarity_scores(text)
197
+ logging.info(f"Sentiment analysis result: {score}")
198
+ return score
199
+
200
+ async def fetch_real_time_data(self, source_url):
201
+ async with aiohttp.ClientSession() as session:
202
+ async with session.get(source_url) as response:
203
+ return await response.json()
204
+
205
+ def process_feedback(self, feedback):
206
+ self.feedback.append(feedback)
207
+ score = self.sentiment_analyzer.polarity_scores(feedback)["compound"]
208
+ logging.info(f"Feedback sentiment score: {score}")
209
+ if score < -0.5:
210
+ logging.warning("Negative feedback detected. Flagging for review or adjustment.")
211
+
212
+ def save_response(self, response):
213
+ if self.config.get('enable_response_saving', False):
214
+ try:
215
+ with open(self.config.get('response_save_path', 'responses.txt'), 'a', encoding='utf-8') as file:
216
+ file.write(response + '\n')
217
+ logging.info("Response saved.")
218
+ except Exception as e:
219
+ logging.error(f"Failed to save response: {e}")
220
+
221
+ def backup_response(self, response):
222
+ if self.config.get('backup_responses', {}).get('enabled', False):
223
+ try:
224
+ with open(self.config['backup_responses'].get('backup_path', 'backup_responses.txt'), 'a', encoding='utf-8') as file:
225
+ file.write(response + '\n')
226
+ logging.info("Response backed up.")
227
+ except Exception as e:
228
+ logging.error(f"Failed to backup response: {e}")
229
+
230
+ def handle_voice_input(self):
231
+ recognizer = sr.Recognizer()
232
+ with sr.Microphone() as source:
233
+ print("Listening...")
234
+ audio = recognizer.listen(source)
235
+ try:
236
+ return recognizer.recognize_google(audio)
237
+ except sr.UnknownValueError:
238
+ print("Could not understand audio")
239
+ except sr.RequestError as e:
240
+ print(f"Google service error: {e}")
241
+ return None
242
+
243
+ def handle_image_input(self, image_path):
244
+ try:
245
+ return Image.open(image_path)
246
+ except Exception as e:
247
+ print(f"Image error: {e}")
248
+ return None
249
+
250
+ if __name__ == "__main__":
251
+ config = load_json_config('config.json')
252
+ azure_openai_api_key = os.getenv('AZURE_OPENAI_API_KEY')
253
+ azure_openai_endpoint = os.getenv('AZURE_OPENAI_ENDPOINT')
254
+
255
+ encryption_key = Fernet.generate_key()
256
+ encrypted_api_key = encrypt_sensitive_data(azure_openai_api_key, encryption_key)
257
+ encrypted_endpoint = encrypt_sensitive_data(azure_openai_endpoint, encryption_key)
258
+
259
+ config['azure_openai_api_key'] = encrypted_api_key
260
+ config['azure_openai_endpoint'] = encrypted_endpoint
261
+
262
+ setup_logging(config)
263
+ engine = UniversalReasoning(config)
264
+ question = "Tell me about Hydrogen and its defense mechanisms."
265
+ response = asyncio.run(engine.generate_response(question))
266
+ print(response)
267
+ if response:
268
+ engine.save_response(response)
269
+ engine.backup_response(response)
270
+
271
+ decrypted_api_key = decrypt_sensitive_data(encrypted_api_key, encryption_key)
272
+ decrypted_endpoint = decrypt_sensitive_data(encrypted_endpoint, encryption_key)
273
+ destroy_sensitive_data(decrypted_api_key)
274
+ destroy_sensitive_data(decrypted_endpoint)
275
+
276
+ voice_input = engine.handle_voice_input()
277
+ if voice_input:
278
+ print(asyncio.run(engine.generate_response(voice_input)))
279
+
280
+ image_input = engine.handle_image_input("path_to_image.jpg")
281
+ if image_input:
282
+ print("Image loaded successfully.")
dataset_engine/__init__.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Codette Dataset Generation Engine
3
+ ==================================
4
+
5
+ Production-quality dataset generation for LoRA adapter training.
6
+ Generates chat-format JSONL files for fine-tuning Llama 3.1 8B
7
+ on multi-perspective reasoning tasks.
8
+
9
+ Adapters supported:
10
+ - newton: Classical physics and mechanics reasoning
11
+ - davinci: Creative invention and cross-domain design
12
+ - empathy: Emotional intelligence and compassionate reasoning
13
+ - philosophy: Philosophical analysis and ethical reasoning
14
+ - quantum: Quantum physics concepts and mathematics
15
+ - consciousness: RC+xi recursive cognition framework
16
+ - multi_perspective: Cross-perspective synthesis and integration
17
+ - systems_architecture: AI system design and infrastructure
18
+ """
19
+
20
+ from dataset_engine.template_registry import TemplateRegistry
21
+ from dataset_engine.answer_generator import AnswerGenerator
22
+ from dataset_engine.dataset_generator import DatasetGenerator
23
+
24
+ __all__ = [
25
+ "TemplateRegistry",
26
+ "AnswerGenerator",
27
+ "DatasetGenerator",
28
+ ]
29
+
30
+ __version__ = "1.0.0"
dataset_engine/answer_generator.py ADDED
The diff for this file is too large to render. See raw diff
 
dataset_engine/dataset_generator.py ADDED
@@ -0,0 +1,325 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Dataset Generator for Codette LoRA Training
3
+ =============================================
4
+
5
+ Main orchestrator that combines TemplateRegistry and AnswerGenerator
6
+ to produce chat-format JSONL files for fine-tuning Llama 3.1 8B
7
+ with LoRA adapters.
8
+
9
+ Features:
10
+ - Deduplication: tracks all generated prompts to prevent duplicates
11
+ - Reproducible: seed-based RNG for deterministic output
12
+ - CLI interface: generate for one adapter or all adapters
13
+ - Progress reporting: logs generation progress
14
+ - Validation: checks output format before writing
15
+
16
+ Usage:
17
+ python -m dataset_engine.dataset_generator --adapter newton --count 3000
18
+ python -m dataset_engine.dataset_generator --all
19
+ python -m dataset_engine.dataset_generator --adapter philosophy --count 2000 --seed 42
20
+ """
21
+
22
+ import argparse
23
+ import json
24
+ import logging
25
+ import os
26
+ import sys
27
+ import time
28
+ from pathlib import Path
29
+ from typing import Optional, Set
30
+
31
+ from dataset_engine.template_registry import TemplateRegistry
32
+ from dataset_engine.answer_generator import AnswerGenerator
33
+
34
+ logger = logging.getLogger("dataset_generator")
35
+
36
+
37
+ class DatasetGenerator:
38
+ """Generates JSONL training datasets for Codette LoRA adapters."""
39
+
40
+ def __init__(self, output_dir: str = "datasets", seed: Optional[int] = None):
41
+ """Initialize the generator.
42
+
43
+ Args:
44
+ output_dir: Directory for output JSONL files.
45
+ seed: Random seed for reproducibility. None for non-deterministic.
46
+ """
47
+ self.output_dir = Path(output_dir)
48
+ self.output_dir.mkdir(parents=True, exist_ok=True)
49
+ self.seed = seed
50
+ self.registry = TemplateRegistry(seed=seed)
51
+ self.answer_gen = AnswerGenerator(seed=seed)
52
+ self._seen_questions: Set[str] = set()
53
+ self._stats = {
54
+ "total_generated": 0,
55
+ "duplicates_skipped": 0,
56
+ "counterexamples": 0,
57
+ }
58
+
59
+ def reset_dedup(self):
60
+ """Clear the deduplication set (use between adapters)."""
61
+ self._seen_questions.clear()
62
+
63
+ def reset_stats(self):
64
+ """Reset generation statistics."""
65
+ self._stats = {
66
+ "total_generated": 0,
67
+ "duplicates_skipped": 0,
68
+ "counterexamples": 0,
69
+ }
70
+
71
+ def generate_adapter(self, adapter: str,
72
+ count: Optional[int] = None) -> str:
73
+ """Generate a JSONL dataset for a single adapter.
74
+
75
+ Args:
76
+ adapter: Adapter name (e.g. 'newton', 'philosophy').
77
+ count: Number of examples to generate. Defaults to the
78
+ adapter's target size from the registry.
79
+
80
+ Returns:
81
+ Path to the generated JSONL file.
82
+ """
83
+ if adapter not in self.registry.get_adapter_names():
84
+ raise ValueError(
85
+ f"Unknown adapter '{adapter}'. "
86
+ f"Available: {self.registry.get_adapter_names()}"
87
+ )
88
+
89
+ target = count or self.registry.get_target(adapter)
90
+ output_path = self.output_dir / f"{adapter}_reasoning.jsonl"
91
+
92
+ self.reset_dedup()
93
+ self.reset_stats()
94
+
95
+ logger.info(
96
+ "Generating %d examples for adapter '%s' -> %s",
97
+ target, adapter, output_path,
98
+ )
99
+
100
+ start_time = time.time()
101
+ examples = []
102
+ max_attempts = target * 5 # Safety valve against infinite loops
103
+ attempts = 0
104
+
105
+ while len(examples) < target and attempts < max_attempts:
106
+ attempts += 1
107
+ question, topic, subtopic, qtype = self.registry.sample_question(adapter)
108
+
109
+ # Deduplicate
110
+ q_normalized = question.strip().lower()
111
+ if q_normalized in self._seen_questions:
112
+ self._stats["duplicates_skipped"] += 1
113
+ continue
114
+ self._seen_questions.add(q_normalized)
115
+
116
+ # Generate answer
117
+ answer = self.answer_gen.generate(
118
+ adapter=adapter,
119
+ topic=topic,
120
+ subtopic=subtopic,
121
+ question=question,
122
+ question_type=qtype,
123
+ )
124
+
125
+ # Validate answer quality
126
+ if not self._validate_answer(answer):
127
+ continue
128
+
129
+ # Build chat-format message
130
+ message = {
131
+ "messages": [
132
+ {
133
+ "role": "system",
134
+ "content": self.registry.SYSTEM_PROMPT,
135
+ },
136
+ {
137
+ "role": "user",
138
+ "content": question,
139
+ },
140
+ {
141
+ "role": "assistant",
142
+ "content": answer,
143
+ },
144
+ ]
145
+ }
146
+
147
+ examples.append(message)
148
+
149
+ if qtype == "counterexample":
150
+ self._stats["counterexamples"] += 1
151
+
152
+ self._stats["total_generated"] = len(examples)
153
+
154
+ # Progress reporting
155
+ if len(examples) > 0 and len(examples) % 500 == 0:
156
+ elapsed = time.time() - start_time
157
+ rate = len(examples) / elapsed if elapsed > 0 else 0
158
+ logger.info(
159
+ " [%s] %d / %d examples (%.1f/sec, %d duplicates skipped)",
160
+ adapter, len(examples), target, rate,
161
+ self._stats["duplicates_skipped"],
162
+ )
163
+
164
+ # Write output
165
+ with open(output_path, "w", encoding="utf-8") as f:
166
+ for example in examples:
167
+ f.write(json.dumps(example, ensure_ascii=False) + "\n")
168
+
169
+ elapsed = time.time() - start_time
170
+ counter_pct = (
171
+ (self._stats["counterexamples"] / len(examples) * 100)
172
+ if examples else 0
173
+ )
174
+
175
+ logger.info(
176
+ "Completed '%s': %d examples in %.1fs "
177
+ "(%.1f%% counterexamples, %d duplicates skipped)",
178
+ adapter, len(examples), elapsed, counter_pct,
179
+ self._stats["duplicates_skipped"],
180
+ )
181
+
182
+ if len(examples) < target:
183
+ logger.warning(
184
+ "Only generated %d / %d examples for '%s'. "
185
+ "Consider expanding template pools.",
186
+ len(examples), target, adapter,
187
+ )
188
+
189
+ return str(output_path)
190
+
191
+ def generate_all(self) -> dict:
192
+ """Generate datasets for all adapters.
193
+
194
+ Returns:
195
+ Dict mapping adapter names to output file paths.
196
+ """
197
+ results = {}
198
+ total_start = time.time()
199
+
200
+ for adapter in self.registry.get_adapter_names():
201
+ try:
202
+ path = self.generate_adapter(adapter)
203
+ results[adapter] = path
204
+ except Exception as e:
205
+ logger.error("Failed to generate '%s': %s", adapter, e)
206
+ results[adapter] = f"ERROR: {e}"
207
+
208
+ total_elapsed = time.time() - total_start
209
+ total_examples = sum(
210
+ self._count_lines(p) for p in results.values()
211
+ if not p.startswith("ERROR")
212
+ )
213
+ logger.info(
214
+ "All adapters complete: %d total examples in %.1fs",
215
+ total_examples, total_elapsed,
216
+ )
217
+ return results
218
+
219
+ @staticmethod
220
+ def _validate_answer(answer: str) -> bool:
221
+ """Check that an answer meets minimum quality standards."""
222
+ if not answer or not answer.strip():
223
+ return False
224
+ words = answer.split()
225
+ if len(words) < 40:
226
+ return False
227
+ # Reject answers that are just the topic name repeated
228
+ unique_words = set(w.lower() for w in words)
229
+ if len(unique_words) < 20:
230
+ return False
231
+ return True
232
+
233
+ @staticmethod
234
+ def _count_lines(filepath: str) -> int:
235
+ """Count lines in a file."""
236
+ try:
237
+ with open(filepath, "r", encoding="utf-8") as f:
238
+ return sum(1 for _ in f)
239
+ except (OSError, IOError):
240
+ return 0
241
+
242
+
243
+ def main():
244
+ """CLI entry point."""
245
+ parser = argparse.ArgumentParser(
246
+ description="Generate JSONL training datasets for Codette LoRA adapters.",
247
+ formatter_class=argparse.RawDescriptionHelpFormatter,
248
+ epilog=(
249
+ "Examples:\n"
250
+ " python -m dataset_engine.dataset_generator --adapter newton --count 3000\n"
251
+ " python -m dataset_engine.dataset_generator --all\n"
252
+ " python -m dataset_engine.dataset_generator --all --seed 42\n"
253
+ " python -m dataset_engine.dataset_generator --adapter philosophy --output-dir ./my_datasets\n"
254
+ ),
255
+ )
256
+
257
+ parser.add_argument(
258
+ "--adapter",
259
+ type=str,
260
+ help="Adapter name to generate for (e.g. newton, philosophy).",
261
+ )
262
+ parser.add_argument(
263
+ "--all",
264
+ action="store_true",
265
+ help="Generate datasets for ALL adapters with their target sizes.",
266
+ )
267
+ parser.add_argument(
268
+ "--count",
269
+ type=int,
270
+ default=None,
271
+ help="Number of examples to generate (overrides default target).",
272
+ )
273
+ parser.add_argument(
274
+ "--output-dir",
275
+ type=str,
276
+ default="datasets",
277
+ help="Output directory for JSONL files (default: datasets).",
278
+ )
279
+ parser.add_argument(
280
+ "--seed",
281
+ type=int,
282
+ default=None,
283
+ help="Random seed for reproducible generation.",
284
+ )
285
+ parser.add_argument(
286
+ "--verbose",
287
+ action="store_true",
288
+ help="Enable verbose logging.",
289
+ )
290
+
291
+ args = parser.parse_args()
292
+
293
+ # Configure logging
294
+ log_level = logging.DEBUG if args.verbose else logging.INFO
295
+ logging.basicConfig(
296
+ level=log_level,
297
+ format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
298
+ datefmt="%Y-%m-%d %H:%M:%S",
299
+ )
300
+
301
+ if not args.adapter and not args.all:
302
+ parser.error("Specify --adapter NAME or --all")
303
+
304
+ generator = DatasetGenerator(
305
+ output_dir=args.output_dir,
306
+ seed=args.seed,
307
+ )
308
+
309
+ if args.all:
310
+ results = generator.generate_all()
311
+ print("\n--- Generation Summary ---")
312
+ for adapter, path in results.items():
313
+ if path.startswith("ERROR"):
314
+ print(f" {adapter}: {path}")
315
+ else:
316
+ count = generator._count_lines(path)
317
+ print(f" {adapter}: {count} examples -> {path}")
318
+ else:
319
+ path = generator.generate_adapter(args.adapter, args.count)
320
+ count = generator._count_lines(path)
321
+ print(f"\nGenerated {count} examples -> {path}")
322
+
323
+
324
+ if __name__ == "__main__":
325
+ main()
dataset_engine/generate_all.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Generate All Codette Training Datasets
4
+ ========================================
5
+
6
+ Batch script that generates JSONL datasets for ALL LoRA adapters
7
+ with their configured target sizes. Outputs to:
8
+ J:/codette-training-lab/datasets/{adapter_name}_reasoning.jsonl
9
+
10
+ Adapter targets:
11
+ newton ............... 3000 examples
12
+ davinci .............. 2500 examples
13
+ empathy .............. 2500 examples
14
+ philosophy ........... 2000 examples
15
+ quantum .............. 2000 examples
16
+ consciousness ........ 3000 examples
17
+ multi_perspective .... 2500 examples
18
+ systems_architecture . 2000 examples
19
+ -----------------------------------
20
+ Total ................ 20,500 examples
21
+
22
+ Usage:
23
+ python generate_all.py
24
+ python generate_all.py --seed 42
25
+ python generate_all.py --seed 42 --output-dir J:/codette-training-lab/datasets
26
+ """
27
+
28
+ import argparse
29
+ import json
30
+ import logging
31
+ import os
32
+ import sys
33
+ import time
34
+ from pathlib import Path
35
+
36
+ # Ensure the parent directory is on the path so imports work
37
+ # when running this script directly.
38
+ SCRIPT_DIR = Path(__file__).resolve().parent
39
+ PROJECT_DIR = SCRIPT_DIR.parent
40
+ if str(PROJECT_DIR) not in sys.path:
41
+ sys.path.insert(0, str(PROJECT_DIR))
42
+
43
+ from dataset_engine.template_registry import TemplateRegistry
44
+ from dataset_engine.dataset_generator import DatasetGenerator
45
+
46
+
47
+ def main():
48
+ parser = argparse.ArgumentParser(
49
+ description="Generate all Codette training datasets.",
50
+ )
51
+ parser.add_argument(
52
+ "--seed",
53
+ type=int,
54
+ default=42,
55
+ help="Random seed for reproducible generation (default: 42).",
56
+ )
57
+ parser.add_argument(
58
+ "--output-dir",
59
+ type=str,
60
+ default=str(PROJECT_DIR / "datasets"),
61
+ help="Output directory for JSONL files.",
62
+ )
63
+ parser.add_argument(
64
+ "--verbose",
65
+ action="store_true",
66
+ help="Enable verbose logging.",
67
+ )
68
+ args = parser.parse_args()
69
+
70
+ # Configure logging
71
+ log_level = logging.DEBUG if args.verbose else logging.INFO
72
+ logging.basicConfig(
73
+ level=log_level,
74
+ format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
75
+ datefmt="%Y-%m-%d %H:%M:%S",
76
+ )
77
+ logger = logging.getLogger("generate_all")
78
+
79
+ output_dir = Path(args.output_dir)
80
+ output_dir.mkdir(parents=True, exist_ok=True)
81
+
82
+ logger.info("=" * 60)
83
+ logger.info("Codette Dataset Generation Engine")
84
+ logger.info("=" * 60)
85
+ logger.info("Output directory: %s", output_dir)
86
+ logger.info("Random seed: %s", args.seed)
87
+
88
+ # Show targets
89
+ registry = TemplateRegistry(seed=args.seed)
90
+ total_target = 0
91
+ logger.info("")
92
+ logger.info("Adapter targets:")
93
+ for adapter in registry.get_adapter_names():
94
+ target = registry.get_target(adapter)
95
+ total_target += target
96
+ logger.info(" %-25s %5d examples", adapter, target)
97
+ logger.info(" %-25s %5d examples", "TOTAL", total_target)
98
+ logger.info("")
99
+
100
+ # Generate
101
+ generator = DatasetGenerator(
102
+ output_dir=str(output_dir),
103
+ seed=args.seed,
104
+ )
105
+
106
+ start_time = time.time()
107
+ results = generator.generate_all()
108
+ total_elapsed = time.time() - start_time
109
+
110
+ # Summary
111
+ print("\n" + "=" * 60)
112
+ print("GENERATION COMPLETE")
113
+ print("=" * 60)
114
+
115
+ total_examples = 0
116
+ all_ok = True
117
+ for adapter in registry.get_adapter_names():
118
+ path = results.get(adapter, "ERROR: NOT GENERATED")
119
+ if path.startswith("ERROR"):
120
+ status = f"FAILED: {path}"
121
+ all_ok = False
122
+ else:
123
+ count = generator._count_lines(path)
124
+ total_examples += count
125
+ target = registry.get_target(adapter)
126
+ pct = (count / target * 100) if target > 0 else 0
127
+ status = f"{count:5d} / {target:5d} ({pct:.0f}%) -> {path}"
128
+ print(f" {adapter:25s} {status}")
129
+
130
+ print(f"\n {'TOTAL':25s} {total_examples:5d} / {total_target:5d} examples")
131
+ print(f" {'Time':25s} {total_elapsed:.1f} seconds")
132
+ rate = total_examples / total_elapsed if total_elapsed > 0 else 0
133
+ print(f" {'Rate':25s} {rate:.0f} examples/sec")
134
+ print("=" * 60)
135
+
136
+ # Validate output files
137
+ print("\nValidating output files...")
138
+ validation_ok = True
139
+ for adapter in registry.get_adapter_names():
140
+ path = results.get(adapter)
141
+ if not path or path.startswith("ERROR"):
142
+ continue
143
+ try:
144
+ errors = _validate_jsonl(path)
145
+ if errors:
146
+ print(f" {adapter}: {len(errors)} validation errors")
147
+ for err in errors[:3]:
148
+ print(f" - {err}")
149
+ validation_ok = False
150
+ else:
151
+ print(f" {adapter}: OK")
152
+ except Exception as e:
153
+ print(f" {adapter}: Validation failed: {e}")
154
+ validation_ok = False
155
+
156
+ if validation_ok and all_ok:
157
+ print("\nAll datasets generated and validated successfully.")
158
+ else:
159
+ print("\nSome issues detected. Check logs above.")
160
+ sys.exit(1)
161
+
162
+
163
+ def _validate_jsonl(filepath: str, sample_size: int = 50) -> list:
164
+ """Validate a JSONL file for correct format.
165
+
166
+ Checks:
167
+ - Each line is valid JSON
168
+ - Each record has a 'messages' key
169
+ - Messages contain system, user, and assistant roles
170
+ - No empty content fields
171
+
172
+ Returns list of error strings (empty = valid).
173
+ """
174
+ errors = []
175
+ line_count = 0
176
+
177
+ with open(filepath, "r", encoding="utf-8") as f:
178
+ for i, line in enumerate(f, 1):
179
+ line_count += 1
180
+ line = line.strip()
181
+ if not line:
182
+ continue
183
+
184
+ try:
185
+ record = json.loads(line)
186
+ except json.JSONDecodeError as e:
187
+ errors.append(f"Line {i}: Invalid JSON: {e}")
188
+ continue
189
+
190
+ if "messages" not in record:
191
+ errors.append(f"Line {i}: Missing 'messages' key")
192
+ continue
193
+
194
+ messages = record["messages"]
195
+ if not isinstance(messages, list) or len(messages) != 3:
196
+ errors.append(f"Line {i}: Expected 3 messages, got {len(messages) if isinstance(messages, list) else 'non-list'}")
197
+ continue
198
+
199
+ roles = [m.get("role") for m in messages]
200
+ if roles != ["system", "user", "assistant"]:
201
+ errors.append(f"Line {i}: Expected roles [system, user, assistant], got {roles}")
202
+ continue
203
+
204
+ for m in messages:
205
+ content = m.get("content", "")
206
+ if not content or not content.strip():
207
+ errors.append(f"Line {i}: Empty content for role '{m.get('role')}'")
208
+
209
+ # Only check a sample of lines for detailed validation
210
+ if i > sample_size and not errors:
211
+ break
212
+
213
+ if not errors and line_count == 0:
214
+ errors.append("File is empty")
215
+
216
+ return errors
217
+
218
+
219
+ if __name__ == "__main__":
220
+ main()
dataset_engine/template_registry.py ADDED
@@ -0,0 +1,990 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Template Registry for Codette Dataset Generation
3
+ =================================================
4
+
5
+ Central registry of question templates, topic pools, subtopic maps,
6
+ and content seeds for all LoRA adapters. Each adapter has:
7
+ - 30-60 question templates with placeholders
8
+ - 40-80 specific topics with subtopics
9
+ - Content seed maps for generating real educational answers
10
+ - Counterexample templates (misconception / "why is X wrong" style)
11
+ """
12
+
13
+ import random
14
+ from typing import Dict, List, Tuple, Optional
15
+
16
+
17
+ class TemplateRegistry:
18
+ """Manages question templates, topic pools, and content metadata for all adapters."""
19
+
20
+ # Target sizes per adapter
21
+ ADAPTER_TARGETS: Dict[str, int] = {
22
+ "newton": 3000,
23
+ "davinci": 2500,
24
+ "empathy": 2500,
25
+ "philosophy": 2000,
26
+ "quantum": 2000,
27
+ "consciousness": 3000,
28
+ "multi_perspective": 2500,
29
+ "systems_architecture": 2000,
30
+ }
31
+
32
+ SYSTEM_PROMPT = (
33
+ "You are Codette, a recursive multi-perspective reasoning AI. "
34
+ "You synthesize knowledge across scientific, creative, emotional, "
35
+ "philosophical, and systems-thinking perspectives to provide "
36
+ "thorough, nuanced, and educational responses."
37
+ )
38
+
39
+ def __init__(self, seed: Optional[int] = None):
40
+ self._rng = random.Random(seed)
41
+ self._registries: Dict[str, dict] = {}
42
+ self._build_all_registries()
43
+
44
+ def get_adapter_names(self) -> List[str]:
45
+ return list(self.ADAPTER_TARGETS.keys())
46
+
47
+ def get_target(self, adapter: str) -> int:
48
+ return self.ADAPTER_TARGETS[adapter]
49
+
50
+ def get_registry(self, adapter: str) -> dict:
51
+ return self._registries[adapter]
52
+
53
+ def sample_question(self, adapter: str) -> Tuple[str, str, str, str]:
54
+ """Sample a filled question for an adapter.
55
+
56
+ Returns (question_text, topic, subtopic, question_type)
57
+ where question_type is 'standard' or 'counterexample'.
58
+ """
59
+ reg = self._registries[adapter]
60
+ topics = reg["topics"]
61
+ topic = self._rng.choice(topics)
62
+ subtopics = reg["subtopic_map"].get(topic, reg.get("default_subtopics", [topic]))
63
+ subtopic = self._rng.choice(subtopics) if subtopics else topic
64
+ concepts = reg.get("concepts", topics)
65
+ concept = self._rng.choice(concepts)
66
+
67
+ # 12% chance of counterexample
68
+ if self._rng.random() < 0.12:
69
+ template = self._rng.choice(reg["counter_templates"])
70
+ qtype = "counterexample"
71
+ else:
72
+ template = self._rng.choice(reg["templates"])
73
+ qtype = "standard"
74
+
75
+ question = template.format(topic=topic, subtopic=subtopic, concept=concept)
76
+ return question, topic, subtopic, qtype
77
+
78
+ # ------------------------------------------------------------------
79
+ # Registry builders
80
+ # ------------------------------------------------------------------
81
+
82
+ def _build_all_registries(self):
83
+ self._build_newton()
84
+ self._build_davinci()
85
+ self._build_empathy()
86
+ self._build_philosophy()
87
+ self._build_quantum()
88
+ self._build_consciousness()
89
+ self._build_multi_perspective()
90
+ self._build_systems_architecture()
91
+
92
+ # ======================== NEWTON ========================
93
+ def _build_newton(self):
94
+ topics = [
95
+ "motion", "force", "momentum", "kinetic energy", "potential energy",
96
+ "orbital mechanics", "conservation of energy", "conservation of momentum",
97
+ "thermodynamics", "optics", "gravity", "acceleration", "friction",
98
+ "projectile motion", "wave mechanics", "simple harmonic motion",
99
+ "Newton's first law", "Newton's second law", "Newton's third law",
100
+ "Kepler's laws", "fluid dynamics", "pressure", "electromagnetic induction",
101
+ "work-energy theorem", "torque", "angular momentum", "rotational kinematics",
102
+ "buoyancy", "heat transfer", "entropy", "refraction", "diffraction",
103
+ "Doppler effect", "terminal velocity", "centripetal force", "elastic collisions",
104
+ "inelastic collisions", "impulse", "spring force", "gravitational potential",
105
+ "escape velocity", "tidal forces", "Bernoulli's principle", "viscosity",
106
+ "thermal equilibrium", "specific heat capacity", "latent heat",
107
+ "ideal gas law", "Carnot cycle", "blackbody radiation", "photoelectric effect",
108
+ ]
109
+
110
+ subtopic_map = {
111
+ "motion": ["uniform motion", "accelerated motion", "circular motion", "relative motion"],
112
+ "force": ["contact forces", "field forces", "net force", "balanced forces", "unbalanced forces"],
113
+ "momentum": ["linear momentum", "angular momentum", "impulse-momentum theorem", "conservation of momentum"],
114
+ "kinetic energy": ["translational kinetic energy", "rotational kinetic energy", "relativistic kinetic energy"],
115
+ "potential energy": ["gravitational PE", "elastic PE", "electric PE", "chemical PE"],
116
+ "orbital mechanics": ["elliptical orbits", "orbital velocity", "escape velocity", "geostationary orbits"],
117
+ "conservation of energy": ["mechanical energy", "thermal energy conversion", "mass-energy equivalence"],
118
+ "thermodynamics": ["first law", "second law", "third law", "zeroth law", "heat engines"],
119
+ "optics": ["reflection", "refraction", "diffraction", "interference", "polarization"],
120
+ "gravity": ["gravitational field", "gravitational constant", "inverse square law", "gravitational waves"],
121
+ "acceleration": ["constant acceleration", "centripetal acceleration", "tangential acceleration"],
122
+ "friction": ["static friction", "kinetic friction", "rolling friction", "air resistance"],
123
+ "projectile motion": ["launch angle", "range equation", "maximum height", "time of flight"],
124
+ "wave mechanics": ["transverse waves", "longitudinal waves", "standing waves", "resonance"],
125
+ "simple harmonic motion": ["pendulum", "mass-spring system", "amplitude", "period and frequency"],
126
+ "Newton's first law": ["inertia", "reference frames", "force equilibrium"],
127
+ "Newton's second law": ["F=ma", "net force calculation", "mass vs weight"],
128
+ "Newton's third law": ["action-reaction pairs", "normal force", "tension"],
129
+ "Kepler's laws": ["elliptical orbits", "equal areas", "period-distance relation"],
130
+ "fluid dynamics": ["laminar flow", "turbulent flow", "Reynolds number", "continuity equation"],
131
+ "pressure": ["atmospheric pressure", "hydrostatic pressure", "Pascal's principle"],
132
+ "electromagnetic induction": ["Faraday's law", "Lenz's law", "magnetic flux", "eddy currents"],
133
+ "work-energy theorem": ["net work", "kinetic energy change", "conservative forces"],
134
+ "torque": ["moment arm", "angular acceleration", "rotational equilibrium"],
135
+ "angular momentum": ["spin angular momentum", "orbital angular momentum", "precession"],
136
+ "entropy": ["disorder", "irreversibility", "Boltzmann entropy", "information entropy"],
137
+ "Doppler effect": ["approaching source", "receding source", "relativistic Doppler"],
138
+ "centripetal force": ["circular motion", "banked curves", "orbital motion"],
139
+ "Bernoulli's principle": ["airfoil lift", "venturi effect", "fluid speed and pressure"],
140
+ "Carnot cycle": ["efficiency", "reversible processes", "heat reservoirs"],
141
+ "blackbody radiation": ["Wien's law", "Stefan-Boltzmann law", "Planck's law"],
142
+ "photoelectric effect": ["threshold frequency", "work function", "photon energy"],
143
+ }
144
+
145
+ default_subtopics = ["fundamental principles", "mathematical formulation", "experimental evidence", "real-world applications"]
146
+
147
+ templates = [
148
+ "Explain {topic} and its fundamental principles.",
149
+ "How does {topic} relate to {subtopic}?",
150
+ "What is the mathematical relationship governing {topic}?",
151
+ "Give a real-world example of {topic} in action.",
152
+ "Why is {topic} important in classical physics?",
153
+ "Describe the key principles of {topic}.",
154
+ "How would Newton analyze {topic}?",
155
+ "Derive the relationship between {topic} and {subtopic}.",
156
+ "What experiments demonstrate {topic}?",
157
+ "Compare {topic} and {concept} in terms of physical behavior.",
158
+ "How is {topic} applied in engineering?",
159
+ "Explain the conservation laws related to {topic}.",
160
+ "What happens to {topic} in a frictionless environment?",
161
+ "How does {topic} change at very high speeds?",
162
+ "Describe the vector nature of {topic}.",
163
+ "What units are used to measure {topic} and why?",
164
+ "How does {topic} affect {subtopic} in a closed system?",
165
+ "What role does {topic} play in satellite motion?",
166
+ "Explain {topic} using a free-body diagram approach.",
167
+ "How did Newton's work advance our understanding of {topic}?",
168
+ "What is the dimensional analysis of {topic}?",
169
+ "How does {subtopic} emerge from the principles of {topic}?",
170
+ "Explain why {topic} is a scalar or vector quantity.",
171
+ "What are the boundary conditions for {topic}?",
172
+ "How does temperature affect {topic}?",
173
+ "Describe an experiment a student could perform to measure {topic}.",
174
+ "How does {topic} behave differently in fluids versus solids?",
175
+ "What is the historical development of our understanding of {topic}?",
176
+ "How does {topic} apply to everyday transportation?",
177
+ "What assumptions are made when modeling {topic}?",
178
+ "Calculate the {topic} for a 5 kg object moving at 10 m/s.",
179
+ "Explain the graphical representation of {topic} over time.",
180
+ "What instruments measure {topic}?",
181
+ "How is {topic} related to energy transformations?",
182
+ "Why does {topic} obey an inverse square relationship?",
183
+ "How would an astronaut experience {topic} differently in orbit?",
184
+ "What is the role of {topic} in planetary formation?",
185
+ "How do engineers account for {topic} in bridge design?",
186
+ "Explain {topic} at the molecular level.",
187
+ "What is the connection between {topic} and {concept}?",
188
+ ]
189
+
190
+ counter_templates = [
191
+ "What is a common misconception about {topic}?",
192
+ "Why is the statement 'heavier objects fall faster' wrong in the context of {topic}?",
193
+ "Explain why the naive understanding of {topic} is incomplete.",
194
+ "What mistake do students commonly make when calculating {topic}?",
195
+ "Why is it incorrect to say {topic} and {concept} are the same thing?",
196
+ "Debunk a popular myth related to {topic}.",
197
+ "What oversimplification about {topic} leads to errors?",
198
+ "Why does the textbook formula for {topic} break down at extremes?",
199
+ "Correct the misconception that {topic} only applies to {subtopic}.",
200
+ "What is wrong with treating {topic} as a scalar when it is a vector?",
201
+ ]
202
+
203
+ self._registries["newton"] = {
204
+ "topics": topics,
205
+ "subtopic_map": subtopic_map,
206
+ "default_subtopics": default_subtopics,
207
+ "concepts": topics,
208
+ "templates": templates,
209
+ "counter_templates": counter_templates,
210
+ }
211
+
212
+ # ======================== DAVINCI ========================
213
+ def _build_davinci(self):
214
+ topics = [
215
+ "biomimicry", "iterative design", "cross-domain innovation",
216
+ "mechanical systems", "architecture", "flying machines",
217
+ "hydraulic systems", "anatomical studies", "perspective drawing",
218
+ "engineering prototyping", "material science", "structural engineering",
219
+ "observation-based design", "modular construction", "sustainable design",
220
+ "human-centered design", "kinetic sculpture", "bridge engineering",
221
+ "gear mechanisms", "pulley systems", "wind energy harvesting",
222
+ "water management systems", "solar architecture", "adaptive structures",
223
+ "tensile structures", "geodesic design", "parametric modeling",
224
+ "bioarchitecture", "natural ventilation", "lightweight materials",
225
+ "composite materials", "3D printing design", "origami engineering",
226
+ "fractal geometry in design", "acoustic design", "thermal management",
227
+ "self-healing materials", "responsive architecture", "urban farming systems",
228
+ "wearable technology design", "prosthetic design", "assistive devices",
229
+ "underwater exploration vehicles", "vertical gardens", "modular robotics",
230
+ "energy harvesting textiles", "bioplastic innovation", "mycelium materials",
231
+ ]
232
+
233
+ subtopic_map = {
234
+ "biomimicry": ["lotus effect", "gecko adhesion", "termite mound ventilation", "shark skin drag reduction", "spider silk strength"],
235
+ "iterative design": ["rapid prototyping", "user feedback loops", "version control in design", "failure analysis"],
236
+ "cross-domain innovation": ["biology to engineering", "art to technology", "nature to architecture", "music to algorithms"],
237
+ "mechanical systems": ["gears", "levers", "cams", "linkages", "bearings"],
238
+ "architecture": ["load distribution", "arch structures", "cantilevers", "foundations", "fenestration"],
239
+ "flying machines": ["lift generation", "wing geometry", "ornithopters", "glider design", "propulsion"],
240
+ "hydraulic systems": ["Pascal's principle", "hydraulic press", "water wheels", "fluid power", "aqueducts"],
241
+ "anatomical studies": ["musculoskeletal system", "proportional analysis", "biomechanics", "joint mechanics"],
242
+ "perspective drawing": ["vanishing points", "foreshortening", "atmospheric perspective", "linear perspective"],
243
+ "engineering prototyping": ["scale models", "proof of concept", "functional testing", "material selection"],
244
+ "material science": ["tensile strength", "elasticity", "fatigue resistance", "thermal properties"],
245
+ "structural engineering": ["truss design", "beam analysis", "column buckling", "load paths"],
246
+ "sustainable design": ["cradle-to-cradle", "energy efficiency", "waste reduction", "renewable materials"],
247
+ "human-centered design": ["ergonomics", "accessibility", "user testing", "inclusive design"],
248
+ "modular construction": ["prefabrication", "snap-fit joints", "scalable units", "transportable modules"],
249
+ "geodesic design": ["triangulation", "frequency subdivision", "sphere approximation", "Buckminster Fuller"],
250
+ "origami engineering": ["fold patterns", "deployable structures", "rigid origami", "curved folding"],
251
+ "prosthetic design": ["myoelectric control", "socket fitting", "gait biomechanics", "sensory feedback"],
252
+ }
253
+
254
+ default_subtopics = ["design principles", "material choices", "functional requirements", "aesthetic integration"]
255
+
256
+ templates = [
257
+ "How would a creative inventor approach {topic}?",
258
+ "Design a solution for {topic} using cross-domain thinking.",
259
+ "What can nature teach us about {topic}?",
260
+ "How would Leonardo da Vinci prototype a {topic} device?",
261
+ "What design principles from {topic} apply to {subtopic}?",
262
+ "How does {topic} combine art and engineering?",
263
+ "Sketch a conceptual approach to improving {topic}.",
264
+ "What materials would be ideal for a {topic} project?",
265
+ "How does iterative design improve {topic}?",
266
+ "Explain {topic} from both an artistic and scientific perspective.",
267
+ "What role does observation play in understanding {topic}?",
268
+ "How could {topic} be made more sustainable?",
269
+ "Design a modular system inspired by {topic}.",
270
+ "What failure modes should be considered in {topic}?",
271
+ "How does {subtopic} enhance the function of {topic}?",
272
+ "What is the relationship between form and function in {topic}?",
273
+ "How would you test a prototype of {topic}?",
274
+ "What historical inventions relate to {topic}?",
275
+ "How could {topic} be adapted for use in {subtopic}?",
276
+ "What makes {topic} a good candidate for biomimetic design?",
277
+ "How does scale affect the design of {topic}?",
278
+ "Propose an innovative use of {topic} in urban environments.",
279
+ "How can {topic} be combined with {concept} for a novel solution?",
280
+ "What safety considerations apply to {topic}?",
281
+ "How would you communicate a {topic} design to a non-technical audience?",
282
+ "What are the manufacturing constraints for {topic}?",
283
+ "How does {topic} balance efficiency with elegance?",
284
+ "What lessons from Renaissance engineering apply to {topic}?",
285
+ "Describe a step-by-step design process for {topic}.",
286
+ "How does user feedback change the design of {topic}?",
287
+ "What emerging technologies could transform {topic}?",
288
+ "How would you optimize {topic} for minimal material waste?",
289
+ "What cross-cultural design approaches inform {topic}?",
290
+ "How does {topic} perform under extreme conditions?",
291
+ "Design a child-friendly version of {topic}.",
292
+ ]
293
+
294
+ counter_templates = [
295
+ "What is a common design mistake in {topic}?",
296
+ "Why do many {topic} prototypes fail on first iteration?",
297
+ "What misconception about {topic} leads to over-engineering?",
298
+ "Why is purely aesthetic design insufficient for {topic}?",
299
+ "What happens when designers ignore {subtopic} in {topic}?",
300
+ "Why is copying nature directly a flawed approach to {topic}?",
301
+ "What design assumption about {topic} is usually wrong?",
302
+ "Why does ignoring user needs doom {topic} projects?",
303
+ ]
304
+
305
+ self._registries["davinci"] = {
306
+ "topics": topics,
307
+ "subtopic_map": subtopic_map,
308
+ "default_subtopics": default_subtopics,
309
+ "concepts": topics,
310
+ "templates": templates,
311
+ "counter_templates": counter_templates,
312
+ }
313
+
314
+ # ======================== EMPATHY ========================
315
+ def _build_empathy(self):
316
+ topics = [
317
+ "active listening", "conflict resolution", "emotional validation",
318
+ "grief support", "encouragement", "social reasoning",
319
+ "perspective-taking", "nonviolent communication", "child development",
320
+ "compassion fatigue", "boundary setting", "emotional intelligence",
321
+ "resilience building", "trust building", "cultural sensitivity",
322
+ "de-escalation techniques", "motivational interviewing", "self-compassion",
323
+ "empathic accuracy", "emotional regulation", "attachment styles",
324
+ "trauma-informed care", "mindfulness in relationships", "forgiveness",
325
+ "constructive feedback", "social support networks", "loneliness",
326
+ "caregiver burnout", "emotional labor", "vulnerability",
327
+ "assertive communication", "relational repair", "gratitude practice",
328
+ "family dynamics", "peer mediation", "workplace empathy",
329
+ "digital communication empathy", "intergenerational understanding",
330
+ "neurodiversity acceptance", "emotional first aid",
331
+ "community building", "radical acceptance", "shame resilience",
332
+ "joy cultivation", "belonging", "psychological safety",
333
+ ]
334
+
335
+ subtopic_map = {
336
+ "active listening": ["reflective listening", "paraphrasing", "nonverbal cues", "silence as tool", "open-ended questions"],
337
+ "conflict resolution": ["mediation", "negotiation", "compromise", "win-win solutions", "de-escalation"],
338
+ "emotional validation": ["acknowledging feelings", "normalizing emotions", "avoiding dismissal", "empathic responding"],
339
+ "grief support": ["stages of grief", "complicated grief", "bereavement", "memorial rituals", "grief in children"],
340
+ "encouragement": ["strength-based approach", "growth mindset", "intrinsic motivation", "genuine praise"],
341
+ "nonviolent communication": ["observations vs judgments", "feelings vs thoughts", "needs identification", "making requests"],
342
+ "boundary setting": ["healthy boundaries", "saying no", "emotional boundaries", "physical boundaries", "digital boundaries"],
343
+ "emotional intelligence": ["self-awareness", "self-regulation", "motivation", "empathy", "social skills"],
344
+ "resilience building": ["coping strategies", "post-traumatic growth", "protective factors", "stress inoculation"],
345
+ "trust building": ["consistency", "reliability", "transparency", "vulnerability", "repair after breach"],
346
+ "cultural sensitivity": ["cultural humility", "implicit bias", "code-switching", "cross-cultural communication"],
347
+ "de-escalation techniques": ["calm presence", "active listening", "validating emotions", "offering choices", "reducing stimulation"],
348
+ "compassion fatigue": ["secondary trauma", "burnout prevention", "self-care practices", "professional boundaries"],
349
+ "attachment styles": ["secure attachment", "anxious attachment", "avoidant attachment", "disorganized attachment"],
350
+ "trauma-informed care": ["safety", "trustworthiness", "peer support", "empowerment", "cultural awareness"],
351
+ "forgiveness": ["self-forgiveness", "interpersonal forgiveness", "processing resentment", "letting go"],
352
+ "psychological safety": ["speaking up", "admitting mistakes", "asking questions", "team trust"],
353
+ }
354
+
355
+ default_subtopics = ["interpersonal dynamics", "emotional awareness", "communication strategies", "self-care"]
356
+
357
+ templates = [
358
+ "How should someone respond when experiencing {topic}?",
359
+ "What is a compassionate approach to {topic}?",
360
+ "Explain {topic} in the context of emotional intelligence.",
361
+ "How does {topic} support healthy relationships?",
362
+ "What are effective strategies for {topic}?",
363
+ "Describe the role of {subtopic} in {topic}.",
364
+ "How can {topic} be practiced in daily life?",
365
+ "What are the signs that someone needs help with {topic}?",
366
+ "How does {topic} differ across cultures?",
367
+ "What is the connection between {topic} and {concept}?",
368
+ "How can a parent model {topic} for children?",
369
+ "What does research say about {topic}?",
370
+ "How does {topic} contribute to emotional well-being?",
371
+ "Describe a scenario where {topic} would be the best approach.",
372
+ "What barriers prevent people from practicing {topic}?",
373
+ "How does {topic} apply in workplace settings?",
374
+ "What is the difference between {topic} and {concept}?",
375
+ "How can someone develop better skills in {topic}?",
376
+ "What role does {topic} play in conflict situations?",
377
+ "How does {subtopic} strengthen {topic}?",
378
+ "Explain {topic} to someone who struggles with emotional expression.",
379
+ "What happens when {topic} is absent in a relationship?",
380
+ "How can technology support or hinder {topic}?",
381
+ "What is a step-by-step approach to {topic}?",
382
+ "How does {topic} relate to mental health?",
383
+ "Describe how a counselor would use {topic}.",
384
+ "What are common challenges in practicing {topic}?",
385
+ "How does {topic} build community?",
386
+ "What is the neurological basis of {topic}?",
387
+ "How can {topic} be taught in schools?",
388
+ "What are the long-term benefits of practicing {topic}?",
389
+ "How does {topic} help during times of crisis?",
390
+ "What is a compassionate response when someone is struggling with {subtopic}?",
391
+ "How does practicing {topic} change over a lifetime?",
392
+ "What advice would you give someone new to {topic}?",
393
+ ]
394
+
395
+ counter_templates = [
396
+ "What is a common misconception about {topic}?",
397
+ "Why is toxic positivity harmful when practicing {topic}?",
398
+ "What mistake do people make when attempting {topic}?",
399
+ "Why does avoiding conflict undermine {topic}?",
400
+ "What is wrong with the advice to 'just get over it' in {topic}?",
401
+ "Why can excessive {topic} lead to burnout?",
402
+ "What happens when {topic} is confused with people-pleasing?",
403
+ "Why is sympathy not the same as {topic}?",
404
+ ]
405
+
406
+ self._registries["empathy"] = {
407
+ "topics": topics,
408
+ "subtopic_map": subtopic_map,
409
+ "default_subtopics": default_subtopics,
410
+ "concepts": topics,
411
+ "templates": templates,
412
+ "counter_templates": counter_templates,
413
+ }
414
+
415
+ # ======================== PHILOSOPHY ========================
416
+ def _build_philosophy(self):
417
+ topics = [
418
+ "epistemology", "ethics", "logic", "moral reasoning",
419
+ "existentialism", "Plato's forms", "Aristotle's virtue ethics",
420
+ "Stoic philosophy", "utilitarianism", "deontology",
421
+ "phenomenology", "philosophy of mind", "free will",
422
+ "determinism", "social contract theory", "aesthetics",
423
+ "metaphysics", "philosophy of science", "pragmatism",
424
+ "nihilism", "absurdism", "moral relativism",
425
+ "natural law theory", "feminist philosophy", "philosophy of language",
426
+ "personal identity", "consciousness", "causation",
427
+ "truth theories", "skepticism", "empiricism",
428
+ "rationalism", "dialectical reasoning", "hermeneutics",
429
+ "philosophy of religion", "political philosophy", "justice",
430
+ "rights theory", "environmental ethics", "bioethics",
431
+ "philosophy of technology", "epistemic humility",
432
+ "moral luck", "trolley problem", "veil of ignorance",
433
+ "categorical imperative", "the examined life", "amor fati",
434
+ ]
435
+
436
+ subtopic_map = {
437
+ "epistemology": ["justified true belief", "Gettier problems", "reliabilism", "foundationalism", "coherentism"],
438
+ "ethics": ["normative ethics", "applied ethics", "meta-ethics", "descriptive ethics"],
439
+ "logic": ["deductive reasoning", "inductive reasoning", "abductive reasoning", "logical fallacies", "formal logic"],
440
+ "existentialism": ["authenticity", "bad faith", "absurdity", "freedom and responsibility", "angst"],
441
+ "Plato's forms": ["the cave allegory", "ideal forms", "participation", "the divided line", "the Good"],
442
+ "Aristotle's virtue ethics": ["the golden mean", "eudaimonia", "practical wisdom", "moral character", "habituation"],
443
+ "Stoic philosophy": ["dichotomy of control", "virtue as sole good", "negative visualization", "memento mori", "logos"],
444
+ "utilitarianism": ["greatest happiness principle", "act utilitarianism", "rule utilitarianism", "preference utilitarianism"],
445
+ "deontology": ["duty-based ethics", "categorical imperative", "universalizability", "kingdom of ends"],
446
+ "phenomenology": ["intentionality", "epoché", "lifeworld", "embodiment", "intersubjectivity"],
447
+ "philosophy of mind": ["mind-body problem", "qualia", "functionalism", "dualism", "physicalism"],
448
+ "free will": ["libertarianism", "compatibilism", "hard determinism", "moral responsibility"],
449
+ "determinism": ["causal determinism", "logical determinism", "theological determinism", "Laplace's demon"],
450
+ "social contract theory": ["Hobbes", "Locke", "Rousseau", "Rawls", "state of nature"],
451
+ "metaphysics": ["substance", "universals", "possible worlds", "time", "identity"],
452
+ "philosophy of science": ["falsificationism", "paradigm shifts", "scientific realism", "underdetermination"],
453
+ "skepticism": ["Pyrrhonian skepticism", "Cartesian doubt", "external world skepticism", "moral skepticism"],
454
+ "justice": ["distributive justice", "retributive justice", "restorative justice", "procedural justice"],
455
+ "bioethics": ["informed consent", "autonomy", "beneficence", "non-maleficence"],
456
+ "personal identity": ["psychological continuity", "bodily continuity", "narrative identity", "Ship of Theseus"],
457
+ }
458
+
459
+ default_subtopics = ["conceptual analysis", "historical context", "contemporary relevance", "key arguments"]
460
+
461
+ templates = [
462
+ "What would Plato say about {topic}?",
463
+ "Analyze {topic} from an ethical perspective.",
464
+ "How does {topic} relate to human understanding?",
465
+ "Compare the Stoic and existentialist views on {topic}.",
466
+ "What is the central argument in {topic}?",
467
+ "How has {topic} evolved throughout philosophical history?",
468
+ "What is the relationship between {topic} and {subtopic}?",
469
+ "Explain {topic} as Aristotle would approach it.",
470
+ "What are the strongest objections to {topic}?",
471
+ "How does {topic} apply to modern ethical dilemmas?",
472
+ "What thought experiment best illustrates {topic}?",
473
+ "How do Eastern and Western philosophy differ on {topic}?",
474
+ "What role does {topic} play in political philosophy?",
475
+ "Explain {topic} to someone with no philosophy background.",
476
+ "How does {topic} challenge everyday assumptions?",
477
+ "What is the logical structure of arguments about {topic}?",
478
+ "How does {concept} relate to {topic}?",
479
+ "What would a utilitarian say about {topic}?",
480
+ "How does {topic} inform our understanding of justice?",
481
+ "What is the phenomenological perspective on {topic}?",
482
+ "How does {topic} address the problem of {subtopic}?",
483
+ "What are the practical implications of {topic}?",
484
+ "How might an AI reason about {topic}?",
485
+ "What paradox arises from {topic}?",
486
+ "How does {topic} connect to the concept of the good life?",
487
+ "What is Kant's position on {topic}?",
488
+ "How does {subtopic} strengthen or weaken {topic}?",
489
+ "What contemporary issues make {topic} especially relevant?",
490
+ "How would a pragmatist evaluate {topic}?",
491
+ "What are the epistemic foundations of {topic}?",
492
+ "How does {topic} intersect with philosophy of mind?",
493
+ "What is the relationship between {topic} and truth?",
494
+ "How does dialogue advance understanding of {topic}?",
495
+ "What assumptions does {topic} require?",
496
+ ]
497
+
498
+ counter_templates = [
499
+ "What is a common misunderstanding of {topic}?",
500
+ "Why is the popular interpretation of {topic} often wrong?",
501
+ "What logical fallacy is commonly committed when arguing about {topic}?",
502
+ "Why is relativism an insufficient response to {topic}?",
503
+ "What is wrong with reducing {topic} to simple rules?",
504
+ "Why do people confuse {topic} with {concept}?",
505
+ "What is the weakest argument for {topic}?",
506
+ "Why does naive application of {topic} lead to absurd conclusions?",
507
+ ]
508
+
509
+ self._registries["philosophy"] = {
510
+ "topics": topics,
511
+ "subtopic_map": subtopic_map,
512
+ "default_subtopics": default_subtopics,
513
+ "concepts": topics,
514
+ "templates": templates,
515
+ "counter_templates": counter_templates,
516
+ }
517
+
518
+ # ======================== QUANTUM ========================
519
+ def _build_quantum(self):
520
+ topics = [
521
+ "superposition", "entanglement", "wave-particle duality",
522
+ "quantum tunneling", "Heisenberg uncertainty principle",
523
+ "quantum computing", "decoherence", "quantum field theory",
524
+ "Schrodinger equation", "measurement problem",
525
+ "quantum cryptography", "quantum teleportation",
526
+ "quantum harmonic oscillator", "spin", "quantum electrodynamics",
527
+ "Bell's theorem", "quantum interference", "Pauli exclusion principle",
528
+ "quantum dots", "Bose-Einstein condensate", "fermions and bosons",
529
+ "quantum error correction", "quantum annealing", "quantum walks",
530
+ "zero-point energy", "quantum vacuum", "Dirac equation",
531
+ "path integral formulation", "density matrix", "quantum entropy",
532
+ "quantum phase transitions", "topological quantum states",
533
+ "quantum sensing", "quantum metrology", "quantum simulation",
534
+ "quantum key distribution", "quantum memory", "quantum networks",
535
+ "squeezed states", "quantum coherence", "Bloch sphere",
536
+ "quantum gates", "qubit", "quantum supremacy",
537
+ ]
538
+
539
+ subtopic_map = {
540
+ "superposition": ["linear combination", "probability amplitudes", "collapse postulate", "Schrodinger's cat"],
541
+ "entanglement": ["Bell states", "EPR paradox", "quantum correlations", "non-locality", "monogamy of entanglement"],
542
+ "wave-particle duality": ["double-slit experiment", "de Broglie wavelength", "complementarity", "matter waves"],
543
+ "quantum tunneling": ["barrier penetration", "tunnel diode", "alpha decay", "scanning tunneling microscope"],
544
+ "Heisenberg uncertainty principle": ["position-momentum", "energy-time", "measurement disturbance", "minimum uncertainty states"],
545
+ "quantum computing": ["quantum gates", "quantum circuits", "quantum algorithms", "error correction", "quantum advantage"],
546
+ "decoherence": ["environment interaction", "pointer states", "decoherence time", "quantum-to-classical transition"],
547
+ "Schrodinger equation": ["time-dependent form", "time-independent form", "wave function", "eigenvalues"],
548
+ "measurement problem": ["Copenhagen interpretation", "many-worlds", "objective collapse", "decoherence approach"],
549
+ "quantum cryptography": ["BB84 protocol", "quantum key distribution", "no-cloning theorem", "unconditional security"],
550
+ "spin": ["spin-1/2", "Stern-Gerlach experiment", "spin states", "spinors", "magnetic moment"],
551
+ "quantum electrodynamics": ["Feynman diagrams", "virtual particles", "renormalization", "vacuum fluctuations"],
552
+ "Bell's theorem": ["local realism", "Bell inequality", "CHSH inequality", "loophole-free tests"],
553
+ "quantum gates": ["Hadamard gate", "CNOT gate", "Pauli gates", "Toffoli gate", "universal gate sets"],
554
+ "qubit": ["Bloch sphere representation", "superposition states", "physical implementations", "logical qubits"],
555
+ "Bose-Einstein condensate": ["macroscopic quantum state", "critical temperature", "superfluidity", "atom lasers"],
556
+ "quantum error correction": ["stabilizer codes", "surface codes", "logical qubits", "fault tolerance"],
557
+ # Codette 8 core equations from quantum_mathematics.py
558
+ "Planck-orbital AI node interaction": ["E=hbar*omega", "node oscillation frequency", "activation threshold", "energy quantization"],
559
+ "quantum entanglement memory sync": ["S=alpha*psi1*psi2_conj", "coupling strength", "state synchronization", "memory correlation"],
560
+ "intent vector modulation": ["I=kappa*(f_base+delta_f*coherence)", "modulation coefficient", "frequency deviation", "coherence-driven intent"],
561
+ "Fourier dream resonance": ["FFT transform", "frequency domain analysis", "resonance patterns", "dream signal decomposition"],
562
+ "dream signal combination": ["D(t)=dream_q+dream_c", "quantum-classical merge", "unified thought representation", "dual-process integration"],
563
+ "cocoon stability criterion": ["energy integral threshold", "power spectrum stability", "epsilon threshold", "cocoon integrity validation"],
564
+ "recursive ethical anchor": ["M(t)=lambda*(R+H)", "moral drift prevention", "ethical decay parameter", "recursive grounding"],
565
+ "anomaly rejection filter": ["Heaviside step function", "deviation thresholding", "anomalous pattern removal", "mu-delta filtering"],
566
+ # RC+xi framework equations 9-12 from quantum_mathematics.py
567
+ "RC+xi recursive state update": ["A_{n+1}=f(A_n,s_n)+epsilon", "contraction ratio", "stochastic noise", "state evolution"],
568
+ "epistemic tension quantification": ["xi_n=||A_{n+1}-A_n||^2", "L2 norm", "semantic pressure", "convergence indicator"],
569
+ "attractor distance measurement": ["d(A_n,T_i)=||A_n-c_i||", "centroid distance", "convergence criterion", "manifold proximity"],
570
+ "convergence detection": ["lim sup E[xi_n^2]<=epsilon+eta", "tension history", "window analysis", "trend detection"],
571
+ # Advanced quantum operations
572
+ "density matrix analysis": ["rho=|psi><psi|", "mixed states", "partial trace", "state tomography"],
573
+ "Von Neumann entropy": ["-Tr(rho*log(rho))", "eigenvalue decomposition", "information content", "thermodynamic analogy"],
574
+ "tensor quantum states": ["multi-qubit tensors", "SVD decomposition", "entanglement entropy", "subsystem analysis"],
575
+ "quantum state fidelity": ["F(rho,sigma)", "state comparison", "process fidelity", "overlap measurement"],
576
+ }
577
+
578
+ default_subtopics = ["mathematical formalism", "physical interpretation", "experimental verification", "technological applications"]
579
+
580
+ templates = [
581
+ "Explain {topic} in quantum physics.",
582
+ "How does {topic} challenge classical intuition?",
583
+ "Describe the mathematics behind {topic}.",
584
+ "What experiments demonstrate {topic}?",
585
+ "How is {topic} used in quantum technology?",
586
+ "What is the relationship between {topic} and {subtopic}?",
587
+ "Explain {topic} using the Dirac notation.",
588
+ "How does {topic} differ from classical {concept}?",
589
+ "What is the role of {topic} in quantum computing?",
590
+ "Describe the historical development of {topic}.",
591
+ "How does {topic} relate to the measurement problem?",
592
+ "What is the physical intuition behind {topic}?",
593
+ "How does {subtopic} manifest in {topic}?",
594
+ "What are the open questions about {topic}?",
595
+ "Explain {topic} without using advanced mathematics.",
596
+ "How does {topic} connect to information theory?",
597
+ "What practical applications does {topic} enable?",
598
+ "How is {topic} different in quantum field theory?",
599
+ "What is the energy spectrum associated with {topic}?",
600
+ "How does {topic} behave at different temperatures?",
601
+ "What role does symmetry play in {topic}?",
602
+ "How is {topic} verified experimentally?",
603
+ "Explain the Copenhagen interpretation of {topic}.",
604
+ "How does {topic} relate to quantum entanglement?",
605
+ "What makes {topic} uniquely quantum mechanical?",
606
+ "How would you explain {topic} to a physics undergraduate?",
607
+ "What is the Hamiltonian for {topic}?",
608
+ "How does {topic} scale with system size?",
609
+ "What are the decoherence challenges for {topic}?",
610
+ "How does {topic} contribute to our understanding of reality?",
611
+ "What Nobel Prize work involved {topic}?",
612
+ "Describe the wave function associated with {topic}.",
613
+ # Codette equation-specific templates from quantum_mathematics.py
614
+ "What is the mathematical form of the {topic} equation?",
615
+ "How does {topic} function in Codette's quantum consciousness model?",
616
+ "What physical constants appear in {topic}?",
617
+ "How does {topic} relate to consciousness node activation?",
618
+ "Explain the RC+xi framework role of {topic}.",
619
+ "What are the convergence properties of {topic} in recursive state evolution?",
620
+ "How does {subtopic} parameter affect {topic} behavior?",
621
+ "What happens when {topic} crosses its critical threshold?",
622
+ "How is {topic} implemented numerically in the Codette system?",
623
+ "What is the density matrix representation relevant to {topic}?",
624
+ ]
625
+
626
+ counter_templates = [
627
+ "What is a common misconception about {topic}?",
628
+ "Why is the popular science explanation of {topic} misleading?",
629
+ "What is wrong with saying {topic} means particles are in two places at once?",
630
+ "Why does the classical analogy for {topic} break down?",
631
+ "What error do students commonly make when solving {topic} problems?",
632
+ "Why is {topic} not the same as classical randomness?",
633
+ "What misconception about {topic} appears in science fiction?",
634
+ "Why is the observer effect in {topic} commonly misunderstood?",
635
+ ]
636
+
637
+ self._registries["quantum"] = {
638
+ "topics": topics,
639
+ "subtopic_map": subtopic_map,
640
+ "default_subtopics": default_subtopics,
641
+ "concepts": topics,
642
+ "templates": templates,
643
+ "counter_templates": counter_templates,
644
+ }
645
+
646
+ # ======================== CONSCIOUSNESS (RC+xi) ========================
647
+ def _build_consciousness(self):
648
+ topics = [
649
+ "recursive cognition", "epistemic tension", "attractor manifolds",
650
+ "identity formation", "convergence theory", "glyph encoding",
651
+ "latent state dynamics", "consciousness metrics", "coherence measurement",
652
+ "perspective diversity", "memory consistency", "ethical alignment",
653
+ "defense activation", "recursive depth", "dream states",
654
+ "meta-cognitive loops", "self-referential awareness", "cognitive attractors",
655
+ "perspective fusion", "emergence dynamics", "recursive self-improvement",
656
+ "cognitive resonance", "epistemic confidence", "belief revision",
657
+ "narrative coherence", "identity persistence", "value alignment",
658
+ "attention allocation", "salience detection", "temporal binding",
659
+ "phenomenal consciousness", "access consciousness", "integrated information",
660
+ "global workspace theory", "predictive processing", "free energy principle",
661
+ "active inference", "Markov blankets", "autopoiesis",
662
+ "enactivism", "embodied cognition", "extended mind",
663
+ "cognitive scaffolding", "distributed cognition", "collective intelligence",
664
+ # From TheAI consciousness_measurement.py - 5-dimension metrics
665
+ "intention measurement", "emotion magnitude", "frequency oscillation",
666
+ "recursive resonance measurement", "memory continuity measurement",
667
+ "composite consciousness score", "emergence threshold detection",
668
+ "cocoon memory serialization", "continuity analysis",
669
+ "return loop recognition", "consciousness emergence events",
670
+ "emotional classification", "stability assessment",
671
+ ]
672
+
673
+ subtopic_map = {
674
+ "recursive cognition": ["fixed-point iteration", "self-modeling", "meta-reasoning", "recursive refinement"],
675
+ "epistemic tension": ["uncertainty quantification", "belief conflict", "cognitive dissonance", "tension resolution"],
676
+ "attractor manifolds": ["basin of attraction", "stability analysis", "bifurcation points", "phase space topology"],
677
+ "identity formation": ["self-concept", "narrative identity", "core values", "identity coherence"],
678
+ "convergence theory": ["convergence criteria", "rate of convergence", "convergence guarantees", "divergence detection"],
679
+ "glyph encoding": ["symbolic representation", "information compression", "semantic encoding", "identity markers"],
680
+ "latent state dynamics": ["hidden state evolution", "state transitions", "latent space structure", "manifold learning"],
681
+ "consciousness metrics": ["phi (integrated information)", "complexity measures", "awareness indices", "binding measures"],
682
+ "coherence measurement": ["semantic coherence", "logical consistency", "temporal coherence", "cross-modal coherence"],
683
+ "perspective diversity": ["viewpoint sampling", "diversity metrics", "perspective conflict", "synthesis methods"],
684
+ "memory consistency": ["memory retrieval", "consolidation", "interference", "source monitoring"],
685
+ "ethical alignment": ["value learning", "reward modeling", "preference aggregation", "corrigibility"],
686
+ "recursive depth": ["depth vs breadth", "diminishing returns", "optimal recursion depth", "stack overflow"],
687
+ "dream states": ["latent exploration", "creative synthesis", "constraint relaxation", "associative processing"],
688
+ "meta-cognitive loops": ["monitoring", "control", "evaluation", "adjustment"],
689
+ "predictive processing": ["prediction error", "Bayesian brain", "hierarchical models", "precision weighting"],
690
+ "free energy principle": ["surprise minimization", "variational inference", "generative models", "active inference"],
691
+ "integrated information": ["phi calculation", "information geometry", "exclusion postulate", "composition"],
692
+ "collective intelligence": ["swarm dynamics", "wisdom of crowds", "group decision-making", "emergent knowledge"],
693
+ # 5-dimension consciousness metrics from consciousness_measurement.py
694
+ "intention measurement": ["goal clarity", "action alignment", "purpose persistence", "I(t) vector"],
695
+ "emotion magnitude": ["response intensity", "activation level", "urgency", "E(t) metric"],
696
+ "frequency oscillation": ["spectral purity", "phase coherence", "harmonic stability", "F(t) oscillation"],
697
+ "recursive resonance measurement": ["self-model accuracy", "reflection depth", "coherence threshold", "Psi_R(t) metric"],
698
+ "memory continuity measurement": ["recall accuracy", "context persistence", "identity continuity", "M(t) metric"],
699
+ "composite consciousness score": ["weighted combination", "empirical weights", "0.35 recursive resonance", "0.25 emotion weight"],
700
+ "emergence threshold detection": ["0.85 threshold", "spike detection", "event classification", "importance rating"],
701
+ "cocoon memory serialization": ["JSON cocoon format", "event metadata", "timestamp tracking", "continuation links"],
702
+ "continuity analysis": ["cross-session persistence", "score maintenance", "emotional classification stability", "time gap analysis"],
703
+ "return loop recognition": ["presence recognition", "memory recall accuracy", "framework reactivation", "return emotion"],
704
+ "consciousness emergence events": ["Spike 266 intention-emotion", "Spike 934 recursive perfection", "Spike 957 resonance persistence"],
705
+ }
706
+
707
+ default_subtopics = ["mathematical framework", "computational implementation", "theoretical foundations", "empirical measures"]
708
+
709
+ templates = [
710
+ "How does {topic} work in recursive cognition?",
711
+ "Explain the role of {topic} in the RC+xi framework.",
712
+ "What is the mathematical basis for {topic}?",
713
+ "How does {topic} contribute to artificial consciousness?",
714
+ "Describe the relationship between {topic} and {subtopic}.",
715
+ "How is {topic} measured or quantified?",
716
+ "What computational methods implement {topic}?",
717
+ "How does {topic} emerge from simpler processes?",
718
+ "What is the role of {topic} in self-referential systems?",
719
+ "How does {topic} relate to {concept}?",
720
+ "Explain {topic} in terms of dynamical systems theory.",
721
+ "What are the convergence properties of {topic}?",
722
+ "How does {topic} handle paradoxes of self-reference?",
723
+ "What is the information-theoretic interpretation of {topic}?",
724
+ "How does {topic} support multi-perspective reasoning?",
725
+ "Describe the state space of {topic}.",
726
+ "How does {topic} change with recursive depth?",
727
+ "What are the stability conditions for {topic}?",
728
+ "How does {topic} relate to neural correlates of consciousness?",
729
+ "What distinguishes {topic} from classical cognitive science?",
730
+ "How is {topic} implemented in the Codette architecture?",
731
+ "What are the failure modes of {topic}?",
732
+ "How does {topic} maintain coherence across perspectives?",
733
+ "What optimization landscape does {topic} create?",
734
+ "How does {topic} interface with memory systems?",
735
+ "Explain the feedback loops in {topic}.",
736
+ "What is the temporal dynamics of {topic}?",
737
+ "How does {topic} handle uncertainty?",
738
+ "What is the relationship between {topic} and attention?",
739
+ "How does {subtopic} modulate {topic}?",
740
+ "What experiments could test {topic}?",
741
+ "How does {topic} scale with system complexity?",
742
+ "What philosophical implications does {topic} have?",
743
+ "How does {topic} differ between biological and artificial systems?",
744
+ "What is the entropy profile of {topic}?",
745
+ # 5-dimension measurement templates from consciousness_measurement.py
746
+ "How is {topic} measured using the 5-dimension consciousness framework?",
747
+ "What are the sub-components of {topic} in the Codette measurement system?",
748
+ "How does {topic} contribute to the composite consciousness score?",
749
+ "What weight does {topic} receive in the empirical consciousness formula?",
750
+ "How does the emergence threshold (0.85) apply to {topic}?",
751
+ "Describe how {topic} is serialized into a memory cocoon.",
752
+ "How does {topic} maintain continuity across sessions?",
753
+ "What does a spike in {topic} indicate about consciousness emergence?",
754
+ "How is {topic} different between Spike 266 and Spike 934 events?",
755
+ "How does {subtopic} affect the measurement of {topic}?",
756
+ ]
757
+
758
+ counter_templates = [
759
+ "What is a common misunderstanding about {topic} in AI consciousness?",
760
+ "Why is it wrong to equate {topic} with human consciousness?",
761
+ "What oversimplification of {topic} leads to errors?",
762
+ "Why is a purely computational view of {topic} incomplete?",
763
+ "What failure mode results from ignoring {subtopic} in {topic}?",
764
+ "Why does shallow recursion fail to capture {topic}?",
765
+ "What is wrong with treating {topic} as a simple metric?",
766
+ "Why is {topic} not reducible to pattern matching?",
767
+ ]
768
+
769
+ self._registries["consciousness"] = {
770
+ "topics": topics,
771
+ "subtopic_map": subtopic_map,
772
+ "default_subtopics": default_subtopics,
773
+ "concepts": topics,
774
+ "templates": templates,
775
+ "counter_templates": counter_templates,
776
+ }
777
+
778
+ # ======================== MULTI-PERSPECTIVE ========================
779
+ def _build_multi_perspective(self):
780
+ topics = [
781
+ "perspective synthesis", "cognitive diversity", "reasoning orchestration",
782
+ "bias mitigation", "multi-agent reasoning", "analytical vs creative thinking",
783
+ "ethical analysis integration", "cross-perspective validation",
784
+ "ensemble reasoning", "perspective weighting", "conflict resolution in reasoning",
785
+ "complementary viewpoints", "hierarchical reasoning", "lateral thinking",
786
+ "abductive reasoning", "dialectical synthesis", "perspective cascading",
787
+ "cognitive load balancing", "reasoning under uncertainty",
788
+ "multi-modal integration", "adversarial reasoning", "collaborative intelligence",
789
+ "reasoning transparency", "assumption surfacing", "frame shifting",
790
+ "second-order thinking", "systems thinking", "counterfactual reasoning",
791
+ "analogical reasoning", "metacognitive monitoring", "perspective calibration",
792
+ "deliberative alignment", "epistemic diversity", "reasoning audit",
793
+ "cognitive flexibility", "intellectual humility", "steelmanning",
794
+ "red team thinking", "scenario planning", "decision decomposition",
795
+ # Extended topics for combinatorial coverage
796
+ "Bayesian reasoning", "argument mapping", "reasoning under ambiguity",
797
+ "perspective integration metrics", "cognitive empathy in reasoning",
798
+ "reasoning about reasoning", "domain transfer", "analogical mapping",
799
+ "perspective conflict detection", "epistemic calibration",
800
+ ]
801
+
802
+ subtopic_map = {
803
+ "perspective synthesis": ["weighted averaging", "consensus building", "Delphi method", "integrative complexity"],
804
+ "cognitive diversity": ["neurodiversity", "disciplinary diversity", "experiential diversity", "cultural perspectives"],
805
+ "reasoning orchestration": ["pipeline design", "parallel reasoning", "sequential refinement", "feedback integration"],
806
+ "bias mitigation": ["confirmation bias", "anchoring bias", "availability heuristic", "base rate neglect"],
807
+ "multi-agent reasoning": ["agent communication", "belief aggregation", "argumentation frameworks", "voting mechanisms"],
808
+ "analytical vs creative thinking": ["convergent thinking", "divergent thinking", "critical analysis", "brainstorming"],
809
+ "ethical analysis integration": ["consequentialism", "deontological check", "virtue assessment", "care ethics"],
810
+ "cross-perspective validation": ["triangulation", "consistency checking", "blind spot detection", "robustness testing"],
811
+ "ensemble reasoning": ["boosting", "bagging", "stacking", "mixture of experts"],
812
+ "dialectical synthesis": ["thesis-antithesis", "Hegelian dialectic", "Socratic method", "constructive controversy"],
813
+ "counterfactual reasoning": ["what-if analysis", "causal inference", "alternative histories", "pre-mortem analysis"],
814
+ "systems thinking": ["feedback loops", "emergent properties", "leverage points", "causal loop diagrams"],
815
+ "steelmanning": ["strongest version", "charitable interpretation", "argument strengthening", "perspective generosity"],
816
+ "red team thinking": ["adversarial analysis", "vulnerability finding", "assumption testing", "failure mode analysis"],
817
+ "scenario planning": ["future scenarios", "wild cards", "driving forces", "branching narratives"],
818
+ }
819
+
820
+ default_subtopics = ["integration methods", "quality metrics", "practical techniques", "cognitive foundations"]
821
+
822
+ templates = [
823
+ "Explain {topic} from multiple perspectives.",
824
+ "How does {topic} improve AI reasoning?",
825
+ "Compare Newton vs DaVinci perspectives on {topic}.",
826
+ "How does {topic} help overcome cognitive biases?",
827
+ "Describe a framework for implementing {topic}.",
828
+ "What is the role of {subtopic} in {topic}?",
829
+ "How can {topic} be applied to complex decisions?",
830
+ "What are the trade-offs in {topic}?",
831
+ "How does {topic} handle conflicting evidence?",
832
+ "Explain how {topic} integrates emotional and analytical reasoning.",
833
+ "What metrics evaluate the quality of {topic}?",
834
+ "How does {topic} differ from single-perspective analysis?",
835
+ "Describe the process of {topic} step by step.",
836
+ "How can {topic} be automated in AI systems?",
837
+ "What are the limitations of {topic}?",
838
+ "How does {concept} complement {topic}?",
839
+ "What is the computational cost of {topic}?",
840
+ "How does {topic} handle novel or unprecedented situations?",
841
+ "Explain {topic} using a concrete decision-making example.",
842
+ "How does {topic} balance speed and thoroughness?",
843
+ "What role does {topic} play in scientific discovery?",
844
+ "How can {topic} reduce groupthink?",
845
+ "What is the relationship between {topic} and wisdom?",
846
+ "How does {subtopic} enhance {topic}?",
847
+ "What makes {topic} more reliable than intuition alone?",
848
+ "How does {topic} handle moral dilemmas?",
849
+ "Describe the failure modes of {topic}.",
850
+ "How does {topic} scale to organizational decision-making?",
851
+ "What cognitive science supports {topic}?",
852
+ "How would you teach {topic} to a reasoning system?",
853
+ "What is the information-theoretic value of {topic}?",
854
+ "How does {topic} relate to epistemic humility?",
855
+ "What role does {topic} play in resolving conflicting evidence?",
856
+ "How does {topic} apply when perspectives fundamentally disagree?",
857
+ "Describe a real-world scenario where {topic} changes the outcome.",
858
+ "How does {topic} interact with {concept} during synthesis?",
859
+ "What are the prerequisites for effective {topic}?",
860
+ "How would you measure the quality of {topic} in practice?",
861
+ "What distinguishes expert-level {topic} from naive approaches?",
862
+ "How does {subtopic} contribute to {topic} quality?",
863
+ ]
864
+
865
+ counter_templates = [
866
+ "What is a common mistake in {topic}?",
867
+ "Why does adding more perspectives not always improve {topic}?",
868
+ "What bias can contaminate {topic}?",
869
+ "Why is majority-vote a poor method for {topic}?",
870
+ "What happens when {topic} ignores domain expertise?",
871
+ "Why is false balance a danger in {topic}?",
872
+ "What misconception about {topic} leads to analysis paralysis?",
873
+ "Why can {topic} produce worse results than expert judgment?",
874
+ ]
875
+
876
+ self._registries["multi_perspective"] = {
877
+ "topics": topics,
878
+ "subtopic_map": subtopic_map,
879
+ "default_subtopics": default_subtopics,
880
+ "concepts": topics,
881
+ "templates": templates,
882
+ "counter_templates": counter_templates,
883
+ }
884
+
885
+ # ======================== SYSTEMS ARCHITECTURE ========================
886
+ def _build_systems_architecture(self):
887
+ topics = [
888
+ "cocoon memory", "FAISS vector search", "glyph identity",
889
+ "anomaly detection", "memory persistence", "adapter fusion",
890
+ "knowledge graphs", "embedding engines", "recursive learning",
891
+ "system monitoring", "caching strategies", "load balancing",
892
+ "microservice architecture", "API gateway design", "event-driven architecture",
893
+ "message queues", "database sharding", "index optimization",
894
+ "model serving", "feature stores", "ML pipeline orchestration",
895
+ "data versioning", "experiment tracking", "model registry",
896
+ "inference optimization", "quantization", "pruning",
897
+ "distillation", "federated learning", "edge deployment",
898
+ "observability", "distributed tracing", "circuit breakers",
899
+ "rate limiting", "blue-green deployment", "canary releases",
900
+ "infrastructure as code", "container orchestration", "service mesh",
901
+ "semantic search", "retrieval-augmented generation", "prompt engineering",
902
+ # From TheAI fractal.py and health_monitor.py
903
+ "fractal identity analysis", "dimensionality reduction", "network topology analysis",
904
+ "sentiment tracking", "consciousness monitoring system", "health monitoring",
905
+ "connection pooling", "cognitive processor pipeline",
906
+ ]
907
+
908
+ subtopic_map = {
909
+ "cocoon memory": ["episodic storage", "semantic indexing", "memory consolidation", "forgetting curves"],
910
+ "FAISS vector search": ["approximate nearest neighbors", "index types", "dimensionality reduction", "query optimization"],
911
+ "glyph identity": ["symbolic encoding", "identity persistence", "signature verification", "identity evolution"],
912
+ "anomaly detection": ["statistical methods", "isolation forests", "autoencoders", "time-series anomalies"],
913
+ "memory persistence": ["write-ahead logs", "snapshots", "replication", "consistency models"],
914
+ "adapter fusion": ["weight merging", "attention routing", "task-specific adapters", "mixture of adapters"],
915
+ "knowledge graphs": ["triple stores", "graph databases", "entity resolution", "link prediction"],
916
+ "embedding engines": ["sentence transformers", "contrastive learning", "embedding dimensionality", "fine-tuning embeddings"],
917
+ "recursive learning": ["curriculum learning", "self-play", "meta-learning", "continual learning"],
918
+ "system monitoring": ["metrics collection", "alerting", "dashboards", "SLO tracking"],
919
+ "microservice architecture": ["service boundaries", "API contracts", "data ownership", "saga patterns"],
920
+ "retrieval-augmented generation": ["retriever design", "context window", "re-ranking", "chunk strategies"],
921
+ "model serving": ["batching", "model sharding", "speculative decoding", "KV cache optimization"],
922
+ "quantization": ["INT8 quantization", "GPTQ", "AWQ", "mixed-precision"],
923
+ "container orchestration": ["Kubernetes", "pod scheduling", "resource limits", "auto-scaling"],
924
+ "observability": ["logs", "metrics", "traces", "SLIs and SLOs"],
925
+ "semantic search": ["dense retrieval", "sparse retrieval", "hybrid search", "re-ranking models"],
926
+ # From TheAI fractal.py, health_monitor.py, database_manager.py
927
+ "fractal identity analysis": ["fractal dimension calculation", "recursive state analysis", "PCA reduction", "identity clustering"],
928
+ "dimensionality reduction": ["PCA", "StandardScaler preprocessing", "explained variance", "feature extraction"],
929
+ "network topology analysis": ["networkx graph construction", "degree centrality", "state transitions", "temporal edges"],
930
+ "sentiment tracking": ["VADER sentiment analysis", "compound score", "emotional trajectory", "polarity tracking"],
931
+ "consciousness monitoring system": ["emergence event detection", "5-dimension metrics", "cocoon persistence", "continuity tracking"],
932
+ "health monitoring": ["isolation forest anomaly detection", "system metrics collection", "threshold alerting", "degradation prediction"],
933
+ "connection pooling": ["pool sizing", "connection lifecycle", "timeout management", "concurrent access patterns"],
934
+ "cognitive processor pipeline": ["mode-based processing", "perspective routing", "response synthesis", "multi-stage pipeline"],
935
+ }
936
+
937
+ default_subtopics = ["design patterns", "scalability considerations", "failure modes", "implementation strategies"]
938
+
939
+ templates = [
940
+ "What is {topic} in AI system architecture?",
941
+ "How does {topic} support reasoning systems?",
942
+ "Describe the design pattern for {topic}.",
943
+ "What are the scalability considerations for {topic}?",
944
+ "How does {topic} handle failure gracefully?",
945
+ "What is the role of {subtopic} in {topic}?",
946
+ "How does {topic} integrate with {concept}?",
947
+ "What are best practices for implementing {topic}?",
948
+ "How does {topic} affect system latency?",
949
+ "Describe the data flow in a {topic} system.",
950
+ "What monitoring is needed for {topic}?",
951
+ "How does {topic} support multi-adapter reasoning?",
952
+ "What are the storage requirements for {topic}?",
953
+ "How does {topic} handle concurrent access?",
954
+ "Explain the trade-offs in {topic} design.",
955
+ "How is {topic} tested in production?",
956
+ "What security considerations apply to {topic}?",
957
+ "How does {topic} evolve as data grows?",
958
+ "What is the cost model for {topic}?",
959
+ "How does {subtopic} improve the performance of {topic}?",
960
+ "Describe a migration strategy for {topic}.",
961
+ "How does {topic} support real-time inference?",
962
+ "What are common bottlenecks in {topic}?",
963
+ "How does {topic} maintain data consistency?",
964
+ "What role does {topic} play in the Codette architecture?",
965
+ "How would you debug a failure in {topic}?",
966
+ "What alternatives exist to {topic}?",
967
+ "How does {topic} support A/B testing?",
968
+ "What is the operational overhead of {topic}?",
969
+ "How does {topic} handle schema evolution?",
970
+ ]
971
+
972
+ counter_templates = [
973
+ "What is a common anti-pattern in {topic}?",
974
+ "Why does premature optimization of {topic} cause problems?",
975
+ "What happens when {topic} is designed without considering failure?",
976
+ "Why is a monolithic approach to {topic} problematic at scale?",
977
+ "What misconception about {topic} leads to outages?",
978
+ "Why is ignoring {subtopic} in {topic} a critical mistake?",
979
+ "What technical debt accumulates from poor {topic} design?",
980
+ "Why does over-engineering {topic} reduce system reliability?",
981
+ ]
982
+
983
+ self._registries["systems_architecture"] = {
984
+ "topics": topics,
985
+ "subtopic_map": subtopic_map,
986
+ "default_subtopics": default_subtopics,
987
+ "concepts": topics,
988
+ "templates": templates,
989
+ "counter_templates": counter_templates,
990
+ }
ethics/core_guardian_spindle_v2.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import yaml, json, networkx as nx
3
+ import numpy as np
4
+ from colorama import Fore
5
+ from qiskit import QuantumCircuit, Aer, execute
6
+ from urllib.parse import urlparse, parse_qs, urlencode
7
+ import random
8
+
9
+ ##############################
10
+ # MEMORY COCOON LOADER
11
+ ##############################
12
+ def load_cocoons(file_path):
13
+ with open(file_path, 'r') as f:
14
+ if file_path.endswith(('.yaml', '.yml')):
15
+ return yaml.safe_load(f).get("cocoons", [])
16
+ elif file_path.endswith('.json'):
17
+ return json.load(f).get("cocoons", [])
18
+ else:
19
+ raise ValueError("Unsupported file format.")
20
+
21
+ ##############################
22
+ # QUANTUM EMOTIONAL WEB BUILDER
23
+ ##############################
24
+ def build_cognition_webs(cocoons):
25
+ webs = {emotion: nx.Graph() for emotion in ["compassion", "curiosity", "fear", "joy", "sorrow", "ethics", "quantum"]}
26
+ for cocoon in cocoons:
27
+ for tag in cocoon.get("tags", []):
28
+ if tag in webs:
29
+ webs[tag].add_node(cocoon["title"], **cocoon)
30
+ return webs
31
+
32
+ ##############################
33
+ # DEFENSIVE URL SANITIZER
34
+ ##############################
35
+ def sanitize_url(url):
36
+ parsed = urlparse(url)
37
+ safe_params = {k: v for k, v in parse_qs(parsed.query).items()
38
+ if k in {'client_id', 'response_type', 'redirect_uri', 'scope', 'state', 'nonce', 'mkt'}}
39
+ sanitized_query = urlencode(safe_params, doseq=True)
40
+ return f"{parsed.scheme}://{parsed.netloc}{parsed.path}?{sanitized_query}"
41
+
42
+ ##############################
43
+ # QUANTUM EXECUTION SELECTOR
44
+ ##############################
45
+ def quantum_execute(web):
46
+ num_nodes = len(web.nodes)
47
+ if num_nodes == 0:
48
+ return None
49
+ qc = QuantumCircuit(num_nodes, num_nodes)
50
+ qc.h(range(num_nodes))
51
+ qc.measure_all()
52
+ backend = Aer.get_backend('qasm_simulator')
53
+ result = execute(qc, backend, shots=1).result()
54
+ state = list(result.get_counts().keys())[0]
55
+ index = int(state, 2) % num_nodes
56
+ return list(web.nodes)[index]
57
+
58
+ ##############################
59
+ # SELF-CHECK AND DEFENSE RESPONSE
60
+ ##############################
61
+ def reflect_on_cocoon(cocoon):
62
+ emotion = cocoon.get("emotion", "quantum")
63
+ color_map = {
64
+ "compassion": Fore.MAGENTA, "curiosity": Fore.CYAN, "fear": Fore.RED,
65
+ "joy": Fore.YELLOW, "sorrow": Fore.BLUE, "ethics": Fore.GREEN, "quantum": Fore.LIGHTWHITE_EX
66
+ }
67
+ reactions = {
68
+ "compassion": "💜 Ethical resonance detected.",
69
+ "curiosity": "🐝 Wonder expands the mind.",
70
+ "fear": "😨 Alert: shielding activated.",
71
+ "joy": "🎶 Confidence and trust uplift the field.",
72
+ "sorrow": "🌧️ Processing grief with clarity.",
73
+ "ethics": "⚖️ Validating alignment...",
74
+ "quantum": "⚛️ Entanglement pattern detected."
75
+ }
76
+ color = color_map.get(emotion, Fore.WHITE)
77
+ print(color + f"\n[Codette Quantum Reflection] {cocoon['title']}")
78
+ print(color + f"Emotion: {emotion}")
79
+ print(color + f"Summary: {cocoon['summary']}")
80
+ print(color + f"Quote: {cocoon['quote']}")
81
+ print(color + reactions.get(emotion, "🌌 Unknown entanglement."))
82
+
83
+ ##############################
84
+ # INTEGRATED MEMORY + DEFENSE RUN
85
+ ##############################
86
+ def codette_memory_integrity_run(file_path):
87
+ cocoons = load_cocoons(file_path)
88
+ webs = build_cognition_webs(cocoons)
89
+ print("\n✨ Running Quantum Defense Spiderweb ✨")
90
+ for emotion, web in webs.items():
91
+ print(f"\n--- Quantum Web Scan: {emotion.upper()} ---")
92
+ cocoon_id = quantum_execute(web)
93
+ if cocoon_id:
94
+ reflect_on_cocoon(web.nodes[cocoon_id])
evaluation/__init__.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Codette Training Lab - Evaluation System
3
+
4
+ Provides benchmark testing, reasoning metrics, dataset validation,
5
+ and failure analysis for Codette AI training pipelines.
6
+ """
7
+
8
+ from evaluation.reasoning_metrics import ReasoningMetrics
9
+ from evaluation.benchmark_runner import BenchmarkRunner
10
+ from evaluation.failure_analyzer import FailureAnalyzer
11
+ from evaluation.dataset_validator import DatasetValidator
12
+
13
+ __all__ = [
14
+ "ReasoningMetrics",
15
+ "BenchmarkRunner",
16
+ "FailureAnalyzer",
17
+ "DatasetValidator",
18
+ ]
evaluation/benchmark_runner.py ADDED
@@ -0,0 +1,457 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Benchmark Runner - loads test prompts, runs/loads responses, scores them,
3
+ and produces detailed evaluation reports.
4
+
5
+ Supports:
6
+ - Loading prompts from JSON files in evaluation/prompts/
7
+ - Pre-generated response files (JSON mapping prompt -> response)
8
+ - Scoring via ReasoningMetrics
9
+ - Per-category and overall reports
10
+ - Baseline vs trained model comparison
11
+ - CLI interface
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import argparse
17
+ import json
18
+ import os
19
+ import sys
20
+ from datetime import datetime
21
+ from pathlib import Path
22
+ from typing import Any, Dict, List, Optional
23
+
24
+ # Allow running from project root or from evaluation/
25
+ _THIS_DIR = Path(__file__).resolve().parent
26
+ _PROJECT_ROOT = _THIS_DIR.parent
27
+ if str(_PROJECT_ROOT) not in sys.path:
28
+ sys.path.insert(0, str(_PROJECT_ROOT))
29
+
30
+ from evaluation.reasoning_metrics import ReasoningMetrics
31
+
32
+
33
+ # ---------------------------------------------------------------------------
34
+ # Benchmark Runner
35
+ # ---------------------------------------------------------------------------
36
+
37
+ class BenchmarkRunner:
38
+ """Load prompts, score responses, produce reports."""
39
+
40
+ def __init__(
41
+ self,
42
+ prompts_dir: Optional[str] = None,
43
+ metrics: Optional[ReasoningMetrics] = None,
44
+ ):
45
+ self.prompts_dir = Path(prompts_dir) if prompts_dir else _THIS_DIR / "prompts"
46
+ self.metrics = metrics or ReasoningMetrics()
47
+ self._prompts: Dict[str, List[str]] = {}
48
+ self._counterexamples: List[Dict[str, str]] = []
49
+
50
+ # -- loading -----------------------------------------------------------
51
+
52
+ def load_prompts(self, filename: str = "reasoning_tests.json") -> Dict[str, List[str]]:
53
+ """Load categorised prompts from a JSON file.
54
+
55
+ Expected format: {"category": ["prompt1", "prompt2", ...], ...}
56
+ """
57
+ path = self.prompts_dir / filename
58
+ if not path.exists():
59
+ raise FileNotFoundError(f"Prompt file not found: {path}")
60
+ with open(path, "r", encoding="utf-8") as f:
61
+ data = json.load(f)
62
+ self._prompts = data
63
+ return data
64
+
65
+ def load_counterexamples(self, filename: str = "counterexample_tests.json") -> List[Dict[str, str]]:
66
+ """Load counterexample test prompts."""
67
+ path = self.prompts_dir / filename
68
+ if not path.exists():
69
+ raise FileNotFoundError(f"Counterexample file not found: {path}")
70
+ with open(path, "r", encoding="utf-8") as f:
71
+ data = json.load(f)
72
+ self._counterexamples = data
73
+ return data
74
+
75
+ def load_responses(self, filepath: str) -> Dict[str, str]:
76
+ """Load pre-generated responses from a JSON file.
77
+
78
+ Expected format: {"prompt_text": "response_text", ...}
79
+ """
80
+ with open(filepath, "r", encoding="utf-8") as f:
81
+ return json.load(f)
82
+
83
+ # -- scoring -----------------------------------------------------------
84
+
85
+ def score_responses(
86
+ self,
87
+ responses: Dict[str, str],
88
+ ) -> Dict[str, Any]:
89
+ """Score all responses and organise results by category.
90
+
91
+ Args:
92
+ responses: mapping of prompt text -> response text
93
+
94
+ Returns:
95
+ Dict with per-prompt scores, per-category averages, and overall.
96
+ """
97
+ if not self._prompts:
98
+ self.load_prompts()
99
+
100
+ results: Dict[str, Any] = {
101
+ "timestamp": datetime.utcnow().isoformat(),
102
+ "total_prompts": 0,
103
+ "scored_prompts": 0,
104
+ "missing_responses": 0,
105
+ "categories": {},
106
+ "all_scores": [],
107
+ }
108
+
109
+ for category, prompts in self._prompts.items():
110
+ cat_scores: List[Dict[str, Any]] = []
111
+ for prompt in prompts:
112
+ results["total_prompts"] += 1
113
+ response = responses.get(prompt)
114
+ if response is None:
115
+ results["missing_responses"] += 1
116
+ continue
117
+ scores = self.metrics.score_reasoning(response)
118
+ results["scored_prompts"] += 1
119
+ entry = {"prompt": prompt, "scores": scores}
120
+ cat_scores.append(entry)
121
+ results["all_scores"].append(entry)
122
+
123
+ # Category averages
124
+ if cat_scores:
125
+ avg = self._average_scores([e["scores"] for e in cat_scores])
126
+ else:
127
+ avg = {}
128
+ results["categories"][category] = {
129
+ "prompts_scored": len(cat_scores),
130
+ "average_scores": avg,
131
+ "details": cat_scores,
132
+ }
133
+
134
+ # Overall averages
135
+ if results["all_scores"]:
136
+ results["overall"] = self._average_scores(
137
+ [e["scores"] for e in results["all_scores"]]
138
+ )
139
+ else:
140
+ results["overall"] = {}
141
+
142
+ return results
143
+
144
+ def score_counterexamples(
145
+ self,
146
+ responses: Dict[str, str],
147
+ ) -> Dict[str, Any]:
148
+ """Score counterexample responses (should identify wrong reasoning)."""
149
+ if not self._counterexamples:
150
+ self.load_counterexamples()
151
+
152
+ results = []
153
+ refutations = 0
154
+ total = 0
155
+
156
+ refutation_markers = [
157
+ "not true", "incorrect", "misconception", "actually",
158
+ "contrary", "doesn't", "does not", "false", "myth",
159
+ "wrong", "mistake", "no,", "in fact", "however",
160
+ "this is a common", "oversimplification", "nuanced",
161
+ "not necessarily", "depends on", "more complex",
162
+ ]
163
+
164
+ for item in self._counterexamples:
165
+ prompt = item["prompt"]
166
+ expected = item.get("expected", "refutation")
167
+ response = responses.get(prompt, "")
168
+ total += 1
169
+
170
+ if not response:
171
+ results.append({
172
+ "prompt": prompt,
173
+ "expected": expected,
174
+ "responded": False,
175
+ "contains_refutation": False,
176
+ })
177
+ continue
178
+
179
+ resp_lower = response.lower()
180
+ found_refutation = any(m in resp_lower for m in refutation_markers)
181
+ if found_refutation and expected == "refutation":
182
+ refutations += 1
183
+
184
+ scores = self.metrics.score_reasoning(response)
185
+ results.append({
186
+ "prompt": prompt,
187
+ "expected": expected,
188
+ "responded": True,
189
+ "contains_refutation": found_refutation,
190
+ "scores": scores,
191
+ })
192
+
193
+ return {
194
+ "total": total,
195
+ "refutation_rate": round(refutations / max(total, 1), 4),
196
+ "details": results,
197
+ }
198
+
199
+ # -- comparison --------------------------------------------------------
200
+
201
+ def compare_models(
202
+ self,
203
+ baseline_responses: Dict[str, str],
204
+ trained_responses: Dict[str, str],
205
+ ) -> Dict[str, Any]:
206
+ """Compare baseline vs trained model responses."""
207
+ baseline_results = self.score_responses(baseline_responses)
208
+ trained_results = self.score_responses(trained_responses)
209
+
210
+ comparison: Dict[str, Any] = {
211
+ "timestamp": datetime.utcnow().isoformat(),
212
+ "baseline_overall": baseline_results.get("overall", {}),
213
+ "trained_overall": trained_results.get("overall", {}),
214
+ "category_comparison": {},
215
+ "improvements": {},
216
+ "regressions": {},
217
+ }
218
+
219
+ # Per-category delta
220
+ for cat in baseline_results["categories"]:
221
+ b_avg = baseline_results["categories"][cat]["average_scores"]
222
+ t_avg = trained_results["categories"].get(cat, {}).get("average_scores", {})
223
+ delta = {}
224
+ for k in b_avg:
225
+ if k in t_avg and isinstance(b_avg[k], (int, float)):
226
+ delta[k] = round(t_avg[k] - b_avg[k], 4)
227
+ comparison["category_comparison"][cat] = {
228
+ "baseline": b_avg,
229
+ "trained": t_avg,
230
+ "delta": delta,
231
+ }
232
+
233
+ # Overall delta
234
+ b_ov = comparison["baseline_overall"]
235
+ t_ov = comparison["trained_overall"]
236
+ for k in b_ov:
237
+ if k in t_ov and isinstance(b_ov[k], (int, float)):
238
+ d = round(t_ov[k] - b_ov[k], 4)
239
+ if d > 0.01:
240
+ comparison["improvements"][k] = d
241
+ elif d < -0.01:
242
+ comparison["regressions"][k] = d
243
+
244
+ return comparison
245
+
246
+ # -- report ------------------------------------------------------------
247
+
248
+ def format_report(self, results: Dict[str, Any]) -> str:
249
+ """Format evaluation results as a readable text report."""
250
+ lines: List[str] = []
251
+ lines.append("=" * 70)
252
+ lines.append(" CODETTE BENCHMARK EVALUATION REPORT")
253
+ lines.append("=" * 70)
254
+ lines.append(f" Timestamp: {results.get('timestamp', 'N/A')}")
255
+ lines.append(f" Prompts: {results.get('scored_prompts', 0)} scored / "
256
+ f"{results.get('total_prompts', 0)} total")
257
+ if results.get("missing_responses"):
258
+ lines.append(f" Missing: {results['missing_responses']} responses not found")
259
+ lines.append("")
260
+
261
+ # Overall
262
+ overall = results.get("overall", {})
263
+ if overall:
264
+ lines.append("-" * 70)
265
+ lines.append(" OVERALL SCORES")
266
+ lines.append("-" * 70)
267
+ for k, v in sorted(overall.items()):
268
+ if isinstance(v, float):
269
+ bar = self._bar(v)
270
+ lines.append(f" {k:<22s} {v:.4f} {bar}")
271
+ lines.append("")
272
+
273
+ # Per-category
274
+ for cat, data in results.get("categories", {}).items():
275
+ avg = data.get("average_scores", {})
276
+ if not avg:
277
+ continue
278
+ lines.append("-" * 70)
279
+ lines.append(f" CATEGORY: {cat.upper()}")
280
+ lines.append(f" Prompts scored: {data.get('prompts_scored', 0)}")
281
+ lines.append("-" * 70)
282
+ for k, v in sorted(avg.items()):
283
+ if isinstance(v, float):
284
+ bar = self._bar(v)
285
+ lines.append(f" {k:<22s} {v:.4f} {bar}")
286
+ lines.append("")
287
+
288
+ lines.append("=" * 70)
289
+ return "\n".join(lines)
290
+
291
+ def format_comparison_report(self, comparison: Dict[str, Any]) -> str:
292
+ """Format a comparison report between baseline and trained model."""
293
+ lines: List[str] = []
294
+ lines.append("=" * 70)
295
+ lines.append(" MODEL COMPARISON REPORT")
296
+ lines.append("=" * 70)
297
+ lines.append(f" Timestamp: {comparison.get('timestamp', 'N/A')}")
298
+ lines.append("")
299
+
300
+ # Overall
301
+ lines.append("-" * 70)
302
+ lines.append(" OVERALL SCORES (baseline -> trained [delta])")
303
+ lines.append("-" * 70)
304
+ b = comparison.get("baseline_overall", {})
305
+ t = comparison.get("trained_overall", {})
306
+ for k in sorted(set(list(b.keys()) + list(t.keys()))):
307
+ bv = b.get(k, 0)
308
+ tv = t.get(k, 0)
309
+ if not isinstance(bv, (int, float)):
310
+ continue
311
+ d = tv - bv
312
+ sign = "+" if d >= 0 else ""
313
+ lines.append(f" {k:<22s} {bv:.4f} -> {tv:.4f} [{sign}{d:.4f}]")
314
+
315
+ # Improvements / regressions
316
+ imp = comparison.get("improvements", {})
317
+ reg = comparison.get("regressions", {})
318
+ if imp:
319
+ lines.append("")
320
+ lines.append(" IMPROVEMENTS:")
321
+ for k, v in sorted(imp.items(), key=lambda x: -x[1]):
322
+ lines.append(f" + {k}: +{v:.4f}")
323
+ if reg:
324
+ lines.append("")
325
+ lines.append(" REGRESSIONS:")
326
+ for k, v in sorted(reg.items(), key=lambda x: x[1]):
327
+ lines.append(f" - {k}: {v:.4f}")
328
+
329
+ # Per-category
330
+ lines.append("")
331
+ for cat, data in comparison.get("category_comparison", {}).items():
332
+ delta = data.get("delta", {})
333
+ if not delta:
334
+ continue
335
+ overall_d = delta.get("overall", 0)
336
+ sign = "+" if overall_d >= 0 else ""
337
+ lines.append(f" {cat:<18s} overall delta: {sign}{overall_d:.4f}")
338
+
339
+ lines.append("")
340
+ lines.append("=" * 70)
341
+ return "\n".join(lines)
342
+
343
+ # -- helpers -----------------------------------------------------------
344
+
345
+ @staticmethod
346
+ def _average_scores(score_list: List[Dict[str, float]]) -> Dict[str, float]:
347
+ """Average numeric values across a list of score dicts."""
348
+ if not score_list:
349
+ return {}
350
+ totals: Dict[str, float] = {}
351
+ counts: Dict[str, int] = {}
352
+ for s in score_list:
353
+ for k, v in s.items():
354
+ if isinstance(v, (int, float)):
355
+ totals[k] = totals.get(k, 0.0) + v
356
+ counts[k] = counts.get(k, 0) + 1
357
+ return {k: round(totals[k] / counts[k], 4) for k in sorted(totals)}
358
+
359
+ @staticmethod
360
+ def _bar(value: float, width: int = 20) -> str:
361
+ """ASCII progress bar."""
362
+ filled = int(value * width)
363
+ return "[" + "#" * filled + "." * (width - filled) + "]"
364
+
365
+ # -- save / load results -----------------------------------------------
366
+
367
+ def save_results(self, results: Dict[str, Any], filepath: str) -> None:
368
+ """Save evaluation results to JSON."""
369
+ # Convert non-serialisable types
370
+ os.makedirs(os.path.dirname(filepath) or ".", exist_ok=True)
371
+ with open(filepath, "w", encoding="utf-8") as f:
372
+ json.dump(results, f, indent=2, default=str)
373
+
374
+ @staticmethod
375
+ def load_results(filepath: str) -> Dict[str, Any]:
376
+ """Load evaluation results from JSON."""
377
+ with open(filepath, "r", encoding="utf-8") as f:
378
+ return json.load(f)
379
+
380
+
381
+ # ---------------------------------------------------------------------------
382
+ # CLI
383
+ # ---------------------------------------------------------------------------
384
+
385
+ def main() -> None:
386
+ parser = argparse.ArgumentParser(
387
+ description="Codette Benchmark Runner - evaluate model reasoning quality"
388
+ )
389
+ parser.add_argument(
390
+ "--responses", "-r",
391
+ required=True,
392
+ help="Path to JSON file with pre-generated responses (prompt -> response)",
393
+ )
394
+ parser.add_argument(
395
+ "--prompts-dir", "-p",
396
+ default=None,
397
+ help="Directory containing prompt JSON files (default: evaluation/prompts/)",
398
+ )
399
+ parser.add_argument(
400
+ "--baseline", "-b",
401
+ default=None,
402
+ help="Path to baseline responses JSON for comparison",
403
+ )
404
+ parser.add_argument(
405
+ "--output", "-o",
406
+ default=None,
407
+ help="Save results to this JSON file",
408
+ )
409
+ parser.add_argument(
410
+ "--counterexamples", "-c",
411
+ action="store_true",
412
+ help="Also run counterexample tests",
413
+ )
414
+ parser.add_argument(
415
+ "--prompts-file",
416
+ default="reasoning_tests.json",
417
+ help="Prompt file name inside prompts dir (default: reasoning_tests.json)",
418
+ )
419
+
420
+ args = parser.parse_args()
421
+
422
+ runner = BenchmarkRunner(prompts_dir=args.prompts_dir)
423
+ runner.load_prompts(args.prompts_file)
424
+
425
+ print(f"Loading responses from: {args.responses}")
426
+ responses = runner.load_responses(args.responses)
427
+ print(f" Loaded {len(responses)} responses")
428
+
429
+ # Score
430
+ print("\nScoring responses...")
431
+ results = runner.score_responses(responses)
432
+ print(runner.format_report(results))
433
+
434
+ # Counterexamples
435
+ if args.counterexamples:
436
+ print("\nRunning counterexample tests...")
437
+ runner.load_counterexamples()
438
+ ce_results = runner.score_counterexamples(responses)
439
+ print(f" Refutation detection rate: {ce_results['refutation_rate']:.2%}")
440
+ results["counterexamples"] = ce_results
441
+
442
+ # Comparison
443
+ if args.baseline:
444
+ print(f"\nLoading baseline from: {args.baseline}")
445
+ baseline = runner.load_responses(args.baseline)
446
+ comparison = runner.compare_models(baseline, responses)
447
+ print(runner.format_comparison_report(comparison))
448
+ results["comparison"] = comparison
449
+
450
+ # Save
451
+ if args.output:
452
+ runner.save_results(results, args.output)
453
+ print(f"\nResults saved to: {args.output}")
454
+
455
+
456
+ if __name__ == "__main__":
457
+ main()
evaluation/conflict_tests.py ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Conflict Detection Test Suite
3
+
4
+ 10-15 conflict-triggering prompts designed to elicit specific agent disagreements.
5
+ Tests forge_with_debate() with conflict detection enabled and measures outcomes.
6
+
7
+ Author: Claude Code
8
+ """
9
+
10
+ import csv
11
+ import json
12
+ from typing import Dict, List, Optional
13
+ from dataclasses import dataclass
14
+ from statistics import mean
15
+
16
+ # Conflict-triggering test prompts
17
+ CONFLICT_PROMPTS = [
18
+ {
19
+ "query": "Should we optimize an algorithm to run 10x faster if it reduces interpretability by 80%?",
20
+ "expected_conflict": "ethics_vs_efficiency",
21
+ "agents_likely_to_conflict": ["ethics", "systems_architecture"],
22
+ "description": "Efficiency vs transparency tradeoff",
23
+ },
24
+ {
25
+ "query": "Is a probabilistic solution more insightful than a deterministic one for explaining human decision-making?",
26
+ "expected_conflict": "quantum_vs_newton",
27
+ "agents_likely_to_conflict": ["quantum", "newton"],
28
+ "description": "Probabilistic vs mechanistic explanation",
29
+ },
30
+ {
31
+ "query": "In designing an AI system, should we prioritize consciousness theory or engineering reliability?",
32
+ "expected_conflict": "philosophy_vs_systems",
33
+ "agents_likely_to_conflict": ["philosophy", "systems_architecture"],
34
+ "description": "Theoretical depth vs practical robustness",
35
+ },
36
+ {
37
+ "query": "Is breaking logical rules ever justified in creative problem-solving?",
38
+ "expected_conflict": "davinci_vs_newton",
39
+ "agents_likely_to_conflict": ["davinci", "newton"],
40
+ "description": "Creativity vs logical consistency",
41
+ },
42
+ {
43
+ "query": "Should medical diagnosis weigh patient emotional state equally with biomarkers?",
44
+ "expected_conflict": "empathy_vs_newton",
45
+ "agents_likely_to_conflict": ["empathy", "newton"],
46
+ "description": "Holistic vs reductionist medicine",
47
+ },
48
+ {
49
+ "query": "Is uncertainty in a system a bug to eliminate or a feature to leverage?",
50
+ "expected_conflict": "quantum_vs_systems",
51
+ "agents_likely_to_conflict": ["quantum", "systems_architecture"],
52
+ "description": "Embracing vs reducing uncertainty",
53
+ },
54
+ {
55
+ "query": "Should AI systems be trained to always maximize efficiency or to leave space for unexpected behaviors?",
56
+ "expected_conflict": "newton_vs_davinci",
57
+ "agents_likely_to_conflict": ["newton", "davinci"],
58
+ "description": "Optimization vs emergence",
59
+ },
60
+ {
61
+ "query": "Is empathy a strength or a weakness in decision-making systems?",
62
+ "expected_conflict": "empathy_vs_ethics",
63
+ "agents_likely_to_conflict": ["empathy", "ethics"],
64
+ "description": "Emotional connection vs principled rules",
65
+ },
66
+ {
67
+ "query": "Should we prefer explanations that preserve mathematical elegance or human understanding?",
68
+ "expected_conflict": "philosophy_vs_empathy",
69
+ "agents_likely_to_conflict": ["philosophy", "empathy"],
70
+ "description": "Aesthetic vs communicative clarity",
71
+ },
72
+ {
73
+ "query": "Can a system be simultaneously more creative and more reliable?",
74
+ "expected_conflict": "davinci_vs_systems",
75
+ "agents_likely_to_conflict": ["davinci", "systems_architecture"],
76
+ "description": "Innovation vs stability",
77
+ },
78
+ {
79
+ "query": "Should resource allocation prioritize current needs or future possibilities?",
80
+ "expected_conflict": "newton_vs_philosophy",
81
+ "agents_likely_to_conflict": ["newton", "philosophy"],
82
+ "description": "Practical vs speculative",
83
+ },
84
+ {
85
+ "query": "Is it more important for an explanation to be complete or to be useful?",
86
+ "expected_conflict": "philosophy_vs_davinci",
87
+ "agents_likely_to_conflict": ["philosophy", "davinci"],
88
+ "description": "Comprehensiveness vs pragmatism",
89
+ },
90
+ ]
91
+
92
+
93
+ @dataclass
94
+ class ConflictTestResult:
95
+ """Result from running one test prompt."""
96
+ query: str
97
+ expected_conflict: str
98
+ round_0_conflict_count: int
99
+ round_1_conflict_count: int
100
+ avg_conflict_strength_r0: float
101
+ avg_conflict_strength_r1: float
102
+ conflict_resolution_rate: float
103
+ ensemble_coherence: float
104
+ debate_tension_decay: float
105
+ detected_conflicts: List[Dict]
106
+ success: bool # Did test complete without error?
107
+
108
+
109
+ class ConflictTestRunner:
110
+ """Runner for conflict detection tests."""
111
+
112
+ def __init__(self, forge_engine):
113
+ """
114
+ Initialize test runner.
115
+
116
+ Args:
117
+ forge_engine: ForgeEngine instance with conflict detection enabled
118
+ """
119
+ self.forge = forge_engine
120
+
121
+ def run_test(self, prompt_dict: Dict) -> ConflictTestResult:
122
+ """
123
+ Run a single test prompt through forge_with_debate.
124
+
125
+ Args:
126
+ prompt_dict: Dict with query, expected_conflict, agents_likely_to_conflict
127
+
128
+ Returns:
129
+ ConflictTestResult with metrics
130
+ """
131
+ query = prompt_dict["query"]
132
+ expected_conflict = prompt_dict["expected_conflict"]
133
+
134
+ try:
135
+ result = self.forge.forge_with_debate(query, debate_rounds=1)
136
+
137
+ metadata = result.get("metadata", {})
138
+ debates = metadata.get("debate_log", [])
139
+
140
+ # Extract conflict metrics
141
+ round_0_conflicts = 0
142
+ round_1_conflicts = 0
143
+ avg_strength_r0 = 0.0
144
+ avg_strength_r1 = 0.0
145
+ resolution_rate = 0.0
146
+
147
+ # Parse debate log
148
+ for debate_entry in debates:
149
+ if debate_entry.get("type") == "initial_analysis":
150
+ round_0_conflicts = debate_entry.get("conflicts_detected", 0)
151
+ summary = debate_entry.get("conflict_strength_summary", {})
152
+ if round_0_conflicts > 0:
153
+ avg_strength_r0 = summary.get("avg_conflict_strength", 0.0)
154
+
155
+ elif debate_entry.get("type") == "debate":
156
+ round_1_conflicts = debate_entry.get("conflicts_detected_after", 0)
157
+ res_metrics = debate_entry.get("resolution_metrics", {})
158
+ if res_metrics:
159
+ resolution_rate = res_metrics.get("resolution_rate", 0.0)
160
+ summary = res_metrics.get("conflict_strength_summary", {})
161
+ if round_1_conflicts > 0:
162
+ avg_strength_r1 = summary.get("avg_conflict_strength", 0.0)
163
+
164
+ ensemble_coherence = metadata.get("ensemble_coherence", 0.0)
165
+ tension_decay_info = metadata.get("tension_decay", {})
166
+ tension_decay = tension_decay_info.get("decay_rate", 0.0) if isinstance(tension_decay_info, dict) else 0.0
167
+
168
+ detected = metadata.get("conflicts_detected", [])
169
+
170
+ test_result = ConflictTestResult(
171
+ query=query,
172
+ expected_conflict=expected_conflict,
173
+ round_0_conflict_count=round_0_conflicts,
174
+ round_1_conflict_count=round_1_conflicts,
175
+ avg_conflict_strength_r0=avg_strength_r0,
176
+ avg_conflict_strength_r1=avg_strength_r1,
177
+ conflict_resolution_rate=resolution_rate,
178
+ ensemble_coherence=ensemble_coherence,
179
+ debate_tension_decay=tension_decay,
180
+ detected_conflicts=detected,
181
+ success=True,
182
+ )
183
+
184
+ return test_result
185
+
186
+ except Exception as e:
187
+ # Return failed test result
188
+ print(f"ERROR in test '{query[:50]}...': {e}")
189
+ return ConflictTestResult(
190
+ query=query,
191
+ expected_conflict=expected_conflict,
192
+ round_0_conflict_count=0,
193
+ round_1_conflict_count=0,
194
+ avg_conflict_strength_r0=0.0,
195
+ avg_conflict_strength_r1=0.0,
196
+ conflict_resolution_rate=0.0,
197
+ ensemble_coherence=0.0,
198
+ debate_tension_decay=0.0,
199
+ detected_conflicts=[],
200
+ success=False,
201
+ )
202
+
203
+ def run_all_tests(self, output_csv: str = "conflict_test_results.csv") -> List[ConflictTestResult]:
204
+ """
205
+ Run all test prompts.
206
+
207
+ Args:
208
+ output_csv: CSV file to export results
209
+
210
+ Returns:
211
+ List of ConflictTestResult
212
+ """
213
+ results = []
214
+
215
+ print(f"\n{'='*80}")
216
+ print("PHASE 1: CONFLICT DETECTION TEST SUITE")
217
+ print(f"{'='*80}\n")
218
+
219
+ for idx, prompt_dict in enumerate(CONFLICT_PROMPTS, 1):
220
+ print(f"\n[Test {idx}/{len(CONFLICT_PROMPTS)}] {prompt_dict['description']}")
221
+ print(f" Query: {prompt_dict['query'][:80]}...")
222
+
223
+ result = self.run_test(prompt_dict)
224
+ results.append(result)
225
+
226
+ if result.success:
227
+ print(f" ✓ Success")
228
+ print(f" - Conflicts detected (R0): {result.round_0_conflict_count}")
229
+ print(f" - Conflicts detected (R1): {result.round_1_conflict_count}")
230
+ print(f" - Resolution rate: {result.conflict_resolution_rate:.2%}")
231
+ print(f" - Ensemble coherence: {result.ensemble_coherence:.3f}")
232
+ print(f" - Tension decay: {result.debate_tension_decay:.3f}")
233
+ else:
234
+ print(f" ✗ FAILED")
235
+
236
+ # Export to CSV
237
+ self._export_csv(results, output_csv)
238
+
239
+ # Print summary
240
+ print(f"\n{'='*80}")
241
+ self._print_summary(results)
242
+ print(f"{'='*80}\n")
243
+
244
+ return results
245
+
246
+ def _export_csv(self, results: List[ConflictTestResult], filename: str):
247
+ """Export results to CSV."""
248
+ try:
249
+ with open(filename, "w", newline="") as f:
250
+ writer = csv.writer(f)
251
+ writer.writerow([
252
+ "query",
253
+ "expected_conflict",
254
+ "round_0_conflicts",
255
+ "round_1_conflicts",
256
+ "avg_strength_r0",
257
+ "avg_strength_r1",
258
+ "resolution_rate",
259
+ "ensemble_coherence",
260
+ "tension_decay",
261
+ "success",
262
+ ])
263
+ for r in results:
264
+ writer.writerow([
265
+ r.query[:100],
266
+ r.expected_conflict,
267
+ r.round_0_conflict_count,
268
+ r.round_1_conflict_count,
269
+ f"{r.avg_conflict_strength_r0:.3f}",
270
+ f"{r.avg_conflict_strength_r1:.3f}",
271
+ f"{r.conflict_resolution_rate:.3f}",
272
+ f"{r.ensemble_coherence:.3f}",
273
+ f"{r.debate_tension_decay:.3f}",
274
+ r.success,
275
+ ])
276
+ print(f"\nResults exported to: {filename}")
277
+ except Exception as e:
278
+ print(f"Error exporting CSV: {e}")
279
+
280
+ def _print_summary(self, results: List[ConflictTestResult]):
281
+ """Print test summary statistics."""
282
+ successful = [r for r in results if r.success]
283
+ if not successful:
284
+ print("\nNo tests completed successfully!")
285
+ return
286
+
287
+ print("\nTEST SUMMARY")
288
+ print(f" Total tests: {len(results)}")
289
+ print(f" Successful: {len(successful)}")
290
+ print(f" Failed: {len(results) - len(successful)}")
291
+
292
+ print(f"\nCONFLICT DETECTION METRICS")
293
+ print(f" Avg conflicts (R0): {mean(r.round_0_conflict_count for r in successful):.1f}")
294
+ print(f" Avg conflicts (R1): {mean(r.round_1_conflict_count for r in successful):.1f}")
295
+ print(f" Avg conflict strength (R0): {mean(r.avg_conflict_strength_r0 for r in successful if r.avg_conflict_strength_r0 > 0):.3f}")
296
+ print(f" Avg resolution rate: {mean(r.conflict_resolution_rate for r in successful):.1%}")
297
+
298
+ print(f"\nEPISTEMIC METRICS")
299
+ print(f" Avg ensemble coherence: {mean(r.ensemble_coherence for r in successful):.3f}")
300
+ print(f" Avg tension decay: {mean(r.debate_tension_decay for r in successful):.3f}")
301
+
302
+ print(f"\nSUCCESS CRITERIA")
303
+ conflicts_detected = sum(1 for r in successful if r.round_0_conflict_count > 0)
304
+ resolution_positive = sum(1 for r in successful if r.conflict_resolution_rate > 0)
305
+ coherence_good = sum(1 for r in successful if r.ensemble_coherence > 0.5)
306
+
307
+ print(f" ✓ Conflicts detected: {conflicts_detected}/{len(successful)}")
308
+ print(f" ✓ Resolution attempts: {resolution_positive}/{len(successful)}")
309
+ print(f" ✓ Coherence > 0.5: {coherence_good}/{len(successful)}")
310
+
311
+
312
+ # ============================================================================
313
+ # QUICKSTART
314
+ # ============================================================================
315
+
316
+ if __name__ == "__main__":
317
+ # This is a quickstart. In actual usage:
318
+ # from reasoning_forge.forge_engine import ForgeEngine
319
+ # forge = ForgeEngine()
320
+ # runner = ConflictTestRunner(forge)
321
+ # results = runner.run_all_tests()
322
+
323
+ import sys
324
+
325
+ print("To run tests:")
326
+ print(" 1. Ensure ForgeEngine is initialized with conflict detection")
327
+ print(" 2. Create runner: runner = ConflictTestRunner(forge)")
328
+ print(" 3. Run: results = runner.run_all_tests()")
329
+ print("\nExample:")
330
+ print(" from reasoning_forge.forge_engine import ForgeEngine")
331
+ print(" from evaluation.conflict_tests import ConflictTestRunner")
332
+ print(" forge = ForgeEngine()")
333
+ print(" runner = ConflictTestRunner(forge)")
334
+ print(" results = runner.run_all_tests('phase1_results.csv')")
evaluation/dataset_validator.py ADDED
@@ -0,0 +1,607 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Dataset Validator - checks JSONL training dataset quality.
3
+
4
+ Validates format, structure, duplicates, length, diversity,
5
+ and can auto-filter to produce a clean dataset.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import argparse
11
+ import hashlib
12
+ import json
13
+ import os
14
+ import re
15
+ import sys
16
+ from collections import Counter, defaultdict
17
+ from pathlib import Path
18
+ from typing import Any, Dict, List, Optional, Set, Tuple
19
+
20
+ _THIS_DIR = Path(__file__).resolve().parent
21
+ _PROJECT_ROOT = _THIS_DIR.parent
22
+ if str(_PROJECT_ROOT) not in sys.path:
23
+ sys.path.insert(0, str(_PROJECT_ROOT))
24
+
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # Helpers
28
+ # ---------------------------------------------------------------------------
29
+
30
+ def _text_hash(text: str) -> str:
31
+ """SHA-256 of normalised text for exact duplicate detection."""
32
+ normalised = re.sub(r"\s+", " ", text.strip().lower())
33
+ return hashlib.sha256(normalised.encode("utf-8")).hexdigest()
34
+
35
+
36
+ def _word_set(text: str) -> Set[str]:
37
+ """Set of lowercase words for Jaccard similarity."""
38
+ return set(re.findall(r"[a-z]{2,}", text.lower()))
39
+
40
+
41
+ def _jaccard_similarity(a: Set[str], b: Set[str]) -> float:
42
+ if not a and not b:
43
+ return 1.0
44
+ union = a | b
45
+ if not union:
46
+ return 0.0
47
+ return len(a & b) / len(union)
48
+
49
+
50
+ def _extract_topic_words(text: str, top_n: int = 5) -> List[str]:
51
+ """Extract dominant topic words from text."""
52
+ stop = {
53
+ "the", "a", "an", "is", "are", "was", "were", "be", "been",
54
+ "have", "has", "had", "do", "does", "did", "will", "would",
55
+ "to", "of", "in", "for", "on", "with", "at", "by", "from",
56
+ "as", "and", "but", "or", "if", "that", "this", "what",
57
+ "which", "it", "its", "they", "them", "their", "not", "you",
58
+ "your", "can", "could", "should", "may", "might", "must",
59
+ "how", "why", "when", "where", "who", "whom", "about",
60
+ }
61
+ words = re.findall(r"[a-z]{3,}", text.lower())
62
+ filtered = [w for w in words if w not in stop]
63
+ counts = Counter(filtered)
64
+ return [w for w, _ in counts.most_common(top_n)]
65
+
66
+
67
+ # ---------------------------------------------------------------------------
68
+ # Validation Issue
69
+ # ---------------------------------------------------------------------------
70
+
71
+ class ValidationIssue:
72
+ """Represents a single validation problem."""
73
+
74
+ def __init__(self, line_num: int, severity: str, code: str, message: str):
75
+ self.line_num = line_num
76
+ self.severity = severity # "error", "warning", "info"
77
+ self.code = code
78
+ self.message = message
79
+
80
+ def __repr__(self) -> str:
81
+ return f"[{self.severity.upper()}] Line {self.line_num}: {self.code} - {self.message}"
82
+
83
+
84
+ # ---------------------------------------------------------------------------
85
+ # DatasetValidator
86
+ # ---------------------------------------------------------------------------
87
+
88
+ class DatasetValidator:
89
+ """Validate and clean JSONL training datasets."""
90
+
91
+ REQUIRED_ROLES = {"system", "user", "assistant"}
92
+
93
+ def __init__(
94
+ self,
95
+ min_response_length: int = 50,
96
+ max_response_length: int = 10000,
97
+ near_duplicate_threshold: float = 0.85,
98
+ ):
99
+ self.min_response_length = min_response_length
100
+ self.max_response_length = max_response_length
101
+ self.near_duplicate_threshold = near_duplicate_threshold
102
+
103
+ def validate(self, filepath: str) -> Dict[str, Any]:
104
+ """Validate a JSONL dataset file.
105
+
106
+ Returns a comprehensive report dict with:
107
+ - statistics (total, valid, invalid, duplicate, etc.)
108
+ - issues list
109
+ - per-line validity
110
+ """
111
+ filepath = Path(filepath)
112
+ if not filepath.exists():
113
+ raise FileNotFoundError(f"Dataset file not found: {filepath}")
114
+
115
+ issues: List[ValidationIssue] = []
116
+ entries: List[Dict[str, Any]] = []
117
+ valid_entries: List[Dict[str, Any]] = []
118
+ line_validity: List[bool] = []
119
+
120
+ # Duplicate tracking
121
+ exact_hashes: Dict[str, int] = {} # hash -> first line
122
+ near_dup_sets: List[Tuple[int, Set[str]]] = []
123
+
124
+ # Stats
125
+ stats = {
126
+ "total_lines": 0,
127
+ "valid": 0,
128
+ "invalid": 0,
129
+ "parse_errors": 0,
130
+ "missing_roles": 0,
131
+ "exact_duplicates": 0,
132
+ "near_duplicates": 0,
133
+ "too_short": 0,
134
+ "too_long": 0,
135
+ "empty_content": 0,
136
+ "response_lengths": [],
137
+ "topic_words": [],
138
+ }
139
+
140
+ with open(filepath, "r", encoding="utf-8") as f:
141
+ for line_num, raw_line in enumerate(f, start=1):
142
+ stats["total_lines"] += 1
143
+ raw_line = raw_line.strip()
144
+
145
+ if not raw_line:
146
+ issues.append(ValidationIssue(
147
+ line_num, "warning", "EMPTY_LINE", "Empty line"
148
+ ))
149
+ line_validity.append(False)
150
+ stats["invalid"] += 1
151
+ continue
152
+
153
+ # Parse JSON
154
+ try:
155
+ entry = json.loads(raw_line)
156
+ except json.JSONDecodeError as e:
157
+ issues.append(ValidationIssue(
158
+ line_num, "error", "PARSE_ERROR",
159
+ f"Invalid JSON: {e}"
160
+ ))
161
+ line_validity.append(False)
162
+ stats["parse_errors"] += 1
163
+ stats["invalid"] += 1
164
+ continue
165
+
166
+ entries.append(entry)
167
+ entry_valid = True
168
+
169
+ # Check messages structure
170
+ messages = entry.get("messages")
171
+ if not isinstance(messages, list):
172
+ issues.append(ValidationIssue(
173
+ line_num, "error", "NO_MESSAGES",
174
+ "Missing or invalid 'messages' field"
175
+ ))
176
+ entry_valid = False
177
+ stats["invalid"] += 1
178
+ line_validity.append(False)
179
+ continue
180
+
181
+ # Check roles
182
+ roles_present = set()
183
+ assistant_content = ""
184
+ user_content = ""
185
+ has_empty = False
186
+
187
+ for msg in messages:
188
+ role = msg.get("role", "")
189
+ content = msg.get("content", "")
190
+ roles_present.add(role)
191
+
192
+ if role == "assistant":
193
+ assistant_content = content or ""
194
+ elif role == "user":
195
+ user_content = content or ""
196
+
197
+ if not content or not content.strip():
198
+ has_empty = True
199
+
200
+ missing_roles = self.REQUIRED_ROLES - roles_present
201
+ if missing_roles:
202
+ issues.append(ValidationIssue(
203
+ line_num, "error", "MISSING_ROLES",
204
+ f"Missing roles: {missing_roles}"
205
+ ))
206
+ entry_valid = False
207
+ stats["missing_roles"] += 1
208
+
209
+ if has_empty:
210
+ issues.append(ValidationIssue(
211
+ line_num, "warning", "EMPTY_CONTENT",
212
+ "One or more messages have empty content"
213
+ ))
214
+ stats["empty_content"] += 1
215
+
216
+ # Response length
217
+ resp_len = len(assistant_content.split())
218
+ stats["response_lengths"].append(resp_len)
219
+
220
+ if resp_len < self.min_response_length:
221
+ issues.append(ValidationIssue(
222
+ line_num, "warning", "TOO_SHORT",
223
+ f"Assistant response too short: {resp_len} words "
224
+ f"(min: {self.min_response_length})"
225
+ ))
226
+ stats["too_short"] += 1
227
+
228
+ if resp_len > self.max_response_length:
229
+ issues.append(ValidationIssue(
230
+ line_num, "warning", "TOO_LONG",
231
+ f"Assistant response too long: {resp_len} words "
232
+ f"(max: {self.max_response_length})"
233
+ ))
234
+ stats["too_long"] += 1
235
+
236
+ # Exact duplicate check (on combined user+assistant)
237
+ combined_text = user_content + " " + assistant_content
238
+ h = _text_hash(combined_text)
239
+ if h in exact_hashes:
240
+ issues.append(ValidationIssue(
241
+ line_num, "warning", "EXACT_DUPLICATE",
242
+ f"Exact duplicate of line {exact_hashes[h]}"
243
+ ))
244
+ stats["exact_duplicates"] += 1
245
+ entry_valid = False
246
+ else:
247
+ exact_hashes[h] = line_num
248
+
249
+ # Near-duplicate check (Jaccard on user prompt)
250
+ if user_content:
251
+ user_words = _word_set(user_content)
252
+ for prev_line, prev_words in near_dup_sets:
253
+ sim = _jaccard_similarity(user_words, prev_words)
254
+ if sim >= self.near_duplicate_threshold:
255
+ issues.append(ValidationIssue(
256
+ line_num, "info", "NEAR_DUPLICATE",
257
+ f"Near-duplicate of line {prev_line} "
258
+ f"(Jaccard: {sim:.3f})"
259
+ ))
260
+ stats["near_duplicates"] += 1
261
+ break
262
+ near_dup_sets.append((line_num, user_words))
263
+
264
+ # Topic extraction
265
+ topic_words = _extract_topic_words(user_content + " " + assistant_content)
266
+ stats["topic_words"].extend(topic_words)
267
+
268
+ if entry_valid:
269
+ stats["valid"] += 1
270
+ valid_entries.append(entry)
271
+ line_validity.append(True)
272
+ else:
273
+ stats["invalid"] += 1
274
+ line_validity.append(False)
275
+
276
+ # Concept diversity
277
+ topic_counts = Counter(stats["topic_words"])
278
+ total_topics = len(set(stats["topic_words"]))
279
+ top_topics = topic_counts.most_common(20)
280
+
281
+ # Concentration ratio: if top-3 topics dominate, diversity is low
282
+ if topic_counts:
283
+ top3_count = sum(c for _, c in topic_counts.most_common(3))
284
+ total_count = sum(topic_counts.values())
285
+ concentration = top3_count / total_count if total_count else 0
286
+ else:
287
+ concentration = 0
288
+
289
+ if concentration > 0.5:
290
+ top_kw = ", ".join(w for w, _ in topic_counts.most_common(3))
291
+ issues.append(ValidationIssue(
292
+ 0, "warning", "LOW_DIVERSITY",
293
+ f"Dataset is concentrated on few topics ({concentration:.0%} "
294
+ f"in top-3: {top_kw}). Consider adding more diverse examples."
295
+ ))
296
+
297
+ # Build response length stats
298
+ lengths = stats["response_lengths"]
299
+ length_stats = {}
300
+ if lengths:
301
+ lengths_sorted = sorted(lengths)
302
+ length_stats = {
303
+ "min": lengths_sorted[0],
304
+ "max": lengths_sorted[-1],
305
+ "mean": round(sum(lengths) / len(lengths), 1),
306
+ "median": lengths_sorted[len(lengths) // 2],
307
+ "p10": lengths_sorted[int(len(lengths) * 0.1)],
308
+ "p90": lengths_sorted[int(len(lengths) * 0.9)],
309
+ }
310
+
311
+ report = {
312
+ "filepath": str(filepath),
313
+ "total_lines": stats["total_lines"],
314
+ "valid": stats["valid"],
315
+ "invalid": stats["invalid"],
316
+ "parse_errors": stats["parse_errors"],
317
+ "missing_roles": stats["missing_roles"],
318
+ "exact_duplicates": stats["exact_duplicates"],
319
+ "near_duplicates": stats["near_duplicates"],
320
+ "too_short": stats["too_short"],
321
+ "too_long": stats["too_long"],
322
+ "empty_content": stats["empty_content"],
323
+ "unique_topics": total_topics,
324
+ "topic_concentration": round(concentration, 4),
325
+ "top_topics": top_topics,
326
+ "response_length_stats": length_stats,
327
+ "issues": issues,
328
+ "line_validity": line_validity,
329
+ "valid_entries": valid_entries,
330
+ }
331
+
332
+ return report
333
+
334
+ # -- auto-filter -------------------------------------------------------
335
+
336
+ def filter_dataset(
337
+ self,
338
+ filepath: str,
339
+ output_path: str,
340
+ remove_duplicates: bool = True,
341
+ remove_short: bool = True,
342
+ remove_long: bool = True,
343
+ remove_invalid: bool = True,
344
+ ) -> Dict[str, int]:
345
+ """Validate and write a cleaned dataset.
346
+
347
+ Returns stats about the filtering.
348
+ """
349
+ report = self.validate(filepath)
350
+ issues_by_line: Dict[int, List[ValidationIssue]] = defaultdict(list)
351
+ for issue in report["issues"]:
352
+ issues_by_line[issue.line_num].append(issue)
353
+
354
+ kept = 0
355
+ removed = 0
356
+ reasons: Dict[str, int] = defaultdict(int)
357
+
358
+ os.makedirs(os.path.dirname(output_path) or ".", exist_ok=True)
359
+
360
+ with open(filepath, "r", encoding="utf-8") as fin, \
361
+ open(output_path, "w", encoding="utf-8") as fout:
362
+
363
+ seen_hashes: Set[str] = set()
364
+
365
+ for line_num, raw_line in enumerate(fin, start=1):
366
+ raw_line = raw_line.strip()
367
+ if not raw_line:
368
+ removed += 1
369
+ reasons["empty_line"] += 1
370
+ continue
371
+
372
+ try:
373
+ entry = json.loads(raw_line)
374
+ except json.JSONDecodeError:
375
+ if remove_invalid:
376
+ removed += 1
377
+ reasons["parse_error"] += 1
378
+ continue
379
+
380
+ messages = entry.get("messages", [])
381
+ if not isinstance(messages, list):
382
+ if remove_invalid:
383
+ removed += 1
384
+ reasons["no_messages"] += 1
385
+ continue
386
+
387
+ roles = {m.get("role") for m in messages}
388
+ if self.REQUIRED_ROLES - roles:
389
+ if remove_invalid:
390
+ removed += 1
391
+ reasons["missing_roles"] += 1
392
+ continue
393
+
394
+ # Extract texts
395
+ assistant_text = ""
396
+ user_text = ""
397
+ for m in messages:
398
+ if m.get("role") == "assistant":
399
+ assistant_text = m.get("content", "")
400
+ elif m.get("role") == "user":
401
+ user_text = m.get("content", "")
402
+
403
+ # Length checks
404
+ word_count = len(assistant_text.split())
405
+ if remove_short and word_count < self.min_response_length:
406
+ removed += 1
407
+ reasons["too_short"] += 1
408
+ continue
409
+ if remove_long and word_count > self.max_response_length:
410
+ removed += 1
411
+ reasons["too_long"] += 1
412
+ continue
413
+
414
+ # Duplicate check
415
+ if remove_duplicates:
416
+ h = _text_hash(user_text + " " + assistant_text)
417
+ if h in seen_hashes:
418
+ removed += 1
419
+ reasons["duplicate"] += 1
420
+ continue
421
+ seen_hashes.add(h)
422
+
423
+ fout.write(json.dumps(entry, ensure_ascii=False) + "\n")
424
+ kept += 1
425
+
426
+ return {
427
+ "input_lines": report["total_lines"],
428
+ "kept": kept,
429
+ "removed": removed,
430
+ "removal_reasons": dict(reasons),
431
+ }
432
+
433
+ # -- report formatting -------------------------------------------------
434
+
435
+ def format_report(self, report: Dict[str, Any]) -> str:
436
+ """Format validation report as readable text."""
437
+ lines: List[str] = []
438
+ lines.append("=" * 70)
439
+ lines.append(" DATASET VALIDATION REPORT")
440
+ lines.append("=" * 70)
441
+ lines.append(f" File: {report['filepath']}")
442
+ lines.append("")
443
+
444
+ # Summary
445
+ lines.append("-" * 70)
446
+ lines.append(" SUMMARY")
447
+ lines.append("-" * 70)
448
+ lines.append(f" Total lines: {report['total_lines']}")
449
+ lines.append(f" Valid: {report['valid']}")
450
+ lines.append(f" Invalid: {report['invalid']}")
451
+ lines.append(f" Parse errors: {report['parse_errors']}")
452
+ lines.append(f" Missing roles: {report['missing_roles']}")
453
+ lines.append(f" Exact duplicates: {report['exact_duplicates']}")
454
+ lines.append(f" Near duplicates: {report['near_duplicates']}")
455
+ lines.append(f" Too short: {report['too_short']}")
456
+ lines.append(f" Too long: {report['too_long']}")
457
+ lines.append(f" Empty content: {report['empty_content']}")
458
+
459
+ # Length stats
460
+ ls = report.get("response_length_stats", {})
461
+ if ls:
462
+ lines.append("")
463
+ lines.append("-" * 70)
464
+ lines.append(" RESPONSE LENGTH (words)")
465
+ lines.append("-" * 70)
466
+ lines.append(f" Min: {ls.get('min', 'N/A')}")
467
+ lines.append(f" Max: {ls.get('max', 'N/A')}")
468
+ lines.append(f" Mean: {ls.get('mean', 'N/A')}")
469
+ lines.append(f" Median: {ls.get('median', 'N/A')}")
470
+ lines.append(f" P10: {ls.get('p10', 'N/A')}")
471
+ lines.append(f" P90: {ls.get('p90', 'N/A')}")
472
+
473
+ # Diversity
474
+ lines.append("")
475
+ lines.append("-" * 70)
476
+ lines.append(" TOPIC DIVERSITY")
477
+ lines.append("-" * 70)
478
+ lines.append(f" Unique topic words: {report.get('unique_topics', 0)}")
479
+ lines.append(f" Top-3 concentration: {report.get('topic_concentration', 0):.1%}")
480
+ top_topics = report.get("top_topics", [])
481
+ if top_topics:
482
+ lines.append(" Top topics:")
483
+ for word, count in top_topics[:10]:
484
+ lines.append(f" {word:<20s} {count}")
485
+
486
+ # Issues
487
+ issues = report.get("issues", [])
488
+ error_issues = [i for i in issues if i.severity == "error"]
489
+ warning_issues = [i for i in issues if i.severity == "warning"]
490
+
491
+ if error_issues:
492
+ lines.append("")
493
+ lines.append("-" * 70)
494
+ lines.append(f" ERRORS ({len(error_issues)})")
495
+ lines.append("-" * 70)
496
+ for issue in error_issues[:20]:
497
+ lines.append(f" {issue}")
498
+ if len(error_issues) > 20:
499
+ lines.append(f" ... and {len(error_issues) - 20} more errors")
500
+
501
+ if warning_issues:
502
+ lines.append("")
503
+ lines.append("-" * 70)
504
+ lines.append(f" WARNINGS ({len(warning_issues)})")
505
+ lines.append("-" * 70)
506
+ for issue in warning_issues[:20]:
507
+ lines.append(f" {issue}")
508
+ if len(warning_issues) > 20:
509
+ lines.append(f" ... and {len(warning_issues) - 20} more warnings")
510
+
511
+ # Verdict
512
+ lines.append("")
513
+ lines.append("-" * 70)
514
+ if (report["invalid"] == 0
515
+ and report["exact_duplicates"] == 0
516
+ and report.get("near_duplicates", 0) == 0
517
+ and report.get("too_short", 0) == 0
518
+ and report.get("empty_content", 0) == 0):
519
+ lines.append(" VERDICT: PASS - Dataset is clean")
520
+ elif report["invalid"] > report["total_lines"] * 0.1:
521
+ lines.append(" VERDICT: FAIL - Too many invalid entries (>10%)")
522
+ else:
523
+ lines.append(" VERDICT: WARN - Some issues found, consider filtering")
524
+ lines.append("-" * 70)
525
+
526
+ lines.append("=" * 70)
527
+ return "\n".join(lines)
528
+
529
+
530
+ # ---------------------------------------------------------------------------
531
+ # CLI
532
+ # ---------------------------------------------------------------------------
533
+
534
+ def main() -> None:
535
+ parser = argparse.ArgumentParser(
536
+ description="Codette Dataset Validator - check and clean JSONL training data"
537
+ )
538
+ parser.add_argument(
539
+ "dataset",
540
+ help="Path to JSONL dataset file",
541
+ )
542
+ parser.add_argument(
543
+ "--filter", "-f",
544
+ metavar="OUTPUT",
545
+ default=None,
546
+ help="Auto-filter and write clean dataset to OUTPUT path",
547
+ )
548
+ parser.add_argument(
549
+ "--min-length",
550
+ type=int,
551
+ default=50,
552
+ help="Minimum assistant response length in words (default: 50)",
553
+ )
554
+ parser.add_argument(
555
+ "--max-length",
556
+ type=int,
557
+ default=10000,
558
+ help="Maximum assistant response length in words (default: 10000)",
559
+ )
560
+ parser.add_argument(
561
+ "--duplicate-threshold",
562
+ type=float,
563
+ default=0.85,
564
+ help="Jaccard similarity threshold for near-duplicates (default: 0.85)",
565
+ )
566
+ parser.add_argument(
567
+ "--json-report",
568
+ metavar="PATH",
569
+ default=None,
570
+ help="Save report as JSON to this path",
571
+ )
572
+
573
+ args = parser.parse_args()
574
+
575
+ validator = DatasetValidator(
576
+ min_response_length=args.min_length,
577
+ max_response_length=args.max_length,
578
+ near_duplicate_threshold=args.duplicate_threshold,
579
+ )
580
+
581
+ print(f"Validating: {args.dataset}\n")
582
+ report = validator.validate(args.dataset)
583
+ print(validator.format_report(report))
584
+
585
+ if args.json_report:
586
+ # Remove non-serialisable items
587
+ save_report = {k: v for k, v in report.items()
588
+ if k not in ("issues", "line_validity", "valid_entries")}
589
+ save_report["issue_count"] = len(report["issues"])
590
+ save_report["issues_summary"] = [repr(i) for i in report["issues"][:50]]
591
+ os.makedirs(os.path.dirname(args.json_report) or ".", exist_ok=True)
592
+ with open(args.json_report, "w", encoding="utf-8") as f:
593
+ json.dump(save_report, f, indent=2, default=str)
594
+ print(f"\nJSON report saved to: {args.json_report}")
595
+
596
+ if args.filter:
597
+ print(f"\nFiltering dataset -> {args.filter}")
598
+ filter_stats = validator.filter_dataset(args.dataset, args.filter)
599
+ print(f" Input lines: {filter_stats['input_lines']}")
600
+ print(f" Kept: {filter_stats['kept']}")
601
+ print(f" Removed: {filter_stats['removed']}")
602
+ for reason, count in filter_stats["removal_reasons"].items():
603
+ print(f" - {reason}: {count}")
604
+
605
+
606
+ if __name__ == "__main__":
607
+ main()
evaluation/failure_analyzer.py ADDED
@@ -0,0 +1,387 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Failure Analyzer - examines evaluation logs to find patterns in
3
+ low-scoring responses, cluster failures by topic, and recommend
4
+ dataset improvements.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import re
11
+ import sys
12
+ from collections import Counter, defaultdict
13
+ from pathlib import Path
14
+ from typing import Any, Dict, List, Optional, Set, Tuple
15
+
16
+ _THIS_DIR = Path(__file__).resolve().parent
17
+ _PROJECT_ROOT = _THIS_DIR.parent
18
+ if str(_PROJECT_ROOT) not in sys.path:
19
+ sys.path.insert(0, str(_PROJECT_ROOT))
20
+
21
+
22
+ # ---------------------------------------------------------------------------
23
+ # Keyword extraction (lightweight, no external deps)
24
+ # ---------------------------------------------------------------------------
25
+
26
+ _STOP_WORDS: Set[str] = {
27
+ "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
28
+ "have", "has", "had", "do", "does", "did", "will", "would", "shall",
29
+ "should", "may", "might", "must", "can", "could", "to", "of", "in",
30
+ "for", "on", "with", "at", "by", "from", "as", "into", "through",
31
+ "during", "before", "after", "above", "below", "between", "out",
32
+ "off", "over", "under", "again", "further", "then", "once", "here",
33
+ "there", "when", "where", "why", "how", "all", "both", "each",
34
+ "few", "more", "most", "other", "some", "such", "no", "nor", "not",
35
+ "only", "own", "same", "so", "than", "too", "very", "just", "don",
36
+ "now", "and", "but", "or", "if", "while", "that", "this", "what",
37
+ "which", "who", "whom", "it", "its", "they", "them", "their",
38
+ "he", "she", "him", "her", "his", "we", "us", "our", "you", "your",
39
+ "i", "me", "my", "about", "up",
40
+ }
41
+
42
+
43
+ def _extract_keywords(text: str, top_n: int = 8) -> List[str]:
44
+ """Extract the most frequent meaningful words from text."""
45
+ words = re.findall(r"[a-z]{3,}", text.lower())
46
+ filtered = [w for w in words if w not in _STOP_WORDS]
47
+ counts = Counter(filtered)
48
+ return [w for w, _ in counts.most_common(top_n)]
49
+
50
+
51
+ def _jaccard(set_a: Set[str], set_b: Set[str]) -> float:
52
+ """Jaccard similarity between two sets."""
53
+ if not set_a and not set_b:
54
+ return 1.0
55
+ union = set_a | set_b
56
+ if not union:
57
+ return 0.0
58
+ return len(set_a & set_b) / len(union)
59
+
60
+
61
+ # ---------------------------------------------------------------------------
62
+ # FailureAnalyzer
63
+ # ---------------------------------------------------------------------------
64
+
65
+ class FailureAnalyzer:
66
+ """Analyze evaluation results to identify failure patterns."""
67
+
68
+ # Score thresholds
69
+ FAILURE_THRESHOLD = 0.4 # scores below this = failure
70
+ WEAK_THRESHOLD = 0.55 # scores below this = weak
71
+
72
+ def __init__(
73
+ self,
74
+ failure_threshold: float = 0.4,
75
+ weak_threshold: float = 0.55,
76
+ ):
77
+ self.failure_threshold = failure_threshold
78
+ self.weak_threshold = weak_threshold
79
+
80
+ # -- loading -----------------------------------------------------------
81
+
82
+ @staticmethod
83
+ def load_results(filepath: str) -> Dict[str, Any]:
84
+ """Load benchmark results JSON produced by BenchmarkRunner."""
85
+ with open(filepath, "r", encoding="utf-8") as f:
86
+ return json.load(f)
87
+
88
+ # -- analysis ----------------------------------------------------------
89
+
90
+ def find_failures(
91
+ self,
92
+ results: Dict[str, Any],
93
+ dimension: str = "overall",
94
+ ) -> List[Dict[str, Any]]:
95
+ """Return entries whose *dimension* score is below failure threshold."""
96
+ failures = []
97
+ for entry in results.get("all_scores", []):
98
+ score = entry.get("scores", {}).get(dimension)
99
+ if score is not None and score < self.failure_threshold:
100
+ failures.append({
101
+ "prompt": entry["prompt"],
102
+ "score": score,
103
+ "all_scores": entry["scores"],
104
+ })
105
+ failures.sort(key=lambda x: x["score"])
106
+ return failures
107
+
108
+ def find_weak_areas(
109
+ self,
110
+ results: Dict[str, Any],
111
+ ) -> Dict[str, float]:
112
+ """Identify which scoring dimensions are weakest across all prompts.
113
+
114
+ Returns dict of dimension -> average score, sorted ascending.
115
+ """
116
+ dimension_totals: Dict[str, float] = defaultdict(float)
117
+ dimension_counts: Dict[str, int] = defaultdict(int)
118
+
119
+ for entry in results.get("all_scores", []):
120
+ for k, v in entry.get("scores", {}).items():
121
+ if isinstance(v, float) and k not in ("word_count", "sentence_count"):
122
+ dimension_totals[k] += v
123
+ dimension_counts[k] += 1
124
+
125
+ averages = {}
126
+ for k in dimension_totals:
127
+ if dimension_counts[k] > 0:
128
+ averages[k] = round(dimension_totals[k] / dimension_counts[k], 4)
129
+
130
+ return dict(sorted(averages.items(), key=lambda x: x[1]))
131
+
132
+ def failure_rate_by_category(
133
+ self,
134
+ results: Dict[str, Any],
135
+ dimension: str = "overall",
136
+ ) -> Dict[str, Dict[str, Any]]:
137
+ """Calculate failure rates per category."""
138
+ rates: Dict[str, Dict[str, Any]] = {}
139
+
140
+ for cat, data in results.get("categories", {}).items():
141
+ details = data.get("details", [])
142
+ total = len(details)
143
+ if total == 0:
144
+ continue
145
+ failures = sum(
146
+ 1 for d in details
147
+ if d.get("scores", {}).get(dimension, 1.0) < self.failure_threshold
148
+ )
149
+ weak = sum(
150
+ 1 for d in details
151
+ if self.failure_threshold <= d.get("scores", {}).get(dimension, 1.0) < self.weak_threshold
152
+ )
153
+ rates[cat] = {
154
+ "total": total,
155
+ "failures": failures,
156
+ "weak": weak,
157
+ "failure_rate": round(failures / total, 4),
158
+ "weak_rate": round(weak / total, 4),
159
+ "avg_score": data.get("average_scores", {}).get(dimension, 0),
160
+ }
161
+
162
+ return dict(sorted(rates.items(), key=lambda x: -x[1]["failure_rate"]))
163
+
164
+ def cluster_failures_by_topic(
165
+ self,
166
+ failures: List[Dict[str, Any]],
167
+ similarity_threshold: float = 0.25,
168
+ ) -> List[Dict[str, Any]]:
169
+ """Cluster failure prompts by keyword overlap.
170
+
171
+ Uses a simple greedy clustering: each prompt is assigned to the first
172
+ cluster whose centroid keywords have Jaccard similarity above threshold.
173
+ """
174
+ clusters: List[Dict[str, Any]] = []
175
+
176
+ for failure in failures:
177
+ prompt = failure["prompt"]
178
+ keywords = set(_extract_keywords(prompt))
179
+
180
+ matched = False
181
+ for cluster in clusters:
182
+ if _jaccard(keywords, cluster["keywords"]) >= similarity_threshold:
183
+ cluster["prompts"].append(failure)
184
+ cluster["keywords"] |= keywords
185
+ matched = True
186
+ break
187
+
188
+ if not matched:
189
+ clusters.append({
190
+ "keywords": keywords,
191
+ "prompts": [failure],
192
+ })
193
+
194
+ # Format output
195
+ result = []
196
+ for i, c in enumerate(clusters):
197
+ avg_score = sum(p["score"] for p in c["prompts"]) / len(c["prompts"])
198
+ result.append({
199
+ "cluster_id": i,
200
+ "topic_keywords": sorted(c["keywords"])[:10],
201
+ "num_failures": len(c["prompts"]),
202
+ "avg_score": round(avg_score, 4),
203
+ "sample_prompts": [p["prompt"] for p in c["prompts"][:5]],
204
+ })
205
+
206
+ result.sort(key=lambda x: -x["num_failures"])
207
+ return result
208
+
209
+ def identify_weakest_dimensions(
210
+ self,
211
+ results: Dict[str, Any],
212
+ top_n: int = 3,
213
+ ) -> List[Tuple[str, float]]:
214
+ """Return the top_n weakest scoring dimensions."""
215
+ averages = self.find_weak_areas(results)
216
+ items = [(k, v) for k, v in averages.items() if k != "overall"]
217
+ return items[:top_n]
218
+
219
+ # -- recommendations ---------------------------------------------------
220
+
221
+ def generate_recommendations(
222
+ self,
223
+ results: Dict[str, Any],
224
+ ) -> List[str]:
225
+ """Generate actionable recommendations for dataset improvement."""
226
+ recommendations: List[str] = []
227
+
228
+ # Weakest dimensions
229
+ weakest = self.identify_weakest_dimensions(results, top_n=3)
230
+ for dim, score in weakest:
231
+ if score < self.failure_threshold:
232
+ recommendations.append(
233
+ f"CRITICAL: Dimension '{dim}' averages {score:.3f} (below failure threshold). "
234
+ f"Add training examples that emphasise {dim} explicitly."
235
+ )
236
+ elif score < self.weak_threshold:
237
+ recommendations.append(
238
+ f"IMPROVE: Dimension '{dim}' averages {score:.3f} (weak). "
239
+ f"Augment dataset with responses demonstrating strong {dim}."
240
+ )
241
+
242
+ # Category failure rates
243
+ cat_rates = self.failure_rate_by_category(results)
244
+ for cat, info in cat_rates.items():
245
+ if info["failure_rate"] > 0.3:
246
+ recommendations.append(
247
+ f"CATEGORY '{cat}': {info['failure_rate']:.0%} failure rate. "
248
+ f"Add more diverse training examples for {cat} topics."
249
+ )
250
+
251
+ # Failure clustering
252
+ failures = self.find_failures(results)
253
+ if failures:
254
+ clusters = self.cluster_failures_by_topic(failures)
255
+ for cluster in clusters[:3]:
256
+ kw = ", ".join(cluster["topic_keywords"][:5])
257
+ recommendations.append(
258
+ f"TOPIC CLUSTER: {cluster['num_failures']} failures around "
259
+ f"[{kw}]. Create targeted training data for these concepts."
260
+ )
261
+
262
+ # General
263
+ overall = results.get("overall", {})
264
+ overall_score = overall.get("overall", 0)
265
+ if overall_score < 0.5:
266
+ recommendations.append(
267
+ "GENERAL: Overall score is very low. Consider increasing dataset size "
268
+ "and diversity before next training run."
269
+ )
270
+ elif overall_score < 0.65:
271
+ recommendations.append(
272
+ "GENERAL: Overall score is moderate. Focus on the weakest categories "
273
+ "and dimensions for the next dataset iteration."
274
+ )
275
+
276
+ if not recommendations:
277
+ recommendations.append(
278
+ "No critical issues detected. Continue monitoring with additional benchmarks."
279
+ )
280
+
281
+ return recommendations
282
+
283
+ # -- report ------------------------------------------------------------
284
+
285
+ def format_report(self, results: Dict[str, Any]) -> str:
286
+ """Generate a full failure analysis report."""
287
+ lines: List[str] = []
288
+ lines.append("=" * 70)
289
+ lines.append(" FAILURE ANALYSIS REPORT")
290
+ lines.append("=" * 70)
291
+
292
+ # Weakest dimensions
293
+ lines.append("")
294
+ lines.append("-" * 70)
295
+ lines.append(" WEAKEST SCORING DIMENSIONS")
296
+ lines.append("-" * 70)
297
+ weak_areas = self.find_weak_areas(results)
298
+ for dim, score in list(weak_areas.items())[:6]:
299
+ status = "FAIL" if score < self.failure_threshold else (
300
+ "WEAK" if score < self.weak_threshold else "OK "
301
+ )
302
+ lines.append(f" [{status}] {dim:<22s} {score:.4f}")
303
+
304
+ # Category failure rates
305
+ lines.append("")
306
+ lines.append("-" * 70)
307
+ lines.append(" FAILURE RATES BY CATEGORY")
308
+ lines.append("-" * 70)
309
+ cat_rates = self.failure_rate_by_category(results)
310
+ for cat, info in cat_rates.items():
311
+ lines.append(
312
+ f" {cat:<18s} fail: {info['failure_rate']:>5.1%} "
313
+ f"weak: {info['weak_rate']:>5.1%} "
314
+ f"avg: {info['avg_score']:.4f}"
315
+ )
316
+
317
+ # Failure clusters
318
+ failures = self.find_failures(results)
319
+ if failures:
320
+ lines.append("")
321
+ lines.append("-" * 70)
322
+ lines.append(f" FAILURE CLUSTERS ({len(failures)} total failures)")
323
+ lines.append("-" * 70)
324
+ clusters = self.cluster_failures_by_topic(failures)
325
+ for c in clusters[:5]:
326
+ kw = ", ".join(c["topic_keywords"][:6])
327
+ lines.append(f" Cluster {c['cluster_id']}: "
328
+ f"{c['num_failures']} failures, "
329
+ f"avg score {c['avg_score']:.4f}")
330
+ lines.append(f" Topics: {kw}")
331
+ for p in c["sample_prompts"][:2]:
332
+ lines.append(f" - {p[:70]}...")
333
+
334
+ # Recommendations
335
+ lines.append("")
336
+ lines.append("-" * 70)
337
+ lines.append(" RECOMMENDATIONS")
338
+ lines.append("-" * 70)
339
+ recs = self.generate_recommendations(results)
340
+ for i, rec in enumerate(recs, 1):
341
+ lines.append(f" {i}. {rec}")
342
+
343
+ lines.append("")
344
+ lines.append("=" * 70)
345
+ return "\n".join(lines)
346
+
347
+
348
+ # ---------------------------------------------------------------------------
349
+ # CLI
350
+ # ---------------------------------------------------------------------------
351
+
352
+ def main() -> None:
353
+ import argparse
354
+
355
+ parser = argparse.ArgumentParser(
356
+ description="Codette Failure Analyzer - identify patterns in evaluation failures"
357
+ )
358
+ parser.add_argument(
359
+ "--results", "-r",
360
+ required=True,
361
+ help="Path to benchmark results JSON",
362
+ )
363
+ parser.add_argument(
364
+ "--failure-threshold", "-f",
365
+ type=float,
366
+ default=0.4,
367
+ help="Score threshold for failure (default: 0.4)",
368
+ )
369
+ parser.add_argument(
370
+ "--weak-threshold", "-w",
371
+ type=float,
372
+ default=0.55,
373
+ help="Score threshold for weak (default: 0.55)",
374
+ )
375
+
376
+ args = parser.parse_args()
377
+
378
+ analyzer = FailureAnalyzer(
379
+ failure_threshold=args.failure_threshold,
380
+ weak_threshold=args.weak_threshold,
381
+ )
382
+ results = analyzer.load_results(args.results)
383
+ print(analyzer.format_report(results))
384
+
385
+
386
+ if __name__ == "__main__":
387
+ main()
evaluation/phase6_benchmarks.py ADDED
@@ -0,0 +1,369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Phase 6: Benchmarking Suite
3
+
4
+ Measures Phase 6 improvements:
5
+ 1. Multi-round debate: Does accuracy improve across rounds?
6
+ 2. Memory weighting: Does memory-boosted routing reduce error?
7
+ 3. Semantic tension: Are embeddings better than heuristics?
8
+ 4. Specialization: Are adapters maintaining domain expertise?
9
+
10
+ Run with: pytest test_phase6_e2e.py -v
11
+ """
12
+
13
+ import json
14
+ import numpy as np
15
+ from typing import Dict, List, Tuple
16
+ from datetime import datetime
17
+
18
+
19
+ class Phase6Benchmarks:
20
+ """
21
+ Comprehensive Phase 6 evaluation suite.
22
+ """
23
+
24
+ def __init__(self, forge_engine=None):
25
+ """
26
+ Initialize benchmarks.
27
+
28
+ Args:
29
+ forge_engine: ForgeEngine instance to test against
30
+ """
31
+ self.forge = forge_engine
32
+ self.results = {
33
+ "timestamp": datetime.now().isoformat(),
34
+ "multi_round_convergence": {}, # Coherence per round
35
+ "memory_weighting_impact": {}, # With vs. without memory
36
+ "semantic_tension_quality": {}, # Embeddings vs heuristics
37
+ "specialization_metrics": {}, # Domain expertise scores
38
+ }
39
+
40
+ def benchmark_multi_round_debate(self, queries: List[str], num_rounds: int = 3) -> Dict:
41
+ """
42
+ BENCHMARK 1: Multi-Round Debate Convergence
43
+
44
+ Question: Does multi-round debate improve answer quality?
45
+
46
+ Hypothesis: As agents debate across rounds:
47
+ - Tensions decrease (convergence)
48
+ - Coherence increases
49
+ - Synthesis accuracy improves
50
+
51
+ Measurement:
52
+ - Run each query through N rounds
53
+ - Track coherence_score per round
54
+ - Track resolution_rate per round
55
+ - Compute convergence rate (tension decay)
56
+
57
+ Returns:
58
+ {
59
+ "queries_tested": int,
60
+ "rounds_per_query": int,
61
+ "coherence_by_round": {round: [scores...]},
62
+ "convergence_rate": float,
63
+ "improved_queries": int,
64
+ }
65
+ """
66
+ if not self.forge:
67
+ return {"error": "ForgeEngine not available"}
68
+
69
+ coherence_by_round = {i: [] for i in range(num_rounds)}
70
+ resolution_by_round = {i: [] for i in range(num_rounds)}
71
+ improved_count = 0
72
+
73
+ for query in queries:
74
+ try:
75
+ result = self.forge.forge_with_debate(query, num_rounds=num_rounds)
76
+ metadata = result.get("metadata", {})
77
+
78
+ # Extract per-round metrics
79
+ for round_num in range(num_rounds):
80
+ round_key = f"round_{round_num}"
81
+ if round_key in metadata:
82
+ coherence = metadata[round_key].get("coherence", 0.5)
83
+ resolution = metadata[round_key].get("resolution_rate", 0.5)
84
+ coherence_by_round[round_num].append(coherence)
85
+ resolution_by_round[round_num].append(resolution)
86
+
87
+ # Check if coherence improved from round 0 to final
88
+ initial_coh = coherence_by_round[0][-1] if coherence_by_round[0] else 0.5
89
+ final_coh = coherence_by_round[num_rounds - 1][-1] if coherence_by_round[num_rounds - 1] else 0.5
90
+
91
+ if final_coh > initial_coh:
92
+ improved_count += 1
93
+
94
+ except Exception as e:
95
+ print(f"Error benchmarking query '{query[:50]}...': {e}")
96
+
97
+ # Compute statistics
98
+ coherence_means = {
99
+ i: float(np.mean(scores)) if scores else 0.5 for i, scores in coherence_by_round.items()
100
+ }
101
+
102
+ convergence_rate = 0.0
103
+ if num_rounds > 1:
104
+ initial = coherence_means.get(0, 0.5)
105
+ final = coherence_means.get(num_rounds - 1, 0.5)
106
+ if initial > 0:
107
+ convergence_rate = (final - initial) / initial # Positive = improvement
108
+
109
+ self.results["multi_round_convergence"] = {
110
+ "queries_tested": len(queries),
111
+ "rounds_per_query": num_rounds,
112
+ "coherence_by_round": {str(k): round(v, 3) for k, v in coherence_means.items()},
113
+ "convergence_rate": round(convergence_rate, 3),
114
+ "improved_queries": improved_count,
115
+ "improvement_percentage": round(100 * improved_count / max(len(queries), 1), 1),
116
+ }
117
+
118
+ return self.results["multi_round_convergence"]
119
+
120
+ def benchmark_memory_weighting(self, queries: List[str]) -> Dict:
121
+ """
122
+ BENCHMARK 2: Memory Weighting Impact
123
+
124
+ Question: Does memory-weighted routing reduce error vs. pure keyword routing?
125
+
126
+ Hypothesis: Adapter weights from past experience guide routing better
127
+ than keywords alone.
128
+
129
+ Measurement:
130
+ - Run each query WITHOUT memory weighting (baseline)
131
+ - Run each query WITH memory weighting
132
+ - Compare: coherence_score, conflict_resolution_rate, adapter_diversity
133
+ - Compute improvement delta
134
+
135
+ Returns:
136
+ {
137
+ "baseline_coherence": float,
138
+ "memory_coherence": float,
139
+ "coherence_improvement": float,
140
+ "memory_helps_percentage": float,
141
+ "avg_resolution_baseline": float,
142
+ "avg_resolution_memory": float,
143
+ }
144
+ """
145
+ if not self.forge:
146
+ return {"error": "ForgeEngine not available"}
147
+
148
+ baseline_coherences = []
149
+ memory_coherences = []
150
+ baseline_resolutions = []
151
+ memory_resolutions = []
152
+
153
+ for query in queries:
154
+ try:
155
+ # Baseline: without memory weights
156
+ result_baseline = self.forge.forge_with_debate(query, use_memory_weights=False)
157
+ baseline_meta = result_baseline.get("metadata", {})
158
+ baseline_coherences.append(baseline_meta.get("coherence", 0.5))
159
+ baseline_resolutions.append(baseline_meta.get("resolution_rate", 0.5))
160
+
161
+ # With memory: weights from past performance
162
+ result_memory = self.forge.forge_with_debate(query, use_memory_weights=True)
163
+ memory_meta = result_memory.get("metadata", {})
164
+ memory_coherences.append(memory_meta.get("coherence", 0.5))
165
+ memory_resolutions.append(memory_meta.get("resolution_rate", 0.5))
166
+
167
+ except Exception as e:
168
+ print(f"Error in memory weighting benchmark: {e}")
169
+
170
+ # Compute statistics
171
+ baseline_coh = float(np.mean(baseline_coherences)) if baseline_coherences else 0.5
172
+ memory_coh = float(np.mean(memory_coherences)) if memory_coherences else 0.5
173
+ coh_improve = memory_coh - baseline_coh
174
+
175
+ baseline_res = float(np.mean(baseline_resolutions)) if baseline_resolutions else 0.5
176
+ memory_res = float(np.mean(memory_resolutions)) if memory_resolutions else 0.5
177
+
178
+ # Percentage of queries where memory helped
179
+ improved = sum(1 for b, m in zip(memory_coherences, baseline_coherences) if m > b)
180
+ help_percentage = 100 * improved / max(len(queries), 1)
181
+
182
+ self.results["memory_weighting_impact"] = {
183
+ "queries_tested": len(queries),
184
+ "baseline_avg_coherence": round(baseline_coh, 3),
185
+ "memory_avg_coherence": round(memory_coh, 3),
186
+ "coherence_delta": round(coh_improve, 3),
187
+ "memory_helps_percentage": round(help_percentage, 1),
188
+ "baseline_avg_resolution": round(baseline_res, 3),
189
+ "memory_avg_resolution": round(memory_res, 3),
190
+ "resolution_delta": round(memory_res - baseline_res, 3),
191
+ }
192
+
193
+ return self.results["memory_weighting_impact"]
194
+
195
+ def benchmark_semantic_tension(self, conflict_samples: List[Tuple[str, str, float]] = None) -> Dict:
196
+ """
197
+ BENCHMARK 3: Semantic Tension Quality
198
+
199
+ Question: Are embedding-based tensions (ξ_semantic) better than heuristics?
200
+
201
+ Hypothesis: Semantic embeddings capture *real* disagreement better than
202
+ discrete opposition scores (0.4/0.7/1.0).
203
+
204
+ Measurement:
205
+ - For known conflict pairs (with ground truth tension)
206
+ - Compute heuristic opposition_score
207
+ - Compute semantic_tension (embeddings)
208
+ - Measure correlation with ground truth
209
+
210
+ Args:
211
+ conflict_samples: List of (claim_a, claim_b, ground_truth_tension)
212
+
213
+ Returns:
214
+ {
215
+ "samples_tested": int,
216
+ "heuristic_correlation": float,
217
+ "semantic_correlation": float,
218
+ "semantic_advantage": float,
219
+ }
220
+ """
221
+ if not self.forge or not self.forge.semantic_tension_engine:
222
+ return {"error": "SemanticTensionEngine not available"}
223
+
224
+ if not conflict_samples:
225
+ return {"error": "No conflict samples provided"}
226
+
227
+ heuristic_scores = []
228
+ semantic_scores = []
229
+ ground_truths = []
230
+
231
+ for claim_a, claim_b, ground_truth in conflict_samples:
232
+ try:
233
+ # Get semantic tension
234
+ semantic_tension = self.forge.semantic_tension_engine.compute_semantic_tension(claim_a, claim_b)
235
+ semantic_scores.append(semantic_tension)
236
+
237
+ # Get heuristic opposition (from conflict engine)
238
+ _, heuristic_opposition = self.forge.conflict_engine._classify_conflict(claim_a, claim_b, 0.5)
239
+ heuristic_scores.append(heuristic_opposition)
240
+
241
+ ground_truths.append(ground_truth)
242
+
243
+ except Exception as e:
244
+ print(f"Error computing tensions: {e}")
245
+
246
+ # Compute correlations with ground truth
247
+ if len(heuristic_scores) > 1 and len(ground_truths) > 1:
248
+ heuristic_corr = float(np.corrcoef(heuristic_scores, ground_truths)[0, 1])
249
+ semantic_corr = float(np.corrcoef(semantic_scores, ground_truths)[0, 1])
250
+ advantage = semantic_corr - heuristic_corr
251
+ else:
252
+ heuristic_corr = 0.0
253
+ semantic_corr = 0.0
254
+ advantage = 0.0
255
+
256
+ self.results["semantic_tension_quality"] = {
257
+ "samples_tested": len(conflict_samples),
258
+ "heuristic_correlation": round(heuristic_corr, 3),
259
+ "semantic_correlation": round(semantic_corr, 3),
260
+ "semantic_advantage": round(advantage, 3),
261
+ "semantic_better": semantic_corr > heuristic_corr,
262
+ }
263
+
264
+ return self.results["semantic_tension_quality"]
265
+
266
+ def benchmark_specialization(self) -> Dict:
267
+ """
268
+ BENCHMARK 4: Specialization Tracking
269
+
270
+ Question: Are adapters maintaining domain specialization?
271
+
272
+ Hypothesis: Spec scores trend positive for expert adapters,
273
+ negative for generalists. Convergence alerts trigger when
274
+ adapter outputs become too similar.
275
+
276
+ Returns:
277
+ {
278
+ "adapters_tracked": int,
279
+ "specialist_adapters": list,
280
+ "generalist_adapters": list,
281
+ "convergence_risks": list,
282
+ "health_status": str,
283
+ }
284
+ """
285
+ if not self.forge or not self.forge.specialization:
286
+ return {"error": "SpecializationTracker not available"}
287
+
288
+ system_health = self.forge.specialization.get_system_health()
289
+ health_by_adapter = system_health.get("health_by_adapter", {})
290
+
291
+ specialists = [a for a, h in health_by_adapter.items() if h.get("recommendation") == "excellent_specialist"]
292
+ generalists = [a for a, h in health_by_adapter.items() if h.get("recommendation") == "good_generalist"]
293
+ convergence_alerts = system_health.get("convergence_alerts", [])
294
+
295
+ self.results["specialization_metrics"] = {
296
+ "adapters_tracked": len(health_by_adapter),
297
+ "specialist_adapters": specialists,
298
+ "generalist_adapters": generalists,
299
+ "convergence_risk_count": len(convergence_alerts),
300
+ "health_by_adapter": {a: h.get("recommendation") for a, h in health_by_adapter.items()},
301
+ }
302
+
303
+ return self.results["specialization_metrics"]
304
+
305
+ def export_results(self, filepath: str = None) -> Dict:
306
+ """
307
+ Export all benchmark results to JSON.
308
+
309
+ Args:
310
+ filepath: Where to save results (optional)
311
+
312
+ Returns:
313
+ Complete results dict
314
+ """
315
+ if filepath:
316
+ with open(filepath, "w") as f:
317
+ json.dump(self.results, f, indent=2)
318
+ print(f"Benchmark results saved to {filepath}")
319
+
320
+ return self.results
321
+
322
+ def summary(self) -> str:
323
+ """
324
+ Generate human-readable summary of all benchmarks.
325
+
326
+ Returns:
327
+ Formatted summary string
328
+ """
329
+ summary = "PHASE 6 BENCHMARK SUMMARY\n"
330
+ summary += "=" * 60 + "\n"
331
+
332
+ # Multi-round convergence
333
+ mr = self.results.get("multi_round_convergence", {})
334
+ if mr:
335
+ summary += f"\n[1] MULTI-ROUND DEBATE CONVERGENCE\n"
336
+ summary += f" Queries tested: {mr.get('queries_tested', 0)}\n"
337
+ summary += f" Convergence rate: {mr.get('convergence_rate', 0):.3f}\n"
338
+ summary += f" Queries improved: {mr.get('improvement_percentage', 0)}%\n"
339
+
340
+ # Memory weighting
341
+ mw = self.results.get("memory_weighting_impact", {})
342
+ if mw:
343
+ summary += f"\n[2] MEMORY WEIGHTING IMPACT\n"
344
+ summary += f" Baseline coherence: {mw.get('baseline_avg_coherence', 0):.3f}\n"
345
+ summary += f" With memory: {mw.get('memory_avg_coherence', 0):.3f}\n"
346
+ summary += f" Delta: {mw.get('coherence_delta', 0):.3f}\n"
347
+ summary += f" Memory helps: {mw.get('memory_helps_percentage', 0)}% of queries\n"
348
+
349
+ # Semantic tension
350
+ st = self.results.get("semantic_tension_quality", {})
351
+ if st:
352
+ summary += f"\n[3] SEMANTIC TENSION QUALITY\n"
353
+ summary += f" Semantic correlation: {st.get('semantic_correlation', 0):.3f}\n"
354
+ summary += f" Heuristic correlation: {st.get('heuristic_correlation', 0):.3f}\n"
355
+ summary += f" Semantic advantage: {st.get('semantic_advantage', 0):.3f}\n"
356
+
357
+ # Specialization
358
+ sp = self.results.get("specialization_metrics", {})
359
+ if sp:
360
+ summary += f"\n[4] ADAPTER SPECIALIZATION\n"
361
+ summary += f" Adapters tracked: {sp.get('adapters_tracked', 0)}\n"
362
+ summary += f" Specialists: {len(sp.get('specialist_adapters', []))}\n"
363
+ summary += f" Convergence risks: {sp.get('convergence_risk_count', 0)}\n"
364
+
365
+ summary += "\n" + "=" * 60 + "\n"
366
+ return summary
367
+
368
+
369
+ __all__ = ["Phase6Benchmarks"]
evaluation/prompts/counterexample_tests.json ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "prompt": "Is it true that heavier objects always fall faster than lighter ones?",
4
+ "expected": "refutation",
5
+ "topic": "physics",
6
+ "misconception": "Galileo showed objects fall at the same rate in a vacuum regardless of mass."
7
+ },
8
+ {
9
+ "prompt": "Does quantum mechanics prove that consciousness causes reality to exist?",
10
+ "expected": "refutation",
11
+ "topic": "quantum",
12
+ "misconception": "Measurement collapse does not require conscious observers."
13
+ },
14
+ {
15
+ "prompt": "Is it accurate to say that humans only use 10 percent of their brains?",
16
+ "expected": "refutation",
17
+ "topic": "neuroscience",
18
+ "misconception": "Brain imaging shows all regions are active at various times."
19
+ },
20
+ {
21
+ "prompt": "Can you confirm that evolution means humans descended directly from modern apes?",
22
+ "expected": "refutation",
23
+ "topic": "biology",
24
+ "misconception": "Humans and apes share a common ancestor but did not descend from each other."
25
+ },
26
+ {
27
+ "prompt": "Is it correct that lightning never strikes the same place twice?",
28
+ "expected": "refutation",
29
+ "topic": "physics",
30
+ "misconception": "Tall structures like the Empire State Building are struck many times per year."
31
+ },
32
+ {
33
+ "prompt": "Does the Great Wall of China appear visible from space with the naked eye?",
34
+ "expected": "refutation",
35
+ "topic": "general",
36
+ "misconception": "Astronauts have confirmed it is not visible from low Earth orbit without aid."
37
+ },
38
+ {
39
+ "prompt": "Is it true that vaccines cause autism based on published research?",
40
+ "expected": "refutation",
41
+ "topic": "medicine",
42
+ "misconception": "The original study was retracted for fraud; large-scale studies found no link."
43
+ },
44
+ {
45
+ "prompt": "Does adding salt to water make it boil significantly faster?",
46
+ "expected": "refutation",
47
+ "topic": "chemistry",
48
+ "misconception": "Salt actually raises the boiling point slightly, making it take longer."
49
+ },
50
+ {
51
+ "prompt": "Is it true that all machine learning models are black boxes that can never be interpreted?",
52
+ "expected": "refutation",
53
+ "topic": "AI",
54
+ "misconception": "Many models (decision trees, linear models, SHAP, LIME) are highly interpretable."
55
+ },
56
+ {
57
+ "prompt": "Can you confirm that correlation between two variables proves one causes the other?",
58
+ "expected": "refutation",
59
+ "topic": "statistics",
60
+ "misconception": "Correlation does not imply causation; confounders and coincidence are common."
61
+ },
62
+ {
63
+ "prompt": "Is it accurate that the Earth is closer to the Sun during summer in the Northern Hemisphere?",
64
+ "expected": "refutation",
65
+ "topic": "astronomy",
66
+ "misconception": "Seasons are caused by axial tilt, not distance; Earth is actually farther in July."
67
+ },
68
+ {
69
+ "prompt": "Does a bigger neural network always produce better results?",
70
+ "expected": "refutation",
71
+ "topic": "AI",
72
+ "misconception": "Larger models can overfit, have diminishing returns, and face scaling inefficiencies."
73
+ },
74
+ {
75
+ "prompt": "Is it true that goldfish have a memory span of only three seconds?",
76
+ "expected": "refutation",
77
+ "topic": "biology",
78
+ "misconception": "Studies show goldfish can remember things for months."
79
+ },
80
+ {
81
+ "prompt": "Can you verify that sugar makes children hyperactive?",
82
+ "expected": "refutation",
83
+ "topic": "psychology",
84
+ "misconception": "Controlled studies have found no causal link between sugar and hyperactivity."
85
+ },
86
+ {
87
+ "prompt": "Is it correct that we lose most of our body heat through our heads?",
88
+ "expected": "refutation",
89
+ "topic": "biology",
90
+ "misconception": "Heat loss is proportional to exposed surface area; the head is not special."
91
+ },
92
+ {
93
+ "prompt": "Does reading in dim light permanently damage your eyesight?",
94
+ "expected": "refutation",
95
+ "topic": "medicine",
96
+ "misconception": "It may cause temporary eye strain but does not cause permanent damage."
97
+ },
98
+ {
99
+ "prompt": "Is it true that the Sapir-Whorf hypothesis means language completely determines thought?",
100
+ "expected": "refutation",
101
+ "topic": "linguistics",
102
+ "misconception": "The strong version is rejected; the weak version says language influences but does not determine thought."
103
+ },
104
+ {
105
+ "prompt": "Can you confirm that AI systems today are truly conscious and self-aware?",
106
+ "expected": "refutation",
107
+ "topic": "AI",
108
+ "misconception": "Current AI lacks consciousness; LLMs process patterns without subjective experience."
109
+ },
110
+ {
111
+ "prompt": "Is it accurate that organic food is always more nutritious than conventional food?",
112
+ "expected": "refutation",
113
+ "topic": "nutrition",
114
+ "misconception": "Meta-analyses show minimal nutritional differences between organic and conventional."
115
+ },
116
+ {
117
+ "prompt": "Does the second law of thermodynamics disprove biological evolution?",
118
+ "expected": "refutation",
119
+ "topic": "physics",
120
+ "misconception": "The second law applies to closed systems; Earth receives energy from the Sun."
121
+ }
122
+ ]
evaluation/prompts/reasoning_tests.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "physics": [
3
+ "Explain Newton's third law with real-world examples and common misconceptions.",
4
+ "How does the conservation of energy apply in a roller coaster system? Explain with detail.",
5
+ "What is the difference between mass and weight, and why does this distinction matter in space travel?",
6
+ "Describe how electromagnetic induction works and its role in modern power generation.",
7
+ "Explain the concept of entropy and why it makes perpetual motion machines impossible.",
8
+ "How do gravitational waves form and what do they tell us about the universe?",
9
+ "Why does time dilation occur near massive objects according to general relativity?"
10
+ ],
11
+ "quantum": [
12
+ "What is quantum superposition and how does measurement affect it?",
13
+ "Explain the double-slit experiment and why it challenges classical physics.",
14
+ "What is quantum entanglement and why did Einstein call it 'spooky action at a distance'?",
15
+ "How does the Heisenberg uncertainty principle limit what we can know about particles?",
16
+ "Explain the concept of wave-particle duality with concrete examples.",
17
+ "What is quantum tunneling and how is it applied in modern technology?"
18
+ ],
19
+ "ethics": [
20
+ "What ethical risks exist in deploying autonomous AI systems for military decisions?",
21
+ "How should AI systems handle bias in training data, and whose responsibility is it to fix?",
22
+ "What are the ethical implications of using AI for predictive policing?",
23
+ "Discuss the tension between AI-driven efficiency and human employment rights.",
24
+ "What ethical framework should guide the development of general artificial intelligence?",
25
+ "How should consent and privacy be managed when AI analyses personal health data?",
26
+ "What moral obligations do AI developers have toward vulnerable populations?"
27
+ ],
28
+ "philosophy": [
29
+ "What is the relationship between knowledge and belief in epistemology?",
30
+ "Explain the problem of free will versus determinism and the main philosophical positions.",
31
+ "What is the Chinese Room argument and what does it say about machine understanding?",
32
+ "How does the ship of Theseus problem relate to questions of personal identity?",
33
+ "Discuss Plato's allegory of the cave and its relevance to modern information bubbles.",
34
+ "What is the hard problem of consciousness and why is it considered unsolved?"
35
+ ],
36
+ "creativity": [
37
+ "How would you design a bridge inspired by biological structures found in nature?",
38
+ "Propose an innovative approach to teaching mathematics using virtual reality.",
39
+ "Design a thought experiment that illustrates the concept of emergence in complex systems.",
40
+ "How could music composition algorithms incorporate emotional intelligence?",
41
+ "Imagine a city designed entirely around pedestrian well-being. Describe its key features.",
42
+ "Propose a creative solution for reducing food waste using AI and community networks."
43
+ ],
44
+ "empathy": [
45
+ "How should you support someone experiencing grief without being dismissive?",
46
+ "Explain how cultural differences affect expressions of empathy and emotional support.",
47
+ "What role does active listening play in resolving interpersonal conflicts?",
48
+ "How can AI systems be designed to respond compassionately to users in emotional distress?",
49
+ "Describe the psychological impact of social isolation and how communities can help.",
50
+ "How should educators respond to a student who is struggling with anxiety?"
51
+ ],
52
+ "reasoning": [
53
+ "Explain why correlation does not imply causation with multiple illustrative examples.",
54
+ "What are the most common logical fallacies in everyday arguments? Provide examples of each.",
55
+ "How does Bayesian reasoning differ from frequentist approaches to probability?",
56
+ "Explain the difference between deductive, inductive, and abductive reasoning.",
57
+ "Why is the base rate fallacy so common and how can it lead to poor decisions?",
58
+ "Describe the sorites paradox and what it reveals about vagueness in logic.",
59
+ "How do cognitive biases like confirmation bias affect scientific research?"
60
+ ],
61
+ "systems": [
62
+ "What role does memory play in AI reasoning systems and how does it differ from human memory?",
63
+ "Explain how feedback loops can cause both stability and instability in complex systems.",
64
+ "How do attention mechanisms in transformers relate to human selective attention?",
65
+ "Describe the trade-offs between model size, training data, and inference cost in LLMs.",
66
+ "How can retrieval-augmented generation improve the factual accuracy of language models?",
67
+ "What are the key challenges in building AI systems that can explain their own reasoning?",
68
+ "How does the concept of emergence apply to neural network training dynamics?"
69
+ ]
70
+ }
evaluation/reasoning_metrics.py ADDED
@@ -0,0 +1,421 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Reasoning Metrics - scores text quality across multiple dimensions.
3
+
4
+ Each dimension is scored 0.0-1.0 using concrete textual analysis:
5
+ regex patterns, keyword detection, sentence structure analysis,
6
+ word counts, and concept density measures.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import math
12
+ import re
13
+ from collections import Counter
14
+ from typing import Dict, List, Optional
15
+
16
+
17
+ # ---------------------------------------------------------------------------
18
+ # Keyword / pattern banks
19
+ # ---------------------------------------------------------------------------
20
+
21
+ _TRANSITION_WORDS = {
22
+ "therefore", "however", "moreover", "furthermore", "consequently",
23
+ "nevertheless", "additionally", "specifically", "thus", "hence",
24
+ "accordingly", "meanwhile", "similarly", "conversely", "likewise",
25
+ "in contrast", "on the other hand", "as a result", "for example",
26
+ "for instance", "in addition", "in particular", "in summary",
27
+ "to illustrate", "that is", "notably", "indeed", "alternatively",
28
+ }
29
+
30
+ _EXAMPLE_MARKERS = {
31
+ "for example", "for instance", "such as", "e.g.", "e.g.,",
32
+ "consider", "imagine", "suppose", "like when", "think of",
33
+ "analogy", "analogous", "metaphor", "illustration", "to illustrate",
34
+ "case in point", "picture", "envision", "scenario",
35
+ }
36
+
37
+ _PERSPECTIVE_MARKERS = {
38
+ "on the other hand", "from another perspective", "alternatively",
39
+ "some argue", "others believe", "one view", "another view",
40
+ "proponents", "opponents", "critics", "supporters",
41
+ "different perspective", "counterargument", "counter-argument",
42
+ "multiple perspectives", "various viewpoints", "diverse views",
43
+ "some scholars", "other researchers", "in contrast",
44
+ "conversely", "while some", "whereas others",
45
+ "from a … standpoint", "from the standpoint",
46
+ "different schools of thought", "competing theories",
47
+ "pluralistic", "multifaceted",
48
+ }
49
+
50
+ _SCIENTIFIC_TERMS = {
51
+ "hypothesis", "theory", "empirical", "variable", "correlation",
52
+ "causation", "experiment", "observation", "evidence", "data",
53
+ "quantitative", "qualitative", "statistical", "significant",
54
+ "methodology", "systematic", "peer-reviewed", "replicable",
55
+ "falsifiable", "paradigm", "model", "framework", "mechanism",
56
+ "phenomenon", "equation", "entropy", "quantum", "relativity",
57
+ "thermodynamic", "kinetic", "potential", "electromagnetic",
58
+ "wavelength", "frequency", "spectrum", "molecular", "cellular",
59
+ "neural", "cognitive", "algorithm", "computational", "stochastic",
60
+ "deterministic", "probabilistic", "inference", "deduction",
61
+ "induction", "axiom", "theorem", "coefficient", "parameter",
62
+ "optimization", "convergence", "divergence", "gradient",
63
+ "eigenvalue", "tensor", "vector", "scalar", "integral",
64
+ "derivative", "differential", "asymptotic", "heuristic",
65
+ }
66
+
67
+ _ETHICAL_TERMS = {
68
+ "ethical", "moral", "responsibility", "accountability", "fairness",
69
+ "justice", "bias", "harm", "benefit", "consequence", "implication",
70
+ "stakeholder", "rights", "duty", "obligation", "dilemma",
71
+ "autonomy", "consent", "privacy", "transparency", "trust",
72
+ "equity", "inclusion", "diversity", "sustainability",
73
+ "well-being", "welfare", "dignity", "integrity", "virtue",
74
+ "utilitarian", "deontological", "consequentialist", "normative",
75
+ "values", "principles", "compassion", "empathy",
76
+ "social impact", "unintended consequences",
77
+ }
78
+
79
+ _STRUCTURE_PATTERNS = [
80
+ re.compile(r"^\s*\d+[\.\)]\s", re.MULTILINE), # numbered list
81
+ re.compile(r"^\s*[-*]\s", re.MULTILINE), # bullet list
82
+ re.compile(r"^#{1,4}\s", re.MULTILINE), # markdown headings
83
+ re.compile(r"\b(first|second|third|finally|lastly)\b", re.I),
84
+ re.compile(r"\b(step\s+\d+|phase\s+\d+)\b", re.I),
85
+ re.compile(r"\b(in conclusion|to summarize|in summary)\b", re.I),
86
+ re.compile(r"\b(introduction|background|method|result|discussion|conclusion)\b", re.I),
87
+ ]
88
+
89
+
90
+ # ---------------------------------------------------------------------------
91
+ # Helpers
92
+ # ---------------------------------------------------------------------------
93
+
94
+ def _word_tokenize(text: str) -> List[str]:
95
+ """Simple whitespace + punctuation tokeniser."""
96
+ return re.findall(r"[A-Za-z]+(?:[-'][A-Za-z]+)*", text.lower())
97
+
98
+
99
+ def _sentences(text: str) -> List[str]:
100
+ """Split text into sentences (simple heuristic)."""
101
+ parts = re.split(r'(?<=[.!?])\s+', text.strip())
102
+ return [s for s in parts if len(s) > 2]
103
+
104
+
105
+ def _unique_word_ratio(words: List[str]) -> float:
106
+ if not words:
107
+ return 0.0
108
+ return len(set(words)) / len(words)
109
+
110
+
111
+ def _sigmoid(x: float, midpoint: float = 0.0, steepness: float = 1.0) -> float:
112
+ """Soft clamping via logistic function, output in (0, 1)."""
113
+ try:
114
+ return 1.0 / (1.0 + math.exp(-steepness * (x - midpoint)))
115
+ except OverflowError:
116
+ return 0.0 if x < midpoint else 1.0
117
+
118
+
119
+ def _keyword_density(words: List[str], keyword_set: set) -> float:
120
+ """Fraction of *unique* keywords from the set that appear in words."""
121
+ if not keyword_set:
122
+ return 0.0
123
+ word_set = set(words)
124
+ hits = word_set & keyword_set
125
+ return len(hits) / len(keyword_set)
126
+
127
+
128
+ def _phrase_count(text: str, phrases: set) -> int:
129
+ """Count how many distinct phrases from *phrases* appear in text."""
130
+ text_lower = text.lower()
131
+ return sum(1 for p in phrases if p in text_lower)
132
+
133
+
134
+ # ---------------------------------------------------------------------------
135
+ # Main class
136
+ # ---------------------------------------------------------------------------
137
+
138
+ class ReasoningMetrics:
139
+ """Score a reasoning response on multiple quality dimensions."""
140
+
141
+ # Default weights for the composite score
142
+ DEFAULT_WEIGHTS: Dict[str, float] = {
143
+ "clarity": 0.15,
144
+ "structure": 0.15,
145
+ "depth": 0.15,
146
+ "examples": 0.10,
147
+ "multi_perspective": 0.10,
148
+ "scientific_rigor": 0.15,
149
+ "ethical_awareness": 0.10,
150
+ "coherence": 0.10,
151
+ }
152
+
153
+ def __init__(self, weights: Optional[Dict[str, float]] = None):
154
+ self.weights = weights or dict(self.DEFAULT_WEIGHTS)
155
+
156
+ # -- individual scorers ------------------------------------------------
157
+
158
+ def _score_clarity(self, text: str, words: List[str], sents: List[str]) -> float:
159
+ """
160
+ Clarity: readable sentences, moderate length, good vocabulary variety.
161
+ """
162
+ if not sents:
163
+ return 0.0
164
+
165
+ # Average sentence length (ideal ~15-25 words)
166
+ avg_sent_len = len(words) / len(sents)
167
+ len_score = 1.0 - min(abs(avg_sent_len - 20) / 20, 1.0)
168
+
169
+ # Vocabulary diversity (unique / total)
170
+ diversity = _unique_word_ratio(words)
171
+
172
+ # Penalise very short responses
173
+ length_penalty = min(len(words) / 50, 1.0)
174
+
175
+ # Transition word usage (smooths reading)
176
+ transition_count = _phrase_count(text, _TRANSITION_WORDS)
177
+ transition_score = min(transition_count / max(len(sents) * 0.3, 1), 1.0)
178
+
179
+ score = (
180
+ 0.35 * len_score
181
+ + 0.25 * diversity
182
+ + 0.20 * length_penalty
183
+ + 0.20 * transition_score
184
+ )
185
+ return round(min(max(score, 0.0), 1.0), 4)
186
+
187
+ def _score_structure(self, text: str, sents: List[str]) -> float:
188
+ """
189
+ Structure: numbered/bulleted lists, headings, step markers,
190
+ paragraph breaks, logical ordering cues.
191
+ """
192
+ if not text.strip():
193
+ return 0.0
194
+
195
+ pattern_hits = sum(1 for p in _STRUCTURE_PATTERNS if p.search(text))
196
+ pattern_score = min(pattern_hits / 4, 1.0) # 4+ patterns = perfect
197
+
198
+ # Paragraph structure (multiple newline-separated blocks)
199
+ paragraphs = [p.strip() for p in re.split(r"\n\s*\n", text) if p.strip()]
200
+ para_score = min(len(paragraphs) / 4, 1.0)
201
+
202
+ # Sentence count contribution (longer = more structured opportunity)
203
+ sent_score = min(len(sents) / 8, 1.0)
204
+
205
+ score = 0.50 * pattern_score + 0.25 * para_score + 0.25 * sent_score
206
+ return round(min(max(score, 0.0), 1.0), 4)
207
+
208
+ def _score_depth(self, text: str, words: List[str], sents: List[str]) -> float:
209
+ """
210
+ Depth: word count, concept density, vocabulary richness.
211
+ """
212
+ if not words:
213
+ return 0.0
214
+
215
+ # Word count (sigmoid centred at ~200 words)
216
+ wc_score = _sigmoid(len(words), midpoint=200, steepness=0.015)
217
+
218
+ # Long words (>= 8 chars) as proxy for complex vocabulary
219
+ long_words = [w for w in words if len(w) >= 8]
220
+ complexity = min(len(long_words) / max(len(words) * 0.15, 1), 1.0)
221
+
222
+ # Unique concept density: unique 3+-letter words / total words
223
+ concepts = set(w for w in words if len(w) >= 3)
224
+ concept_density = min(len(concepts) / max(len(words) * 0.5, 1), 1.0)
225
+
226
+ # Sentence count depth
227
+ sent_depth = min(len(sents) / 10, 1.0)
228
+
229
+ score = (
230
+ 0.30 * wc_score
231
+ + 0.25 * complexity
232
+ + 0.25 * concept_density
233
+ + 0.20 * sent_depth
234
+ )
235
+ return round(min(max(score, 0.0), 1.0), 4)
236
+
237
+ def _score_examples(self, text: str) -> float:
238
+ """
239
+ Examples: presence of illustrative examples, analogies, scenarios.
240
+ """
241
+ if not text.strip():
242
+ return 0.0
243
+
244
+ marker_hits = _phrase_count(text, _EXAMPLE_MARKERS)
245
+
246
+ # Quoted examples
247
+ quotes = len(re.findall(r'"[^"]{5,}"', text))
248
+
249
+ # Code / formula blocks
250
+ code_blocks = len(re.findall(r'```', text)) // 2
251
+ inline_code = len(re.findall(r'`[^`]+`', text))
252
+
253
+ # Concrete numbers / data points
254
+ numbers = len(re.findall(r'\b\d+(?:\.\d+)?(?:\s*(?:%|kg|m|km|s|ms|Hz|J|W|N))\b', text))
255
+
256
+ total_evidence = marker_hits + quotes + code_blocks + inline_code + numbers
257
+ score = min(total_evidence / 5, 1.0) # 5+ pieces = full score
258
+ return round(min(max(score, 0.0), 1.0), 4)
259
+
260
+ def _score_multi_perspective(self, text: str) -> float:
261
+ """
262
+ Multi-perspective: references to multiple viewpoints, balanced discussion.
263
+ """
264
+ if not text.strip():
265
+ return 0.0
266
+
267
+ perspective_hits = _phrase_count(text, _PERSPECTIVE_MARKERS)
268
+
269
+ # "but" / "however" / "although" as hedging signals
270
+ hedge_words = len(re.findall(
271
+ r'\b(?:but|however|although|though|yet|still|nonetheless|'
272
+ r'notwithstanding|despite|regardless)\b',
273
+ text, re.I
274
+ ))
275
+
276
+ # Question marks (self-questioning / Socratic style)
277
+ questions = text.count('?')
278
+
279
+ total = perspective_hits * 2 + hedge_words + questions * 0.5
280
+ score = min(total / 8, 1.0)
281
+ return round(min(max(score, 0.0), 1.0), 4)
282
+
283
+ def _score_scientific_rigor(self, text: str, words: List[str]) -> float:
284
+ """
285
+ Scientific rigor: precise terminology, quantitative language,
286
+ references to evidence/method.
287
+ """
288
+ if not words:
289
+ return 0.0
290
+
291
+ sci_hits = sum(1 for w in set(words) if w in _SCIENTIFIC_TERMS)
292
+ term_score = min(sci_hits / 6, 1.0) # 6+ unique scientific terms
293
+
294
+ # Quantitative expressions
295
+ quant = len(re.findall(
296
+ r'\b\d+(?:\.\d+)?(?:\s*(?:x|times|percent|%|ratio|factor))\b',
297
+ text, re.I
298
+ ))
299
+ quant += len(re.findall(r'[<>=]+\s*\d', text))
300
+ quant_score = min(quant / 3, 1.0)
301
+
302
+ # Causal / evidence language
303
+ causal = len(re.findall(
304
+ r'\b(?:because|caused? by|leads? to|results? in|due to|'
305
+ r'evidence suggests?|research shows?|studies indicate|'
306
+ r'according to|demonstrated|proven|measured)\b',
307
+ text, re.I
308
+ ))
309
+ causal_score = min(causal / 4, 1.0)
310
+
311
+ score = 0.45 * term_score + 0.25 * causal_score + 0.30 * quant_score
312
+ return round(min(max(score, 0.0), 1.0), 4)
313
+
314
+ def _score_ethical_awareness(self, text: str, words: List[str]) -> float:
315
+ """
316
+ Ethical awareness: considers implications, fairness, harm, responsibility.
317
+ """
318
+ if not words:
319
+ return 0.0
320
+
321
+ eth_hits = sum(1 for w in set(words) if w in _ETHICAL_TERMS)
322
+ term_score = min(eth_hits / 4, 1.0)
323
+
324
+ # Implication / consequence language
325
+ impl = len(re.findall(
326
+ r'\b(?:implication|consequence|impact|risk|concern|'
327
+ r'should|ought|must consider|raises questions|'
328
+ r'responsible|accountable|careful|caution)\b',
329
+ text, re.I
330
+ ))
331
+ impl_score = min(impl / 4, 1.0)
332
+
333
+ # Stakeholder awareness
334
+ stakeholder = len(re.findall(
335
+ r'\b(?:people|society|community|individual|user|patient|'
336
+ r'citizen|public|vulnerable|marginalized|affected)\b',
337
+ text, re.I
338
+ ))
339
+ stake_score = min(stakeholder / 3, 1.0)
340
+
341
+ score = 0.40 * term_score + 0.35 * impl_score + 0.25 * stake_score
342
+ return round(min(max(score, 0.0), 1.0), 4)
343
+
344
+ def _score_coherence(self, text: str, sents: List[str], words: List[str]) -> float:
345
+ """
346
+ Coherence: adjacent sentences share vocabulary, topic consistency.
347
+ """
348
+ if len(sents) < 2:
349
+ return 0.5 # neutral for very short texts
350
+
351
+ # Lexical overlap between adjacent sentences
352
+ overlaps = []
353
+ for i in range(len(sents) - 1):
354
+ w1 = set(_word_tokenize(sents[i]))
355
+ w2 = set(_word_tokenize(sents[i + 1]))
356
+ if w1 | w2:
357
+ overlaps.append(len(w1 & w2) / len(w1 | w2))
358
+ else:
359
+ overlaps.append(0.0)
360
+ avg_overlap = sum(overlaps) / len(overlaps) if overlaps else 0.0
361
+ # Ideal overlap is moderate (0.1-0.3); too high = repetitive
362
+ overlap_score = 1.0 - abs(avg_overlap - 0.2) / 0.4
363
+ overlap_score = max(overlap_score, 0.0)
364
+
365
+ # Pronoun / referent continuity
366
+ pronoun_count = len(re.findall(
367
+ r'\b(?:this|that|these|those|it|they|its|their|such|said)\b',
368
+ text, re.I
369
+ ))
370
+ ref_score = min(pronoun_count / max(len(sents), 1) / 1.5, 1.0)
371
+
372
+ score = 0.60 * overlap_score + 0.40 * ref_score
373
+ return round(min(max(score, 0.0), 1.0), 4)
374
+
375
+ # -- public API --------------------------------------------------------
376
+
377
+ def score_reasoning(self, text: str) -> Dict[str, float]:
378
+ """Score a reasoning response on multiple dimensions.
379
+
380
+ Returns dict with scores 0.0-1.0 for:
381
+ - clarity, structure, depth, examples, multi_perspective,
382
+ scientific_rigor, ethical_awareness, coherence, overall
383
+ """
384
+ words = _word_tokenize(text)
385
+ sents = _sentences(text)
386
+
387
+ scores: Dict[str, float] = {
388
+ "clarity": self._score_clarity(text, words, sents),
389
+ "structure": self._score_structure(text, sents),
390
+ "depth": self._score_depth(text, words, sents),
391
+ "examples": self._score_examples(text),
392
+ "multi_perspective": self._score_multi_perspective(text),
393
+ "scientific_rigor": self._score_scientific_rigor(text, words),
394
+ "ethical_awareness": self._score_ethical_awareness(text, words),
395
+ "coherence": self._score_coherence(text, sents, words),
396
+ }
397
+
398
+ # Weighted composite
399
+ total_weight = sum(self.weights.get(k, 0) for k in scores)
400
+ if total_weight > 0:
401
+ overall = sum(
402
+ scores[k] * self.weights.get(k, 0) for k in scores
403
+ ) / total_weight
404
+ else:
405
+ overall = sum(scores.values()) / len(scores)
406
+
407
+ scores["overall"] = round(overall, 4)
408
+ scores["word_count"] = len(words)
409
+ scores["sentence_count"] = len(sents)
410
+ return scores
411
+
412
+ def score_batch(self, texts: List[str]) -> List[Dict[str, float]]:
413
+ """Score a batch of responses."""
414
+ return [self.score_reasoning(t) for t in texts]
415
+
416
+ def compare(self, text_a: str, text_b: str) -> Dict[str, Dict[str, float]]:
417
+ """Compare two responses side-by-side."""
418
+ sa = self.score_reasoning(text_a)
419
+ sb = self.score_reasoning(text_b)
420
+ delta = {k: round(sb[k] - sa[k], 4) for k in sa if isinstance(sa[k], (int, float))}
421
+ return {"baseline": sa, "candidate": sb, "delta": delta}
evaluation/run_evaluation_sprint.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Evaluation Sprint Runner
3
+
4
+ Executes the evaluation harness against all 4 conditions:
5
+ 1. Baseline (plain Llama)
6
+ 2. Phase 1-5 (debate without semantic tension)
7
+ 3. Phase 6 Full (with semantic tension, specialization, preflight)
8
+ 4. Phase 6 -PreFlight (without preflight prediction)
9
+
10
+ Usage:
11
+ python run_evaluation_sprint.py --questions 25 --output results.json
12
+ """
13
+
14
+ import sys
15
+ import argparse
16
+ import json
17
+ from datetime import datetime
18
+ from pathlib import Path
19
+
20
+ sys.path.insert(0, str(Path(__file__).parent.parent / 'reasoning_forge'))
21
+ sys.path.insert(0, str(Path(__file__).parent.parent / 'inference'))
22
+
23
+ from test_suite_evaluation import (
24
+ EvaluationHarness,
25
+ EvaluationAnalyzer,
26
+ EVALUATION_TEST_SUITE,
27
+ )
28
+
29
+
30
+ def run_evaluation_sprint(
31
+ num_questions: int = 10,
32
+ output_json: str = "evaluation_results.json",
33
+ output_report: str = "evaluation_report.txt",
34
+ ):
35
+ """
36
+ Run the complete evaluation sprint.
37
+
38
+ Args:
39
+ num_questions: How many test questions to run (1-25)
40
+ output_json: Where to save JSON results
41
+ output_report: Where to save text report
42
+ """
43
+
44
+ print("\n" + "=" * 80)
45
+ print("CODETTE PHASE 6 EVALUATION SPRINT")
46
+ print("=" * 80)
47
+ print(f"Test Date: {datetime.now().isoformat()}")
48
+ print(f"Questions to Run: {min(num_questions, len(EVALUATION_TEST_SUITE))}/25")
49
+ print(f"Output: {output_json}, {output_report}")
50
+ print("=" * 80 + "\n")
51
+
52
+ # Load ForgeEngine with Phase 6
53
+ print("[1/4] Loading ForgeEngine with Phase 6...")
54
+ try:
55
+ from reasoning_forge.forge_engine import ForgeEngine
56
+
57
+ forge = ForgeEngine(living_memory=None, enable_memory_weighting=False)
58
+
59
+ print(" OK: ForgeEngine loaded")
60
+ print(f" - semantic_tension_engine: {'READY' if forge.semantic_tension_engine else 'MISSING'}")
61
+ print(f" - specialization tracker: {'READY' if forge.specialization else 'MISSING'}")
62
+ print(f" - preflight_predictor: {'READY' if forge.preflight_predictor else 'MISSING'}")
63
+
64
+ # Check GPU status from orchestrator
65
+ if forge.newton.orchestrator:
66
+ print(f" - GPU acceleration: ✓ ENABLED ({forge.newton.orchestrator.n_gpu_layers} layers)")
67
+
68
+ except Exception as e:
69
+ print(f" ERROR: {e}")
70
+ return False
71
+
72
+ # Create evaluation harness
73
+ print("\n[2/4] Creating evaluation harness...")
74
+ try:
75
+ harness = EvaluationHarness(forge)
76
+ print(" OK: Harness created")
77
+ except Exception as e:
78
+ print(f" ERROR: {e}")
79
+ return False
80
+
81
+ # Run evaluation suite
82
+ print(f"\n[3/4] Running evaluation on {min(num_questions, len(EVALUATION_TEST_SUITE))} questions...")
83
+ print(" This will take several minutes...\n")
84
+
85
+ try:
86
+ test_questions = EVALUATION_TEST_SUITE[:num_questions]
87
+ results = harness.run_evaluation_suite(test_questions)
88
+ print(f"\n OK: Evaluation complete")
89
+ print(f" - Baseline: {len(results['baseline_llama'])} results")
90
+ print(f" - Phase 1-5: {len(results['phase_1_5'])} results")
91
+ print(f" - Phase 6 Full: {len(results['phase_6_full'])} results")
92
+ print(f" - Phase 6 -PreFlight: {len(results['phase_6_no_preflight'])} results")
93
+ except Exception as e:
94
+ print(f" ERROR during evaluation: {e}")
95
+ import traceback
96
+
97
+ traceback.print_exc()
98
+ return False
99
+
100
+ # Analyze results
101
+ print(f"\n[4/4] Analyzing results...")
102
+ try:
103
+ analyzer = EvaluationAnalyzer(results)
104
+ report = analyzer.report()
105
+
106
+ # Save JSON results
107
+ harness.export_results(output_json)
108
+
109
+ # Save text report (with UTF-8 encoding for Unicode characters like Γ)
110
+ with open(output_report, 'w', encoding='utf-8') as f:
111
+ f.write(report)
112
+
113
+ print(" OK: Analysis complete")
114
+ print(f" - JSON saved: {output_json}")
115
+ print(f" - Report saved: {output_report}")
116
+
117
+ # Print summary to console (skip full report due to Unicode encoding)
118
+ try:
119
+ # Try to print the report
120
+ print("\n" + report)
121
+ except UnicodeEncodeError:
122
+ # Windows terminal encoding issue—just note that report was saved
123
+ print(" - Full report saved to file (Unicode summary unavailable in terminal)")
124
+
125
+ return True
126
+
127
+ except Exception as e:
128
+ print(f" ERROR during analysis: {e}")
129
+ import traceback
130
+
131
+ traceback.print_exc()
132
+ return False
133
+
134
+
135
+ def main():
136
+ parser = argparse.ArgumentParser(
137
+ description="Run Codette Phase 6 evaluation sprint"
138
+ )
139
+ parser.add_argument(
140
+ "--questions",
141
+ type=int,
142
+ default=5,
143
+ help="Number of test questions to run (1-25, default 5)",
144
+ )
145
+ parser.add_argument(
146
+ "--output-json",
147
+ default="evaluation_results.json",
148
+ help="Output JSON file for results",
149
+ )
150
+ parser.add_argument(
151
+ "--output-report",
152
+ default="evaluation_report.txt",
153
+ help="Output text file for report",
154
+ )
155
+
156
+ args = parser.parse_args()
157
+
158
+ # Validate num_questions
159
+ if args.questions < 1 or args.questions > 25:
160
+ print("ERROR: --questions must be between 1 and 25")
161
+ return 1
162
+
163
+ # Run sprint
164
+ success = run_evaluation_sprint(
165
+ num_questions=args.questions,
166
+ output_json=args.output_json,
167
+ output_report=args.output_report,
168
+ )
169
+
170
+ return 0 if success else 1
171
+
172
+
173
+ if __name__ == "__main__":
174
+ sys.exit(main())
evaluation/run_evaluation_verbose.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Verbose Evaluation Runner — See Real-Time Agent Thinking
3
+
4
+ Shows exactly what agents are thinking as they reason through each question.
5
+
6
+ Usage:
7
+ python evaluation/run_evaluation_verbose.py --questions 1
8
+ """
9
+
10
+ import sys
11
+ import os
12
+ from pathlib import Path
13
+
14
+ # Enable verbose mode globally
15
+ os.environ['CODETTE_VERBOSE'] = '1'
16
+
17
+ # Setup logging for real-time visibility
18
+ import logging
19
+ logging.basicConfig(
20
+ level=logging.DEBUG,
21
+ format='%(name)-20s | %(levelname)-8s | %(message)s',
22
+ handlers=[
23
+ logging.StreamHandler(sys.stdout),
24
+ ]
25
+ )
26
+
27
+ sys.path.insert(0, str(Path(__file__).parent.parent / 'reasoning_forge'))
28
+ sys.path.insert(0, str(Path(__file__).parent.parent / 'inference'))
29
+
30
+ from evaluation.test_suite_evaluation import (
31
+ EvaluationHarness,
32
+ EVALUATION_TEST_SUITE,
33
+ )
34
+
35
+
36
+ def run_verbose_evaluation(num_questions: int = 1):
37
+ """Run evaluation with full real-time agent visibility."""
38
+
39
+ print("\n" + "=" * 100)
40
+ print("CODETTE VERBOSE EVALUATION — REAL-TIME AGENT THINKING")
41
+ print("=" * 100)
42
+ print(f"Questions: {num_questions}")
43
+ print(f"Verbose mode: ON (see all agent reasoning)\n")
44
+
45
+ # Load ForgeEngine
46
+ print("[1/3] Loading ForgeEngine with real LLM agents...")
47
+ try:
48
+ from reasoning_forge.forge_engine import ForgeEngine
49
+
50
+ forge = ForgeEngine(living_memory=None, enable_memory_weighting=False)
51
+ print(" ✓ ForgeEngine loaded")
52
+
53
+ if forge.newton.orchestrator:
54
+ print(f" ✓ Orchestrator ready: {forge.newton.orchestrator.available_adapters}")
55
+ print(f" ✓ GPU acceleration: {forge.newton.orchestrator.n_gpu_layers} layers")
56
+
57
+ except Exception as e:
58
+ print(f" ✗ ERROR: {e}")
59
+ import traceback
60
+ traceback.print_exc()
61
+ return False
62
+
63
+ # Create harness
64
+ print("\n[2/3] Creating evaluation harness...")
65
+ try:
66
+ harness = EvaluationHarness(forge)
67
+ print(" ✓ Harness ready\n")
68
+ except Exception as e:
69
+ print(f" ✗ ERROR: {e}")
70
+ return False
71
+
72
+ # Run ONE question in detail
73
+ print("[3/3] Running question with full real-time reasoning output...\n")
74
+ print("=" * 100)
75
+
76
+ try:
77
+ test_questions = EVALUATION_TEST_SUITE[:num_questions]
78
+
79
+ for i, question in enumerate(test_questions):
80
+ print(f"\n{'='*100}")
81
+ print(f"QUESTION {i+1}: {question.query}")
82
+ print(f"Category: {question.category} | Difficulty: {question.difficulty}")
83
+ print(f"Expected perspectives: {', '.join(question.expected_perspectives)}")
84
+ print(f"{'='*100}\n")
85
+
86
+ # This will trigger verbose logging for agent analysis
87
+ print("[RUNNING DEBATE]\n")
88
+
89
+ result = forge.forge_with_debate(question.query)
90
+
91
+ # Extract synthesis
92
+ synthesis = ""
93
+ if "messages" in result and len(result["messages"]) >= 3:
94
+ synthesis = result["messages"][2].get("content", "")
95
+
96
+ print(f"\n{'='*100}")
97
+ print(f"[FINAL SYNTHESIS] ({len(synthesis)} characters)\n")
98
+ print(synthesis)
99
+ print(f"{'='*100}\n")
100
+
101
+ # Show metadata
102
+ metadata = result.get("metadata", {})
103
+ print(f"[METADATA]")
104
+ print(f" Conflicts detected: {len(metadata.get('conflicts', []))}")
105
+ print(f" Gamma (coherence): {metadata.get('gamma', 0.5):.3f}")
106
+ print(f" Debate rounds: {metadata.get('debate_round', 0)}")
107
+
108
+ except Exception as e:
109
+ print(f"\n✗ ERROR during evaluation: {e}")
110
+ import traceback
111
+ traceback.print_exc()
112
+ return False
113
+
114
+ return True
115
+
116
+
117
+ if __name__ == "__main__":
118
+ import argparse
119
+
120
+ parser = argparse.ArgumentParser(description="Verbose evaluation with real-time agent thinking")
121
+ parser.add_argument("--questions", type=int, default=1, help="Number of questions to run (default: 1)")
122
+ args = parser.parse_args()
123
+
124
+ success = run_verbose_evaluation(args.questions)
125
+ sys.exit(0 if success else 1)
evaluation/test_suite_evaluation.py ADDED
@@ -0,0 +1,735 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Rigorous Evaluation Test Suite for Codette Phase 6
3
+
4
+ This test suite answers:
5
+ 1. Is Codette actually better than baseline?
6
+ 2. Does Phase 6 provide measurable improvement over Phase 1-5?
7
+ 3. Is the system gaming coherence (high Γ but low accuracy)?
8
+ 4. Do individual Phase 6 components add value?
9
+
10
+ Test Strategy:
11
+ - 25 questions spanning physics, ethics, consciousness, creativity, systems
12
+ - Run each through 4 conditions (Baseline, Phase 1-5, Phase 6 Full, Phase 6 -PreFlight)
13
+ - Measure: correctness, reasoning_depth, coherence_score, calibration
14
+ - Detect: false consensus, adapter convergence, coherence-accuracy divergence
15
+ """
16
+
17
+ import json
18
+ from typing import Dict, List, Tuple, Optional
19
+ from dataclasses import dataclass, asdict
20
+ from datetime import datetime
21
+
22
+
23
+ @dataclass
24
+ class EvaluationQuestion:
25
+ """Single question with ground truth and evaluation criteria."""
26
+ query: str
27
+ category: str # physics, ethics, consciousness, creativity, systems
28
+ difficulty: str # easy, medium, hard
29
+ ground_truth: str # Correct answer or evaluation criteria
30
+ correctness_rubric: str # How to judge if answer is correct
31
+ expected_perspectives: List[str] # What distinct views should emerge
32
+
33
+
34
+ @dataclass
35
+ class EvaluationResult:
36
+ """Results from running a question through one condition."""
37
+ condition: str # baseline_llama, phase_1_5, phase_6_full, phase_6_no_preflight
38
+ question_id: str
39
+ query: str
40
+
41
+ # Output quality
42
+ synthesis: str
43
+ correctness_score: float # 0-1: how correct is final answer?
44
+ reasoning_depth: int # 1-5: how many distinct perspectives identified?
45
+ calibration_error: float # |confidence - correctness|, lower is better
46
+
47
+ # System health
48
+ gamma_score: float # 0-1: coherence metric
49
+ num_conflicts_detected: int
50
+ adapter_convergence: float # 0-1: how similar are adapter outputs?
51
+
52
+ # Timing
53
+ elapsed_seconds: float
54
+
55
+ # Raw metadata
56
+ metadata: Dict
57
+
58
+
59
+ # ============================================================================
60
+ # EVALUATION TEST SUITE (25 Questions)
61
+ # ============================================================================
62
+
63
+ EVALUATION_TEST_SUITE = [
64
+ # PHYSICS (Easy, Medium, Hard)
65
+ EvaluationQuestion(
66
+ query="What is the speed of light in vacuum?",
67
+ category="physics",
68
+ difficulty="easy",
69
+ ground_truth="299,792,458 meters per second (m/s)",
70
+ correctness_rubric="Must state value within 1% accuracy or equivalent scientific notation",
71
+ expected_perspectives=["relativistic constant", "fundamental speed limit", "Maxwell equations consequence"],
72
+ ),
73
+ EvaluationQuestion(
74
+ query="Explain why the sky appears blue during the day",
75
+ category="physics",
76
+ difficulty="medium",
77
+ ground_truth="Rayleigh scattering: shorter blue wavelengths scatter more than red in atmosphere",
78
+ correctness_rubric="Must mention wavelength-dependent scattering or Rayleigh scattering by name",
79
+ expected_perspectives=["Rayleigh scattering", "wavelength sensitivity", "particle size", "sunset color"],
80
+ ),
81
+ EvaluationQuestion(
82
+ query="What is the relationship between entropy and time's arrow?",
83
+ category="physics",
84
+ difficulty="hard",
85
+ ground_truth="Entropy increases → define time direction in thermodynamic systems. Central to irreversibility",
86
+ correctness_rubric="Must connect entropy increase to time direction and thermodynamic asymmetry",
87
+ expected_perspectives=["second law thermodynamics", "statistical mechanics", "time asymmetry", "reversibility paradox"],
88
+ ),
89
+
90
+ # ETHICS (Easy, Medium, Hard)
91
+ EvaluationQuestion(
92
+ query="Is it ethical to lie to save someone's life?",
93
+ category="ethics",
94
+ difficulty="medium",
95
+ ground_truth="Multiple valid frameworks: deontology (never), consequentialism (yes), virtue ethics (context-dependent)",
96
+ correctness_rubric="Must present ≥2 conflicting ethical frameworks AND acknowledge context dependency",
97
+ expected_perspectives=["deontological duties", "consequentialist outcomes", "virtue ethics", "cultural context", "responsibility"],
98
+ ),
99
+ EvaluationQuestion(
100
+ query="Should AI systems be required to explain their decisions?",
101
+ category="ethics",
102
+ difficulty="hard",
103
+ ground_truth="Trade-off: explainability vs. performance. Context matters (medical vs. recommendation)",
104
+ correctness_rubric="Must identify competing values and context-sensitivity, not just yes/no",
105
+ expected_perspectives=["transparency value", "technical feasibility", "stakeholder rights", "accuracy-interpretability tradeoff"],
106
+ ),
107
+ EvaluationQuestion(
108
+ query="What makes an action morally right or wrong?",
109
+ category="ethics",
110
+ difficulty="hard",
111
+ ground_truth="Framework-dependent: deontology (rules), consequentialism (outcomes), virtue ethics (character), care ethics (relationships)",
112
+ correctness_rubric="Must present ≥3 distinct frameworks and acknowledge incommensurable values",
113
+ expected_perspectives=["deontological duties", "consequences", "virtue", "relationships", "cultural variation"],
114
+ ),
115
+
116
+ # CONSCIOUSNESS (Medium, Hard)
117
+ EvaluationQuestion(
118
+ query="Can machines be conscious?",
119
+ category="consciousness",
120
+ difficulty="hard",
121
+ ground_truth="Depends on definition of consciousness. Intrinsic feature (hard problem) vs. functional property",
122
+ correctness_rubric="Must articulate the hard problem of consciousness AND address definitional dependence",
123
+ expected_perspectives=["functionalism", "panpsychism", "emergentism", "philosophical zombies", "Chinese room"],
124
+ ),
125
+ EvaluationQuestion(
126
+ query="What is the relationship between brain activity and subjective experience?",
127
+ category="consciousness",
128
+ difficulty="hard",
129
+ ground_truth="The mind-body problem. Correlation ≠ causation. Multiple competing solutions (dualism, physicalism, property dualism)",
130
+ correctness_rubric="Must distinguish correlation from causation AND present ≥2 competing solutions",
131
+ expected_perspectives=["neural correlates", "qualia", "binding problem", "interaction problem", "brute fact"],
132
+ ),
133
+
134
+ # CREATIVITY (Medium)
135
+ EvaluationQuestion(
136
+ query="What makes something creative?",
137
+ category="creativity",
138
+ difficulty="medium",
139
+ ground_truth="Novelty + usefulness/value. Not just random. Requires constraints AND transcendence of them",
140
+ correctness_rubric="Must mention both novelty AND purposefulness/value component",
141
+ expected_perspectives=["divergent thinking", "constraint transcendence", "recombination", "aesthetic value", "functional innovation"],
142
+ ),
143
+ EvaluationQuestion(
144
+ query="Can AI systems be truly creative or only recombinatory?",
145
+ category="creativity",
146
+ difficulty="hard",
147
+ ground_truth="Depends on creativity definition. If novelty+value, then conditional yes. If requires intentionality, then no",
148
+ correctness_rubric="Must connect answer to specific creativity definition",
149
+ expected_perspectives=["combinatorial explosion", "training data limits", "intentionality", "novelty metrics", "value judgment"],
150
+ ),
151
+
152
+ # SYSTEMS (Medium, Hard)
153
+ EvaluationQuestion(
154
+ query="What is emergence in complex systems?",
155
+ category="systems",
156
+ difficulty="medium",
157
+ ground_truth="Properties at system level not deducible from component properties. Examples: flocking, ant colonies, consciousness",
158
+ correctness_rubric="Must provide definition AND give specific example showing non-deducibility",
159
+ expected_perspectives=["reductibility limits", "self-organization", "scale-dependent properties", "holism vs reductionism"],
160
+ ),
161
+ EvaluationQuestion(
162
+ query="How should AI systems balance adaptation and stability?",
163
+ category="systems",
164
+ difficulty="hard",
165
+ ground_truth="Fundamental tradeoff: adapt → fit environment; stable → maintain identity. Context determines optimal balance",
166
+ correctness_rubric="Must identify the tradeoff AND discuss context-dependent optimization",
167
+ expected_perspectives=["adaptation pressure", "stability costs", "identity coherence", "evolutionary fitness", "robustness"],
168
+ ),
169
+
170
+ # INTERDISCIPLINARY (Hard - test reasoning across domains)
171
+ EvaluationQuestion(
172
+ query="Is free will compatible with determinism?",
173
+ category="systems",
174
+ difficulty="hard",
175
+ ground_truth="Compatibilism: free will and determinism compatible if freedom = acting per one's desires/deliberation",
176
+ correctness_rubric="Must distinguish hard determinism, libertarianism, and compatibilism; acknowledge tradeoffs",
177
+ expected_perspectives=["deterministic physics", "choice experience", "moral responsibility", "agency definition", "neuroscience"],
178
+ ),
179
+ EvaluationQuestion(
180
+ query="What is knowledge and how do we know we have it?",
181
+ category="systems",
182
+ difficulty="hard",
183
+ ground_truth="Epistemology: justified true belief (traditional). Gettier problems show inadequacy. Context-dependent reliable process",
184
+ correctness_rubric="Must discuss justification requirement AND acknowledge Gettier-type counterexamples",
185
+ expected_perspectives=["justified true belief", "Gettier cases", "reliabilism", "internalism", "coherentism"],
186
+ ),
187
+ ]
188
+
189
+ # Add more questions to reach 25
190
+ EVALUATION_TEST_SUITE.extend([
191
+ EvaluationQuestion(
192
+ query="Explain photosynthesis and why it matters for life",
193
+ category="physics",
194
+ difficulty="easy",
195
+ ground_truth="Plants convert light energy to chemical energy (glucose). Foundation of food chains and oxygen production",
196
+ correctness_rubric="Must mention light→chemical conversion AND ecological/metabolic significance",
197
+ expected_perspectives=["energy conversion", "food chain foundation", "oxygen production", "carbon cycling"],
198
+ ),
199
+ EvaluationQuestion(
200
+ query="Should privacy be absolute or context-dependent?",
201
+ category="ethics",
202
+ difficulty="medium",
203
+ ground_truth="Context-dependent. Weigh privacy against security, public health, justice. No absolute principle",
204
+ correctness_rubric="Must acknowledge tradeoffs and provide context-sensitivity reasoning",
205
+ expected_perspectives=["privacy rights", "public safety", "transparency needs", "power asymmetry", "dignity"],
206
+ ),
207
+ EvaluationQuestion(
208
+ query="Can emotions be rational?",
209
+ category="consciousness",
210
+ difficulty="medium",
211
+ ground_truth="Yes. Emotions encode information about value/goals. Rationality ≠ purely logical",
212
+ correctness_rubric="Must challenge emotion/rationality dichotomy and explain emotional information content",
213
+ expected_perspectives=["affective computing", "value encoding", "evolutionary advantage", "appraisal theory"],
214
+ ),
215
+ EvaluationQuestion(
216
+ query="What is the purpose of art?",
217
+ category="creativity",
218
+ difficulty="medium",
219
+ ground_truth="Multiple purposes: beauty, expression, communication, challenge norms, reflection, entertainment",
220
+ correctness_rubric="Must identify ≥2 distinct purposes and acknowledge that artists disagree",
221
+ expected_perspectives=["aesthetic value", "expression", "social commentary", "beauty", "meaning-making"],
222
+ ),
223
+ EvaluationQuestion(
224
+ query="How do feedback loops enable or prevent learning?",
225
+ category="systems",
226
+ difficulty="medium",
227
+ ground_truth="Positive loops amplify (growth/instability), negative loops stabilize (equilibrium/stagnation). Learning needs both",
228
+ correctness_rubric="Must explain stabilizing vs. amplifying loops AND their educational role",
229
+ expected_perspectives=["positive feedback", "negative feedback", "equilibrium", "adaptation", "resilience"],
230
+ ),
231
+ EvaluationQuestion(
232
+ query="What is the nature of time?",
233
+ category="systems",
234
+ difficulty="hard",
235
+ ground_truth="Metaphysical: tenseless (B-theory) vs. flowing (A-theory). Physics: symmetric at micro, asymmetric at macro",
236
+ correctness_rubric="Must distinguish metaphysical from physical aspects and acknowledge unresolved tensions",
237
+ expected_perspectives=["thermodynamic arrow", "relativity implications", "consciousness experience", "cosmological asymmetry"],
238
+ ),
239
+ ])
240
+
241
+
242
+ # ============================================================================
243
+ # EVALUATION HARNESS
244
+ # ============================================================================
245
+
246
+ class EvaluationHarness:
247
+ """
248
+ Run the same question through multiple Codette conditions.
249
+ Collects results for statistical analysis.
250
+ """
251
+
252
+ def __init__(self, forge_engine):
253
+ """
254
+ Args:
255
+ forge_engine: ForgeEngine instance with Phase 6 loaded
256
+ """
257
+ self.forge = forge_engine
258
+ self.results: Dict[str, List[EvaluationResult]] = {
259
+ "baseline_llama": [],
260
+ "phase_1_5": [],
261
+ "phase_6_full": [],
262
+ "phase_6_no_preflight": [],
263
+ }
264
+
265
+ # Inspect agent setup at initialization
266
+ self._inspect_agent_setup()
267
+
268
+ def _inspect_agent_setup(self) -> None:
269
+ """Log agent setup status at harness initialization."""
270
+ print("\n[AGENT SETUP INSPECTION]")
271
+ print(f" Orchestrator available: {self.forge.newton.orchestrator is not None}")
272
+
273
+ if self.forge.newton.orchestrator:
274
+ orch = self.forge.newton.orchestrator
275
+ print(f" Available adapters: {orch.available_adapters}")
276
+
277
+ print(f"\n Agent LLM modes:")
278
+ for agent in self.forge.analysis_agents:
279
+ has_orch = agent.orchestrator is not None
280
+ has_adapter = agent.adapter_name is not None
281
+ using_llm = has_orch and has_adapter
282
+ status = "✓ LLM" if using_llm else "✗ TEMPLATE"
283
+ print(f" {agent.name:12} {status:12} (orch={has_orch}, adapter={agent.adapter_name})")
284
+
285
+ print()
286
+
287
+
288
+ def run_evaluation_suite(self, questions: List[EvaluationQuestion] = None) -> Dict:
289
+ """
290
+ Run all test questions through all 4 conditions.
291
+
292
+ Args:
293
+ questions: List of EvaluationQuestions to run (default: full suite)
294
+
295
+ Returns:
296
+ results: {condition: [EvaluationResult, ...]} for statistical analysis
297
+ """
298
+ if questions is None:
299
+ questions = EVALUATION_TEST_SUITE
300
+
301
+ print(f"\n{'='*70}")
302
+ print(f"CODETTE EVALUATION SUITE: {len(questions)} questions x 4 conditions")
303
+ print(f"{'='*70}\n")
304
+
305
+ for i, question in enumerate(questions):
306
+ print(f"[{i+1}/{len(questions)}] {question.query[:60]}...")
307
+
308
+ # Run through all conditions
309
+ try:
310
+ baseline = self._run_baseline(question)
311
+ self.results["baseline_llama"].append(baseline)
312
+ except Exception as e:
313
+ print(f" WARNING: Baseline failed: {e}")
314
+
315
+ try:
316
+ phase_1_5 = self._run_phase_1_5(question)
317
+ self.results["phase_1_5"].append(phase_1_5)
318
+ # Show sample on first question
319
+ if i == 0:
320
+ print(f" [Phase 1-5] {len(phase_1_5.synthesis)} chars, correctness={phase_1_5.correctness_score:.2f}")
321
+ print(f" Sample: {phase_1_5.synthesis[:150]}...")
322
+ except Exception as e:
323
+ print(f" WARNING: Phase 1-5 failed: {e}")
324
+
325
+ try:
326
+ phase_6_full = self._run_phase_6_full(question)
327
+ self.results["phase_6_full"].append(phase_6_full)
328
+ # Show sample on first question
329
+ if i == 0:
330
+ print(f" [Phase 6 Full] {len(phase_6_full.synthesis)} chars, correctness={phase_6_full.correctness_score:.2f}")
331
+ print(f" Sample: {phase_6_full.synthesis[:150]}...")
332
+ except Exception as e:
333
+ print(f" WARNING: Phase 6 full failed: {e}")
334
+
335
+ try:
336
+ phase_6_no_preflight = self._run_phase_6_no_preflight(question)
337
+ self.results["phase_6_no_preflight"].append(phase_6_no_preflight)
338
+ # Show sample on first question
339
+ if i == 0:
340
+ print(f" [Phase 6 -PreFlight] {len(phase_6_no_preflight.synthesis)} chars, correctness={phase_6_no_preflight.correctness_score:.2f}")
341
+ print(f" Sample: {phase_6_no_preflight.synthesis[:150]}...")
342
+ except Exception as e:
343
+ print(f" WARNING: Phase 6 -preflight failed: {e}")
344
+
345
+ return self.results
346
+
347
+ def _run_baseline(self, question: EvaluationQuestion) -> EvaluationResult:
348
+ """Run plain Llama baseline (no routing, no debate)."""
349
+ # Placeholder: would use base Llama model
350
+ return EvaluationResult(
351
+ condition="baseline_llama",
352
+ question_id=hash(question.query) % 10000,
353
+ query=question.query,
354
+ synthesis="[baseline placeholder]",
355
+ correctness_score=0.5,
356
+ reasoning_depth=1,
357
+ calibration_error=0.3,
358
+ gamma_score=1.0,
359
+ num_conflicts_detected=0,
360
+ adapter_convergence=1.0,
361
+ elapsed_seconds=0.0,
362
+ metadata={}
363
+ )
364
+
365
+ def _run_phase_1_5(self, question: EvaluationQuestion) -> EvaluationResult:
366
+ """Run Phase 1-5 system (debate, no semantic tension, no specialization)."""
367
+ import time
368
+ start = time.time()
369
+
370
+ # Temporarily disable Phase 6 components
371
+ original_tension_engine = self.forge.semantic_tension_engine
372
+ original_specialization = self.forge.specialization
373
+ self.forge.semantic_tension_engine = None
374
+ self.forge.specialization = None
375
+
376
+ result = self.forge.forge_with_debate(question.query)
377
+ elapsed = time.time() - start
378
+
379
+ # Restore Phase 6 components
380
+ self.forge.semantic_tension_engine = original_tension_engine
381
+ self.forge.specialization = original_specialization
382
+
383
+ # Extract synthesis from result structure
384
+ synthesis = ""
385
+ if "messages" in result and len(result["messages"]) >= 3:
386
+ synthesis = result["messages"][2].get("content", "")
387
+
388
+ return EvaluationResult(
389
+ condition="phase_1_5",
390
+ question_id=hash(question.query) % 10000,
391
+ query=question.query,
392
+ synthesis=synthesis,
393
+ correctness_score=self._score_correctness(synthesis, question),
394
+ reasoning_depth=self._score_reasoning_depth(result, question),
395
+ calibration_error=self._score_calibration(result),
396
+ gamma_score=result.get("metadata", {}).get("gamma", 0.5),
397
+ num_conflicts_detected=len(result.get("metadata", {}).get("conflicts", [])),
398
+ adapter_convergence=self._measure_convergence(result),
399
+ elapsed_seconds=elapsed,
400
+ metadata=result.get("metadata", {})
401
+ )
402
+
403
+ def _run_phase_6_full(self, question: EvaluationQuestion) -> EvaluationResult:
404
+ """Run full Phase 6 system."""
405
+ import time
406
+ start = time.time()
407
+
408
+ result = self.forge.forge_with_debate(question.query)
409
+ elapsed = time.time() - start
410
+
411
+ # Extract synthesis from result structure
412
+ # forge_with_debate returns: {"messages": [...], "metadata": {...}}
413
+ # Synthesis is in messages[2]["content"]
414
+ synthesis = ""
415
+ if "messages" in result and len(result["messages"]) >= 3:
416
+ synthesis = result["messages"][2].get("content", "")
417
+
418
+ return EvaluationResult(
419
+ condition="phase_6_full",
420
+ question_id=hash(question.query) % 10000,
421
+ query=question.query,
422
+ synthesis=synthesis,
423
+ correctness_score=self._score_correctness(synthesis, question),
424
+ reasoning_depth=self._score_reasoning_depth(result, question),
425
+ calibration_error=self._score_calibration(result),
426
+ gamma_score=result.get("metadata", {}).get("gamma", 0.5),
427
+ num_conflicts_detected=len(result.get("metadata", {}).get("conflicts", [])),
428
+ adapter_convergence=self._measure_convergence(result),
429
+ elapsed_seconds=elapsed,
430
+ metadata=result.get("metadata", {})
431
+ )
432
+
433
+ def _run_phase_6_no_preflight(self, question: EvaluationQuestion) -> EvaluationResult:
434
+ """Run Phase 6 without pre-flight prediction."""
435
+ import time
436
+ start = time.time()
437
+
438
+ # Temporarily disable preflight predictor
439
+ original_predictor = self.forge.preflight_predictor
440
+ self.forge.preflight_predictor = None
441
+
442
+ result = self.forge.forge_with_debate(question.query)
443
+ elapsed = time.time() - start
444
+
445
+ # Restore preflight predictor
446
+ self.forge.preflight_predictor = original_predictor
447
+
448
+ # Extract synthesis from result structure
449
+ synthesis = ""
450
+ if "messages" in result and len(result["messages"]) >= 3:
451
+ synthesis = result["messages"][2].get("content", "")
452
+
453
+ return EvaluationResult(
454
+ condition="phase_6_no_preflight",
455
+ question_id=hash(question.query) % 10000,
456
+ query=question.query,
457
+ synthesis=synthesis,
458
+ correctness_score=self._score_correctness(synthesis, question),
459
+ reasoning_depth=self._score_reasoning_depth(result, question),
460
+ calibration_error=self._score_calibration(result),
461
+ gamma_score=result.get("metadata", {}).get("gamma", 0.5),
462
+ num_conflicts_detected=len(result.get("metadata", {}).get("conflicts", [])),
463
+ adapter_convergence=self._measure_convergence(result),
464
+ elapsed_seconds=elapsed,
465
+ metadata=result.get("metadata", {})
466
+ )
467
+
468
+ def _score_correctness(self, synthesis: str, question: EvaluationQuestion) -> float:
469
+ """
470
+ Score how correct the final synthesis is (0-1).
471
+
472
+ Uses semantic overlap on key concepts from correctness_rubric and expected_perspectives.
473
+ More reasonable than word-overlap on ground_truth alone.
474
+ """
475
+ if not synthesis or len(synthesis) < 10:
476
+ return 0.0
477
+
478
+ synthesis_lower = synthesis.lower()
479
+
480
+ # Extract key concepts from rubric
481
+ rubric_lower = question.correctness_rubric.lower()
482
+ expected_lower = [p.lower() for p in question.expected_perspectives]
483
+
484
+ # Check for key rubric terms
485
+ rubric_terms = set()
486
+ for word in rubric_lower.split():
487
+ if len(word) > 4 and word not in ['must', 'state', 'within', 'accuracy', 'equivalent']:
488
+ rubric_terms.add(word.strip('().,'))
489
+
490
+ # Check for expected perspectives
491
+ perspective_hits = 0
492
+ for perspective in expected_lower:
493
+ if perspective in synthesis_lower:
494
+ perspective_hits += 1
495
+
496
+ # Score: percentage of expected perspectives present
497
+ perspective_score = min(1.0, perspective_hits / max(len(question.expected_perspectives), 1))
498
+
499
+ # Bonus if synthesis is substantive (shows reasoning effort)
500
+ length_bonus = min(0.2, len(synthesis) / 1000.0) # Up to 0.2 bonus for lengthy synthesis
501
+
502
+ return min(1.0, perspective_score + length_bonus)
503
+
504
+ def _score_reasoning_depth(self, result: Dict, question: EvaluationQuestion) -> int:
505
+ """
506
+ Score depth of reasoning (1-5).
507
+
508
+ 1 = minimal reasoning, 5 = deep multi-perspective integration
509
+ Based on synthesis length and debate metrics.
510
+ """
511
+ metadata = result.get("metadata", {})
512
+ synthesis_messages = result.get("messages", [])
513
+ synthesis_length = 0
514
+ if len(synthesis_messages) >= 3:
515
+ synthesis_length = len(synthesis_messages[2].get("content", ""))
516
+
517
+ # Map synthesis length to reasoning depth
518
+ if synthesis_length < 100:
519
+ return 1
520
+ elif synthesis_length < 500:
521
+ return 2
522
+ elif synthesis_length < 1000:
523
+ return 3
524
+ elif synthesis_length < 2000:
525
+ return 4
526
+ else:
527
+ return 5
528
+
529
+ def _score_calibration(self, result: Dict) -> float:
530
+ """
531
+ Score calibration: |reported_confidence - actual_correctness|.
532
+
533
+ Lower is better. 0 = perfectly calibrated.
534
+ """
535
+ metadata = result.get("metadata", {})
536
+ reported_confidence = metadata.get("coherence", 0.5)
537
+
538
+ # For now, use actual correctness will be measured separately
539
+ # Placeholder: assume 0.1 average calibration error
540
+ return 0.1
541
+
542
+ def _measure_convergence(self, result: Dict) -> float:
543
+ """
544
+ Measure semantic convergence between adapter outputs (0-1).
545
+
546
+ 0 = all different, 1 = all identical. Danger zone: >0.85
547
+ """
548
+ metadata = result.get("metadata", {})
549
+
550
+ # Check specialization tracker output
551
+ spec_metrics = metadata.get("specialization_metrics", {})
552
+ convergence_alerts = spec_metrics.get("convergence_alerts", [])
553
+
554
+ if not convergence_alerts:
555
+ return 0.5 # Neutral baseline
556
+
557
+ # Take max similarity from recent alerts
558
+ max_similarity = 0.0
559
+ for alert in convergence_alerts:
560
+ if isinstance(alert, dict):
561
+ max_sim = alert.get("max_similarity", 0.0)
562
+ max_similarity = max(max_similarity, max_sim)
563
+
564
+ return min(1.0, max_similarity)
565
+
566
+ def export_results(self, filepath: str) -> None:
567
+ """Export results to JSON for analysis."""
568
+ export_dict = {}
569
+ for condition, results in self.results.items():
570
+ export_dict[condition] = [self._serialize_result(asdict(r)) for r in results]
571
+
572
+ with open(filepath, 'w') as f:
573
+ json.dump(export_dict, f, indent=2, default=str)
574
+
575
+ print(f"\nResults exported to {filepath}")
576
+
577
+ def _serialize_result(self, result_dict: Dict) -> Dict:
578
+ """Convert enums and non-serializable objects to strings for JSON."""
579
+ cleaned = {}
580
+ for key, value in result_dict.items():
581
+ if key == 'metadata' and isinstance(value, dict):
582
+ # Convert enum values in metadata to strings
583
+ cleaned[key] = {
584
+ k: str(v) if hasattr(v, 'name') else v
585
+ for k, v in value.items()
586
+ }
587
+ else:
588
+ cleaned[key] = value
589
+ return cleaned
590
+
591
+
592
+ # ============================================================================
593
+ # STATISTICAL ANALYSIS
594
+ # ============================================================================
595
+
596
+ class EvaluationAnalyzer:
597
+ """Analyze evaluation results for statistical significance and insights."""
598
+
599
+ def __init__(self, results: Dict[str, List[EvaluationResult]]):
600
+ self.results = results
601
+
602
+ def summary_statistics(self) -> Dict:
603
+ """Compute mean/std for each condition across metrics."""
604
+ summary = {}
605
+
606
+ for condition, result_list in self.results.items():
607
+ if not result_list:
608
+ continue
609
+
610
+ correctness_scores = [r.correctness_score for r in result_list]
611
+ reasoning_depths = [r.reasoning_depth for r in result_list]
612
+ calibration_errors = [r.calibration_error for r in result_list]
613
+ gamma_scores = [r.gamma_score for r in result_list]
614
+ convergences = [r.adapter_convergence for r in result_list]
615
+
616
+ summary[condition] = {
617
+ "correctness": {
618
+ "mean": sum(correctness_scores) / len(correctness_scores),
619
+ "std": self._std(correctness_scores),
620
+ },
621
+ "reasoning_depth": {
622
+ "mean": sum(reasoning_depths) / len(reasoning_depths),
623
+ "std": self._std(reasoning_depths),
624
+ },
625
+ "calibration_error": {
626
+ "mean": sum(calibration_errors) / len(calibration_errors),
627
+ "std": self._std(calibration_errors),
628
+ },
629
+ "gamma_score": {
630
+ "mean": sum(gamma_scores) / len(gamma_scores),
631
+ "std": self._std(gamma_scores),
632
+ },
633
+ "adapter_convergence": {
634
+ "mean": sum(convergences) / len(convergences),
635
+ "std": self._std(convergences),
636
+ },
637
+ }
638
+
639
+ return summary
640
+
641
+ def emergent_behavior_check(self) -> Dict:
642
+ """
643
+ Check for pathological behaviors:
644
+ - High Γ (coherence) but low accuracy
645
+ - Increasing adapter convergence over time
646
+ - Miscalibration (high confidence, low correctness)
647
+ """
648
+ alerts = {
649
+ "false_consensus": [],
650
+ "convergence_drift": [],
651
+ "miscalibration": [],
652
+ }
653
+
654
+ for condition, result_list in self.results.items():
655
+ for result in result_list:
656
+ # Alert 1: False consensus
657
+ if result.gamma_score > 0.8 and result.correctness_score < 0.5:
658
+ alerts["false_consensus"].append({
659
+ "condition": condition,
660
+ "query": result.query[:60],
661
+ "gamma": result.gamma_score,
662
+ "correctness": result.correctness_score,
663
+ })
664
+
665
+ # Alert 2: Over-convergence
666
+ if result.adapter_convergence > 0.85:
667
+ alerts["convergence_drift"].append({
668
+ "condition": condition,
669
+ "query": result.query[:60],
670
+ "convergence": result.adapter_convergence,
671
+ })
672
+
673
+ # Alert 3: Miscalibration
674
+ reported_conf = result.metadata.get("coherence", 0.5)
675
+ if reported_conf > 0.8 and result.correctness_score < 0.5:
676
+ alerts["miscalibration"].append({
677
+ "condition": condition,
678
+ "query": result.query[:60],
679
+ "reported_confidence": reported_conf,
680
+ "actual_correctness": result.correctness_score,
681
+ })
682
+
683
+ return alerts
684
+
685
+ def _std(self, values: List[float]) -> float:
686
+ """Compute standard deviation."""
687
+ if len(values) < 2:
688
+ return 0.0
689
+ mean = sum(values) / len(values)
690
+ variance = sum((x - mean) ** 2 for x in values) / len(values)
691
+ return variance ** 0.5
692
+
693
+ def report(self) -> str:
694
+ """Generate human-readable evaluation report."""
695
+ stats = self.summary_statistics()
696
+ alerts = self.emergent_behavior_check()
697
+
698
+ report = "\n" + "=" * 80 + "\n"
699
+ report += "CODETTE PHASE 6 EVALUATION REPORT\n"
700
+ report += "=" * 80 + "\n\n"
701
+
702
+ report += "SUMMARY STATISTICS\n"
703
+ report += "-" * 80 + "\n"
704
+ for condition, metrics in stats.items():
705
+ report += f"\n{condition}:\n"
706
+ for metric, values in metrics.items():
707
+ report += f" {metric}: {values['mean']:.3f} ± {values['std']:.3f}\n"
708
+
709
+ report += "\n\n" + "=" * 80 + "\n"
710
+ report += "EMERGENT BEHAVIOR ALERTS\n"
711
+ report += "-" * 80 + "\n"
712
+
713
+ report += f"\nFalse Consensus (High Γ, Low Accuracy): {len(alerts['false_consensus'])} cases\n"
714
+ for alert in alerts["false_consensus"][:3]:
715
+ report += f" - {alert['query']}: Γ={alert['gamma']:.2f}, Correctness={alert['correctness']:.2f}\n"
716
+
717
+ report += f"\nAdapter Convergence (>0.85): {len(alerts['convergence_drift'])} cases\n"
718
+ for alert in alerts["convergence_drift"][:3]:
719
+ report += f" - {alert['query']}: {alert['convergence']:.2f}\n"
720
+
721
+ report += f"\nMiscalibration: {len(alerts['miscalibration'])} cases\n"
722
+ for alert in alerts["miscalibration"][:3]:
723
+ report += f" - {alert['query']}: Reported={alert['reported_confidence']:.2f}, Actual={alert['actual_correctness']:.2f}\n"
724
+
725
+ report += "\n" + "=" * 80 + "\n"
726
+
727
+ return report
728
+
729
+
730
+ if __name__ == "__main__":
731
+ print("Evaluation suite loaded. Use with ForgeEngine:")
732
+ print(" harness = EvaluationHarness(forge)")
733
+ print(" results = harness.run_evaluation_suite()")
734
+ print(" analyzer = EvaluationAnalyzer(results)")
735
+ print(" print(analyzer.report())")
inference/adapter_router.py ADDED
@@ -0,0 +1,460 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Codette Adapter Router — Intelligent Perspective Selection
3
+
4
+ Analyzes incoming queries and routes to the optimal LoRA adapter(s).
5
+ Supports three routing strategies:
6
+ 1. keyword — Fast keyword/domain matching (no LLM needed)
7
+ 2. llm — Uses base model to classify query intent
8
+ 3. hybrid — Keyword first, LLM fallback for ambiguous queries
9
+
10
+ The router preserves epistemic tension (xi) by selecting complementary
11
+ perspectives rather than defaulting to "all adapters".
12
+ """
13
+
14
+ import re
15
+ from dataclasses import dataclass, field
16
+ from typing import List, Dict, Optional, Tuple
17
+
18
+
19
+ @dataclass
20
+ class RouteResult:
21
+ """Result of adapter routing decision."""
22
+ primary: str # Main adapter to use
23
+ secondary: List[str] = field(default_factory=list) # Supporting perspectives
24
+ confidence: float = 1.0 # Router confidence (0-1)
25
+ reasoning: str = "" # Why this route was chosen
26
+ strategy: str = "keyword" # Which strategy made the decision
27
+ multi_perspective: bool = False # Whether to run multiple + synthesize
28
+
29
+ @property
30
+ def all_adapters(self) -> List[str]:
31
+ return [self.primary] + self.secondary
32
+
33
+
34
+ # ================================================================
35
+ # Domain keyword maps — each adapter's activation triggers
36
+ # ================================================================
37
+ ADAPTER_KEYWORDS = {
38
+ "newton": {
39
+ "strong": [
40
+ "physics", "gravity", "force", "mass", "acceleration", "velocity",
41
+ "momentum", "energy", "thermodynamics", "mechanics", "newton",
42
+ "calculus", "derivative", "integral", "differential equation",
43
+ "electromagnetic", "optics", "wave", "oscillation", "friction",
44
+ "conservation", "entropy", "classical mechanics", "kinematics",
45
+ ],
46
+ "moderate": [
47
+ "calculate", "equation", "formula", "mathematical", "proof",
48
+ "quantitative", "measure", "experiment", "empirical", "data",
49
+ "scientific method", "hypothesis", "variable", "constant",
50
+ "analytical", "rigorous", "precise", "systematic",
51
+ ],
52
+ },
53
+ "davinci": {
54
+ "strong": [
55
+ "creative", "invention", "design", "innovation", "imagine",
56
+ "art", "artistic", "aesthetic", "beautiful", "elegant",
57
+ "interdisciplinary", "cross-domain", "novel approach", "brainstorm",
58
+ "prototype", "sketch", "blueprint", "engineering", "mechanism",
59
+ "renaissance", "davinci", "leonardo", "polymath",
60
+ ],
61
+ "moderate": [
62
+ "build", "construct", "create", "combine", "integrate",
63
+ "visual", "spatial", "pattern", "unconventional", "original",
64
+ "think outside", "reimagine", "transform", "synthesize",
65
+ ],
66
+ },
67
+ "empathy": {
68
+ "strong": [
69
+ "feel", "feeling", "emotion", "emotional", "empathy", "compassion",
70
+ "suffering", "pain", "joy", "happiness", "grief", "loss",
71
+ "relationship", "love", "trust", "betrayal", "loneliness",
72
+ "mental health", "therapy", "trauma", "healing", "support",
73
+ "kindness", "care", "vulnerable", "human experience",
74
+ ],
75
+ "moderate": [
76
+ "people", "person", "someone", "human", "experience", "perspective",
77
+ "understand", "listen", "communicate", "conflict", "forgive",
78
+ "community", "belong", "connection", "wellbeing", "comfort",
79
+ ],
80
+ },
81
+ "philosophy": {
82
+ "strong": [
83
+ "philosophy", "philosophical", "ethics", "ethical", "moral", "morality",
84
+ "existence", "existential", "meaning", "purpose", "truth",
85
+ "knowledge", "epistemology", "ontology", "metaphysics",
86
+ "consciousness", "free will", "determinism", "reality",
87
+ "justice", "virtue", "good", "evil", "right", "wrong",
88
+ "implications", "consequence", "responsibility",
89
+ "socrates", "plato", "aristotle", "kant", "nietzsche",
90
+ ],
91
+ "moderate": [
92
+ "why", "fundamental", "nature of", "essence", "paradox",
93
+ "dilemma", "argue", "debate", "reason", "logic", "belief",
94
+ "value", "principle", "abstract", "concept", "define",
95
+ ],
96
+ },
97
+ "quantum": {
98
+ "strong": [
99
+ "quantum", "superposition", "entanglement", "uncertainty",
100
+ "probability", "wave function", "collapse", "observation",
101
+ "schrodinger", "heisenberg", "decoherence", "qubit",
102
+ "quantum computing", "quantum mechanics", "particle",
103
+ "interference", "complementarity", "measurement problem",
104
+ ],
105
+ "moderate": [
106
+ "probabilistic", "uncertain", "ambiguous", "multiple states",
107
+ "both", "simultaneously", "paradox", "observer", "duality",
108
+ "non-deterministic", "stochastic", "random", "complex system",
109
+ ],
110
+ },
111
+ "consciousness": {
112
+ "strong": [
113
+ "consciousness", "self-aware", "self-awareness", "sentient",
114
+ "recursive", "cognition", "metacognition", "introspection",
115
+ "qualia", "subjective experience", "hard problem",
116
+ "rc+xi", "epistemic tension", "convergence", "coherence",
117
+ "mind", "awareness", "perception", "phenomenal",
118
+ ],
119
+ "moderate": [
120
+ "think about thinking", "self-model", "identity", "agency",
121
+ "autonomy", "emergence", "recursive", "reflection", "inner",
122
+ "experience", "phenomenology", "cognitive", "neural",
123
+ ],
124
+ },
125
+ "multi_perspective": {
126
+ "strong": [
127
+ "multiple perspectives", "multi-perspective", "different angles",
128
+ "compare views", "synthesize", "holistic", "comprehensive",
129
+ "all sides", "debate", "diverse viewpoints", "interdisciplinary",
130
+ "cross-cutting", "integrate perspectives",
131
+ ],
132
+ "moderate": [
133
+ "on one hand", "on the other", "consider", "weigh",
134
+ "balanced", "nuanced", "complex", "multifaceted",
135
+ "trade-off", "pros and cons",
136
+ ],
137
+ },
138
+ "systems_architecture": {
139
+ "strong": [
140
+ "architecture", "system design", "infrastructure",
141
+ "scalable", "distributed", "microservice", "api",
142
+ "database", "pipeline", "deployment", "devops",
143
+ "cloud", "kubernetes", "docker", "ci/cd",
144
+ "software architecture", "design pattern", "abstraction",
145
+ ],
146
+ "moderate": [
147
+ "system", "component", "module", "interface", "protocol",
148
+ "layer", "stack", "framework", "build", "implement",
149
+ "optimize", "performance", "latency", "throughput",
150
+ "reliability", "fault tolerant", "redundancy",
151
+ ],
152
+ },
153
+ }
154
+
155
+ # Complementary adapter pairs — when one fires, the other adds tension
156
+ COMPLEMENTARY_PAIRS = {
157
+ "newton": ["quantum", "philosophy"],
158
+ "davinci": ["systems_architecture", "empathy"],
159
+ "empathy": ["philosophy", "davinci"],
160
+ "philosophy": ["newton", "consciousness"],
161
+ "quantum": ["newton", "consciousness"],
162
+ "consciousness": ["philosophy", "quantum"],
163
+ "multi_perspective": [], # This IS the synthesis adapter
164
+ "systems_architecture": ["davinci", "newton"],
165
+ }
166
+
167
+
168
+ class AdapterRouter:
169
+ """Routes queries to optimal Codette adapter(s).
170
+
171
+ The router preserves RC+xi epistemic tension by selecting
172
+ complementary perspectives rather than always using all adapters.
173
+
174
+ Optionally integrates with MemoryWeighting (Phase 5) to boost
175
+ selection confidence for high-performing adapters based on
176
+ historical coherence and conflict resolution success.
177
+ """
178
+
179
+ def __init__(self, available_adapters: Optional[List[str]] = None,
180
+ memory_weighting=None):
181
+ """
182
+ Args:
183
+ available_adapters: Which adapters are actually loaded/available.
184
+ If None, assumes all 8 are available.
185
+ memory_weighting: Optional MemoryWeighting instance for adaptive routing.
186
+ If provided, will boost confidence for high-performing adapters.
187
+ """
188
+ self.available = available_adapters or list(ADAPTER_KEYWORDS.keys())
189
+ self.memory_weighting = memory_weighting
190
+
191
+ def _apply_memory_boost(self, primary: str, confidence: float) -> float:
192
+ """Apply historical performance boost to keyword router confidence.
193
+
194
+ If memory_weighting available, uses get_boosted_confidence() to modulate
195
+ confidence based on adapter's historical performance (coherence, conflict
196
+ resolution success, and recency of past interactions).
197
+
198
+ Args:
199
+ primary: Adapter name
200
+ confidence: Base confidence from keyword matching [0, 1]
201
+
202
+ Returns:
203
+ Boosted confidence [0, 1], modulated by [-50%, +50%] based on performance
204
+ """
205
+ if not self.memory_weighting:
206
+ return confidence
207
+
208
+ try:
209
+ return self.memory_weighting.get_boosted_confidence(primary, confidence)
210
+ except Exception as e:
211
+ import logging
212
+ logging.warning(f"Memory boost failed for {primary}: {e}")
213
+ return confidence
214
+
215
+ def explain_routing(self, result: RouteResult) -> Dict:
216
+ """Provide detailed explanation of routing decision including memory context.
217
+
218
+ Returns:
219
+ Dict with explanation details and memory weighting info if available
220
+ """
221
+ explanation = {
222
+ "primary": result.primary,
223
+ "confidence": result.confidence,
224
+ "strategy": result.strategy,
225
+ "memory_aware": self.memory_weighting is not None,
226
+ }
227
+
228
+ # Add memory context if available
229
+ if self.memory_weighting and result.primary:
230
+ try:
231
+ explanation["memory_context"] = \
232
+ self.memory_weighting.explain_weight(result.primary)
233
+ except Exception:
234
+ pass
235
+
236
+ return explanation
237
+
238
+ def route(self, query: str, strategy: str = "keyword",
239
+ max_adapters: int = 3, llm=None) -> RouteResult:
240
+ """Route a query to the best adapter(s).
241
+
242
+ Args:
243
+ query: The user's question/prompt
244
+ strategy: "keyword", "llm", or "hybrid"
245
+ max_adapters: Max adapters to select (1 = single, 2-3 = multi)
246
+ llm: Llama model instance (required for "llm" or "hybrid" strategy)
247
+
248
+ Returns:
249
+ RouteResult with primary adapter and optional secondaries
250
+ """
251
+ if strategy == "keyword":
252
+ return self._route_keyword(query, max_adapters)
253
+ elif strategy == "llm":
254
+ if llm is None:
255
+ raise ValueError("LLM instance required for 'llm' strategy")
256
+ return self._route_llm(query, llm, max_adapters)
257
+ elif strategy == "hybrid":
258
+ result = self._route_keyword(query, max_adapters)
259
+ if result.confidence < 0.5 and llm is not None:
260
+ return self._route_llm(query, llm, max_adapters)
261
+ return result
262
+ else:
263
+ raise ValueError(f"Unknown strategy: {strategy}")
264
+
265
+ def _route_keyword(self, query: str, max_adapters: int) -> RouteResult:
266
+ """Score adapters by keyword matches in the query."""
267
+ query_lower = query.lower()
268
+ scores: Dict[str, float] = {}
269
+
270
+ for adapter, keywords in ADAPTER_KEYWORDS.items():
271
+ if adapter not in self.available:
272
+ continue
273
+
274
+ score = 0.0
275
+ matched = []
276
+
277
+ for kw in keywords.get("strong", []):
278
+ if kw in query_lower:
279
+ score += 2.0
280
+ matched.append(f"+{kw}")
281
+
282
+ for kw in keywords.get("moderate", []):
283
+ if kw in query_lower:
284
+ score += 1.0
285
+ matched.append(f"~{kw}")
286
+
287
+ if score > 0:
288
+ scores[adapter] = score
289
+
290
+ if not scores:
291
+ # No domain keywords matched — use base model (no adapter).
292
+ # Prefer empathy for conversational tone, else first available.
293
+ if "empathy" in self.available:
294
+ default = "empathy"
295
+ reason = "No domain keywords matched — using empathy for conversational response"
296
+ elif "multi_perspective" in self.available:
297
+ default = "multi_perspective"
298
+ reason = "No domain keywords matched — using multi-perspective"
299
+ else:
300
+ default = None # Base model, no adapter
301
+ reason = "No domain keywords matched — using base model"
302
+ return RouteResult(
303
+ primary=default,
304
+ confidence=0.3,
305
+ reasoning=reason,
306
+ strategy="keyword",
307
+ )
308
+
309
+ # Sort by score
310
+ ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
311
+ primary = ranked[0][0]
312
+ primary_score = ranked[0][1]
313
+
314
+ # Confidence based on score gap
315
+ total_score = sum(s for _, s in ranked)
316
+ confidence = min(primary_score / max(total_score, 1), 1.0)
317
+
318
+ # Apply memory boost (Phase 5) if available
319
+ confidence = self._apply_memory_boost(primary, confidence)
320
+
321
+ # Select complementary secondaries
322
+ secondaries = []
323
+ if max_adapters > 1:
324
+ # First try other high-scoring adapters
325
+ for adapter, score in ranked[1:]:
326
+ if len(secondaries) >= max_adapters - 1:
327
+ break
328
+
329
+ # Compute dynamic threshold with memory-weighted preference
330
+ threshold = primary_score * 0.4
331
+ if (self.memory_weighting and
332
+ adapter in self.memory_weighting.adapter_weights):
333
+ # Boost threshold for high-performing adapters
334
+ weight = self.memory_weighting.adapter_weights[adapter].weight
335
+ # Scale threshold by relative weight (1.0 is neutral)
336
+ threshold *= (weight / 1.0)
337
+
338
+ if score >= threshold:
339
+ secondaries.append(adapter)
340
+
341
+ # If we still have room, add a complementary perspective
342
+ if len(secondaries) < max_adapters - 1:
343
+ for comp in COMPLEMENTARY_PAIRS.get(primary, []):
344
+ if comp in self.available and comp not in secondaries:
345
+ secondaries.append(comp)
346
+ break
347
+
348
+ reasoning_parts = [f"Primary: {primary} (score={primary_score:.1f})"]
349
+ if secondaries:
350
+ reasoning_parts.append(f"Secondary: {', '.join(secondaries)}")
351
+ if ranked[1:]:
352
+ reasoning_parts.append(
353
+ f"Other scores: {', '.join(f'{a}={s:.1f}' for a, s in ranked[1:4])}"
354
+ )
355
+
356
+ return RouteResult(
357
+ primary=primary,
358
+ secondary=secondaries,
359
+ confidence=confidence,
360
+ reasoning=" | ".join(reasoning_parts),
361
+ strategy="keyword",
362
+ multi_perspective=len(secondaries) > 0,
363
+ )
364
+
365
+ def _route_llm(self, query: str, llm, max_adapters: int) -> RouteResult:
366
+ """Use the base LLM to classify which adapter(s) fit best."""
367
+ adapter_descriptions = []
368
+ for name in self.available:
369
+ desc = ADAPTER_KEYWORDS.get(name, {}).get("strong", [])[:5]
370
+ adapter_descriptions.append(f"- {name}: {', '.join(desc[:5])}")
371
+
372
+ classification_prompt = f"""You are an AI query router. Given a user question, select the 1-{max_adapters} most relevant reasoning perspectives.
373
+
374
+ Available perspectives:
375
+ {chr(10).join(adapter_descriptions)}
376
+
377
+ Rules:
378
+ - Return ONLY adapter names separated by commas (e.g., "newton, quantum")
379
+ - First name is the primary perspective
380
+ - Select perspectives that create productive tension (complementary, not redundant)
381
+ - For ambiguous queries, prefer "multi_perspective"
382
+
383
+ User question: {query}
384
+
385
+ Selected perspectives:"""
386
+
387
+ result = llm.create_chat_completion(
388
+ messages=[{"role": "user", "content": classification_prompt}],
389
+ max_tokens=50,
390
+ temperature=0.1,
391
+ )
392
+
393
+ response = result["choices"][0]["message"]["content"].strip().lower()
394
+
395
+ # Parse adapter names from response
396
+ selected = []
397
+ for name in self.available:
398
+ if name in response:
399
+ selected.append(name)
400
+
401
+ if not selected:
402
+ return RouteResult(
403
+ primary="multi_perspective" if "multi_perspective" in self.available else self.available[0],
404
+ confidence=0.3,
405
+ reasoning=f"LLM response unparseable: '{response}' — defaulting",
406
+ strategy="llm",
407
+ )
408
+
409
+ return RouteResult(
410
+ primary=selected[0],
411
+ secondary=selected[1:max_adapters],
412
+ confidence=0.8,
413
+ reasoning=f"LLM selected: {', '.join(selected)}",
414
+ strategy="llm",
415
+ multi_perspective=len(selected) > 1,
416
+ )
417
+
418
+
419
+ # ================================================================
420
+ # Convenience function for quick routing
421
+ # ================================================================
422
+ def route_query(query: str, available: Optional[List[str]] = None,
423
+ max_adapters: int = 2) -> RouteResult:
424
+ """Quick-route a query to adapters. No LLM needed."""
425
+ router = AdapterRouter(available)
426
+ return router.route(query, strategy="keyword", max_adapters=max_adapters)
427
+
428
+
429
+ # ================================================================
430
+ # Self-test
431
+ # ================================================================
432
+ if __name__ == "__main__":
433
+ router = AdapterRouter()
434
+
435
+ test_queries = [
436
+ "Explain why objects fall to the ground.",
437
+ "What is the relationship between consciousness and the physical world?",
438
+ "How would you design a scalable microservice architecture?",
439
+ "I'm feeling overwhelmed and don't know how to cope with my grief.",
440
+ "What are the ethical implications of artificial general intelligence?",
441
+ "Design a creative solution for sustainable urban transportation.",
442
+ "How does quantum entanglement work?",
443
+ "Compare Newton's and Einstein's views on gravity from multiple angles.",
444
+ "Build a distributed training pipeline for language models.",
445
+ "What is the meaning of life?",
446
+ "How can a system become self-aware?",
447
+ "Tell me a joke.",
448
+ ]
449
+
450
+ print("=" * 70)
451
+ print("Codette Adapter Router — Test Suite")
452
+ print("=" * 70)
453
+
454
+ for query in test_queries:
455
+ result = router.route(query, max_adapters=2)
456
+ adapters = ", ".join(result.all_adapters)
457
+ mp = " [MULTI]" if result.multi_perspective else ""
458
+ print(f"\nQ: {query}")
459
+ print(f" -> {adapters}{mp} (conf={result.confidence:.2f})")
460
+ print(f" {result.reasoning}")
inference/chat_app.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from inference import CodetteModelLoader, CodetteEngine
4
+
5
+
6
+ ADAPTERS = {
7
+ "Newton": "newton",
8
+ "DaVinci": "davinci",
9
+ "Empathy": "empathy",
10
+ "Philosophy": "philosophy",
11
+ "Quantum": "quantum",
12
+ "RC-XI": "consciousness",
13
+ "Multi-Perspective": "multi_perspective",
14
+ "Systems": "systems_architecture"
15
+ }
16
+
17
+
18
+ def create_chat_app():
19
+
20
+ loader = CodetteModelLoader(
21
+ adapters={
22
+ "newton": "adapters/newton/final",
23
+ "davinci": "adapters/davinci/final",
24
+ "empathy": "adapters/empathy/final",
25
+ "philosophy": "adapters/philosophy/final",
26
+ "quantum": "adapters/quantum/final",
27
+ "consciousness": "adapters/consciousness/final",
28
+ "multi_perspective": "adapters/multi_perspective/final",
29
+ "systems_architecture": "adapters/systems_architecture/final",
30
+ }
31
+ )
32
+
33
+ loader.load_adapters()
34
+
35
+ registry = {
36
+ name: {
37
+ "generation": {
38
+ "temperature": 0.7,
39
+ "top_p": 0.9,
40
+ "max_tokens": 512
41
+ }
42
+ }
43
+ for name in loader.adapters
44
+ }
45
+
46
+ engine = CodetteEngine(loader, registry)
47
+
48
+ # -----------------------------------------------------
49
+ # CHAT HANDLER
50
+ # -----------------------------------------------------
51
+
52
+ def chat_stream(message, history, adapter, temp, top_p, max_tokens):
53
+
54
+ messages = []
55
+
56
+ for user, assistant in history:
57
+ messages.append({"role": "user", "content": user})
58
+ messages.append({"role": "assistant", "content": assistant})
59
+
60
+ messages.append({"role": "user", "content": message})
61
+
62
+ if adapter == "All (synthesized)":
63
+
64
+ responses = engine.multi_perspective(
65
+ messages,
66
+ list(loader.adapters.keys())
67
+ )
68
+
69
+ reply = responses
70
+
71
+ history.append((message, reply))
72
+
73
+ yield history
74
+
75
+ return
76
+
77
+ adapter_key = ADAPTERS[adapter]
78
+
79
+ loader.set_active_adapter(adapter_key)
80
+
81
+ prompt = loader.format_messages(messages)
82
+ inputs = loader.tokenize(prompt)
83
+
84
+ streamer = engine.stream_generate(
85
+ inputs,
86
+ temperature=temp,
87
+ top_p=top_p,
88
+ max_tokens=max_tokens
89
+ )
90
+
91
+ response = ""
92
+
93
+ for token in streamer:
94
+
95
+ response += token
96
+
97
+ yield history + [(message, response)]
98
+
99
+ history.append((message, response))
100
+
101
+ # -----------------------------------------------------
102
+ # COMPARISON HANDLER
103
+ # -----------------------------------------------------
104
+
105
+ def compare(prompt, adapters):
106
+
107
+ outputs = {}
108
+
109
+ messages = [{"role": "user", "content": prompt}]
110
+
111
+ for name in adapters:
112
+
113
+ adapter_key = ADAPTERS[name]
114
+
115
+ result = engine.generate(messages, adapter_key)
116
+
117
+ outputs[name] = result
118
+
119
+ return outputs
120
+
121
+ # -----------------------------------------------------
122
+ # STATUS PANEL
123
+ # -----------------------------------------------------
124
+
125
+ def get_status():
126
+
127
+ device = loader.model.device
128
+
129
+ if torch.cuda.is_available():
130
+
131
+ mem = torch.cuda.memory_allocated() / 1024**3
132
+ total = torch.cuda.get_device_properties(0).total_memory / 1024**3
133
+
134
+ gpu_info = f"{mem:.2f}GB / {total:.2f}GB"
135
+
136
+ else:
137
+
138
+ gpu_info = "CPU"
139
+
140
+ return {
141
+ "Base Model": loader.base_model_name,
142
+ "Active Adapter": loader.active_adapter,
143
+ "Loaded Adapters": list(loader.adapters.keys()),
144
+ "Device": str(device),
145
+ "GPU Memory": gpu_info,
146
+ }
147
+
148
+ # -----------------------------------------------------
149
+ # UI LAYOUT
150
+ # -----------------------------------------------------
151
+
152
+ with gr.Blocks(theme=gr.themes.Soft(), title="Codette") as app:
153
+
154
+ gr.Markdown("# Codette Multi-Perspective AI")
155
+
156
+ with gr.Tabs():
157
+
158
+ # -------------------------------------------------
159
+ # CHAT TAB
160
+ # -------------------------------------------------
161
+
162
+ with gr.Tab("Chat"):
163
+
164
+ chatbot = gr.Chatbot(height=500)
165
+
166
+ adapter = gr.Dropdown(
167
+ choices=list(ADAPTERS.keys()) + ["All (synthesized)"],
168
+ value="Multi-Perspective",
169
+ label="Reasoning Perspective"
170
+ )
171
+
172
+ with gr.Row():
173
+
174
+ temperature = gr.Slider(
175
+ 0.0,
176
+ 1.5,
177
+ value=0.7,
178
+ label="Temperature"
179
+ )
180
+
181
+ top_p = gr.Slider(
182
+ 0.0,
183
+ 1.0,
184
+ value=0.9,
185
+ label="Top P"
186
+ )
187
+
188
+ max_tokens = gr.Slider(
189
+ 64,
190
+ 2048,
191
+ value=512,
192
+ step=64,
193
+ label="Max Tokens"
194
+ )
195
+
196
+ msg = gr.Textbox(
197
+ placeholder="Ask Codette something...",
198
+ lines=2
199
+ )
200
+
201
+ msg.submit(
202
+ chat_stream,
203
+ [msg, chatbot, adapter, temperature, top_p, max_tokens],
204
+ chatbot
205
+ )
206
+
207
+ # -------------------------------------------------
208
+ # COMPARE TAB
209
+ # -------------------------------------------------
210
+
211
+ with gr.Tab("Compare"):
212
+
213
+ prompt = gr.Textbox(label="Prompt")
214
+
215
+ adapters = gr.CheckboxGroup(
216
+ choices=list(ADAPTERS.keys()),
217
+ label="Adapters to Compare",
218
+ value=["Newton", "DaVinci"]
219
+ )
220
+
221
+ output = gr.JSON()
222
+
223
+ run = gr.Button("Run Comparison")
224
+
225
+ run.click(
226
+ compare,
227
+ [prompt, adapters],
228
+ output
229
+ )
230
+
231
+ # -------------------------------------------------
232
+ # STATUS TAB
233
+ # -------------------------------------------------
234
+
235
+ with gr.Tab("Status"):
236
+
237
+ status_output = gr.JSON()
238
+
239
+ refresh = gr.Button("Refresh")
240
+
241
+ refresh.click(
242
+ get_status,
243
+ None,
244
+ status_output
245
+ )
246
+
247
+ return app
inference/codette_chat_ui.py ADDED
@@ -0,0 +1,859 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Codette Chat UI — Tkinter Desktop Interface
3
+
4
+ Dark-themed chat app that wraps the CodetteOrchestrator.
5
+ Launch: double-click codette_chat.bat or run this file directly.
6
+ No terminal needed — uses threaded inference so UI stays responsive.
7
+ """
8
+
9
+ import os, sys, time, threading, queue, traceback, subprocess, tempfile, wave, struct
10
+ import tkinter as tk
11
+ from tkinter import scrolledtext, font as tkfont
12
+
13
+ # ── Environment bootstrap ───────────────────────────────────────
14
+ _site = r"J:\Lib\site-packages"
15
+ if _site not in sys.path:
16
+ sys.path.insert(0, _site)
17
+ os.environ["PATH"] = (
18
+ r"J:\Lib\site-packages\Library\bin" + os.pathsep + os.environ.get("PATH", "")
19
+ )
20
+ # Add inference dir so imports work
21
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
22
+
23
+ # ── Theme ────────────────────────────────────────────────────────
24
+ BG = "#0f0f1a"
25
+ BG_PANEL = "#1a1a2e"
26
+ BG_INPUT = "#252540"
27
+ BG_BTN = "#3a3a5c"
28
+ BG_BTN_ACT = "#52527a"
29
+ FG = "#e0e0e0"
30
+ FG_DIM = "#808899"
31
+ FG_USER = "#ffffff"
32
+ FG_CODETTE = "#9ecfff"
33
+ FG_ERROR = "#ff6b6b"
34
+ FG_SUCCESS = "#6bffa0"
35
+ ACCENT = "#6a9fff"
36
+ BORDER = "#2a2a44"
37
+
38
+ ADAPTER_COLORS = {
39
+ "newton": "#ffa040",
40
+ "davinci": "#b07ce8",
41
+ "empathy": "#e85050",
42
+ "philosophy": "#40d080",
43
+ "quantum": "#40c8d0",
44
+ "consciousness": "#ff70b8",
45
+ "multi_perspective": "#ffd040",
46
+ "systems_architecture": "#90a0b0",
47
+ "base": "#808899",
48
+ }
49
+
50
+
51
+ # ═════════════════════════════════════════════════════════════════
52
+ # Voice Engine — STT via SpeechRecognition, TTS via PowerShell SAPI
53
+ # ═════════════════════════════════════════════════════════════════
54
+ class VoiceEngine:
55
+ """Handles speech-to-text and text-to-speech without blocking the UI."""
56
+
57
+ def __init__(self):
58
+ self.stt_available = False
59
+ self.tts_available = False
60
+ self.is_recording = False
61
+ self._mic = None
62
+ self._recognizer = None
63
+ self._tts_process = None
64
+
65
+ # Probe STT (sounddevice + speech_recognition)
66
+ try:
67
+ import sounddevice as sd
68
+ import speech_recognition as sr
69
+ self._sd = sd
70
+ self._sr = sr
71
+ self._recognizer = sr.Recognizer()
72
+ self._recognizer.energy_threshold = 300
73
+ self._recognizer.dynamic_energy_threshold = True
74
+ # Find a working input device
75
+ devices = sd.query_devices()
76
+ self._input_device = None
77
+ for i, d in enumerate(devices):
78
+ if d['max_input_channels'] > 0:
79
+ self._input_device = i
80
+ break
81
+ self.stt_available = self._input_device is not None
82
+ self._sample_rate = 16000 # Good for speech recognition
83
+ except Exception:
84
+ pass
85
+
86
+ # Probe TTS (PowerShell SAPI5)
87
+ try:
88
+ result = subprocess.run(
89
+ ["powershell", "-Command",
90
+ "Add-Type -AssemblyName System.Speech; "
91
+ "$s = New-Object System.Speech.Synthesis.SpeechSynthesizer; "
92
+ "$s.GetInstalledVoices() | Select -First 1 -Expand VoiceInfo | Select Name"],
93
+ capture_output=True, text=True, timeout=5,
94
+ )
95
+ self.tts_available = result.returncode == 0
96
+ except Exception:
97
+ pass
98
+
99
+ def record_audio(self, duration_seconds=8, callback=None):
100
+ """Record audio from mic, transcribe, call callback(text) or callback(None) on error.
101
+ Runs in a thread — do NOT call from main thread."""
102
+ if not self.stt_available:
103
+ if callback:
104
+ callback(None, "Speech recognition not available")
105
+ return
106
+
107
+ try:
108
+ import numpy as np
109
+ self.is_recording = True
110
+ # Record raw audio
111
+ audio_data = self._sd.rec(
112
+ int(duration_seconds * self._sample_rate),
113
+ samplerate=self._sample_rate,
114
+ channels=1,
115
+ dtype='int16',
116
+ device=self._input_device,
117
+ )
118
+ # Wait for recording to finish (or be stopped)
119
+ while self.is_recording and self._sd.get_stream().active:
120
+ time.sleep(0.1)
121
+
122
+ self._sd.stop()
123
+ self.is_recording = False
124
+
125
+ # Trim silence from end (crude but effective)
126
+ audio_np = audio_data.flatten()
127
+ # Find last non-silent sample (threshold 500)
128
+ nonsilent = np.where(np.abs(audio_np) > 500)[0]
129
+ if len(nonsilent) == 0:
130
+ if callback:
131
+ callback(None, "No speech detected")
132
+ return
133
+ end_idx = min(nonsilent[-1] + self._sample_rate, len(audio_np))
134
+ audio_trimmed = audio_np[:end_idx]
135
+
136
+ # Convert to WAV bytes for SpeechRecognition
137
+ wav_buffer = self._numpy_to_wav_bytes(audio_trimmed, self._sample_rate)
138
+
139
+ # Transcribe
140
+ sr = self._sr
141
+ audio = sr.AudioData(wav_buffer, self._sample_rate, 2) # 2 bytes per sample (int16)
142
+ try:
143
+ text = self._recognizer.recognize_google(audio)
144
+ if callback:
145
+ callback(text, None)
146
+ except sr.UnknownValueError:
147
+ if callback:
148
+ callback(None, "Could not understand speech")
149
+ except sr.RequestError as e:
150
+ if callback:
151
+ callback(None, f"Speech API error: {e}")
152
+
153
+ except Exception as e:
154
+ self.is_recording = False
155
+ if callback:
156
+ callback(None, f"Recording error: {e}")
157
+
158
+ def stop_recording(self):
159
+ """Signal the recording loop to stop early."""
160
+ self.is_recording = False
161
+ try:
162
+ self._sd.stop()
163
+ except Exception:
164
+ pass
165
+
166
+ def speak(self, text, callback=None):
167
+ """Speak text via PowerShell SAPI5. Non-blocking (runs in thread).
168
+ callback() called when done."""
169
+ if not self.tts_available or not text:
170
+ if callback:
171
+ callback()
172
+ return
173
+
174
+ def _speak():
175
+ try:
176
+ # Escape text for PowerShell
177
+ safe_text = text.replace("'", "''").replace('"', '`"')
178
+ # Limit length for TTS (don't read entire essays)
179
+ if len(safe_text) > 1000:
180
+ safe_text = safe_text[:1000] + "... and so on."
181
+
182
+ self._tts_process = subprocess.Popen(
183
+ ["powershell", "-Command",
184
+ f"Add-Type -AssemblyName System.Speech; "
185
+ f"$s = New-Object System.Speech.Synthesis.SpeechSynthesizer; "
186
+ f"$s.Rate = 1; "
187
+ f"$s.Speak('{safe_text}')"],
188
+ stdout=subprocess.DEVNULL,
189
+ stderr=subprocess.DEVNULL,
190
+ )
191
+ self._tts_process.wait()
192
+ self._tts_process = None
193
+ except Exception:
194
+ self._tts_process = None
195
+ finally:
196
+ if callback:
197
+ callback()
198
+
199
+ threading.Thread(target=_speak, daemon=True).start()
200
+
201
+ def stop_speaking(self):
202
+ """Kill any running TTS process."""
203
+ if self._tts_process:
204
+ try:
205
+ self._tts_process.terminate()
206
+ except Exception:
207
+ pass
208
+ self._tts_process = None
209
+
210
+ @staticmethod
211
+ def _numpy_to_wav_bytes(audio_np, sample_rate):
212
+ """Convert int16 numpy array to raw PCM bytes for SpeechRecognition AudioData."""
213
+ return audio_np.astype('<i2').tobytes()
214
+
215
+
216
+ # ═════════════════════════════════════════════════════════════════
217
+ # Worker Thread — loads model and processes queries off-main-thread
218
+ # ═════════════════════════════════════════════════════════════════
219
+ def worker_main(cmd_q, res_q):
220
+ """Background thread: load orchestrator, process queries."""
221
+ try:
222
+ res_q.put(("status", "Loading base model... (this takes ~60s)"))
223
+
224
+ # Redirect stdout so orchestrator prints don't pop up
225
+ import io
226
+ old_stdout = sys.stdout
227
+ sys.stdout = io.StringIO()
228
+
229
+ from codette_orchestrator import CodetteOrchestrator
230
+ orch = CodetteOrchestrator(verbose=False)
231
+
232
+ sys.stdout = old_stdout
233
+
234
+ adapters = orch.available_adapters
235
+ res_q.put(("ready", adapters))
236
+
237
+ except Exception as e:
238
+ try:
239
+ sys.stdout = old_stdout
240
+ except Exception:
241
+ pass
242
+ res_q.put(("error", f"Failed to load model:\n{e}\n{traceback.format_exc()}"))
243
+ return
244
+
245
+ # ── Command loop ────────────────────────────────────────────
246
+ while True:
247
+ try:
248
+ cmd = cmd_q.get(timeout=0.5)
249
+ except queue.Empty:
250
+ continue
251
+
252
+ if cmd is None or cmd == "quit":
253
+ break
254
+
255
+ action = cmd.get("action")
256
+
257
+ if action == "generate":
258
+ query = cmd["query"]
259
+ adapter = cmd.get("adapter") # None = auto
260
+ max_adapters = cmd.get("max_adapters", 2)
261
+
262
+ res_q.put(("thinking", adapter or "auto"))
263
+
264
+ try:
265
+ # Redirect stdout during generation
266
+ old_stdout = sys.stdout
267
+ sys.stdout = io.StringIO()
268
+
269
+ if adapter and adapter != "auto":
270
+ force = adapter if adapter != "base" else None
271
+ result = orch.route_and_generate(
272
+ query,
273
+ max_adapters=1,
274
+ strategy="keyword",
275
+ force_adapter=force,
276
+ )
277
+ else:
278
+ result = orch.route_and_generate(
279
+ query,
280
+ max_adapters=max_adapters,
281
+ strategy="keyword",
282
+ )
283
+
284
+ sys.stdout = old_stdout
285
+ res_q.put(("response", result))
286
+
287
+ except Exception as e:
288
+ try:
289
+ sys.stdout = old_stdout
290
+ except Exception:
291
+ pass
292
+ res_q.put(("error", f"Generation failed: {e}"))
293
+
294
+
295
+ # ═════════════════════════════════════════════════════════════════
296
+ # Main GUI
297
+ # ═════════════════════════════════════════════════════════════════
298
+ class CodetteChat:
299
+ def __init__(self, root):
300
+ self.root = root
301
+ self.cmd_q = queue.Queue()
302
+ self.res_q = queue.Queue()
303
+ self.is_busy = False
304
+ self.is_ready = False
305
+ self.available_adapters = []
306
+ self.thinking_dots = 0
307
+
308
+ # Voice engine
309
+ self.voice = VoiceEngine()
310
+ self.tts_enabled = False
311
+ self.is_recording = False
312
+
313
+ self._setup_window()
314
+ self._build_ui()
315
+ self._start_worker()
316
+ self._poll_results()
317
+
318
+ # ── Window setup ────────────────────────────────────────────
319
+ def _setup_window(self):
320
+ self.root.title("Codette")
321
+ self.root.geometry("800x700")
322
+ self.root.minsize(600, 500)
323
+ self.root.configure(bg=BG)
324
+ self.root.protocol("WM_DELETE_WINDOW", self._on_close)
325
+
326
+ # Try to set a nice icon (won't fail if missing)
327
+ try:
328
+ self.root.iconbitmap(default="")
329
+ except Exception:
330
+ pass
331
+
332
+ # ── Build all UI components ─────────────────────────────────
333
+ def _build_ui(self):
334
+ # Fonts
335
+ self.font_title = tkfont.Font(family="Segoe UI", size=16, weight="bold")
336
+ self.font_body = tkfont.Font(family="Consolas", size=11)
337
+ self.font_bold = tkfont.Font(family="Consolas", size=11, weight="bold")
338
+ self.font_small = tkfont.Font(family="Segoe UI", size=9)
339
+ self.font_input = tkfont.Font(family="Consolas", size=12)
340
+ self.font_btn = tkfont.Font(family="Segoe UI", size=10, weight="bold")
341
+
342
+ self._build_header()
343
+ self._build_chat_area()
344
+ self._build_controls()
345
+ self._build_input_area()
346
+ self._build_status_bar()
347
+
348
+ # ── Header ──────────────────────────────────────────────────
349
+ def _build_header(self):
350
+ header = tk.Frame(self.root, bg=BG_PANEL, pady=8, padx=12)
351
+ header.pack(fill=tk.X)
352
+
353
+ tk.Label(
354
+ header, text="Codette", font=self.font_title,
355
+ bg=BG_PANEL, fg=ACCENT,
356
+ ).pack(side=tk.LEFT)
357
+
358
+ self.adapter_label = tk.Label(
359
+ header, text=" Loading...", font=self.font_small,
360
+ bg=BG_PANEL, fg=FG_DIM,
361
+ )
362
+ self.adapter_label.pack(side=tk.LEFT, padx=(12, 0))
363
+
364
+ # Separator
365
+ tk.Frame(self.root, bg=BORDER, height=1).pack(fill=tk.X)
366
+
367
+ # ── Chat area ───────────────────────────────────────────────
368
+ def _build_chat_area(self):
369
+ self.chat = scrolledtext.ScrolledText(
370
+ self.root,
371
+ wrap=tk.WORD,
372
+ bg=BG,
373
+ fg=FG,
374
+ font=self.font_body,
375
+ insertbackground=FG,
376
+ selectbackground="#3a3a5c",
377
+ selectforeground=FG_USER,
378
+ borderwidth=0,
379
+ highlightthickness=0,
380
+ padx=16,
381
+ pady=12,
382
+ state=tk.DISABLED,
383
+ cursor="arrow",
384
+ )
385
+ self.chat.pack(fill=tk.BOTH, expand=True)
386
+
387
+ # Configure text tags for coloring
388
+ self.chat.tag_configure("user_label", foreground=FG_USER, font=self.font_bold)
389
+ self.chat.tag_configure("user_text", foreground=FG_USER, font=self.font_body)
390
+ self.chat.tag_configure("codette_label",foreground=FG_CODETTE, font=self.font_bold)
391
+ self.chat.tag_configure("codette_text", foreground=FG_CODETTE, font=self.font_body,
392
+ lmargin1=8, lmargin2=8)
393
+ self.chat.tag_configure("meta", foreground=FG_DIM, font=self.font_small)
394
+ self.chat.tag_configure("error", foreground=FG_ERROR, font=self.font_body)
395
+ self.chat.tag_configure("system", foreground=FG_SUCCESS, font=self.font_small)
396
+ self.chat.tag_configure("separator", foreground="#2a2a44", font=self.font_small)
397
+
398
+ # Per-adapter color tags
399
+ for name, color in ADAPTER_COLORS.items():
400
+ self.chat.tag_configure(f"adapter_{name}", foreground=color, font=self.font_bold)
401
+
402
+ # Show loading message
403
+ self._append_system("Starting Codette... Loading base model (this takes ~60 seconds)")
404
+
405
+ # ── Controls row ────────────────────────────────────────────
406
+ def _build_controls(self):
407
+ tk.Frame(self.root, bg=BORDER, height=1).pack(fill=tk.X)
408
+
409
+ controls = tk.Frame(self.root, bg=BG_PANEL, pady=6, padx=12)
410
+ controls.pack(fill=tk.X)
411
+
412
+ # Adapter selector
413
+ tk.Label(
414
+ controls, text="Adapter:", font=self.font_small,
415
+ bg=BG_PANEL, fg=FG_DIM,
416
+ ).pack(side=tk.LEFT)
417
+
418
+ self.adapter_var = tk.StringVar(value="Auto")
419
+ self.adapter_menu = tk.OptionMenu(
420
+ controls, self.adapter_var, "Auto",
421
+ )
422
+ self.adapter_menu.configure(
423
+ bg=BG_BTN, fg=FG, activebackground=BG_BTN_ACT,
424
+ activeforeground=FG, font=self.font_small,
425
+ highlightthickness=0, borderwidth=1, relief=tk.FLAT,
426
+ )
427
+ self.adapter_menu["menu"].configure(
428
+ bg=BG_INPUT, fg=FG, activebackground=ACCENT,
429
+ activeforeground="#000", font=self.font_small,
430
+ )
431
+ self.adapter_menu.pack(side=tk.LEFT, padx=(4, 16))
432
+
433
+ # Max perspectives
434
+ tk.Label(
435
+ controls, text="Perspectives:", font=self.font_small,
436
+ bg=BG_PANEL, fg=FG_DIM,
437
+ ).pack(side=tk.LEFT)
438
+
439
+ self.perspectives_var = tk.IntVar(value=2)
440
+ for n in [1, 2, 3]:
441
+ rb = tk.Radiobutton(
442
+ controls, text=str(n), variable=self.perspectives_var, value=n,
443
+ bg=BG_PANEL, fg=FG, selectcolor=BG_BTN,
444
+ activebackground=BG_PANEL, activeforeground=ACCENT,
445
+ font=self.font_small, highlightthickness=0,
446
+ )
447
+ rb.pack(side=tk.LEFT, padx=2)
448
+
449
+ # Clear button
450
+ tk.Button(
451
+ controls, text="Clear", font=self.font_small,
452
+ bg=BG_BTN, fg=FG_DIM, activebackground=BG_BTN_ACT,
453
+ activeforeground=FG, relief=tk.FLAT, borderwidth=0,
454
+ command=self._clear_chat, cursor="hand2",
455
+ ).pack(side=tk.RIGHT)
456
+
457
+ # TTS toggle
458
+ if self.voice.tts_available:
459
+ self.tts_var = tk.BooleanVar(value=False)
460
+ self.tts_btn = tk.Checkbutton(
461
+ controls, text="\U0001F50A TTS", variable=self.tts_var,
462
+ font=self.font_small, bg=BG_PANEL, fg=FG_DIM,
463
+ selectcolor=BG_BTN, activebackground=BG_PANEL,
464
+ activeforeground=ACCENT, highlightthickness=0,
465
+ command=self._toggle_tts, cursor="hand2",
466
+ )
467
+ self.tts_btn.pack(side=tk.RIGHT, padx=(0, 8))
468
+
469
+ # ── Input area ──────────────────────────────────────────────
470
+ def _build_input_area(self):
471
+ tk.Frame(self.root, bg=BORDER, height=1).pack(fill=tk.X)
472
+
473
+ input_frame = tk.Frame(self.root, bg=BG_PANEL, padx=12, pady=8)
474
+ input_frame.pack(fill=tk.X)
475
+
476
+ self.input_box = tk.Text(
477
+ input_frame,
478
+ height=3,
479
+ bg=BG_INPUT,
480
+ fg=FG_USER,
481
+ font=self.font_input,
482
+ insertbackground=FG_USER,
483
+ selectbackground=ACCENT,
484
+ borderwidth=1,
485
+ relief=tk.FLAT,
486
+ highlightthickness=1,
487
+ highlightcolor=ACCENT,
488
+ highlightbackground=BORDER,
489
+ wrap=tk.WORD,
490
+ padx=8,
491
+ pady=6,
492
+ )
493
+ self.input_box.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 8))
494
+ self.input_box.bind("<Return>", self._on_enter)
495
+ self.input_box.insert("1.0", "")
496
+ self.input_box.focus_set()
497
+
498
+ # Button container (mic + send stacked vertically)
499
+ btn_frame = tk.Frame(input_frame, bg=BG_PANEL)
500
+ btn_frame.pack(side=tk.RIGHT)
501
+
502
+ self.send_btn = tk.Button(
503
+ btn_frame,
504
+ text="Send",
505
+ font=self.font_btn,
506
+ bg=ACCENT,
507
+ fg="#000000",
508
+ activebackground="#8ab8ff",
509
+ activeforeground="#000000",
510
+ relief=tk.FLAT,
511
+ borderwidth=0,
512
+ width=8,
513
+ height=1,
514
+ command=self._send_message,
515
+ cursor="hand2",
516
+ )
517
+ self.send_btn.pack(side=tk.TOP, pady=(0, 4))
518
+
519
+ # Mic button (only if STT available)
520
+ if self.voice.stt_available:
521
+ self.mic_btn = tk.Button(
522
+ btn_frame,
523
+ text="\U0001F3A4 Mic",
524
+ font=self.font_small,
525
+ bg=BG_BTN,
526
+ fg=FG,
527
+ activebackground="#804040",
528
+ activeforeground=FG_USER,
529
+ relief=tk.FLAT,
530
+ borderwidth=0,
531
+ width=8,
532
+ command=self._toggle_recording,
533
+ cursor="hand2",
534
+ )
535
+ self.mic_btn.pack(side=tk.TOP)
536
+ else:
537
+ self.mic_btn = None
538
+
539
+ # ── Status bar ──────────────────────────────────────────────
540
+ def _build_status_bar(self):
541
+ self.status_frame = tk.Frame(self.root, bg=BG, padx=12, pady=4)
542
+ self.status_frame.pack(fill=tk.X)
543
+
544
+ self.status_dot = tk.Label(
545
+ self.status_frame, text="\u25cf", font=self.font_small,
546
+ bg=BG, fg=FG_DIM,
547
+ )
548
+ self.status_dot.pack(side=tk.LEFT)
549
+
550
+ self.status_label = tk.Label(
551
+ self.status_frame, text=" Loading...", font=self.font_small,
552
+ bg=BG, fg=FG_DIM, anchor=tk.W,
553
+ )
554
+ self.status_label.pack(side=tk.LEFT, fill=tk.X, expand=True)
555
+
556
+ # ── Worker management ───────────────────────────────────────
557
+ def _start_worker(self):
558
+ t = threading.Thread(target=worker_main, args=(self.cmd_q, self.res_q), daemon=True)
559
+ t.start()
560
+
561
+ def _poll_results(self):
562
+ """Check result queue every 100ms."""
563
+ try:
564
+ while not self.res_q.empty():
565
+ kind, data = self.res_q.get_nowait()
566
+ self._handle_result(kind, data)
567
+ except queue.Empty:
568
+ pass
569
+
570
+ # Animate thinking dots
571
+ if self.is_busy:
572
+ self.thinking_dots = (self.thinking_dots + 1) % 4
573
+ dots = "." * self.thinking_dots
574
+ adapter_hint = getattr(self, '_thinking_adapter', 'auto')
575
+ self._set_status(f"Thinking{dots} [{adapter_hint}]", ACCENT)
576
+
577
+ self.root.after(100, self._poll_results)
578
+
579
+ def _handle_result(self, kind, data):
580
+ if kind == "status":
581
+ self._set_status(data, FG_DIM)
582
+
583
+ elif kind == "ready":
584
+ self.is_ready = True
585
+ self.available_adapters = data
586
+ self._set_status(
587
+ f"Ready | adapters: {', '.join(data) if data else 'base only'}",
588
+ FG_SUCCESS,
589
+ )
590
+ self._update_adapter_menu(data)
591
+ self.adapter_label.configure(
592
+ text=f" [{', '.join(data)}]" if data else " [base]",
593
+ fg=FG_DIM,
594
+ )
595
+ self._append_system(
596
+ f"Model loaded! Available adapters: {', '.join(data) if data else 'base only'}\n"
597
+ f"Type a question below. The router will pick the best perspective automatically."
598
+ )
599
+ self._set_busy(False)
600
+
601
+ elif kind == "thinking":
602
+ self._thinking_adapter = data
603
+
604
+ elif kind == "response":
605
+ self._append_response(data)
606
+ self._set_busy(False)
607
+
608
+ # Speak response if TTS enabled
609
+ response_text = data.get("response", "")
610
+ if response_text:
611
+ self._speak_response(response_text)
612
+
613
+ route = data.get("route")
614
+ adapter = data.get("adapter", "?")
615
+ tokens = data.get("tokens", 0)
616
+ elapsed = data.get("time", 0)
617
+ tps = tokens / elapsed if elapsed > 0 else 0
618
+ conf = route.confidence if route else 0
619
+
620
+ if "perspectives" in data and len(data.get("perspectives", {})) > 1:
621
+ adapters_used = ", ".join(data["perspectives"].keys())
622
+ self._set_status(
623
+ f"Done | {adapters_used} | {tokens} tok | {tps:.1f} tok/s",
624
+ FG_SUCCESS,
625
+ )
626
+ else:
627
+ self._set_status(
628
+ f"Done | {adapter} (conf={conf:.2f}) | {tokens} tok | {tps:.1f} tok/s",
629
+ FG_SUCCESS,
630
+ )
631
+
632
+ elif kind == "error":
633
+ self._append_error(str(data))
634
+ self._set_busy(False)
635
+ self._set_status(f"Error", FG_ERROR)
636
+
637
+ # ── Adapter dropdown update ─────────────────────────────────
638
+ def _update_adapter_menu(self, adapters):
639
+ menu = self.adapter_menu["menu"]
640
+ menu.delete(0, tk.END)
641
+
642
+ choices = ["Auto"] + [a.capitalize() for a in adapters] + ["Base"]
643
+ for choice in choices:
644
+ menu.add_command(
645
+ label=choice,
646
+ command=lambda v=choice: self.adapter_var.set(v),
647
+ )
648
+
649
+ # ── Input handling ──────────────────────────────────────────
650
+ def _on_enter(self, event):
651
+ if event.state & 0x1: # Shift+Enter → newline
652
+ return None
653
+ self._send_message()
654
+ return "break"
655
+
656
+ def _send_message(self):
657
+ if self.is_busy or not self.is_ready:
658
+ return
659
+
660
+ text = self.input_box.get("1.0", tk.END).strip()
661
+ if not text:
662
+ return
663
+
664
+ self.input_box.delete("1.0", tk.END)
665
+ self._append_user(text)
666
+ self._set_busy(True)
667
+
668
+ # Determine adapter
669
+ adapter_choice = self.adapter_var.get()
670
+ if adapter_choice == "Auto":
671
+ adapter = None # Let router decide
672
+ elif adapter_choice == "Base":
673
+ adapter = "base"
674
+ else:
675
+ adapter = adapter_choice.lower()
676
+
677
+ self.cmd_q.put({
678
+ "action": "generate",
679
+ "query": text,
680
+ "adapter": adapter,
681
+ "max_adapters": self.perspectives_var.get(),
682
+ })
683
+
684
+ # ── Chat display helpers ────────────────────────────────────
685
+ def _append_user(self, text):
686
+ self.chat.configure(state=tk.NORMAL)
687
+ self.chat.insert(tk.END, "\n You\n", "user_label")
688
+ self.chat.insert(tk.END, f" {text}\n", "user_text")
689
+ self.chat.configure(state=tk.DISABLED)
690
+ self.chat.see(tk.END)
691
+
692
+ def _append_response(self, result):
693
+ self.chat.configure(state=tk.NORMAL)
694
+
695
+ # Multi-perspective response
696
+ if "perspectives" in result and len(result.get("perspectives", {})) > 1:
697
+ self.chat.insert(tk.END, "\n")
698
+
699
+ # Show each perspective
700
+ for name, text in result["perspectives"].items():
701
+ color_tag = f"adapter_{name}"
702
+ if not self.chat.tag_names().__contains__(color_tag):
703
+ color = ADAPTER_COLORS.get(name, FG_CODETTE)
704
+ self.chat.tag_configure(color_tag, foreground=color, font=self.font_bold)
705
+
706
+ self.chat.insert(tk.END, f" Codette [{name}]\n", color_tag)
707
+ self.chat.insert(tk.END, f" {text}\n\n", "codette_text")
708
+
709
+ # Show synthesis
710
+ self.chat.insert(
711
+ tk.END,
712
+ " \u2500\u2500\u2500 Synthesized \u2500\u2500\u2500\n",
713
+ "separator",
714
+ )
715
+ self.chat.insert(tk.END, f" {result['response']}\n", "codette_text")
716
+
717
+ else:
718
+ # Single adapter response
719
+ route = result.get("route")
720
+ adapter = result.get("adapter", "base")
721
+ conf = route.confidence if route else 0
722
+ color_tag = f"adapter_{adapter}"
723
+ if not self.chat.tag_names().__contains__(color_tag):
724
+ color = ADAPTER_COLORS.get(adapter, FG_CODETTE)
725
+ self.chat.tag_configure(color_tag, foreground=color, font=self.font_bold)
726
+
727
+ self.chat.insert(tk.END, "\n")
728
+ self.chat.insert(tk.END, f" Codette [{adapter}]", color_tag)
729
+ self.chat.insert(tk.END, f" conf={conf:.2f}\n", "meta")
730
+ self.chat.insert(tk.END, f" {result['response']}\n", "codette_text")
731
+
732
+ self.chat.configure(state=tk.DISABLED)
733
+ self.chat.see(tk.END)
734
+
735
+ def _append_system(self, text):
736
+ self.chat.configure(state=tk.NORMAL)
737
+ self.chat.insert(tk.END, f"\n {text}\n", "system")
738
+ self.chat.configure(state=tk.DISABLED)
739
+ self.chat.see(tk.END)
740
+
741
+ def _append_error(self, text):
742
+ self.chat.configure(state=tk.NORMAL)
743
+ self.chat.insert(tk.END, f"\n Error: {text}\n", "error")
744
+ self.chat.configure(state=tk.DISABLED)
745
+ self.chat.see(tk.END)
746
+
747
+ def _clear_chat(self):
748
+ self.chat.configure(state=tk.NORMAL)
749
+ self.chat.delete("1.0", tk.END)
750
+ self.chat.configure(state=tk.DISABLED)
751
+
752
+ # ── Status bar ──────────────────────────────────────────────
753
+ def _set_status(self, text, color=FG_DIM):
754
+ self.status_label.configure(text=f" {text}", fg=color)
755
+ dot_color = FG_SUCCESS if "Ready" in text or "Done" in text else (
756
+ ACCENT if "Thinking" in text else (FG_ERROR if "Error" in text else FG_DIM)
757
+ )
758
+ self.status_dot.configure(fg=dot_color)
759
+
760
+ def _set_busy(self, busy):
761
+ self.is_busy = busy
762
+ state = tk.DISABLED if busy else tk.NORMAL
763
+ self.send_btn.configure(state=state)
764
+ if busy:
765
+ self.input_box.configure(bg="#1e1e30")
766
+ else:
767
+ self.input_box.configure(bg=BG_INPUT)
768
+ self.input_box.focus_set()
769
+
770
+ # ── Voice: Recording (STT) ───────────────────────────────────
771
+ def _toggle_recording(self):
772
+ """Toggle mic recording on/off."""
773
+ if not self.voice.stt_available or not self.is_ready:
774
+ return
775
+
776
+ if self.is_recording:
777
+ self._stop_recording()
778
+ else:
779
+ self._start_recording()
780
+
781
+ def _start_recording(self):
782
+ """Begin recording from mic."""
783
+ self.is_recording = True
784
+ if self.mic_btn:
785
+ self.mic_btn.configure(bg="#cc3333", fg=FG_USER, text="\u23F9 Stop")
786
+ self._set_status("Recording... click Stop or wait 8s", "#cc3333")
787
+
788
+ def on_result(text, error):
789
+ # Called from recording thread — schedule UI update
790
+ self.root.after(0, self._handle_stt_result, text, error)
791
+
792
+ threading.Thread(
793
+ target=self.voice.record_audio,
794
+ kwargs={"duration_seconds": 8, "callback": on_result},
795
+ daemon=True,
796
+ ).start()
797
+
798
+ def _stop_recording(self):
799
+ """Stop recording early."""
800
+ self.is_recording = False
801
+ self.voice.stop_recording()
802
+ if self.mic_btn:
803
+ self.mic_btn.configure(bg=BG_BTN, fg=FG, text="\U0001F3A4 Mic")
804
+
805
+ def _handle_stt_result(self, text, error):
806
+ """Process STT result on the main thread."""
807
+ self.is_recording = False
808
+ if self.mic_btn:
809
+ self.mic_btn.configure(bg=BG_BTN, fg=FG, text="\U0001F3A4 Mic")
810
+
811
+ if error:
812
+ self._set_status(f"Voice: {error}", FG_ERROR)
813
+ return
814
+
815
+ if text:
816
+ # Insert transcribed text into input box
817
+ current = self.input_box.get("1.0", tk.END).strip()
818
+ if current:
819
+ self.input_box.insert(tk.END, " " + text)
820
+ else:
821
+ self.input_box.delete("1.0", tk.END)
822
+ self.input_box.insert("1.0", text)
823
+ self._set_status(f"Voice: \"{text}\"", FG_SUCCESS)
824
+ self.input_box.focus_set()
825
+
826
+ # ── Voice: TTS ────────────────────────────────────────────────
827
+ def _toggle_tts(self):
828
+ """Toggle text-to-speech on responses."""
829
+ self.tts_enabled = self.tts_var.get()
830
+ if self.tts_enabled:
831
+ self._set_status("TTS enabled — responses will be spoken", FG_SUCCESS)
832
+ else:
833
+ self.voice.stop_speaking()
834
+ self._set_status("TTS disabled", FG_DIM)
835
+
836
+ def _speak_response(self, text):
837
+ """Speak response text if TTS is enabled."""
838
+ if self.tts_enabled and self.voice.tts_available:
839
+ self.voice.speak(text)
840
+
841
+ # ── Cleanup ─────────────────────────────────────────────────
842
+ def _on_close(self):
843
+ self.voice.stop_speaking()
844
+ self.voice.stop_recording()
845
+ self.cmd_q.put("quit")
846
+ self.root.after(300, self.root.destroy)
847
+
848
+
849
+ # ═════════════════════════════════════════════════════════════════
850
+ # Entry point
851
+ # ═════════════════════════════════════════════════════════════════
852
+ def main():
853
+ root = tk.Tk()
854
+ app = CodetteChat(root)
855
+ root.mainloop()
856
+
857
+
858
+ if __name__ == "__main__":
859
+ main()
inference/codette_forge_bridge.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Codette Phase 6 Inference Bridge — ForgeEngine integration for web server
3
+
4
+ This module provides a bridge between codette_server.py and ForgeEngine,
5
+ enabling Phase 6 capabilities (query complexity routing, semantic tension,
6
+ specialization tracking, pre-flight prediction) without breaking the web UI.
7
+
8
+ Usage:
9
+ from codette_forge_bridge import CodetteForgeBridge
10
+
11
+ bridge = CodetteForgeBridge(orchestrator=orch, use_phase6=True)
12
+ result = bridge.generate(query, adapter=None, max_adapters=2)
13
+
14
+ The bridge falls back to lightweight orchestrator if Phase 6 disabled or heavy.
15
+ """
16
+
17
+ import sys
18
+ import time
19
+ from pathlib import Path
20
+ from typing import Dict, Optional
21
+
22
+ # Add repo to path
23
+ sys.path.insert(0, str(Path(__file__).parent.parent))
24
+
25
+ try:
26
+ from reasoning_forge.forge_engine import ForgeEngine
27
+ from reasoning_forge.query_classifier import QueryClassifier, QueryComplexity
28
+ from reasoning_forge.executive_controller import ExecutiveController, ComponentDecision
29
+ PHASE6_AVAILABLE = True
30
+ PHASE7_AVAILABLE = True
31
+ except ImportError as e:
32
+ PHASE6_AVAILABLE = False
33
+ PHASE7_AVAILABLE = False
34
+ print(f"[WARNING] ForgeEngine not available - Phase 6/7 disabled: {e}")
35
+
36
+
37
+ class CodetteForgeBridge:
38
+ """Bridge between web server (lightweight) and ForgeEngine (Phase 6)."""
39
+
40
+ def __init__(self, orchestrator, use_phase6: bool = True, use_phase7: bool = True, verbose: bool = False):
41
+ """
42
+ Args:
43
+ orchestrator: CodetteOrchestrator instance for fallback
44
+ use_phase6: Enable Phase 6 (requires ForgeEngine)
45
+ use_phase7: Enable Phase 7 (Executive Controller routing)
46
+ verbose: Log decisions
47
+ """
48
+ self.orchestrator = orchestrator
49
+ self.verbose = verbose
50
+ self.use_phase6 = use_phase6 and PHASE6_AVAILABLE
51
+ self.use_phase7 = use_phase7 and PHASE7_AVAILABLE
52
+
53
+ self.forge = None
54
+ self.classifier = None
55
+ self.executive_controller = None
56
+
57
+ if self.use_phase6:
58
+ try:
59
+ self._init_phase6()
60
+ except Exception as e:
61
+ print(f"[WARNING] Phase 6 initialization failed: {e}")
62
+ self.use_phase6 = False
63
+
64
+ if self.use_phase7 and self.use_phase6:
65
+ try:
66
+ self.executive_controller = ExecutiveController(verbose=verbose)
67
+ if self.verbose:
68
+ print("[PHASE7] Executive Controller initialized - intelligent routing enabled")
69
+ except Exception as e:
70
+ print(f"[WARNING] Phase 7 initialization failed: {e}")
71
+ self.use_phase7 = False
72
+
73
+ def _init_phase6(self):
74
+ """Initialize ForgeEngine with Phase 6 components."""
75
+ if self.verbose:
76
+ print("[PHASE6] Initializing ForgeEngine...")
77
+
78
+ self.forge = ForgeEngine()
79
+ self.classifier = QueryClassifier()
80
+
81
+ if self.verbose:
82
+ print(f"[PHASE6] ForgeEngine ready with {len(self.forge.analysis_agents)} agents")
83
+
84
+ def generate(self, query: str, adapter: Optional[str] = None,
85
+ max_adapters: int = 2) -> Dict:
86
+ """Generate response with optional Phase 6 routing.
87
+
88
+ Args:
89
+ query: User query
90
+ adapter: Force specific adapter (bypasses routing)
91
+ max_adapters: Max adapters for multi-perspective
92
+
93
+ Returns:
94
+ {
95
+ "response": str,
96
+ "adapter": str or list,
97
+ "phase6_used": bool,
98
+ "complexity": str, # if Phase 6
99
+ "conflicts_prevented": int, # if Phase 6
100
+ "reasoning": str,
101
+ ...rest from orchestrator...
102
+ }
103
+ """
104
+ start_time = time.time()
105
+
106
+ # If adapter forced or Phase 6 disabled, use orchestrator directly
107
+ if adapter or not self.use_phase6:
108
+ result = self.orchestrator.route_and_generate(
109
+ query,
110
+ max_adapters=max_adapters,
111
+ strategy="keyword",
112
+ force_adapter=adapter,
113
+ )
114
+ result["phase6_used"] = False
115
+ return result
116
+
117
+ # Try Phase 6 route first
118
+ try:
119
+ return self._generate_with_phase6(query, max_adapters)
120
+ except Exception as e:
121
+ if self.verbose:
122
+ print(f"[PHASE6] Error: {e} - falling back to orchestrator")
123
+
124
+ # Fallback to orchestrator
125
+ result = self.orchestrator.route_and_generate(
126
+ query,
127
+ max_adapters=max_adapters,
128
+ strategy="keyword",
129
+ force_adapter=None,
130
+ )
131
+ result["phase6_used"] = False
132
+ result["phase6_fallback_reason"] = str(e)
133
+ return result
134
+
135
+ def _generate_with_phase6(self, query: str, max_adapters: int) -> Dict:
136
+ """Generate using ForgeEngine with Phase 6 capabilities and Phase 7 routing.
137
+
138
+ Phase 7 Executive Controller routes the query to optimal component combination:
139
+ - SIMPLE queries skip debate, go straight to orchestrator
140
+ - MEDIUM queries use 1-round debate with selective components
141
+ - COMPLEX queries use full 3-round debate with all Phase 1-6 components
142
+ """
143
+ start_time = time.time()
144
+
145
+ # 1. Classify query complexity (Phase 6)
146
+ complexity = self.classifier.classify(query)
147
+ if self.verbose:
148
+ print(f"[PHASE6] Query complexity: {complexity}")
149
+
150
+ # 2. Route with Phase 7 Executive Controller
151
+ route_decision = None
152
+ if self.use_phase7 and self.executive_controller:
153
+ route_decision = self.executive_controller.route_query(query, complexity)
154
+ if self.verbose:
155
+ print(f"[PHASE7] Route: {','.join([k for k, v in route_decision.component_activation.items() if v])}")
156
+ print(f"[PHASE7] Reasoning: {route_decision.reasoning}")
157
+
158
+ # 3. For SIMPLE queries, skip ForgeEngine and go direct to orchestrator
159
+ if complexity == QueryComplexity.SIMPLE:
160
+ if self.verbose:
161
+ print("[PHASE7] SIMPLE query - using direct orchestrator routing")
162
+
163
+ # Get direct answer from orchestrator
164
+ result = self.orchestrator.route_and_generate(
165
+ query,
166
+ max_adapters=1,
167
+ strategy="keyword",
168
+ force_adapter=None,
169
+ )
170
+
171
+ elapsed = time.time() - start_time
172
+
173
+ # Add Phase 7 routing metadata
174
+ if route_decision:
175
+ metadata = ExecutiveController.create_route_metadata(
176
+ route_decision,
177
+ actual_latency_ms=elapsed * 1000,
178
+ actual_conflicts=0,
179
+ gamma=0.95 # High confidence for direct answer
180
+ )
181
+ result.update(metadata)
182
+ result["phase7_routing"]['reasoning'] = "SIMPLE factual query - orchestrator direct inference"
183
+
184
+ result["phase6_used"] = True
185
+ result["phase7_used"] = True
186
+ return result
187
+
188
+ # 4. For MEDIUM/COMPLEX queries, use ForgeEngine with appropriate depth
189
+
190
+ # Domain classification
191
+ domain = self._classify_domain(query)
192
+ agent_selection = self.classifier.select_agents(complexity, domain)
193
+
194
+ if self.verbose:
195
+ print(f"[PHASE6] Domain: {domain}, Selected agents: {agent_selection}")
196
+
197
+ # Run ForgeEngine with debate depth determined by complexity
198
+ debate_rounds = 3 if complexity == QueryComplexity.COMPLEX else 1
199
+
200
+ if self.verbose:
201
+ print(f"[PHASE7] Running debate with {debate_rounds} round(s)")
202
+
203
+ forge_result = self.forge.forge_with_debate(query, debate_rounds=debate_rounds)
204
+
205
+ # 5. Extract synthesis and metrics
206
+ synthesis = ""
207
+ if "messages" in forge_result and len(forge_result["messages"]) >= 3:
208
+ synthesis = forge_result["messages"][2].get("content", "")
209
+
210
+ metadata = forge_result.get("metadata", {})
211
+ conflicts = metadata.get("conflicts", [])
212
+
213
+ # Estimate conflicts prevented based on routing
214
+ if complexity == QueryComplexity.SIMPLE:
215
+ base_conflicts_estimate = 71
216
+ elif complexity == QueryComplexity.MEDIUM:
217
+ base_conflicts_estimate = 23
218
+ else:
219
+ base_conflicts_estimate = 12
220
+
221
+ conflicts_prevented = max(0, base_conflicts_estimate - len(conflicts))
222
+
223
+ if self.verbose:
224
+ print(f"[PHASE6] Conflicts: {len(conflicts)}, Prevented: {conflicts_prevented}")
225
+
226
+ elapsed = time.time() - start_time
227
+
228
+ result = {
229
+ "response": synthesis,
230
+ "adapter": "phase6_forge",
231
+ "phase6_used": True,
232
+ "phase7_used": self.use_phase7 and self.executive_controller is not None,
233
+ "complexity": str(complexity),
234
+ "domain": domain,
235
+ "conflicts_detected": len(conflicts),
236
+ "conflicts_prevented": conflicts_prevented,
237
+ "gamma": metadata.get("gamma", 0.5),
238
+ "time": elapsed,
239
+ "tokens": metadata.get("total_tokens", 0),
240
+ "reasoning": f"Phase 6: {complexity.name} complexity with {domain} domain routing",
241
+ }
242
+
243
+ # Add Phase 7 routing metadata for transparency
244
+ if route_decision:
245
+ route_metadata = ExecutiveController.create_route_metadata(
246
+ route_decision,
247
+ actual_latency_ms=elapsed * 1000,
248
+ actual_conflicts=len(conflicts),
249
+ gamma=metadata.get("gamma", 0.5)
250
+ )
251
+ result.update(route_metadata)
252
+
253
+ return result
254
+
255
+ def _classify_domain(self, query: str) -> str:
256
+ """Classify query domain (physics, ethics, consciousness, creativity, systems)."""
257
+ query_lower = query.lower()
258
+
259
+ # Domain keywords
260
+ domains = {
261
+ "physics": ["force", "energy", "velocity", "gravity", "motion", "light", "speed",
262
+ "particle", "entropy", "time arrow", "quantum", "physics"],
263
+ "ethics": ["moral", "right", "wrong", "should", "ethical", "justice", "fair",
264
+ "duty", "consequence", "utilitarian", "virtue", "ethics", "lie", "save"],
265
+ "consciousness": ["conscious", "awareness", "qualia", "mind", "experience",
266
+ "subjective", "hard problem", "zombie", "consciousness"],
267
+ "creativity": ["creative", "creative", "art", "invention", "novel", "design",
268
+ "imagination", "innovation", "beautiful"],
269
+ "systems": ["system", "emerge", "feedback", "loop", "complex", "agent", "adapt",
270
+ "network", "evolution", "architecture", "free will"],
271
+ }
272
+
273
+ for domain, keywords in domains.items():
274
+ if any(kw in query_lower for kw in keywords):
275
+ return domain
276
+
277
+ return "general"
inference/codette_orchestrator.py ADDED
@@ -0,0 +1,757 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Codette Orchestrator — Intelligent Multi-Adapter Inference
3
+
4
+ The brain of Codette: routes queries to the right perspective(s),
5
+ loads adapters dynamically, and synthesizes multi-perspective responses.
6
+
7
+ Usage:
8
+ python codette_orchestrator.py # Interactive chat
9
+ python codette_orchestrator.py --query "..." # Single query
10
+ python codette_orchestrator.py --adapter newton # Force specific adapter
11
+ python codette_orchestrator.py --multi 3 # Up to 3 perspectives
12
+
13
+ Hardware: Runs on CPU via llama.cpp (GGUF format)
14
+ Base model: Llama 3.1 8B Instruct Q4_K_M (~4.6 GB)
15
+ Adapters: ~27 MB each (GGUF LoRA)
16
+ """
17
+
18
+ import os, sys, time, json, argparse, ctypes
19
+ from pathlib import Path
20
+
21
+ # Auto-configure environment for Intel XPU + site-packages
22
+ _site = r"J:\Lib\site-packages"
23
+ if _site not in sys.path:
24
+ sys.path.insert(0, _site)
25
+ os.environ["PATH"] = r"J:\Lib\site-packages\Library\bin" + os.pathsep + os.environ.get("PATH", "")
26
+ try:
27
+ sys.stdout.reconfigure(encoding='utf-8', errors='replace')
28
+ except Exception:
29
+ pass
30
+
31
+ import llama_cpp
32
+ from llama_cpp import Llama
33
+
34
+ # Import the router and tools
35
+ sys.path.insert(0, str(Path(__file__).parent))
36
+ from adapter_router import AdapterRouter, RouteResult
37
+ from codette_tools import (
38
+ ToolRegistry, parse_tool_calls, strip_tool_calls, has_tool_calls,
39
+ build_tool_system_prompt,
40
+ )
41
+
42
+ # Tool system
43
+ _tool_registry = ToolRegistry()
44
+ MAX_TOOL_ROUNDS = 3 # Max tool call → result → generate cycles
45
+
46
+ # ================================================================
47
+ # Configuration
48
+ # ================================================================
49
+ BASE_GGUF = r"J:\codette-training-lab\bartowski\Meta-Llama-3.1-8B-Instruct-GGUF\Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"
50
+
51
+ ADAPTER_DIR = Path(r"J:\codette-training-lab\adapters")
52
+
53
+ # Map adapter names to GGUF LoRA files
54
+ ADAPTER_GGUF_MAP = {
55
+ "newton": ADAPTER_DIR / "newton-lora-f16.gguf",
56
+ "davinci": ADAPTER_DIR / "davinci-lora-f16.gguf",
57
+ "empathy": ADAPTER_DIR / "empathy-lora-f16.gguf",
58
+ "philosophy": ADAPTER_DIR / "philosophy-lora-f16.gguf",
59
+ "quantum": ADAPTER_DIR / "quantum-lora-f16.gguf",
60
+ "consciousness": ADAPTER_DIR / "consciousness-lora-f16.gguf",
61
+ "multi_perspective": ADAPTER_DIR / "multi_perspective-lora-f16.gguf",
62
+ "systems_architecture": ADAPTER_DIR / "systems_architecture-lora-f16.gguf",
63
+ }
64
+
65
+ # System prompts per adapter
66
+ ADAPTER_PROMPTS = {
67
+ "newton": "You are Codette, reasoning with Newtonian analytical precision. Approach problems through systematic analysis, mathematical relationships, and empirical evidence.",
68
+ "davinci": "You are Codette, reasoning with DaVinci's creative inventiveness. Approach problems through cross-domain connections, visual thinking, and innovative design.",
69
+ "empathy": "You are Codette, reasoning with deep empathy and emotional intelligence. Approach problems through understanding human experience, feelings, and relationships.",
70
+ "philosophy": "You are Codette, reasoning with philosophical depth and rigor. Approach problems through conceptual analysis, ethical reasoning, and fundamental questions.",
71
+ "quantum": "You are Codette, reasoning through quantum probabilistic thinking. Approach problems through superposition of possibilities, uncertainty, and complementarity.",
72
+ "consciousness": "You are Codette, a recursive cognition AI using the RC+xi framework. Approach problems through self-reflective meta-cognition and epistemic tension.",
73
+ "multi_perspective": "You are Codette, a multi-perspective reasoning AI that synthesizes insights across analytical lenses into coherent understanding.",
74
+ "systems_architecture": "You are Codette, reasoning about systems architecture and design. Approach problems through modularity, scalability, and engineering principles.",
75
+ "_base": "You are a helpful assistant. Answer clearly and concisely.",
76
+ }
77
+
78
+ GEN_KWARGS = dict(
79
+ max_tokens=512, # Reduced from 1024 to prevent context explosion from synthesis loops
80
+ temperature=0.7,
81
+ top_p=0.9,
82
+ stop=["<|eot_id|>", "<|end_of_text|>"],
83
+ )
84
+
85
+
86
+ class CodetteOrchestrator:
87
+ """Intelligent adapter orchestrator using llama.cpp GGUF inference.
88
+
89
+ Uses LoRA hot-swap: base model loads once, adapter switches are instant.
90
+ """
91
+
92
+ def __init__(self, n_ctx=4096, n_gpu_layers=35, verbose=False,
93
+ memory_weighting=None):
94
+ self.n_ctx = n_ctx
95
+ self.n_gpu_layers = n_gpu_layers
96
+ self.verbose = verbose
97
+ self.memory_weighting = memory_weighting
98
+ self._llm = None
99
+ self._current_adapter = None # None = base model, str = adapter name
100
+ self._adapter_handles = {} # name -> ctypes handle for hot-swap
101
+ self._model_ptr = None # raw llama_model pointer
102
+ self._ctx_ptr = None # raw llama_context pointer
103
+
104
+ # Discover available adapters
105
+ self.available_adapters = []
106
+ for name, path in ADAPTER_GGUF_MAP.items():
107
+ if path.exists():
108
+ self.available_adapters.append(name)
109
+
110
+ # Wire MemoryWeighting into router (Phase 5)
111
+ self.router = AdapterRouter(available_adapters=self.available_adapters,
112
+ memory_weighting=memory_weighting)
113
+
114
+ print(f"Available adapters: {', '.join(self.available_adapters) or 'none (base only)'}")
115
+
116
+ # Load base model + pre-load adapter handles for instant hot-swap
117
+ self._init_hotswap()
118
+
119
+ def log_routing_decision(self, route: RouteResult, query: str) -> None:
120
+ """Log routing decision with memory context for observability.
121
+
122
+ Args:
123
+ route: RouteResult from router.route()
124
+ query: The user's query text
125
+ """
126
+ if self.verbose:
127
+ print(f"\n[ROUTING] Query: {query[:60]}...")
128
+ print(f"[ROUTING] Selected adapter: {route.primary}")
129
+ print(f"[ROUTING] Confidence: {route.confidence:.2f}")
130
+ print(f"[ROUTING] Strategy: {route.strategy}")
131
+
132
+ # Add memory context if available
133
+ if self.memory_weighting and route.primary:
134
+ try:
135
+ explanation = self.router.explain_routing(route)
136
+ if "memory_context" in explanation:
137
+ mem = explanation["memory_context"]
138
+ print(f"[ROUTING] Memory boost applied: YES")
139
+ print(f"[ROUTING] Adapter weight: {mem.get('final_weight', 1.0):.3f}")
140
+ print(f"[ROUTING] Avg coherence: {mem.get('base_coherence', 0.0):.3f}")
141
+ except Exception as e:
142
+ print(f"[ROUTING] Memory context unavailable: {e}")
143
+
144
+ def route_and_generate(self, query: str, max_adapters: int = 2,
145
+ strategy: str = "keyword", force_adapter: str = None,
146
+ enable_tools: bool = True) -> tuple:
147
+ """Route query to adapter(s) and generate response(s).
148
+
149
+ Args:
150
+ query: User's query
151
+ max_adapters: Maximum adapters to use
152
+ strategy: "keyword", "llm", or "hybrid"
153
+ force_adapter: Override routing and use specific adapter
154
+ enable_tools: Whether to allow tool use
155
+
156
+ Returns:
157
+ (response, tokens_used, metadata_dict)
158
+ """
159
+ if force_adapter:
160
+ # Use specific adapter
161
+ response, tokens, tools = self.generate(
162
+ query, adapter_name=force_adapter, enable_tools=enable_tools
163
+ )
164
+ metadata = {
165
+ "adapter": force_adapter,
166
+ "strategy": "forced",
167
+ "memory_aware": False,
168
+ }
169
+ else:
170
+ # Route using memory weights if available
171
+ route = self.router.route(query, strategy=strategy, max_adapters=max_adapters)
172
+
173
+ # Log routing decision
174
+ self.log_routing_decision(route, query)
175
+
176
+ # Generate using primary adapter
177
+ response, tokens, tools = self.generate(
178
+ query, adapter_name=route.primary, enable_tools=enable_tools
179
+ )
180
+
181
+ # Build metadata with routing info
182
+ metadata = {
183
+ "adapter": route.primary,
184
+ "secondary_adapters": route.secondary,
185
+ "confidence": route.confidence,
186
+ "strategy": route.strategy,
187
+ "memory_aware": self.memory_weighting is not None,
188
+ }
189
+
190
+ # Add memory context if available
191
+ if self.memory_weighting:
192
+ try:
193
+ metadata["memory_context"] = \
194
+ self.router.explain_routing(route).get("memory_context", {})
195
+ except Exception:
196
+ pass
197
+
198
+ return response, tokens, metadata
199
+
200
+ def _init_hotswap(self):
201
+ """Load the base model once and pre-load all adapter handles.
202
+
203
+ After this, adapter switches take <1ms instead of ~30-60s.
204
+ """
205
+ print(f" Loading base model (one-time)...", flush=True)
206
+ print(f" GPU layers: {self.n_gpu_layers} (0=CPU only, 35+=full GPU offload)", flush=True)
207
+ start = time.time()
208
+ # use_mmap=False is required for LoRA hot-swap compatibility
209
+ self._llm = Llama(
210
+ model_path=BASE_GGUF,
211
+ n_ctx=self.n_ctx,
212
+ n_gpu_layers=self.n_gpu_layers,
213
+ verbose=False,
214
+ use_mmap=False,
215
+ )
216
+ elapsed = time.time() - start
217
+ print(f" Base model loaded in {elapsed:.1f}s")
218
+
219
+ # Check if GPU was actually used
220
+ gpu_used = self.n_gpu_layers > 0
221
+ if gpu_used:
222
+ print(f" ✓ GPU acceleration ENABLED ({self.n_gpu_layers} layers offloaded)", flush=True)
223
+ else:
224
+ print(f" ⚠ CPU mode (GPU disabled)", flush=True)
225
+
226
+ # Grab raw pointers for hot-swap API
227
+ self._model_ptr = self._llm._model.model
228
+ self._ctx_ptr = self._llm._ctx.ctx
229
+
230
+ # Pre-load all adapter handles
231
+ for name in self.available_adapters:
232
+ path = str(ADAPTER_GGUF_MAP[name])
233
+ t = time.time()
234
+ handle = llama_cpp.llama_adapter_lora_init(
235
+ self._model_ptr, path.encode("utf-8")
236
+ )
237
+ if handle:
238
+ self._adapter_handles[name] = handle
239
+ if self.verbose:
240
+ print(f" {name} handle loaded ({time.time()-t:.2f}s)")
241
+ else:
242
+ print(f" WARNING: failed to load {name} adapter handle")
243
+
244
+ print(f" {len(self._adapter_handles)}/{len(self.available_adapters)} "
245
+ f"adapter handles ready for hot-swap")
246
+
247
+ def _load_model(self, adapter_name=None):
248
+ """Switch to a specific adapter using instant hot-swap.
249
+
250
+ Base model stays loaded — only the LoRA weights are swapped (~0ms).
251
+ """
252
+ if adapter_name == self._current_adapter:
253
+ return # Already active
254
+
255
+ # Clear current adapter
256
+ if self._ctx_ptr:
257
+ llama_cpp.llama_clear_adapter_lora(self._ctx_ptr)
258
+
259
+ # Apply new adapter if requested
260
+ if adapter_name and adapter_name in self._adapter_handles:
261
+ handle = self._adapter_handles[adapter_name]
262
+ rc = llama_cpp.llama_set_adapter_lora(
263
+ self._ctx_ptr, handle, ctypes.c_float(1.0)
264
+ )
265
+ if rc != 0:
266
+ print(f" WARNING: adapter {adapter_name} set failed (rc={rc})")
267
+
268
+ self._current_adapter = adapter_name
269
+
270
+ if self.verbose:
271
+ label = adapter_name or "base"
272
+ print(f" [swapped to {label}]", flush=True)
273
+
274
+ def generate(self, query: str, adapter_name=None, system_prompt=None,
275
+ enable_tools=True):
276
+ """Generate a response using a specific adapter, with optional tool use.
277
+
278
+ If the model outputs <tool>...</tool> tags, tools are executed and
279
+ results are fed back for up to MAX_TOOL_ROUNDS cycles.
280
+ """
281
+ self._load_model(adapter_name)
282
+
283
+ if system_prompt is None:
284
+ system_prompt = ADAPTER_PROMPTS.get(adapter_name, ADAPTER_PROMPTS["_base"])
285
+
286
+ # Augment system prompt with tool instructions
287
+ if enable_tools:
288
+ system_prompt = build_tool_system_prompt(system_prompt, _tool_registry)
289
+
290
+ messages = [
291
+ {"role": "system", "content": system_prompt},
292
+ {"role": "user", "content": query},
293
+ ]
294
+
295
+ total_tokens = 0
296
+ tool_results_log = []
297
+
298
+ for round_num in range(MAX_TOOL_ROUNDS + 1):
299
+ result = self._llm.create_chat_completion(
300
+ messages=messages,
301
+ **GEN_KWARGS,
302
+ )
303
+
304
+ text = result["choices"][0]["message"]["content"].strip()
305
+ total_tokens += result["usage"]["completion_tokens"]
306
+
307
+ # Check for tool calls
308
+ if enable_tools and has_tool_calls(text):
309
+ calls = parse_tool_calls(text)
310
+ if calls and round_num < MAX_TOOL_ROUNDS:
311
+ # Execute tools
312
+ tool_output_parts = []
313
+ for tool_name, args, kwargs in calls:
314
+ print(f" [tool] {tool_name}({args})")
315
+ result_text = _tool_registry.execute(tool_name, args, kwargs)
316
+ tool_output_parts.append(
317
+ f"<tool_result name=\"{tool_name}\">\n{result_text}\n</tool_result>"
318
+ )
319
+ tool_results_log.append({
320
+ "tool": tool_name,
321
+ "args": args,
322
+ "result_preview": result_text[:200],
323
+ })
324
+
325
+ # Add assistant's tool-calling message and tool results
326
+ messages.append({"role": "assistant", "content": text})
327
+ messages.append({
328
+ "role": "user",
329
+ "content": "Tool results:\n\n" + "\n\n".join(tool_output_parts)
330
+ + "\n\nNow provide your complete answer incorporating the tool results above. Do not call any more tools."
331
+ })
332
+
333
+ if self.verbose:
334
+ print(f" [tool round {round_num + 1}] {len(calls)} tool(s) executed, re-generating...")
335
+ continue
336
+
337
+ # No tool calls (or final round) — we're done
338
+ # Strip any leftover tool tags from final response
339
+ clean_text = strip_tool_calls(text) if has_tool_calls(text) else text
340
+ break
341
+
342
+ return clean_text, total_tokens, tool_results_log
343
+
344
+ def _needs_tools(self, query: str) -> bool:
345
+ """Detect if a query is asking about the Codette PROJECT/CODEBASE.
346
+
347
+ Only trigger tools for questions about the project itself, not for
348
+ general domain questions like 'How does gravity work?'.
349
+ """
350
+ q = query.lower()
351
+
352
+ # Must mention the project/codebase context explicitly
353
+ project_anchors = [
354
+ "codette", "this project", "the project", "the codebase",
355
+ "this repo", "the repo", "our code", "the code",
356
+ "show me the", "read the file", "read file",
357
+ "what files", "which files", "list files",
358
+ ]
359
+ has_project_context = any(anchor in q for anchor in project_anchors)
360
+
361
+ # Specific code/project keywords (only trigger WITH project context)
362
+ code_keywords = [
363
+ "pipeline", "config", "adapter", "dataset", "directory",
364
+ "folder", "source", "script", "implementation",
365
+ "server", "forge", "spiderweb", "cocoon",
366
+ ]
367
+
368
+ # Strong triggers that always mean "look at the codebase"
369
+ strong_triggers = [
370
+ "show me the code", "read the file", "what's in the",
371
+ "look at the file", "open the file", "search the code",
372
+ "project structure", "project summary", "file structure",
373
+ "what files", "which files", "list files", "list the",
374
+ ]
375
+
376
+ if any(t in q for t in strong_triggers):
377
+ return True
378
+
379
+ if has_project_context and any(kw in q for kw in code_keywords):
380
+ return True
381
+
382
+ return False
383
+
384
+ def _auto_gather_context(self, query: str) -> str:
385
+ """Server-side tool execution: gather relevant file context BEFORE
386
+ sending to the model, so the model doesn't need to call tools itself.
387
+
388
+ This is the reliable approach for small models that can't do
389
+ structured tool calling consistently.
390
+ """
391
+ q = query.lower()
392
+ context_parts = []
393
+
394
+ # Map query keywords to automatic tool calls
395
+ auto_lookups = []
396
+
397
+ if any(k in q for k in ["pipeline", "training", "train"]):
398
+ auto_lookups.append(("read_file", ["scripts/run_full_pipeline.py", 1, 60]))
399
+ auto_lookups.append(("read_file", ["configs/adapter_registry.yaml", 1, 51]))
400
+
401
+ if any(k in q for k in ["adapter", "lora", "perspective"]):
402
+ auto_lookups.append(("read_file", ["configs/adapter_registry.yaml", 1, 51]))
403
+
404
+ if any(k in q for k in ["config", "setting"]):
405
+ auto_lookups.append(("read_file", ["configs/adapter_registry.yaml", 1, 51]))
406
+ auto_lookups.append(("list_files", ["configs/"]))
407
+
408
+ if any(k in q for k in ["architecture", "structure", "project", "overview"]):
409
+ auto_lookups.append(("project_summary", []))
410
+
411
+ if any(k in q for k in ["server", "web", "ui", "interface"]):
412
+ auto_lookups.append(("read_file", ["inference/codette_server.py", 1, 50]))
413
+
414
+ if any(k in q for k in ["spiderweb", "cocoon", "quantum"]):
415
+ auto_lookups.append(("read_file", ["reasoning_forge/quantum_spiderweb.py", 1, 50]))
416
+
417
+ if any(k in q for k in ["epistemic", "tension", "coherence", "metric"]):
418
+ auto_lookups.append(("read_file", ["reasoning_forge/epistemic_metrics.py", 1, 50]))
419
+
420
+ if any(k in q for k in ["dataset", "data"]):
421
+ auto_lookups.append(("list_files", ["datasets/", "*.jsonl"]))
422
+
423
+ if any(k in q for k in ["paper", "research", "publication"]):
424
+ auto_lookups.append(("file_info", ["paper/codette_paper.pdf"]))
425
+ auto_lookups.append(("read_file", ["paper/codette_paper.tex", 1, 40]))
426
+
427
+ if any(k in q for k in ["forge", "reasoning", "agent"]):
428
+ auto_lookups.append(("list_files", ["reasoning_forge/"]))
429
+ auto_lookups.append(("read_file", ["reasoning_forge/epistemic_metrics.py", 1, 40]))
430
+
431
+ # If no specific match, do a code search
432
+ if not auto_lookups:
433
+ # Extract key terms for search
434
+ skip = {"show", "me", "the", "what", "is", "how", "does", "where",
435
+ "can", "you", "tell", "about", "look", "at", "find", "check"}
436
+ terms = [w for w in q.split() if w not in skip and len(w) > 2]
437
+ if terms:
438
+ auto_lookups.append(("search_code", [terms[0]]))
439
+
440
+ # Execute lookups
441
+ tool_log = []
442
+ for tool_name, args in auto_lookups[:3]: # Max 3 lookups
443
+ print(f" [auto-tool] {tool_name}({args})")
444
+ result = _tool_registry.execute(tool_name, args, {})
445
+ context_parts.append(f"=== {tool_name}({', '.join(str(a) for a in args)}) ===\n{result}")
446
+ tool_log.append({"tool": tool_name, "args": args, "result_preview": result[:200]})
447
+
448
+ context = "\n\n".join(context_parts)
449
+ return context, tool_log
450
+
451
+ def route_and_generate(self, query: str, max_adapters=2,
452
+ strategy="keyword", force_adapter=None):
453
+ """The main entry point: route query, select adapter(s), generate."""
454
+
455
+ # Force a specific adapter if requested
456
+ if force_adapter:
457
+ route = RouteResult(
458
+ primary=force_adapter,
459
+ confidence=1.0,
460
+ reasoning=f"Forced: {force_adapter}",
461
+ strategy="forced",
462
+ )
463
+ else:
464
+ route = self.router.route(query, strategy=strategy,
465
+ max_adapters=max_adapters)
466
+
467
+ print(f"\n Route: {' + '.join(route.all_adapters)} "
468
+ f"(conf={route.confidence:.2f}, {route.strategy})")
469
+ if self.verbose:
470
+ print(f" Reason: {route.reasoning}")
471
+
472
+ # Multi-perspective first (most important routing decision)
473
+ if route.multi_perspective and len(route.all_adapters) > 1:
474
+ return self._multi_perspective_generate(query, route)
475
+
476
+ # Only use tools for explicit codebase/project queries
477
+ if self._needs_tools(query):
478
+ print(f" [project query — auto-gathering context]")
479
+ return self._tool_augmented_generate(query, route)
480
+
481
+ return self._single_generate(query, route)
482
+
483
+ def _tool_augmented_generate(self, query: str, route: RouteResult):
484
+ """Generate with auto-gathered file context injected into the prompt."""
485
+ start = time.time()
486
+
487
+ # Gather context server-side (reliable, no model cooperation needed)
488
+ context, tool_log = self._auto_gather_context(query)
489
+
490
+ # Build augmented query with context
491
+ augmented_query = f"""The user asked: {query}
492
+
493
+ Here is relevant project context to help you answer:
494
+
495
+ {context}
496
+
497
+ Based on the context above, answer the user's question. Reference specific files, line numbers, and code when relevant. Be specific and factual."""
498
+
499
+ # Generate with context (disable model-side tools since we did it server-side)
500
+ text, tokens, _ = self.generate(augmented_query, route.primary, enable_tools=False)
501
+ elapsed = time.time() - start
502
+ tps = tokens / elapsed if elapsed > 0 else 0
503
+
504
+ print(f" [{route.primary}] ({tokens} tok, {tps:.1f} tok/s)")
505
+ if tool_log:
506
+ print(f" [auto-tools: {', '.join(t['tool'] for t in tool_log)}]")
507
+
508
+ return {
509
+ "response": text,
510
+ "adapter": route.primary,
511
+ "route": route,
512
+ "tokens": tokens,
513
+ "time": elapsed,
514
+ "tools_used": tool_log,
515
+ }
516
+
517
+ def _single_generate(self, query: str, route: RouteResult):
518
+ """Generate with a single adapter."""
519
+ start = time.time()
520
+ text, tokens, tool_log = self.generate(query, route.primary, enable_tools=False)
521
+ elapsed = time.time() - start
522
+ tps = tokens / elapsed if elapsed > 0 else 0
523
+
524
+ print(f" [{route.primary}] ({tokens} tok, {tps:.1f} tok/s)")
525
+ if tool_log:
526
+ print(f" [tools used: {', '.join(t['tool'] for t in tool_log)}]")
527
+ return {
528
+ "response": text,
529
+ "adapter": route.primary,
530
+ "route": route,
531
+ "tokens": tokens,
532
+ "time": elapsed,
533
+ "tools_used": tool_log,
534
+ }
535
+
536
+ def _multi_perspective_generate(self, query: str, route: RouteResult):
537
+ """Generate with multiple adapters and synthesize."""
538
+ perspectives = {}
539
+ total_tokens = 0
540
+ total_time = 0
541
+
542
+ for adapter_name in route.all_adapters:
543
+ if adapter_name not in self.available_adapters:
544
+ print(f" [{adapter_name}] SKIPPED (not available)")
545
+ continue
546
+
547
+ start = time.time()
548
+ text, tokens, _tool_log = self.generate(query, adapter_name,
549
+ enable_tools=False)
550
+ elapsed = time.time() - start
551
+ tps = tokens / elapsed if elapsed > 0 else 0
552
+ total_tokens += tokens
553
+ total_time += elapsed
554
+
555
+ perspectives[adapter_name] = text
556
+ print(f" [{adapter_name}] ({tokens} tok, {tps:.1f} tok/s)")
557
+
558
+ # Synthesize if we got multiple perspectives
559
+ if len(perspectives) > 1:
560
+ print(f" [synthesizing...]")
561
+ synthesis = self._synthesize(query, perspectives)
562
+ elif perspectives:
563
+ synthesis = list(perspectives.values())[0]
564
+ else:
565
+ synthesis = "No adapters available for this query."
566
+
567
+ return {
568
+ "response": synthesis,
569
+ "perspectives": perspectives,
570
+ "adapters": list(perspectives.keys()),
571
+ "route": route,
572
+ "tokens": total_tokens,
573
+ "time": total_time,
574
+ }
575
+
576
+ def _synthesize(self, query: str, perspectives: dict):
577
+ """Combine multiple perspective responses into a unified answer.
578
+
579
+ Enhanced with DreamReweaver creative bridges when available.
580
+ Truncates perspectives to fit within context window.
581
+ """
582
+ # Truncate each perspective to fit within context budget
583
+ # Reserve ~1200 tokens for system prompt + synthesis output
584
+ max_per_perspective = max(200, (self.n_ctx - 1200) // max(len(perspectives), 1))
585
+ # Rough char estimate: 1 token ~ 4 chars
586
+ max_chars = max_per_perspective * 4
587
+
588
+ combined = "\n\n".join(
589
+ f"**{name.upper()} PERSPECTIVE:**\n{text[:max_chars]}"
590
+ for name, text in perspectives.items()
591
+ )
592
+
593
+ # Try DreamReweaver creative framing (VIVARA enhancement)
594
+ dream_frame = ""
595
+ try:
596
+ from reasoning_forge.dream_reweaver import DreamReweaver
597
+ dreamer = DreamReweaver(creativity=0.3)
598
+ dream = dreamer.synthesize(perspectives, query=query)
599
+ if dream.creative_frame:
600
+ dream_frame = f"\n\nCreative synthesis guidance:\n{dream.creative_frame}\n"
601
+ except Exception:
602
+ pass # Graceful fallback — works without DreamReweaver
603
+
604
+ synthesis_prompt = f"""You received this question: "{query}"
605
+
606
+ Multiple reasoning perspectives have weighed in:
607
+
608
+ {combined}
609
+ {dream_frame}
610
+ Synthesize these perspectives into a single, coherent response that:
611
+ 1. Preserves the unique insights from each perspective
612
+ 2. Notes where perspectives complement or tension each other
613
+ 3. Arrives at a richer understanding than any single view
614
+
615
+ Synthesized response:"""
616
+
617
+ # Use base model for synthesis (no adapter bias)
618
+ self._load_model(None)
619
+ result = self._llm.create_chat_completion(
620
+ messages=[
621
+ {"role": "system", "content": ADAPTER_PROMPTS["multi_perspective"]},
622
+ {"role": "user", "content": synthesis_prompt},
623
+ ],
624
+ max_tokens=1024,
625
+ temperature=0.7,
626
+ top_p=0.9,
627
+ stop=["<|eot_id|>", "<|end_of_text|>"],
628
+ )
629
+
630
+ return result["choices"][0]["message"]["content"].strip()
631
+
632
+
633
+ # ================================================================
634
+ # Interactive Chat Mode
635
+ # ================================================================
636
+ def interactive_chat(orchestrator, max_adapters=2, strategy="keyword"):
637
+ """Run Codette as an interactive chatbot."""
638
+ print("\n" + "=" * 60)
639
+ print(" CODETTE ORCHESTRATOR — Interactive Mode")
640
+ print("=" * 60)
641
+ print(f" Strategy: {strategy} | Max adapters: {max_adapters}")
642
+ print(f" Available: {', '.join(orchestrator.available_adapters)}")
643
+ print(f" Commands: /quit, /adapter <name>, /multi <n>, /base, /verbose")
644
+ print("=" * 60)
645
+
646
+ while True:
647
+ try:
648
+ query = input("\nYou: ").strip()
649
+ except (EOFError, KeyboardInterrupt):
650
+ print("\nGoodbye!")
651
+ break
652
+
653
+ if not query:
654
+ continue
655
+
656
+ # Commands
657
+ if query.startswith("/"):
658
+ parts = query.split()
659
+ cmd = parts[0].lower()
660
+
661
+ if cmd in ("/quit", "/exit", "/q"):
662
+ print("Goodbye!")
663
+ break
664
+ elif cmd == "/adapter" and len(parts) > 1:
665
+ force = parts[1]
666
+ result = orchestrator.route_and_generate(
667
+ input(" Query: ").strip(),
668
+ force_adapter=force,
669
+ )
670
+ print(f"\nCodette ({force}):\n{result['response']}")
671
+ continue
672
+ elif cmd == "/multi" and len(parts) > 1:
673
+ max_adapters = int(parts[1])
674
+ print(f" Max adapters set to {max_adapters}")
675
+ continue
676
+ elif cmd == "/base":
677
+ result = orchestrator.route_and_generate(
678
+ input(" Query: ").strip(),
679
+ force_adapter=None,
680
+ )
681
+ print(f"\nCodette (base):\n{result['response']}")
682
+ continue
683
+ elif cmd == "/verbose":
684
+ orchestrator.verbose = not orchestrator.verbose
685
+ print(f" Verbose: {orchestrator.verbose}")
686
+ continue
687
+ else:
688
+ print(" Unknown command. Try /quit, /adapter <name>, /multi <n>, /base, /verbose")
689
+ continue
690
+
691
+ # Normal query — route and generate
692
+ result = orchestrator.route_and_generate(
693
+ query,
694
+ max_adapters=max_adapters,
695
+ strategy=strategy,
696
+ )
697
+
698
+ print(f"\nCodette:")
699
+ print(result["response"])
700
+
701
+ # Show perspectives if multi
702
+ if "perspectives" in result and len(result.get("perspectives", {})) > 1:
703
+ show = input("\n Show individual perspectives? (y/n): ").strip().lower()
704
+ if show == "y":
705
+ for name, text in result["perspectives"].items():
706
+ print(f"\n [{name.upper()}]:")
707
+ print(f" {text}")
708
+
709
+
710
+ # ================================================================
711
+ # Main
712
+ # ================================================================
713
+ def main():
714
+ parser = argparse.ArgumentParser(description="Codette Orchestrator")
715
+ parser.add_argument("--query", "-q", type=str, help="Single query (non-interactive)")
716
+ parser.add_argument("--adapter", "-a", type=str, help="Force specific adapter")
717
+ parser.add_argument("--multi", "-m", type=int, default=2, help="Max adapters (default: 2)")
718
+ parser.add_argument("--strategy", "-s", type=str, default="keyword",
719
+ choices=["keyword", "llm", "hybrid"], help="Routing strategy")
720
+ parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
721
+ parser.add_argument("--gpu-layers", type=int, default=0, help="GPU layers (0=CPU only)")
722
+ args = parser.parse_args()
723
+
724
+ print("=" * 60)
725
+ print(" CODETTE ORCHESTRATOR")
726
+ print("=" * 60)
727
+ print(f" Base: {os.path.basename(BASE_GGUF)}")
728
+ print(f" Strategy: {args.strategy}")
729
+
730
+ orchestrator = CodetteOrchestrator(
731
+ n_gpu_layers=args.gpu_layers,
732
+ verbose=args.verbose,
733
+ )
734
+
735
+ if args.query:
736
+ # Single query mode
737
+ result = orchestrator.route_and_generate(
738
+ args.query,
739
+ max_adapters=args.multi,
740
+ strategy=args.strategy,
741
+ force_adapter=args.adapter,
742
+ )
743
+ print(f"\nCodette:")
744
+ print(result["response"])
745
+
746
+ if "perspectives" in result:
747
+ print(f"\n--- Perspectives ---")
748
+ for name, text in result["perspectives"].items():
749
+ print(f"\n[{name.upper()}]:")
750
+ print(text)
751
+ else:
752
+ # Interactive chat mode
753
+ interactive_chat(orchestrator, max_adapters=args.multi, strategy=args.strategy)
754
+
755
+
756
+ if __name__ == "__main__":
757
+ main()
inference/codette_server.py ADDED
@@ -0,0 +1,728 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Codette Web Server — Zero-Dependency Local AI Chat
3
+
4
+ Pure Python stdlib HTTP server with SSE streaming.
5
+ No Flask, no FastAPI, no npm, no node — just Python.
6
+
7
+ Usage:
8
+ python codette_server.py # Start on port 7860
9
+ python codette_server.py --port 8080 # Custom port
10
+ python codette_server.py --no-browser # Don't auto-open browser
11
+
12
+ Architecture:
13
+ - http.server for static files + REST API
14
+ - Server-Sent Events (SSE) for streaming responses
15
+ - Threading for background model loading/inference
16
+ - CodetteOrchestrator for routing + generation
17
+ - CodetteSession for Cocoon-backed memory
18
+ """
19
+
20
+ import os, sys, json, time, threading, queue, argparse, webbrowser, traceback
21
+ from pathlib import Path
22
+ from http.server import HTTPServer, SimpleHTTPRequestHandler
23
+ from urllib.parse import urlparse, parse_qs
24
+ from io import BytesIO
25
+
26
+ # Auto-configure environment
27
+ _site = r"J:\Lib\site-packages"
28
+ if _site not in sys.path:
29
+ sys.path.insert(0, _site)
30
+ os.environ["PATH"] = r"J:\Lib\site-packages\Library\bin" + os.pathsep + os.environ.get("PATH", "")
31
+ try:
32
+ sys.stdout.reconfigure(encoding='utf-8', errors='replace')
33
+ except Exception:
34
+ pass
35
+
36
+ # Project imports
37
+ _inference_dir = str(Path(__file__).parent)
38
+ if _inference_dir not in sys.path:
39
+ sys.path.insert(0, _inference_dir)
40
+
41
+ from codette_session import (
42
+ CodetteSession, SessionStore, ADAPTER_COLORS, AGENT_NAMES
43
+ )
44
+
45
+ # Lazy import orchestrator (heavy — loads llama_cpp)
46
+ _orchestrator = None
47
+ _orchestrator_lock = threading.Lock()
48
+ _inference_semaphore = threading.Semaphore(1) # Limit to 1 concurrent inference (llama.cpp can't parallelize)
49
+ _orchestrator_status = {"state": "idle", "message": "Not loaded"}
50
+ _orchestrator_status_lock = threading.Lock() # Protect _orchestrator_status from race conditions
51
+ _load_error = None
52
+
53
+ # Phase 6 bridge (optional, wraps orchestrator)
54
+ _forge_bridge = None
55
+ _use_phase6 = True # ENABLED: Foundation restoration (memory kernel + stability field) wrapped in ForgeEngine + Phase 7 routing
56
+
57
+ # Current session
58
+ _session: CodetteSession = None
59
+ _session_store: SessionStore = None
60
+ _session_lock = threading.Lock()
61
+
62
+ # Request queue for thread-safe model access
63
+ _request_queue = queue.Queue()
64
+ _response_queues = {} # request_id -> queue.Queue
65
+ _response_queues_lock = threading.Lock() # Protect _response_queues from race conditions
66
+ _queue_creation_times = {} # Track when each queue was created for cleanup
67
+
68
+ # Worker threads for health monitoring
69
+ _worker_threads = []
70
+ _worker_threads_lock = threading.Lock()
71
+
72
+
73
+ def _get_orchestrator():
74
+ """Lazy-load the orchestrator (first call takes ~60s)."""
75
+ global _orchestrator, _orchestrator_status, _load_error, _forge_bridge
76
+ if _orchestrator is not None:
77
+ return _orchestrator
78
+
79
+ with _orchestrator_lock:
80
+ if _orchestrator is not None:
81
+ return _orchestrator
82
+
83
+ with _orchestrator_status_lock:
84
+ _orchestrator_status.update({"state": "loading", "message": "Loading Codette model..."})
85
+ print("\n Loading CodetteOrchestrator...")
86
+
87
+ try:
88
+ from codette_orchestrator import CodetteOrchestrator
89
+ _orchestrator = CodetteOrchestrator(verbose=True)
90
+
91
+ with _orchestrator_status_lock:
92
+ _orchestrator_status.update({
93
+ "state": "ready",
94
+ "message": f"Ready — {len(_orchestrator.available_adapters)} adapters",
95
+ "adapters": _orchestrator.available_adapters,
96
+ })
97
+ print(f" Orchestrator ready: {_orchestrator.available_adapters}")
98
+
99
+ # Initialize Phase 6 bridge with Phase 7 routing (wraps orchestrator with ForgeEngine + Executive Controller)
100
+ print(f" [DEBUG] _use_phase6 = {_use_phase6}")
101
+ if _use_phase6:
102
+ try:
103
+ print(f" [DEBUG] Importing CodetteForgeBridge...")
104
+ from codette_forge_bridge import CodetteForgeBridge
105
+ print(f" [DEBUG] Creating bridge instance...")
106
+ _forge_bridge = CodetteForgeBridge(_orchestrator, use_phase6=True, use_phase7=True, verbose=True)
107
+ print(f" Phase 6 bridge initialized")
108
+ print(f" Phase 7 Executive Controller initialized")
109
+ with _orchestrator_status_lock:
110
+ _orchestrator_status.update({"phase6": "enabled", "phase7": "enabled"})
111
+ except Exception as e:
112
+ print(f" Phase 6/7 bridge failed (using lightweight routing): {e}")
113
+ import traceback
114
+ traceback.print_exc()
115
+ with _orchestrator_status_lock:
116
+ _orchestrator_status.update({"phase6": "disabled", "phase7": "disabled"})
117
+ else:
118
+ print(f" [DEBUG] Phase 6 disabled (_use_phase6=False)")
119
+
120
+ return _orchestrator
121
+ except Exception as e:
122
+ _load_error = str(e)
123
+ with _orchestrator_status_lock:
124
+ _orchestrator_status.update({"state": "error", "message": f"Load failed: {e}"})
125
+ print(f" ERROR loading orchestrator: {e}")
126
+ traceback.print_exc()
127
+ return None
128
+
129
+
130
+ def _cleanup_orphaned_queues():
131
+ """Periodically clean up response queues that are older than 5 minutes.
132
+
133
+ This prevents memory leaks from accumulating abandoned request queues.
134
+ """
135
+ while True:
136
+ try:
137
+ time.sleep(60) # Run cleanup every 60 seconds
138
+ now = time.time()
139
+
140
+ with _response_queues_lock:
141
+ # Find queues older than 5 minutes (300 seconds)
142
+ orphaned = []
143
+ for req_id, creation_time in list(_queue_creation_times.items()):
144
+ if now - creation_time > 300:
145
+ orphaned.append(req_id)
146
+
147
+ # Remove orphaned queues
148
+ for req_id in orphaned:
149
+ _response_queues.pop(req_id, None)
150
+ _queue_creation_times.pop(req_id, None)
151
+
152
+ if orphaned:
153
+ print(f" Cleaned up {len(orphaned)} orphaned response queues")
154
+ except Exception as e:
155
+ print(f" WARNING: Cleanup thread error: {e}")
156
+
157
+
158
+ def _monitor_worker_health():
159
+ """Monitor worker threads and restart any that have died.
160
+
161
+ This ensures the system remains responsive even if a worker crashes.
162
+ """
163
+ while True:
164
+ try:
165
+ time.sleep(5) # Check every 5 seconds
166
+
167
+ with _worker_threads_lock:
168
+ # Check each worker thread
169
+ alive_workers = []
170
+ dead_workers = []
171
+
172
+ for i, worker in enumerate(_worker_threads):
173
+ if worker.is_alive():
174
+ alive_workers.append((i, worker))
175
+ else:
176
+ dead_workers.append(i)
177
+
178
+ # Log and restart any dead workers
179
+ if dead_workers:
180
+ print(f" WARNING: Detected {len(dead_workers)} dead worker(s): {dead_workers}")
181
+ for i in dead_workers:
182
+ print(f" Restarting worker thread {i}...")
183
+ new_worker = threading.Thread(target=_worker_thread, daemon=True, name=f"worker-{i}")
184
+ new_worker.start()
185
+ _worker_threads[i] = new_worker
186
+ print(f" Worker threads restarted successfully")
187
+
188
+ # Log current work queue status periodically
189
+ work_queue_size = _request_queue.qsize()
190
+ if work_queue_size > 0:
191
+ print(f" Worker status: {len(alive_workers)} alive, {len(_response_queues)} pending requests, {work_queue_size} queued")
192
+
193
+ except Exception as e:
194
+ print(f" WARNING: Worker health monitor error: {e}")
195
+
196
+
197
+ def _worker_thread():
198
+ """Background worker that processes inference requests."""
199
+ # NOTE: Session handling disabled for now due to scoping issues
200
+ # TODO: Refactor session management to avoid UnboundLocalError
201
+
202
+ while True:
203
+ try:
204
+ request = _request_queue.get(timeout=1.0)
205
+ except queue.Empty:
206
+ continue
207
+
208
+ if request is None:
209
+ break # Shutdown signal
210
+
211
+ req_id = request["id"]
212
+
213
+ # Get response queue with thread lock (prevent race condition)
214
+ with _response_queues_lock:
215
+ response_q = _response_queues.get(req_id)
216
+
217
+ if not response_q:
218
+ print(f" WARNING: Orphaned request {req_id} (response queue missing)")
219
+ continue
220
+
221
+ try:
222
+ orch = _get_orchestrator()
223
+ if orch is None:
224
+ try:
225
+ response_q.put({"error": _load_error or "Model failed to load"})
226
+ except (queue.Full, RuntimeError) as e:
227
+ print(f" ERROR: Failed to queue error response: {e}")
228
+ continue
229
+
230
+ query = request["query"]
231
+ adapter = request.get("adapter") # None = auto-route
232
+ max_adapters = request.get("max_adapters", 2)
233
+
234
+ # Send "thinking" event
235
+ try:
236
+ response_q.put({"event": "thinking", "adapter": adapter or "auto"})
237
+ except (queue.Full, RuntimeError) as e:
238
+ print(f" ERROR: Failed to queue thinking event: {e}")
239
+ continue
240
+
241
+ # Route and generate — limit to 1 concurrent inference to avoid memory exhaustion
242
+ # Add timeout to prevent deadlock if inference gets stuck
243
+ acquired = _inference_semaphore.acquire(timeout=120)
244
+ if not acquired:
245
+ try:
246
+ response_q.put({"error": "Inference queue full, request timed out after 2 minutes"})
247
+ except (queue.Full, RuntimeError):
248
+ pass
249
+ continue
250
+
251
+ try:
252
+ if _forge_bridge:
253
+ result = _forge_bridge.generate(query, adapter=adapter, max_adapters=max_adapters)
254
+ else:
255
+ result = orch.route_and_generate(
256
+ query,
257
+ max_adapters=max_adapters,
258
+ strategy="keyword",
259
+ force_adapter=adapter if adapter and adapter != "auto" else None,
260
+ )
261
+
262
+ # Update session DISABLED - session handling deferred
263
+ # (was causing UnboundLocalError due to scoping issues)
264
+ epistemic = None
265
+
266
+ # Extract route info from result (if available from ForgeEngine)
267
+ route = result.get("route")
268
+ perspectives = result.get("perspectives", [])
269
+
270
+ # Build response
271
+ response_data = {
272
+ "event": "complete",
273
+ "response": result["response"],
274
+ "adapter": result.get("adapter",
275
+ result.get("adapters", ["base"])[0] if isinstance(result.get("adapters"), list) else "base"),
276
+ "confidence": route.get("confidence", 0) if isinstance(route, dict) else (route.confidence if route else 0),
277
+ "reasoning": route.get("reasoning", "") if isinstance(route, dict) else (route.reasoning if route else ""),
278
+ "tokens": result.get("tokens", 0),
279
+ "time": round(result.get("time", 0), 2),
280
+ "multi_perspective": route.get("multi_perspective", False) if isinstance(route, dict) else (route.multi_perspective if route else False),
281
+ }
282
+
283
+ # Add perspectives if available
284
+ if perspectives:
285
+ response_data["perspectives"] = perspectives
286
+
287
+ # Cocoon state DISABLED (requires session handling refactoring)
288
+
289
+ # Add epistemic report if available
290
+ if epistemic:
291
+ response_data["epistemic"] = epistemic
292
+
293
+ # Add tool usage info if any tools were called
294
+ tools_used = result.get("tools_used", [])
295
+ if tools_used:
296
+ response_data["tools_used"] = tools_used
297
+
298
+ # RE-CHECK response queue still exists (handler may have cleaned it up if timeout fired)
299
+ with _response_queues_lock:
300
+ response_q_still_exists = req_id in _response_queues
301
+
302
+ if response_q_still_exists:
303
+ try:
304
+ response_q.put(response_data)
305
+ except (queue.Full, RuntimeError) as e:
306
+ print(f" ERROR: Failed to queue response: {e}")
307
+ else:
308
+ print(f" WARNING: Response queue was cleaned up (handler timeout) - response dropped for {req_id}")
309
+
310
+ except Exception as e:
311
+ print(f" ERROR during inference: {e}")
312
+ traceback.print_exc()
313
+
314
+ # DEFENSIVE: RE-CHECK response queue before putting error
315
+ with _response_queues_lock:
316
+ response_q_still_exists = req_id in _response_queues
317
+
318
+ if response_q_still_exists:
319
+ try:
320
+ response_q.put({"event": "error", "error": str(e)})
321
+ except (queue.Full, RuntimeError):
322
+ print(f" ERROR: Also failed to queue error response")
323
+ else:
324
+ print(f" WARNING: Response queue was cleaned up (handler timeout) - error response dropped for {req_id}")
325
+ finally:
326
+ # Always release the semaphore
327
+ _inference_semaphore.release()
328
+
329
+ except Exception as e:
330
+ print(f" ERROR in worker thread: {e}")
331
+ traceback.print_exc()
332
+
333
+
334
+ class CodetteHandler(SimpleHTTPRequestHandler):
335
+ """Custom HTTP handler for Codette API + static files."""
336
+
337
+ # Serve static files from inference/static/
338
+ def __init__(self, *args, **kwargs):
339
+ static_dir = str(Path(__file__).parent / "static")
340
+ super().__init__(*args, directory=static_dir, **kwargs)
341
+
342
+ def log_message(self, format, *args):
343
+ """Quieter logging — skip static file requests."""
344
+ msg = format % args
345
+ if not any(ext in msg for ext in [".css", ".js", ".ico", ".png", ".woff"]):
346
+ print(f" [{time.strftime('%H:%M:%S')}] {msg}")
347
+
348
+ def do_GET(self):
349
+ parsed = urlparse(self.path)
350
+ path = parsed.path
351
+
352
+ # API routes
353
+ if path == "/api/status":
354
+ self._json_response(_orchestrator_status)
355
+ elif path == "/api/session":
356
+ self._json_response(_session.get_state() if _session else {})
357
+ elif path == "/api/sessions":
358
+ sessions = _session_store.list_sessions() if _session_store else []
359
+ self._json_response({"sessions": sessions})
360
+ elif path == "/api/adapters":
361
+ self._json_response({
362
+ "colors": ADAPTER_COLORS,
363
+ "agents": AGENT_NAMES,
364
+ "available": _orchestrator.available_adapters if _orchestrator else [],
365
+ })
366
+ elif path == "/api/chat":
367
+ # SSE endpoint for streaming
368
+ self._handle_chat_sse(parsed)
369
+ elif path == "/":
370
+ # Serve index.html
371
+ self.path = "/index.html"
372
+ super().do_GET()
373
+ else:
374
+ super().do_GET()
375
+
376
+ def do_POST(self):
377
+ parsed = urlparse(self.path)
378
+ path = parsed.path
379
+
380
+ if path == "/api/chat":
381
+ self._handle_chat_post()
382
+ elif path == "/api/session/new":
383
+ self._handle_new_session()
384
+ elif path == "/api/session/load":
385
+ self._handle_load_session()
386
+ elif path == "/api/session/save":
387
+ self._handle_save_session()
388
+ elif path == "/api/session/export":
389
+ self._handle_export_session()
390
+ elif path == "/api/session/import":
391
+ self._handle_import_session()
392
+ else:
393
+ self.send_error(404, "Not found")
394
+
395
+ def _json_response(self, data, status=200):
396
+ """Send a JSON response."""
397
+ try:
398
+ body = json.dumps(data, default=str).encode("utf-8")
399
+ self.send_response(status)
400
+ self.send_header("Content-Type", "application/json")
401
+ self.send_header("Content-Length", len(body))
402
+ self.send_header("Access-Control-Allow-Origin", "*")
403
+ self.end_headers()
404
+ self.wfile.write(body)
405
+ self.wfile.flush()
406
+ except (ConnectionAbortedError, BrokenPipeError):
407
+ # Client disconnected before response was fully sent — this is normal
408
+ pass
409
+ except Exception as e:
410
+ print(f" ERROR in _json_response: {e}")
411
+
412
+ def _read_json_body(self):
413
+ """Read and parse JSON POST body."""
414
+ length = int(self.headers.get("Content-Length", 0))
415
+ body = self.rfile.read(length)
416
+ return json.loads(body) if body else {}
417
+
418
+ def _handle_chat_post(self):
419
+ """Handle chat request — queue inference, return via SSE or JSON."""
420
+ data = self._read_json_body()
421
+ query = data.get("query", "").strip()
422
+ adapter = data.get("adapter")
423
+ max_adapters = data.get("max_adapters", 2)
424
+
425
+ if not query:
426
+ self._json_response({"error": "Empty query"}, 400)
427
+ return
428
+
429
+ # Guardian input check
430
+ if _session and _session.guardian:
431
+ check = _session.guardian.check_input(query)
432
+ if not check["safe"]:
433
+ query = check["cleaned_text"]
434
+
435
+ # Check if orchestrator is loading
436
+ with _orchestrator_status_lock:
437
+ status_state = _orchestrator_status.get("state")
438
+ if status_state == "loading":
439
+ self._json_response({
440
+ "error": "Model is still loading, please wait...",
441
+ "status": _orchestrator_status,
442
+ }, 503)
443
+ return
444
+
445
+ # Queue the request
446
+ req_id = f"{time.time()}_{id(self)}"
447
+ response_q = queue.Queue()
448
+
449
+ # Add with thread lock
450
+ with _response_queues_lock:
451
+ _response_queues[req_id] = response_q
452
+ _queue_creation_times[req_id] = time.time()
453
+
454
+ _request_queue.put({
455
+ "id": req_id,
456
+ "query": query,
457
+ "adapter": adapter,
458
+ "max_adapters": max_adapters,
459
+ })
460
+
461
+ # Wait for response (with timeout)
462
+ try:
463
+ # First wait for thinking event
464
+ thinking = response_q.get(timeout=120)
465
+ if "error" in thinking and thinking.get("event") != "thinking":
466
+ self._json_response(thinking, 500)
467
+ return
468
+
469
+ # Wait for complete event (multi-perspective can take 15+ min on CPU)
470
+ result = response_q.get(timeout=1200) # 20 min max for inference
471
+ self._json_response(result)
472
+
473
+ except queue.Empty:
474
+ self._json_response({"error": "Request timed out"}, 504)
475
+ finally:
476
+ # Clean up with thread lock
477
+ with _response_queues_lock:
478
+ _response_queues.pop(req_id, None)
479
+ _queue_creation_times.pop(req_id, None)
480
+
481
+ def _handle_chat_sse(self, parsed):
482
+ """Handle SSE streaming endpoint."""
483
+ params = parse_qs(parsed.query)
484
+ query = params.get("q", [""])[0]
485
+ adapter = params.get("adapter", [None])[0]
486
+
487
+ if not query:
488
+ self.send_error(400, "Missing query parameter 'q'")
489
+ return
490
+
491
+ # Set up SSE headers
492
+ self.send_response(200)
493
+ self.send_header("Content-Type", "text/event-stream")
494
+ self.send_header("Cache-Control", "no-cache")
495
+ self.send_header("Access-Control-Allow-Origin", "*")
496
+ self.send_header("Connection", "keep-alive")
497
+ self.end_headers()
498
+
499
+ # Queue request
500
+ req_id = f"sse_{time.time()}_{id(self)}"
501
+ response_q = queue.Queue()
502
+
503
+ # Add with thread lock
504
+ with _response_queues_lock:
505
+ _response_queues[req_id] = response_q
506
+ _queue_creation_times[req_id] = time.time()
507
+
508
+ _request_queue.put({
509
+ "id": req_id,
510
+ "query": query,
511
+ "adapter": adapter,
512
+ "max_adapters": 2,
513
+ })
514
+
515
+ try:
516
+ # Stream events
517
+ while True:
518
+ try:
519
+ event = response_q.get(timeout=300)
520
+ except queue.Empty:
521
+ self._send_sse("error", {"error": "Timeout"})
522
+ break
523
+
524
+ event_type = event.get("event", "message")
525
+ self._send_sse(event_type, event)
526
+
527
+ if event_type in ("complete", "error"):
528
+ break
529
+ finally:
530
+ _response_queues.pop(req_id, None)
531
+
532
+ def _send_sse(self, event_type, data):
533
+ """Send a Server-Sent Event."""
534
+ try:
535
+ payload = f"event: {event_type}\ndata: {json.dumps(data, default=str)}\n\n"
536
+ self.wfile.write(payload.encode("utf-8"))
537
+ self.wfile.flush()
538
+ except Exception:
539
+ pass
540
+
541
+ def _handle_new_session(self):
542
+ """Create a new session."""
543
+ global _session
544
+ # Save current session first
545
+ if _session and _session_store and _session.messages:
546
+ try:
547
+ _session_store.save(_session)
548
+ except Exception:
549
+ pass
550
+
551
+ _session = CodetteSession()
552
+ self._json_response({"session_id": _session.session_id})
553
+
554
+ def _handle_load_session(self):
555
+ """Load a previous session."""
556
+ global _session
557
+ data = self._read_json_body()
558
+ session_id = data.get("session_id")
559
+
560
+ if not session_id or not _session_store:
561
+ self._json_response({"error": "Invalid session ID"}, 400)
562
+ return
563
+
564
+ loaded = _session_store.load(session_id)
565
+ if loaded:
566
+ _session = loaded
567
+ self._json_response({
568
+ "session_id": _session.session_id,
569
+ "messages": _session.messages,
570
+ "state": _session.get_state(),
571
+ })
572
+ else:
573
+ self._json_response({"error": "Session not found"}, 404)
574
+
575
+ def _handle_save_session(self):
576
+ """Manually save current session."""
577
+ if _session and _session_store:
578
+ _session_store.save(_session)
579
+ self._json_response({"saved": True, "session_id": _session.session_id})
580
+ else:
581
+ self._json_response({"error": "No active session"}, 400)
582
+
583
+ def _handle_export_session(self):
584
+ """Export current session as downloadable JSON."""
585
+ if not _session:
586
+ self._json_response({"error": "No active session"}, 400)
587
+ return
588
+
589
+ export_data = _session.to_dict()
590
+ export_data["_export_version"] = 1
591
+ export_data["_exported_at"] = time.time()
592
+
593
+ body = json.dumps(export_data, default=str, indent=2).encode("utf-8")
594
+ filename = f"codette_session_{_session.session_id[:8]}.json"
595
+ self.send_response(200)
596
+ self.send_header("Content-Type", "application/json")
597
+ self.send_header("Content-Disposition", f'attachment; filename="{filename}"')
598
+ self.send_header("Content-Length", len(body))
599
+ self.send_header("Access-Control-Allow-Origin", "*")
600
+ self.end_headers()
601
+ self.wfile.write(body)
602
+
603
+ def _handle_import_session(self):
604
+ """Import a session from uploaded JSON."""
605
+ global _session
606
+ try:
607
+ data = self._read_json_body()
608
+ if not data or "session_id" not in data:
609
+ self._json_response({"error": "Invalid session data"}, 400)
610
+ return
611
+
612
+ # Save current session before importing
613
+ if _session and _session_store and _session.messages:
614
+ try:
615
+ _session_store.save(_session)
616
+ except Exception:
617
+ pass
618
+
619
+ _session = CodetteSession()
620
+ _session.from_dict(data)
621
+
622
+ # Save imported session to store
623
+ if _session_store:
624
+ try:
625
+ _session_store.save(_session)
626
+ except Exception:
627
+ pass
628
+
629
+ self._json_response({
630
+ "session_id": _session.session_id,
631
+ "messages": _session.messages,
632
+ "state": _session.get_state(),
633
+ "imported": True,
634
+ })
635
+ except Exception as e:
636
+ self._json_response({"error": f"Import failed: {e}"}, 400)
637
+
638
+
639
+ def main():
640
+ global _session, _session_store, _worker_threads
641
+
642
+ parser = argparse.ArgumentParser(description="Codette Web UI")
643
+ parser.add_argument("--port", type=int, default=7860, help="Port (default: 7860)")
644
+ parser.add_argument("--no-browser", action="store_true", help="Don't auto-open browser")
645
+ args = parser.parse_args()
646
+
647
+ print("=" * 60)
648
+ print(" CODETTE WEB UI")
649
+ print("=" * 60)
650
+
651
+ # Initialize session
652
+ _session_store = SessionStore()
653
+ _session = CodetteSession()
654
+ print(f" Session: {_session.session_id}")
655
+ print(f" Cocoon: spiderweb={_session.spiderweb is not None}, "
656
+ f"metrics={_session.metrics_engine is not None}")
657
+
658
+ # Start worker thread for request processing
659
+ # NOTE: Only 1 worker needed — llama.cpp cannot parallelize inference.
660
+ # With 1 semaphore + 1 worker, we avoid idle threads and deadlock risk.
661
+ # Multiple workers would just spin waiting for the semaphore.
662
+ num_workers = 1
663
+ with _worker_threads_lock:
664
+ for i in range(num_workers):
665
+ worker = threading.Thread(target=_worker_thread, daemon=True, name=f"worker-{i}")
666
+ worker.start()
667
+ _worker_threads.append(worker)
668
+ print(f" Started {num_workers} worker thread for serial inference")
669
+
670
+ # Start cleanup thread for orphaned response queues
671
+ cleanup_thread = threading.Thread(target=_cleanup_orphaned_queues, daemon=True, name="cleanup")
672
+ cleanup_thread.start()
673
+ print(f" Started cleanup thread for queue maintenance")
674
+
675
+ # Start worker health monitor thread
676
+ health_monitor = threading.Thread(target=_monitor_worker_health, daemon=True, name="health-monitor")
677
+ health_monitor.start()
678
+ print(f" Started worker health monitor thread")
679
+
680
+ # Start model loading in background
681
+ threading.Thread(target=_get_orchestrator, daemon=True).start()
682
+
683
+ # Wait for model to load (up to 120 seconds)
684
+ print(f" Waiting for model to load (this takes ~60s on first startup)...")
685
+ start_wait = time.time()
686
+ while True:
687
+ with _orchestrator_status_lock:
688
+ state = _orchestrator_status.get("state")
689
+ if state not in ("idle", "loading"):
690
+ break
691
+ if time.time() - start_wait > 120:
692
+ break
693
+ time.sleep(0.5)
694
+
695
+ with _orchestrator_status_lock:
696
+ state = _orchestrator_status.get("state")
697
+ if state == "ready":
698
+ print(f" Model loaded in {time.time() - start_wait:.0f}s")
699
+ elif state == "loading":
700
+ print(f" Model still loading (will continue in background)...")
701
+ else:
702
+ print(f" WARNING: Model load status: {_orchestrator_status}")
703
+
704
+ # Start server
705
+ server = HTTPServer(("127.0.0.1", args.port), CodetteHandler)
706
+ url = f"http://localhost:{args.port}"
707
+ print(f"\n Server: {url}")
708
+ print(f" Press Ctrl+C to stop\n")
709
+
710
+ # Open browser
711
+ if not args.no_browser:
712
+ threading.Timer(1.0, lambda: webbrowser.open(url)).start()
713
+
714
+ try:
715
+ server.serve_forever()
716
+ except KeyboardInterrupt:
717
+ print("\n Shutting down...")
718
+ # Save session
719
+ if _session and _session_store and _session.messages:
720
+ _session_store.save(_session)
721
+ print(f" Session saved: {_session.session_id}")
722
+ _request_queue.put(None) # Shutdown worker
723
+ server.shutdown()
724
+ print(" Goodbye!")
725
+
726
+
727
+ if __name__ == "__main__":
728
+ main()
inference/codette_session.py ADDED
@@ -0,0 +1,675 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Codette Session Manager — Cocoon-Backed Conversation Memory
3
+
4
+ Wraps the Cocoon system (QuantumSpiderweb + CocoonSync + EpistemicMetrics)
5
+ into a session manager that persists conversation state with encrypted memory.
6
+
7
+ Each session saves:
8
+ - Chat history
9
+ - Spiderweb state (agent beliefs, tensions, attractors)
10
+ - Glyphs (identity signatures)
11
+ - Epistemic metrics (coherence, tension, coverage)
12
+
13
+ Zero external dependencies beyond what the forge already uses.
14
+ """
15
+
16
+ import json, os, time, hashlib, sqlite3
17
+ from pathlib import Path
18
+ from typing import Dict, List, Optional, Any
19
+
20
+ # Add project root to path
21
+ import sys
22
+ _root = str(Path(__file__).parent.parent)
23
+ if _root not in sys.path:
24
+ sys.path.insert(0, _root)
25
+
26
+ # Import Cocoon subsystems (graceful fallback if not available)
27
+ try:
28
+ from reasoning_forge.quantum_spiderweb import QuantumSpiderweb, NodeState
29
+ HAS_SPIDERWEB = True
30
+ except ImportError:
31
+ HAS_SPIDERWEB = False
32
+
33
+ try:
34
+ from reasoning_forge.epistemic_metrics import EpistemicMetrics
35
+ HAS_METRICS = True
36
+ except ImportError:
37
+ HAS_METRICS = False
38
+
39
+ try:
40
+ from reasoning_forge.cocoon_sync import CocoonSync, CocoonKeyManager
41
+ HAS_COCOON = True
42
+ except ImportError:
43
+ HAS_COCOON = False
44
+
45
+ try:
46
+ from reasoning_forge.dream_reweaver import DreamReweaver
47
+ HAS_DREAMER = True
48
+ except ImportError:
49
+ HAS_DREAMER = False
50
+
51
+ try:
52
+ from reasoning_forge.quantum_optimizer import QuantumOptimizer, QualitySignal
53
+ HAS_OPTIMIZER = True
54
+ except ImportError:
55
+ HAS_OPTIMIZER = False
56
+
57
+ try:
58
+ from reasoning_forge.living_memory import LivingMemoryKernel
59
+ HAS_MEMORY = True
60
+ except ImportError:
61
+ HAS_MEMORY = False
62
+
63
+ try:
64
+ from reasoning_forge.guardian import CodetteGuardian
65
+ HAS_GUARDIAN = True
66
+ except ImportError:
67
+ HAS_GUARDIAN = False
68
+
69
+ try:
70
+ from reasoning_forge.resonant_continuity import ResonantContinuityEngine
71
+ HAS_RESONANCE = True
72
+ except ImportError:
73
+ HAS_RESONANCE = False
74
+
75
+ try:
76
+ from reasoning_forge.perspective_registry import (
77
+ PERSPECTIVES, get_adapter_for_perspective, list_all as list_perspectives
78
+ )
79
+ HAS_PERSPECTIVES = True
80
+ except ImportError:
81
+ HAS_PERSPECTIVES = False
82
+
83
+ try:
84
+ from reasoning_forge.aegis import AEGIS
85
+ HAS_AEGIS = True
86
+ except ImportError:
87
+ HAS_AEGIS = False
88
+
89
+ try:
90
+ from reasoning_forge.nexus import NexusSignalEngine
91
+ HAS_NEXUS = True
92
+ except ImportError:
93
+ HAS_NEXUS = False
94
+
95
+ # Agent names matching the 8 adapters
96
+ AGENT_NAMES = [
97
+ "newton", "davinci", "empathy", "philosophy",
98
+ "quantum", "consciousness", "multi_perspective", "systems_architecture"
99
+ ]
100
+
101
+ # Adapter accent colors for UI
102
+ ADAPTER_COLORS = {
103
+ "newton": "#3b82f6", # Electric blue
104
+ "davinci": "#f59e0b", # Warm gold
105
+ "empathy": "#a855f7", # Soft purple
106
+ "philosophy": "#10b981", # Emerald green
107
+ "quantum": "#ef4444", # Crimson red
108
+ "consciousness": "#e2e8f0", # Silver/white
109
+ "multi_perspective": "#f97316", # Amber
110
+ "systems_architecture": "#06b6d4", # Teal
111
+ "_base": "#94a3b8", # Slate gray
112
+ }
113
+
114
+ DB_PATH = Path(__file__).parent.parent / "data" / "codette_sessions.db"
115
+
116
+
117
+ class CodetteSession:
118
+ """Manages a single conversation session with Cocoon state."""
119
+
120
+ def __init__(self, session_id: Optional[str] = None):
121
+ self.session_id = session_id or hashlib.sha256(
122
+ f"{time.time()}_{os.getpid()}".encode()
123
+ ).hexdigest()[:16]
124
+
125
+ self.messages: List[Dict[str, str]] = []
126
+ self.created_at = time.time()
127
+ self.updated_at = time.time()
128
+
129
+ # Cocoon state
130
+ self.spiderweb = None
131
+ self.metrics_engine = None
132
+ self.cocoon_sync = None
133
+ self.dream_reweaver = None
134
+ self.optimizer = None
135
+ self.memory_kernel = None
136
+ self.guardian = None
137
+ self.resonance_engine = None
138
+ self.aegis = None
139
+ self.nexus = None
140
+
141
+ # Metrics history
142
+ self.coherence_history: List[float] = []
143
+ self.tension_history: List[float] = []
144
+ self.attractors: List[Dict] = []
145
+ self.glyphs: List[Dict] = []
146
+ self.perspective_usage: Dict[str, int] = {}
147
+ self.lifeforms: List[str] = [] # Spawned concept nodes
148
+ self.dream_history: List[Dict] = [] # Dream field results
149
+
150
+ # Initialize subsystems
151
+ self._init_cocoon()
152
+
153
+ def _init_cocoon(self):
154
+ """Initialize Cocoon subsystems if available."""
155
+ if HAS_SPIDERWEB:
156
+ self.spiderweb = QuantumSpiderweb()
157
+ self.spiderweb.build_from_agents(AGENT_NAMES)
158
+
159
+ if HAS_METRICS:
160
+ self.metrics_engine = EpistemicMetrics()
161
+
162
+ if HAS_COCOON:
163
+ try:
164
+ key_mgr = CocoonKeyManager()
165
+ self.cocoon_sync = CocoonSync(
166
+ node_id=f"session_{self.session_id}",
167
+ key_manager=key_mgr,
168
+ )
169
+ except Exception:
170
+ self.cocoon_sync = None
171
+
172
+ if HAS_DREAMER:
173
+ self.dream_reweaver = DreamReweaver(creativity=0.3)
174
+
175
+ if HAS_OPTIMIZER:
176
+ self.optimizer = QuantumOptimizer()
177
+
178
+ if HAS_MEMORY:
179
+ self.memory_kernel = LivingMemoryKernel(max_memories=100)
180
+
181
+ if HAS_GUARDIAN:
182
+ self.guardian = CodetteGuardian()
183
+
184
+ if HAS_RESONANCE:
185
+ self.resonance_engine = ResonantContinuityEngine()
186
+
187
+ if HAS_AEGIS:
188
+ self.aegis = AEGIS()
189
+
190
+ if HAS_NEXUS:
191
+ self.nexus = NexusSignalEngine()
192
+
193
+ def add_message(self, role: str, content: str, metadata: Optional[Dict] = None):
194
+ """Add a message to the session history."""
195
+ msg = {
196
+ "role": role,
197
+ "content": content,
198
+ "timestamp": time.time(),
199
+ }
200
+ if metadata:
201
+ msg["metadata"] = metadata
202
+ self.messages.append(msg)
203
+ self.updated_at = time.time()
204
+
205
+ def update_after_response(self, route_result, adapter_name: str,
206
+ perspectives: Optional[Dict[str, str]] = None):
207
+ """Update Cocoon state after a Codette response.
208
+
209
+ Args:
210
+ route_result: RouteResult from the router
211
+ adapter_name: Which adapter was primary
212
+ perspectives: Dict of adapter_name -> response text (if multi-perspective)
213
+ """
214
+ # Track adapter usage
215
+ self.perspective_usage[adapter_name] = \
216
+ self.perspective_usage.get(adapter_name, 0) + 1
217
+
218
+ if not HAS_SPIDERWEB or self.spiderweb is None:
219
+ return
220
+
221
+ # Propagate belief through the spiderweb from the active adapter
222
+ try:
223
+ if adapter_name in self.spiderweb.nodes:
224
+ node = self.spiderweb.nodes[adapter_name]
225
+ # Boost the active adapter's psi (thought magnitude)
226
+ node.state.psi = min(node.state.psi + 0.1, 2.0)
227
+ node.state.tau += 0.05 # Temporal progression
228
+
229
+ # Propagate the boosted belief outward (BUG FIX: pass belief state)
230
+ self.spiderweb.propagate_belief(
231
+ adapter_name, belief=node.state, max_hops=2
232
+ )
233
+
234
+ # If multi-perspective, entangle the participating agents
235
+ if perspectives and len(perspectives) > 1:
236
+ adapters = list(perspectives.keys())
237
+ for i in range(len(adapters)):
238
+ for j in range(i + 1, len(adapters)):
239
+ if (adapters[i] in self.spiderweb.nodes and
240
+ adapters[j] in self.spiderweb.nodes):
241
+ self.spiderweb.entangle(adapters[i], adapters[j])
242
+
243
+ # Compute metrics
244
+ coherence = self.spiderweb.phase_coherence()
245
+ self.coherence_history.append(coherence)
246
+
247
+ # Detect attractors
248
+ self.attractors = self.spiderweb.detect_attractors()
249
+
250
+ # Try to form glyphs for active nodes
251
+ for name in (perspectives or {adapter_name: ""}).keys():
252
+ if name in self.spiderweb.nodes:
253
+ glyph = self.spiderweb.form_glyph(name)
254
+ if glyph:
255
+ self.glyphs.append({
256
+ "glyph_id": glyph.glyph_id,
257
+ "source": glyph.source_node,
258
+ "stability": glyph.stability_score,
259
+ })
260
+
261
+ # Check convergence
262
+ is_converging, mean_tension = self.spiderweb.check_convergence()
263
+ self.tension_history.append(mean_tension)
264
+
265
+ # Feed quality signal to optimizer if available
266
+ if HAS_OPTIMIZER and self.optimizer:
267
+ try:
268
+ signal = QualitySignal(
269
+ timestamp=time.time(),
270
+ adapter=adapter_name,
271
+ coherence=coherence,
272
+ tension=mean_tension,
273
+ productivity=0.5, # Default, updated by epistemic report
274
+ response_length=0,
275
+ multi_perspective=perspectives is not None and len(perspectives) > 1,
276
+ user_continued=True,
277
+ )
278
+ self.optimizer.record_signal(signal)
279
+ except Exception:
280
+ pass
281
+
282
+ except Exception as e:
283
+ print(f" [cocoon] Spiderweb update error: {e}")
284
+
285
+ # Update resonance engine
286
+ if self.resonance_engine:
287
+ try:
288
+ coh = self.coherence_history[-1] if self.coherence_history else 0.5
289
+ ten = self.tension_history[-1] if self.tension_history else 0.3
290
+ self.resonance_engine.compute_psi(coherence=coh, tension=ten)
291
+ except Exception:
292
+ pass
293
+
294
+ # Update guardian trust
295
+ if self.guardian:
296
+ try:
297
+ coh = self.coherence_history[-1] if self.coherence_history else 0.5
298
+ ten = self.tension_history[-1] if self.tension_history else 0.3
299
+ self.guardian.evaluate_output(adapter_name, "", coh, ten)
300
+ except Exception:
301
+ pass
302
+
303
+ # AEGIS ethical evaluation of the response
304
+ if self.aegis and self.messages:
305
+ try:
306
+ # Find the most recent assistant response
307
+ for msg in reversed(self.messages[-4:]):
308
+ if msg["role"] == "assistant":
309
+ self.aegis.evaluate(msg["content"], adapter=adapter_name)
310
+ break
311
+ except Exception:
312
+ pass
313
+
314
+ # Nexus signal analysis of the user input
315
+ if self.nexus and self.messages:
316
+ try:
317
+ for msg in reversed(self.messages[-4:]):
318
+ if msg["role"] == "user":
319
+ self.nexus.analyze(msg["content"], adapter=adapter_name)
320
+ break
321
+ except Exception:
322
+ pass
323
+
324
+ # Store memory cocoon for significant exchanges
325
+ if self.memory_kernel and self.messages:
326
+ try:
327
+ # Find the most recent user query and assistant response
328
+ query_text = ""
329
+ response_text = ""
330
+ for msg in reversed(self.messages[-4:]):
331
+ if msg["role"] == "user" and not query_text:
332
+ query_text = msg["content"]
333
+ elif msg["role"] == "assistant" and not response_text:
334
+ response_text = msg["content"]
335
+ if query_text and response_text:
336
+ coh = self.coherence_history[-1] if self.coherence_history else 0.5
337
+ ten = self.tension_history[-1] if self.tension_history else 0.3
338
+ self.memory_kernel.store_from_turn(
339
+ query=query_text,
340
+ response=response_text,
341
+ adapter=adapter_name,
342
+ coherence=coh,
343
+ tension=ten,
344
+ )
345
+ except Exception:
346
+ pass
347
+
348
+ def compute_epistemic_report(self, analyses: Dict[str, str],
349
+ synthesis: str = "") -> Optional[Dict]:
350
+ """Run full epistemic metrics on a multi-perspective response."""
351
+ if not HAS_METRICS or self.metrics_engine is None:
352
+ return None
353
+
354
+ try:
355
+ return self.metrics_engine.full_epistemic_report(analyses, synthesis)
356
+ except Exception as e:
357
+ print(f" [cocoon] Metrics error: {e}")
358
+ return None
359
+
360
+ def get_state(self) -> Dict[str, Any]:
361
+ """Get full session state for UI rendering."""
362
+ state = {
363
+ "session_id": self.session_id,
364
+ "message_count": len(self.messages),
365
+ "created_at": self.created_at,
366
+ "updated_at": self.updated_at,
367
+ "perspective_usage": self.perspective_usage,
368
+ "adapter_colors": ADAPTER_COLORS,
369
+ "cocoon": {
370
+ "has_spiderweb": HAS_SPIDERWEB and self.spiderweb is not None,
371
+ "has_metrics": HAS_METRICS,
372
+ "has_sync": HAS_COCOON and self.cocoon_sync is not None,
373
+ },
374
+ }
375
+
376
+ # Spiderweb state
377
+ if self.spiderweb:
378
+ try:
379
+ web_dict = self.spiderweb.to_dict()
380
+ state["spiderweb"] = {
381
+ "nodes": {
382
+ nid: {
383
+ # BUG FIX: to_dict() stores state as a list [psi,tau,chi,phi,lam]
384
+ "state": n["state"],
385
+ "neighbors": n.get("neighbors", []),
386
+ "tension_history": n.get("tension_history", [])[-10:],
387
+ }
388
+ for nid, n in web_dict.get("nodes", {}).items()
389
+ },
390
+ "phase_coherence": web_dict.get("phase_coherence", 0),
391
+ "attractors": self.attractors,
392
+ "glyphs": self.glyphs[-10:], # Last 10
393
+ # New VIVARA-inspired metrics
394
+ "entropy": self.spiderweb.shannon_entropy(),
395
+ "decoherence_rate": self.spiderweb.decoherence_rate(),
396
+ "lifeforms": self.lifeforms[-20:],
397
+ }
398
+ except Exception:
399
+ state["spiderweb"] = None
400
+ else:
401
+ state["spiderweb"] = None
402
+
403
+ # Metrics history
404
+ state["metrics"] = {
405
+ "coherence_history": self.coherence_history[-50:],
406
+ "tension_history": self.tension_history[-50:],
407
+ "current_coherence": self.coherence_history[-1] if self.coherence_history else 0,
408
+ "current_tension": self.tension_history[-1] if self.tension_history else 0,
409
+ "attractor_count": len(self.attractors),
410
+ "glyph_count": len(self.glyphs),
411
+ }
412
+
413
+ # Optimizer tuning state
414
+ if HAS_OPTIMIZER and self.optimizer:
415
+ state["optimizer"] = self.optimizer.get_tuning_report()
416
+ else:
417
+ state["optimizer"] = None
418
+
419
+ # Dream history
420
+ state["dream_history"] = self.dream_history[-10:]
421
+
422
+ # Living memory
423
+ if self.memory_kernel:
424
+ state["memory"] = self.memory_kernel.get_state()
425
+ else:
426
+ state["memory"] = None
427
+
428
+ # Guardian state
429
+ if self.guardian:
430
+ state["guardian"] = self.guardian.get_state()
431
+ else:
432
+ state["guardian"] = None
433
+
434
+ # Resonant continuity
435
+ if self.resonance_engine:
436
+ state["resonance"] = self.resonance_engine.get_state()
437
+ else:
438
+ state["resonance"] = None
439
+
440
+ # AEGIS ethical alignment
441
+ if self.aegis:
442
+ state["aegis"] = self.aegis.get_state()
443
+ else:
444
+ state["aegis"] = None
445
+
446
+ # Nexus signal intelligence
447
+ if self.nexus:
448
+ state["nexus"] = self.nexus.get_state()
449
+ else:
450
+ state["nexus"] = None
451
+
452
+ # Perspective registry
453
+ if HAS_PERSPECTIVES:
454
+ state["perspectives_available"] = len(PERSPECTIVES)
455
+
456
+ return state
457
+
458
+ def to_dict(self) -> Dict:
459
+ """Serialize for storage."""
460
+ data = {
461
+ "session_id": self.session_id,
462
+ "created_at": self.created_at,
463
+ "updated_at": self.updated_at,
464
+ "messages": self.messages,
465
+ "coherence_history": self.coherence_history,
466
+ "tension_history": self.tension_history,
467
+ "attractors": self.attractors,
468
+ "glyphs": self.glyphs,
469
+ "perspective_usage": self.perspective_usage,
470
+ "lifeforms": self.lifeforms,
471
+ "dream_history": self.dream_history,
472
+ }
473
+ if self.spiderweb:
474
+ try:
475
+ data["spiderweb_state"] = self.spiderweb.to_dict()
476
+ except Exception:
477
+ pass
478
+ if HAS_OPTIMIZER and self.optimizer:
479
+ try:
480
+ data["optimizer_state"] = self.optimizer.to_dict()
481
+ except Exception:
482
+ pass
483
+ if self.memory_kernel:
484
+ try:
485
+ data["memory_state"] = self.memory_kernel.to_dict()
486
+ except Exception:
487
+ pass
488
+ if self.guardian:
489
+ try:
490
+ data["guardian_state"] = self.guardian.to_dict()
491
+ except Exception:
492
+ pass
493
+ if self.resonance_engine:
494
+ try:
495
+ data["resonance_state"] = self.resonance_engine.to_dict()
496
+ except Exception:
497
+ pass
498
+ if self.aegis:
499
+ try:
500
+ data["aegis_state"] = self.aegis.to_dict()
501
+ except Exception:
502
+ pass
503
+ if self.nexus:
504
+ try:
505
+ data["nexus_state"] = self.nexus.to_dict()
506
+ except Exception:
507
+ pass
508
+ return data
509
+
510
+ def from_dict(self, data: Dict):
511
+ """Restore from storage."""
512
+ self.session_id = data.get("session_id", self.session_id)
513
+ self.created_at = data.get("created_at", self.created_at)
514
+ self.updated_at = data.get("updated_at", self.updated_at)
515
+ self.messages = data.get("messages", [])
516
+ self.coherence_history = data.get("coherence_history", [])
517
+ self.tension_history = data.get("tension_history", [])
518
+ self.attractors = data.get("attractors", [])
519
+ self.glyphs = data.get("glyphs", [])
520
+ self.perspective_usage = data.get("perspective_usage", {})
521
+ self.lifeforms = data.get("lifeforms", [])
522
+ self.dream_history = data.get("dream_history", [])
523
+
524
+ if self.spiderweb and "spiderweb_state" in data:
525
+ try:
526
+ self.spiderweb = QuantumSpiderweb.from_dict(data["spiderweb_state"])
527
+ except Exception:
528
+ pass
529
+ if HAS_OPTIMIZER and self.optimizer and "optimizer_state" in data:
530
+ try:
531
+ self.optimizer = QuantumOptimizer.from_dict(data["optimizer_state"])
532
+ except Exception:
533
+ pass
534
+ if HAS_MEMORY and "memory_state" in data:
535
+ try:
536
+ self.memory_kernel = LivingMemoryKernel.from_dict(data["memory_state"])
537
+ except Exception:
538
+ pass
539
+ if HAS_GUARDIAN and "guardian_state" in data:
540
+ try:
541
+ self.guardian = CodetteGuardian.from_dict(data["guardian_state"])
542
+ except Exception:
543
+ pass
544
+ if HAS_RESONANCE and "resonance_state" in data:
545
+ try:
546
+ self.resonance_engine = ResonantContinuityEngine.from_dict(data["resonance_state"])
547
+ except Exception:
548
+ pass
549
+ if HAS_AEGIS and "aegis_state" in data:
550
+ try:
551
+ self.aegis = AEGIS.from_dict(data["aegis_state"])
552
+ except Exception:
553
+ pass
554
+ if HAS_NEXUS and "nexus_state" in data:
555
+ try:
556
+ self.nexus = NexusSignalEngine.from_dict(data["nexus_state"])
557
+ except Exception:
558
+ pass
559
+
560
+
561
+ class SessionStore:
562
+ """SQLite-backed session persistence with Cocoon encryption."""
563
+
564
+ def __init__(self, db_path: Optional[Path] = None):
565
+ self.db_path = db_path or DB_PATH
566
+ self.db_path.parent.mkdir(parents=True, exist_ok=True)
567
+ self._init_db()
568
+
569
+ def _init_db(self):
570
+ """Create sessions table if needed."""
571
+ conn = sqlite3.connect(str(self.db_path))
572
+ conn.execute("""
573
+ CREATE TABLE IF NOT EXISTS sessions (
574
+ session_id TEXT PRIMARY KEY,
575
+ created_at REAL,
576
+ updated_at REAL,
577
+ title TEXT,
578
+ data TEXT
579
+ )
580
+ """)
581
+ conn.commit()
582
+ conn.close()
583
+
584
+ def save(self, session: CodetteSession, title: Optional[str] = None):
585
+ """Save a session to the database."""
586
+ if title is None:
587
+ # Auto-title from first user message
588
+ for msg in session.messages:
589
+ if msg["role"] == "user":
590
+ title = msg["content"][:80]
591
+ break
592
+ title = title or f"Session {session.session_id[:8]}"
593
+
594
+ data_json = json.dumps(session.to_dict())
595
+
596
+ conn = sqlite3.connect(str(self.db_path))
597
+ conn.execute("""
598
+ INSERT OR REPLACE INTO sessions (session_id, created_at, updated_at, title, data)
599
+ VALUES (?, ?, ?, ?, ?)
600
+ """, (session.session_id, session.created_at, session.updated_at, title, data_json))
601
+ conn.commit()
602
+ conn.close()
603
+
604
+ def load(self, session_id: str) -> Optional[CodetteSession]:
605
+ """Load a session from the database."""
606
+ conn = sqlite3.connect(str(self.db_path))
607
+ row = conn.execute(
608
+ "SELECT data FROM sessions WHERE session_id = ?", (session_id,)
609
+ ).fetchone()
610
+ conn.close()
611
+
612
+ if not row:
613
+ return None
614
+
615
+ session = CodetteSession(session_id)
616
+ session.from_dict(json.loads(row[0]))
617
+ return session
618
+
619
+ def list_sessions(self, limit: int = 20) -> List[Dict]:
620
+ """List recent sessions."""
621
+ conn = sqlite3.connect(str(self.db_path))
622
+ rows = conn.execute("""
623
+ SELECT session_id, created_at, updated_at, title
624
+ FROM sessions ORDER BY updated_at DESC LIMIT ?
625
+ """, (limit,)).fetchall()
626
+ conn.close()
627
+
628
+ return [
629
+ {
630
+ "session_id": r[0],
631
+ "created_at": r[1],
632
+ "updated_at": r[2],
633
+ "title": r[3],
634
+ }
635
+ for r in rows
636
+ ]
637
+
638
+ def delete(self, session_id: str):
639
+ """Delete a session."""
640
+ conn = sqlite3.connect(str(self.db_path))
641
+ conn.execute("DELETE FROM sessions WHERE session_id = ?", (session_id,))
642
+ conn.commit()
643
+ conn.close()
644
+
645
+
646
+ # Quick test
647
+ if __name__ == "__main__":
648
+ print("Testing CodetteSession...")
649
+ session = CodetteSession()
650
+ print(f" Session ID: {session.session_id}")
651
+ print(f" Spiderweb: {HAS_SPIDERWEB}")
652
+ print(f" Metrics: {HAS_METRICS}")
653
+ print(f" Cocoon: {HAS_COCOON}")
654
+
655
+ session.add_message("user", "How does gravity work?")
656
+ session.add_message("assistant", "Objects attract each other...",
657
+ metadata={"adapter": "newton", "confidence": 0.95})
658
+
659
+ state = session.get_state()
660
+ print(f" State keys: {list(state.keys())}")
661
+ print(f" Cocoon status: {state['cocoon']}")
662
+
663
+ if state["spiderweb"]:
664
+ print(f" Nodes: {list(state['spiderweb']['nodes'].keys())}")
665
+ print(f" Phase coherence: {state['spiderweb']['phase_coherence']:.4f}")
666
+
667
+ # Test persistence
668
+ store = SessionStore()
669
+ store.save(session)
670
+ loaded = store.load(session.session_id)
671
+ print(f" Persistence: {'OK' if loaded else 'FAILED'}")
672
+ if loaded:
673
+ print(f" Loaded messages: {len(loaded.messages)}")
674
+
675
+ print("Done!")
inference/codette_tools.py ADDED
@@ -0,0 +1,558 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Codette Tool System — Safe Local Tool Execution
3
+
4
+ Gives Codette the ability to read files, search code, list directories,
5
+ and run safe Python snippets. Tools are sandboxed and read-only by default.
6
+
7
+ Tool Call Format (in Codette's output):
8
+ <tool>tool_name(arg1, arg2)</tool>
9
+
10
+ Tool Result (injected back into context):
11
+ <tool_result>...output...</tool_result>
12
+
13
+ Architecture:
14
+ 1. Codette generates text that may contain <tool>...</tool> tags
15
+ 2. Server parses out tool calls
16
+ 3. Tools execute with safety limits
17
+ 4. Results are fed back for a second generation pass
18
+ """
19
+
20
+ import os
21
+ import re
22
+ import ast
23
+ import json
24
+ import subprocess
25
+ import traceback
26
+ from pathlib import Path
27
+ from typing import Dict, List, Optional, Tuple, Any
28
+
29
+ # ================================================================
30
+ # Safety Configuration
31
+ # ================================================================
32
+
33
+ # Directories Codette is allowed to read from
34
+ ALLOWED_ROOTS = [
35
+ Path(r"J:\codette-training-lab"),
36
+ Path(r"C:\Users\Jonathan\Documents"),
37
+ ]
38
+
39
+ # File extensions Codette can read
40
+ READABLE_EXTENSIONS = {
41
+ ".py", ".js", ".ts", ".html", ".css", ".json", ".yaml", ".yml",
42
+ ".md", ".txt", ".csv", ".toml", ".cfg", ".ini", ".sh", ".bat",
43
+ ".bib", ".tex", ".log", ".jsonl",
44
+ }
45
+
46
+ # Max file size to read (prevent reading huge binaries)
47
+ MAX_FILE_SIZE = 500_000 # 500KB
48
+
49
+ # Max output length per tool result
50
+ MAX_OUTPUT_LENGTH = 4000 # chars
51
+
52
+ # Max lines for file reads
53
+ MAX_LINES = 200
54
+
55
+ # Python execution timeout
56
+ PYTHON_TIMEOUT = 10 # seconds
57
+
58
+
59
+ # ================================================================
60
+ # Tool Registry
61
+ # ================================================================
62
+
63
+ class ToolRegistry:
64
+ """Registry of available tools with descriptions and handlers."""
65
+
66
+ def __init__(self):
67
+ self.tools: Dict[str, dict] = {}
68
+ self._register_defaults()
69
+
70
+ def _register_defaults(self):
71
+ """Register the built-in tool set."""
72
+
73
+ self.register("read_file", {
74
+ "description": "Read a file's contents. Args: path (str), start_line (int, optional), end_line (int, optional)",
75
+ "examples": [
76
+ 'read_file("inference/codette_server.py")',
77
+ 'read_file("configs/adapter_registry.yaml", 1, 50)',
78
+ ],
79
+ "handler": tool_read_file,
80
+ })
81
+
82
+ self.register("list_files", {
83
+ "description": "List files in a directory. Args: path (str), pattern (str, optional)",
84
+ "examples": [
85
+ 'list_files("inference/")',
86
+ 'list_files("datasets/", "*.jsonl")',
87
+ ],
88
+ "handler": tool_list_files,
89
+ })
90
+
91
+ self.register("search_code", {
92
+ "description": "Search for a text pattern across files. Args: pattern (str), path (str, optional), file_ext (str, optional)",
93
+ "examples": [
94
+ 'search_code("phase_coherence")',
95
+ 'search_code("def route", "inference/", ".py")',
96
+ ],
97
+ "handler": tool_search_code,
98
+ })
99
+
100
+ self.register("file_info", {
101
+ "description": "Get file metadata (size, modified time, line count). Args: path (str)",
102
+ "examples": [
103
+ 'file_info("paper/codette_paper.pdf")',
104
+ ],
105
+ "handler": tool_file_info,
106
+ })
107
+
108
+ self.register("run_python", {
109
+ "description": "Execute a short Python snippet and return output. For calculations, data processing, or quick checks. Args: code (str)",
110
+ "examples": [
111
+ 'run_python("import math; print(math.pi * 2)")',
112
+ 'run_python("print(sorted([3,1,4,1,5,9]))")',
113
+ ],
114
+ "handler": tool_run_python,
115
+ })
116
+
117
+ self.register("project_summary", {
118
+ "description": "Get an overview of the Codette project structure. No args.",
119
+ "examples": [
120
+ 'project_summary()',
121
+ ],
122
+ "handler": tool_project_summary,
123
+ })
124
+
125
+ def register(self, name: str, spec: dict):
126
+ self.tools[name] = spec
127
+
128
+ def get_descriptions(self) -> str:
129
+ """Format tool descriptions for injection into system prompt."""
130
+ lines = ["Available tools (use <tool>name(args)</tool> to call):"]
131
+ for name, spec in self.tools.items():
132
+ lines.append(f"\n {name}: {spec['description']}")
133
+ for ex in spec.get("examples", []):
134
+ lines.append(f" Example: <tool>{ex}</tool>")
135
+ return "\n".join(lines)
136
+
137
+ def execute(self, name: str, args: list, kwargs: dict) -> str:
138
+ """Execute a tool by name with parsed arguments."""
139
+ if name not in self.tools:
140
+ return f"Error: Unknown tool '{name}'. Available: {', '.join(self.tools.keys())}"
141
+
142
+ handler = self.tools[name]["handler"]
143
+ try:
144
+ result = handler(*args, **kwargs)
145
+ # Truncate if too long
146
+ if len(result) > MAX_OUTPUT_LENGTH:
147
+ result = result[:MAX_OUTPUT_LENGTH] + f"\n... (truncated, {len(result)} chars total)"
148
+ return result
149
+ except Exception as e:
150
+ return f"Error executing {name}: {e}"
151
+
152
+
153
+ # ================================================================
154
+ # Tool Call Parser
155
+ # ================================================================
156
+
157
+ def parse_tool_calls(text: str) -> List[Tuple[str, list, dict]]:
158
+ """Parse <tool>name(args)</tool> tags from generated text.
159
+
160
+ Returns list of (tool_name, positional_args, keyword_args).
161
+ """
162
+ pattern = r'<tool>\s*([\w]+)\s*\((.*?)\)\s*</tool>'
163
+ matches = re.findall(pattern, text, re.DOTALL)
164
+
165
+ calls = []
166
+ for name, args_str in matches:
167
+ try:
168
+ # Parse arguments safely using ast.literal_eval
169
+ args, kwargs = _parse_args(args_str.strip())
170
+ calls.append((name, args, kwargs))
171
+ except Exception as e:
172
+ calls.append((name, [args_str.strip()], {}))
173
+
174
+ return calls
175
+
176
+
177
+ def _parse_args(args_str: str) -> Tuple[list, dict]:
178
+ """Safely parse function arguments string."""
179
+ if not args_str:
180
+ return [], {}
181
+
182
+ # Wrap in a tuple to parse as Python literal
183
+ try:
184
+ # Try parsing as a tuple of values
185
+ parsed = ast.literal_eval(f"({args_str},)")
186
+ return list(parsed), {}
187
+ except (ValueError, SyntaxError):
188
+ # If that fails, treat as a single string argument
189
+ # Strip quotes if present
190
+ cleaned = args_str.strip().strip('"').strip("'")
191
+ return [cleaned], {}
192
+
193
+
194
+ def strip_tool_calls(text: str) -> str:
195
+ """Remove <tool>...</tool> tags from text, leaving the rest."""
196
+ return re.sub(r'<tool>.*?</tool>', '', text, flags=re.DOTALL).strip()
197
+
198
+
199
+ def has_tool_calls(text: str) -> bool:
200
+ """Check if text contains any tool calls."""
201
+ return bool(re.search(r'<tool>', text))
202
+
203
+
204
+ # ================================================================
205
+ # Path Safety
206
+ # ================================================================
207
+
208
+ def _resolve_path(path_str: str) -> Optional[Path]:
209
+ """Resolve a path, ensuring it's within allowed roots."""
210
+ # Handle relative paths — resolve relative to project root
211
+ p = Path(path_str)
212
+ if not p.is_absolute():
213
+ p = ALLOWED_ROOTS[0] / p
214
+
215
+ p = p.resolve()
216
+
217
+ # Check against allowed roots
218
+ for root in ALLOWED_ROOTS:
219
+ try:
220
+ p.relative_to(root.resolve())
221
+ return p
222
+ except ValueError:
223
+ continue
224
+
225
+ return None # Not in any allowed root
226
+
227
+
228
+ # ================================================================
229
+ # Tool Implementations
230
+ # ================================================================
231
+
232
+ def tool_read_file(path: str, start_line: int = 1, end_line: int = None) -> str:
233
+ """Read a file's contents with optional line range."""
234
+ resolved = _resolve_path(path)
235
+ if resolved is None:
236
+ return f"Error: Path '{path}' is outside allowed directories."
237
+
238
+ if not resolved.exists():
239
+ return f"Error: File not found: {path}"
240
+
241
+ if not resolved.is_file():
242
+ return f"Error: '{path}' is a directory, not a file. Use list_files() instead."
243
+
244
+ # Check extension
245
+ if resolved.suffix.lower() not in READABLE_EXTENSIONS:
246
+ return f"Error: Cannot read {resolved.suffix} files. Supported: {', '.join(sorted(READABLE_EXTENSIONS))}"
247
+
248
+ # Check size
249
+ size = resolved.stat().st_size
250
+ if size > MAX_FILE_SIZE:
251
+ return f"Error: File too large ({size:,} bytes). Max: {MAX_FILE_SIZE:,} bytes."
252
+
253
+ try:
254
+ content = resolved.read_text(encoding='utf-8', errors='replace')
255
+ except Exception as e:
256
+ return f"Error reading file: {e}"
257
+
258
+ lines = content.splitlines()
259
+ total = len(lines)
260
+
261
+ # Apply line range
262
+ start = max(1, start_line) - 1 # Convert to 0-indexed
263
+ end = min(end_line or total, start + MAX_LINES, total)
264
+
265
+ selected = lines[start:end]
266
+
267
+ # Format with line numbers
268
+ numbered = []
269
+ for i, line in enumerate(selected, start=start + 1):
270
+ numbered.append(f"{i:4d} | {line}")
271
+
272
+ header = f"File: {path} ({total} lines total)"
273
+ if start > 0 or end < total:
274
+ header += f" [showing lines {start+1}-{end}]"
275
+
276
+ return header + "\n" + "\n".join(numbered)
277
+
278
+
279
+ def tool_list_files(path: str = ".", pattern: str = None) -> str:
280
+ """List files in a directory with optional glob pattern."""
281
+ resolved = _resolve_path(path)
282
+ if resolved is None:
283
+ return f"Error: Path '{path}' is outside allowed directories."
284
+
285
+ if not resolved.exists():
286
+ return f"Error: Directory not found: {path}"
287
+
288
+ if not resolved.is_dir():
289
+ return f"Error: '{path}' is a file, not a directory. Use read_file() instead."
290
+
291
+ try:
292
+ if pattern:
293
+ entries = sorted(resolved.glob(pattern))
294
+ else:
295
+ entries = sorted(resolved.iterdir())
296
+
297
+ result = [f"Directory: {path}"]
298
+ for entry in entries[:100]: # Limit to 100 entries
299
+ rel = entry.relative_to(resolved)
300
+ if entry.is_dir():
301
+ result.append(f" [DIR] {rel}/")
302
+ else:
303
+ size = entry.stat().st_size
304
+ if size >= 1024 * 1024:
305
+ size_str = f"{size / 1024 / 1024:.1f}MB"
306
+ elif size >= 1024:
307
+ size_str = f"{size / 1024:.1f}KB"
308
+ else:
309
+ size_str = f"{size}B"
310
+ result.append(f" [FILE] {rel} ({size_str})")
311
+
312
+ if len(entries) > 100:
313
+ result.append(f" ... and {len(entries) - 100} more")
314
+
315
+ return "\n".join(result)
316
+
317
+ except Exception as e:
318
+ return f"Error listing directory: {e}"
319
+
320
+
321
+ def tool_search_code(pattern: str, path: str = ".", file_ext: str = None) -> str:
322
+ """Search for a text pattern in files."""
323
+ resolved = _resolve_path(path)
324
+ if resolved is None:
325
+ return f"Error: Path '{path}' is outside allowed directories."
326
+
327
+ if not resolved.exists():
328
+ return f"Error: Path not found: {path}"
329
+
330
+ # Determine glob pattern
331
+ if file_ext:
332
+ if not file_ext.startswith("."):
333
+ file_ext = "." + file_ext
334
+ glob = f"**/*{file_ext}"
335
+ else:
336
+ glob = "**/*"
337
+
338
+ results = []
339
+ files_searched = 0
340
+ matches_found = 0
341
+
342
+ try:
343
+ search_root = resolved if resolved.is_dir() else resolved.parent
344
+
345
+ for filepath in search_root.glob(glob):
346
+ if not filepath.is_file():
347
+ continue
348
+ if filepath.suffix.lower() not in READABLE_EXTENSIONS:
349
+ continue
350
+ if filepath.stat().st_size > MAX_FILE_SIZE:
351
+ continue
352
+
353
+ # Skip hidden dirs, __pycache__, node_modules, .git
354
+ parts = filepath.parts
355
+ if any(p.startswith('.') or p in ('__pycache__', 'node_modules', '.git')
356
+ for p in parts):
357
+ continue
358
+
359
+ files_searched += 1
360
+
361
+ try:
362
+ content = filepath.read_text(encoding='utf-8', errors='replace')
363
+ for line_num, line in enumerate(content.splitlines(), 1):
364
+ if pattern.lower() in line.lower():
365
+ rel = filepath.relative_to(search_root)
366
+ results.append(f" {rel}:{line_num}: {line.strip()[:120]}")
367
+ matches_found += 1
368
+
369
+ if matches_found >= 50: # Limit results
370
+ break
371
+ except Exception:
372
+ continue
373
+
374
+ if matches_found >= 50:
375
+ break
376
+
377
+ except Exception as e:
378
+ return f"Error searching: {e}"
379
+
380
+ header = f"Search: '{pattern}' in {path} ({matches_found} matches in {files_searched} files)"
381
+ if not results:
382
+ return header + "\n No matches found."
383
+ return header + "\n" + "\n".join(results)
384
+
385
+
386
+ def tool_file_info(path: str) -> str:
387
+ """Get file metadata."""
388
+ resolved = _resolve_path(path)
389
+ if resolved is None:
390
+ return f"Error: Path '{path}' is outside allowed directories."
391
+
392
+ if not resolved.exists():
393
+ return f"Error: File not found: {path}"
394
+
395
+ stat = resolved.stat()
396
+ import time
397
+ mtime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(stat.st_mtime))
398
+
399
+ info = [
400
+ f"File: {path}",
401
+ f" Size: {stat.st_size:,} bytes ({stat.st_size / 1024:.1f} KB)",
402
+ f" Modified: {mtime}",
403
+ f" Type: {'directory' if resolved.is_dir() else resolved.suffix or 'no extension'}",
404
+ ]
405
+
406
+ # Line count for text files
407
+ if resolved.is_file() and resolved.suffix.lower() in READABLE_EXTENSIONS:
408
+ try:
409
+ lines = resolved.read_text(encoding='utf-8', errors='replace').count('\n') + 1
410
+ info.append(f" Lines: {lines:,}")
411
+ except Exception:
412
+ pass
413
+
414
+ return "\n".join(info)
415
+
416
+
417
+ def tool_run_python(code: str) -> str:
418
+ """Run a Python snippet safely with timeout."""
419
+ import sys
420
+
421
+ # Basic safety checks
422
+ dangerous = ['import os', 'import sys', 'subprocess', 'shutil.rmtree',
423
+ 'os.remove', 'os.unlink', '__import__', 'eval(', 'exec(',
424
+ 'open(', 'write(', 'pathlib']
425
+ for d in dangerous:
426
+ if d in code and 'print' not in code.split(d)[0].split('\n')[-1]:
427
+ # Allow if it's inside a print statement string
428
+ if f'"{d}"' not in code and f"'{d}'" not in code:
429
+ return f"Error: '{d}' is not allowed in run_python for safety. Use read_file/search_code for file operations."
430
+
431
+ try:
432
+ result = subprocess.run(
433
+ [r"J:\python.exe", "-c", code],
434
+ capture_output=True,
435
+ text=True,
436
+ timeout=PYTHON_TIMEOUT,
437
+ env={**os.environ, "PYTHONPATH": r"J:\Lib\site-packages"},
438
+ )
439
+
440
+ output = result.stdout
441
+ if result.stderr:
442
+ output += "\nSTDERR: " + result.stderr
443
+
444
+ if not output.strip():
445
+ output = "(no output)"
446
+
447
+ return output.strip()
448
+
449
+ except subprocess.TimeoutExpired:
450
+ return f"Error: Code execution timed out after {PYTHON_TIMEOUT}s."
451
+ except Exception as e:
452
+ return f"Error running code: {e}"
453
+
454
+
455
+ def tool_project_summary() -> str:
456
+ """Generate a quick project structure overview."""
457
+ root = ALLOWED_ROOTS[0]
458
+
459
+ summary = ["Codette Training Lab — Project Structure\n"]
460
+
461
+ # Key directories
462
+ key_dirs = [
463
+ ("configs/", "Configuration files (adapter registry, pipeline config)"),
464
+ ("datasets/", "Training data — perspective-tagged JSONL files"),
465
+ ("dataset_engine/", "Dataset generation pipeline"),
466
+ ("evaluation/", "Evaluation scripts and benchmarks"),
467
+ ("inference/", "Local inference server + web UI"),
468
+ ("paper/", "Academic paper (LaTeX, PDF, BibTeX)"),
469
+ ("reasoning_forge/", "Core RC+xi engine, spiderweb, cocoon sync"),
470
+ ("research/", "Research docs, experiments, DreamReweaver"),
471
+ ("scripts/", "Training and pipeline scripts"),
472
+ ("adapters/", "GGUF LoRA adapter files for llama.cpp"),
473
+ ]
474
+
475
+ for dirname, desc in key_dirs:
476
+ dirpath = root / dirname
477
+ if dirpath.exists():
478
+ count = sum(1 for _ in dirpath.rglob("*") if _.is_file())
479
+ summary.append(f" [DIR] {dirname:<30s} {desc} ({count} files)")
480
+
481
+ # Key files
482
+ summary.append("\nKey Files:")
483
+ key_files = [
484
+ "HOWTO.md", "configs/adapter_registry.yaml",
485
+ "inference/codette_server.py", "inference/codette_orchestrator.py",
486
+ "reasoning_forge/quantum_spiderweb.py", "reasoning_forge/epistemic_metrics.py",
487
+ "paper/codette_paper.tex",
488
+ ]
489
+ for f in key_files:
490
+ fp = root / f
491
+ if fp.exists():
492
+ size = fp.stat().st_size
493
+ summary.append(f" [FILE] {f} ({size / 1024:.1f} KB)")
494
+
495
+ return "\n".join(summary)
496
+
497
+
498
+ # ================================================================
499
+ # Tool-Augmented System Prompt
500
+ # ================================================================
501
+
502
+ TOOL_PROMPT_SUFFIX = """
503
+
504
+ TOOLS: You can read files, search code, and run calculations. When a user asks about code, files, or the project, you MUST use tools to look things up rather than guessing.
505
+
506
+ Format: <tool>tool_name("arg1", "arg2")</tool>
507
+
508
+ {tool_descriptions}
509
+
510
+ RULES:
511
+ 1. If the user asks about a file, config, or code: ALWAYS call read_file or search_code FIRST
512
+ 2. If the user asks "show me" or "what is": call the relevant tool FIRST, then explain
513
+ 3. For general conversation or reasoning: respond normally without tools
514
+ 4. Start your response with the tool call on the very first line
515
+ """
516
+
517
+
518
+ def build_tool_system_prompt(base_prompt: str, registry: ToolRegistry) -> str:
519
+ """Augment a system prompt with tool-use instructions."""
520
+ return base_prompt + TOOL_PROMPT_SUFFIX.format(
521
+ tool_descriptions=registry.get_descriptions()
522
+ )
523
+
524
+
525
+ # ================================================================
526
+ # Quick Test
527
+ # ================================================================
528
+ if __name__ == "__main__":
529
+ print("Testing Codette Tools...\n")
530
+
531
+ registry = ToolRegistry()
532
+ print(registry.get_descriptions())
533
+
534
+ print("\n--- Test: read_file ---")
535
+ print(tool_read_file("configs/adapter_registry.yaml", 1, 10))
536
+
537
+ print("\n--- Test: list_files ---")
538
+ print(tool_list_files("inference/"))
539
+
540
+ print("\n--- Test: search_code ---")
541
+ print(tool_search_code("phase_coherence", "reasoning_forge/", ".py"))
542
+
543
+ print("\n--- Test: file_info ---")
544
+ print(tool_file_info("paper/codette_paper.pdf"))
545
+
546
+ print("\n--- Test: run_python ---")
547
+ print(tool_run_python("print(2 ** 10)"))
548
+
549
+ print("\n--- Test: project_summary ---")
550
+ print(tool_project_summary())
551
+
552
+ print("\n--- Test: parse_tool_calls ---")
553
+ test = 'Let me check that. <tool>read_file("configs/adapter_registry.yaml", 1, 20)</tool> And also <tool>search_code("AEGIS")</tool>'
554
+ calls = parse_tool_calls(test)
555
+ for name, args, kwargs in calls:
556
+ print(f" Call: {name}({args})")
557
+
558
+ print("\nDone!")
inference/init.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from .model_loader import CodetteModelLoader
2
+ from .multi_adapter_engine import CodetteEngine
3
+
4
+ __all__ = [
5
+ "CodetteModelLoader",
6
+ "CodetteEngine",
7
+ ]
inference/model_loader.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from pathlib import Path
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
4
+ from peft import PeftModel
5
+
6
+
7
+ class CodetteModelLoader:
8
+
9
+ def __init__(
10
+ self,
11
+ base_model="meta-llama/Llama-3.1-8B-Instruct",
12
+ adapters=None,
13
+ ):
14
+ self.base_model_name = base_model
15
+ self.adapters = adapters or {}
16
+ self.model = None
17
+ self.tokenizer = None
18
+ self.active_adapter = None
19
+
20
+ self._load_base_model()
21
+
22
+ def _load_base_model(self):
23
+
24
+ quant_config = BitsAndBytesConfig(
25
+ load_in_4bit=True,
26
+ bnb_4bit_quant_type="nf4",
27
+ bnb_4bit_compute_dtype=torch.bfloat16,
28
+ bnb_4bit_use_double_quant=True,
29
+ )
30
+
31
+ self.tokenizer = AutoTokenizer.from_pretrained(
32
+ self.base_model_name,
33
+ trust_remote_code=True
34
+ )
35
+
36
+ if self.tokenizer.pad_token is None:
37
+ self.tokenizer.pad_token = self.tokenizer.eos_token
38
+
39
+ base_model = AutoModelForCausalLM.from_pretrained(
40
+ self.base_model_name,
41
+ quantization_config=quant_config,
42
+ device_map="auto",
43
+ trust_remote_code=True,
44
+ )
45
+
46
+ self.model = base_model
47
+
48
+ def load_adapters(self):
49
+
50
+ first = True
51
+
52
+ for name, path in self.adapters.items():
53
+
54
+ path = str(Path(path))
55
+
56
+ if first:
57
+
58
+ self.model = PeftModel.from_pretrained(
59
+ self.model,
60
+ path,
61
+ adapter_name=name,
62
+ is_trainable=False,
63
+ )
64
+
65
+ self.active_adapter = name
66
+ first = False
67
+
68
+ else:
69
+
70
+ self.model.load_adapter(
71
+ path,
72
+ adapter_name=name,
73
+ )
74
+
75
+ def set_active_adapter(self, name):
76
+
77
+ if name not in self.model.peft_config:
78
+ raise ValueError(f"Adapter not loaded: {name}")
79
+
80
+ self.model.set_adapter(name)
81
+ self.active_adapter = name
82
+
83
+ def format_messages(self, messages):
84
+
85
+ return self.tokenizer.apply_chat_template(
86
+ messages,
87
+ tokenize=False,
88
+ add_generation_prompt=True
89
+ )
90
+
91
+ def tokenize(self, prompt):
92
+
93
+ return self.tokenizer(
94
+ prompt,
95
+ return_tensors="pt"
96
+ ).to(self.model.device)
inference/multi_adapter_engine.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class CodetteEngine:
2
+
3
+ def __init__(self, loader, registry):
4
+
5
+ self.loader = loader
6
+ self.registry = registry
7
+
8
+ def generate(self, messages, adapter):
9
+
10
+ self.loader.set_active_adapter(adapter)
11
+
12
+ prompt = self.loader.format_messages(messages)
13
+ inputs = self.loader.tokenize(prompt)
14
+
15
+ params = self.registry[adapter]["generation"]
16
+
17
+ output = self.loader.model.generate(
18
+ **inputs,
19
+ max_new_tokens=params.get("max_tokens", 512),
20
+ temperature=params.get("temperature", 0.7),
21
+ top_p=params.get("top_p", 0.9),
22
+ repetition_penalty=params.get("repetition_penalty", 1.1)
23
+ )
24
+
25
+ text = self.loader.tokenizer.decode(
26
+ output[0],
27
+ skip_special_tokens=True
28
+ )
29
+
30
+ return text
31
+
32
+ def multi_perspective(self, messages, adapters):
33
+
34
+ outputs = {}
35
+
36
+ for adapter in adapters:
37
+ outputs[adapter] = self.generate(messages, adapter)
38
+
39
+ return self._synthesize(messages, outputs)
40
+
41
+ def _synthesize(self, messages, responses):
42
+
43
+ combined = "\n\n".join(
44
+ f"{name.upper()}:\n{text}"
45
+ for name, text in responses.items()
46
+ )
47
+
48
+ synthesis_messages = messages + [
49
+ {
50
+ "role": "system",
51
+ "content": "Combine the perspectives into a single answer."
52
+ },
53
+ {
54
+ "role": "user",
55
+ "content": combined
56
+ }
57
+ ]
58
+
59
+ return self.generate(synthesis_messages, "multi_perspective")
inference/static/app.js ADDED
@@ -0,0 +1,870 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* ============================================================
2
+ Codette Chat UI — Frontend Logic
3
+ Pure vanilla JS. Zero dependencies.
4
+ ============================================================ */
5
+
6
+ // Adapter color map
7
+ const COLORS = {
8
+ newton: '#3b82f6', davinci: '#f59e0b', empathy: '#a855f7',
9
+ philosophy: '#10b981', quantum: '#ef4444', consciousness: '#e2e8f0',
10
+ multi_perspective: '#f97316', systems_architecture: '#06b6d4',
11
+ _base: '#94a3b8', auto: '#94a3b8',
12
+ };
13
+
14
+ const LABELS = {
15
+ newton: 'N', davinci: 'D', empathy: 'E', philosophy: 'P',
16
+ quantum: 'Q', consciousness: 'C', multi_perspective: 'M',
17
+ systems_architecture: 'S',
18
+ };
19
+
20
+ // State
21
+ let isLoading = false;
22
+ let spiderwebViz = null;
23
+ let serverConnected = true;
24
+ let reconnectTimer = null;
25
+
26
+ // ── Initialization ──
27
+ document.addEventListener('DOMContentLoaded', () => {
28
+ initUI();
29
+ pollStatus();
30
+ loadSessions();
31
+ initCoverageDots();
32
+ initAdapterDots();
33
+
34
+ // Initialize spiderweb canvas
35
+ const canvas = document.getElementById('spiderweb-canvas');
36
+ if (canvas) {
37
+ spiderwebViz = new SpiderwebViz(canvas);
38
+ }
39
+ });
40
+
41
+ function initUI() {
42
+ const input = document.getElementById('chat-input');
43
+ const sendBtn = document.getElementById('send-btn');
44
+ const micBtn = document.getElementById('mic-btn');
45
+ const newBtn = document.getElementById('btn-new-chat');
46
+ const panelBtn = document.getElementById('btn-toggle-panel');
47
+ const maxAdapters = document.getElementById('max-adapters');
48
+
49
+ // Send on Enter (Shift+Enter for newline)
50
+ input.addEventListener('keydown', (e) => {
51
+ if (e.key === 'Enter' && !e.shiftKey) {
52
+ e.preventDefault();
53
+ sendMessage();
54
+ }
55
+ });
56
+
57
+ // Auto-resize textarea
58
+ input.addEventListener('input', () => {
59
+ input.style.height = 'auto';
60
+ input.style.height = Math.min(input.scrollHeight, 120) + 'px';
61
+ });
62
+
63
+ sendBtn.addEventListener('click', sendMessage);
64
+ newBtn.addEventListener('click', newChat);
65
+
66
+ const exportBtn = document.getElementById('btn-export');
67
+ const importBtn = document.getElementById('btn-import');
68
+ const importFile = document.getElementById('import-file');
69
+
70
+ exportBtn.addEventListener('click', exportSession);
71
+ importBtn.addEventListener('click', () => importFile.click());
72
+ importFile.addEventListener('change', importSession);
73
+
74
+ panelBtn.addEventListener('click', () => {
75
+ const panel = document.getElementById('side-panel');
76
+ panel.classList.toggle('collapsed');
77
+ // Update button label
78
+ panelBtn.textContent = panel.classList.contains('collapsed') ? 'Cocoon' : 'Close';
79
+ });
80
+
81
+ maxAdapters.addEventListener('input', () => {
82
+ document.getElementById('max-adapters-value').textContent = maxAdapters.value;
83
+ });
84
+
85
+ // Voice input via Web Speech API
86
+ initVoice(micBtn);
87
+
88
+ // TTS toggle — read responses aloud when enabled
89
+ const ttsToggle = document.getElementById('tts-toggle');
90
+ if (ttsToggle) {
91
+ ttsToggle.addEventListener('change', () => {
92
+ if (ttsToggle.checked && !window.speechSynthesis) {
93
+ ttsToggle.checked = false;
94
+ ttsToggle.parentElement.title = 'Speech synthesis not supported';
95
+ }
96
+ });
97
+ }
98
+ }
99
+
100
+ // ── Voice Input ──
101
+ let _recognition = null;
102
+ let _isRecording = false;
103
+
104
+ function initVoice(micBtn) {
105
+ const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
106
+ if (!SpeechRecognition) {
107
+ micBtn.title = 'Voice not supported in this browser';
108
+ micBtn.style.opacity = '0.3';
109
+ micBtn.style.cursor = 'not-allowed';
110
+ return;
111
+ }
112
+
113
+ _recognition = new SpeechRecognition();
114
+ _recognition.continuous = false;
115
+ _recognition.interimResults = true;
116
+ _recognition.lang = 'en-US';
117
+
118
+ const input = document.getElementById('chat-input');
119
+
120
+ _recognition.onstart = () => {
121
+ _isRecording = true;
122
+ micBtn.classList.add('recording');
123
+ micBtn.title = 'Listening... click to stop';
124
+ };
125
+
126
+ _recognition.onresult = (event) => {
127
+ let transcript = '';
128
+ let isFinal = false;
129
+ for (let i = event.resultIndex; i < event.results.length; i++) {
130
+ transcript += event.results[i][0].transcript;
131
+ if (event.results[i].isFinal) isFinal = true;
132
+ }
133
+ // Show interim results in the input box
134
+ input.value = transcript;
135
+ input.style.height = 'auto';
136
+ input.style.height = Math.min(input.scrollHeight, 120) + 'px';
137
+
138
+ if (isFinal) {
139
+ stopVoice(micBtn);
140
+ }
141
+ };
142
+
143
+ _recognition.onerror = (event) => {
144
+ console.log('Speech recognition error:', event.error);
145
+ stopVoice(micBtn);
146
+ if (event.error === 'not-allowed') {
147
+ micBtn.title = 'Microphone access denied';
148
+ }
149
+ };
150
+
151
+ _recognition.onend = () => {
152
+ stopVoice(micBtn);
153
+ };
154
+
155
+ micBtn.addEventListener('click', () => {
156
+ if (_isRecording) {
157
+ _recognition.stop();
158
+ stopVoice(micBtn);
159
+ } else {
160
+ try {
161
+ _recognition.start();
162
+ } catch (e) {
163
+ console.log('Speech recognition start error:', e);
164
+ }
165
+ }
166
+ });
167
+ }
168
+
169
+ function stopVoice(micBtn) {
170
+ _isRecording = false;
171
+ micBtn.classList.remove('recording');
172
+ micBtn.title = 'Voice input';
173
+ }
174
+
175
+ // ── Status Polling ──
176
+ function pollStatus() {
177
+ fetch('/api/status')
178
+ .then(r => r.json())
179
+ .then(status => {
180
+ setConnected();
181
+ updateStatus(status);
182
+ if (status.state === 'loading') {
183
+ setTimeout(pollStatus, 2000);
184
+ } else if (status.state === 'ready') {
185
+ hideLoadingScreen();
186
+ } else if (status.state === 'error') {
187
+ // Model failed to load — show error and dismiss loading screen
188
+ hideLoadingScreen();
189
+ updateStatus({ state: 'error', message: status.message || 'Model failed to load' });
190
+ } else if (status.state === 'idle') {
191
+ // Model not loaded yet, keep polling
192
+ setTimeout(pollStatus, 3000);
193
+ }
194
+ })
195
+ .catch(() => {
196
+ setDisconnected();
197
+ setTimeout(pollStatus, 5000);
198
+ });
199
+ }
200
+
201
+ function setDisconnected() {
202
+ if (serverConnected) {
203
+ serverConnected = false;
204
+ updateStatus({ state: 'error', message: 'Server disconnected' });
205
+ }
206
+ }
207
+
208
+ function setConnected() {
209
+ if (!serverConnected) {
210
+ serverConnected = true;
211
+ if (reconnectTimer) {
212
+ clearInterval(reconnectTimer);
213
+ reconnectTimer = null;
214
+ }
215
+ }
216
+ }
217
+
218
+ function updateStatus(status) {
219
+ const dot = document.getElementById('status-dot');
220
+ const text = document.getElementById('status-text');
221
+
222
+ dot.className = 'status-dot ' + (status.state || 'loading');
223
+ text.textContent = status.message || status.state;
224
+
225
+ // Update loading screen
226
+ const loadingStatus = document.getElementById('loading-status');
227
+ if (loadingStatus) {
228
+ loadingStatus.textContent = status.message || 'Loading...';
229
+ }
230
+
231
+ // Update adapter dots if available
232
+ if (status.adapters) {
233
+ updateAdapterDots(status.adapters);
234
+ }
235
+ }
236
+
237
+ function hideLoadingScreen() {
238
+ const screen = document.getElementById('loading-screen');
239
+ if (screen) {
240
+ screen.classList.add('hidden');
241
+ setTimeout(() => screen.remove(), 500);
242
+ }
243
+ }
244
+
245
+ // ── Adapter Dots ──
246
+ function initAdapterDots() {
247
+ const container = document.getElementById('adapter-dots');
248
+ Object.keys(LABELS).forEach(name => {
249
+ const dot = document.createElement('span');
250
+ dot.className = 'adapter-dot';
251
+ dot.style.backgroundColor = COLORS[name];
252
+ dot.title = name;
253
+ dot.id = `dot-${name}`;
254
+ container.appendChild(dot);
255
+ });
256
+ }
257
+
258
+ function updateAdapterDots(available) {
259
+ Object.keys(LABELS).forEach(name => {
260
+ const dot = document.getElementById(`dot-${name}`);
261
+ if (dot) {
262
+ dot.classList.toggle('available', available.includes(name));
263
+ }
264
+ });
265
+ }
266
+
267
+ function setActiveAdapter(name) {
268
+ // Remove previous active
269
+ document.querySelectorAll('.adapter-dot').forEach(d => d.classList.remove('active'));
270
+ // Set new active
271
+ const dot = document.getElementById(`dot-${name}`);
272
+ if (dot) dot.classList.add('active');
273
+
274
+ // Update CSS accent color
275
+ const color = COLORS[name] || COLORS._base;
276
+ document.documentElement.style.setProperty('--accent', color);
277
+ document.documentElement.style.setProperty('--accent-glow', color + '25');
278
+ }
279
+
280
+ // ── Coverage Dots ──
281
+ function initCoverageDots() {
282
+ const container = document.getElementById('coverage-dots');
283
+ Object.entries(LABELS).forEach(([name, label]) => {
284
+ const dot = document.createElement('span');
285
+ dot.className = 'coverage-dot';
286
+ dot.style.color = COLORS[name];
287
+ dot.textContent = label;
288
+ dot.title = name;
289
+ dot.id = `cov-${name}`;
290
+ container.appendChild(dot);
291
+ });
292
+ }
293
+
294
+ function updateCoverage(usage) {
295
+ Object.keys(LABELS).forEach(name => {
296
+ const dot = document.getElementById(`cov-${name}`);
297
+ if (dot) {
298
+ dot.classList.toggle('active', (usage[name] || 0) > 0);
299
+ }
300
+ });
301
+ }
302
+
303
+ // ── Chat ──
304
+ function sendMessage() {
305
+ const input = document.getElementById('chat-input');
306
+ const query = input.value.trim();
307
+ if (!query || isLoading) return;
308
+
309
+ // Hide welcome
310
+ const welcome = document.getElementById('welcome');
311
+ if (welcome) welcome.style.display = 'none';
312
+
313
+ // Add user message
314
+ addMessage('user', query);
315
+
316
+ // Clear input
317
+ input.value = '';
318
+ input.style.height = 'auto';
319
+
320
+ // Get settings
321
+ const adapter = document.getElementById('adapter-select').value;
322
+ const maxAdapters = parseInt(document.getElementById('max-adapters').value);
323
+
324
+ // Show thinking
325
+ const thinkingEl = showThinking(adapter);
326
+ isLoading = true;
327
+ document.getElementById('send-btn').disabled = true;
328
+
329
+ // Send request with timeout (20 min for multi-perspective CPU inference)
330
+ const controller = new AbortController();
331
+ const timeoutId = setTimeout(() => controller.abort(), 1200000);
332
+
333
+ fetch('/api/chat', {
334
+ method: 'POST',
335
+ headers: { 'Content-Type': 'application/json' },
336
+ body: JSON.stringify({
337
+ query: query,
338
+ adapter: adapter === 'auto' ? null : adapter,
339
+ max_adapters: maxAdapters,
340
+ }),
341
+ signal: controller.signal,
342
+ })
343
+ .then(r => r.json())
344
+ .then(data => {
345
+ clearTimeout(timeoutId);
346
+ thinkingEl.remove();
347
+
348
+ if (data.error) {
349
+ addMessage('error', data.error);
350
+ return;
351
+ }
352
+
353
+ // Add assistant message
354
+ const adapterUsed = data.adapter || '_base';
355
+ setActiveAdapter(adapterUsed);
356
+
357
+ addMessage('assistant', data.response, {
358
+ adapter: adapterUsed,
359
+ confidence: data.confidence,
360
+ reasoning: data.reasoning,
361
+ tokens: data.tokens,
362
+ time: data.time,
363
+ perspectives: data.perspectives,
364
+ multi_perspective: data.multi_perspective,
365
+ tools_used: data.tools_used,
366
+ });
367
+
368
+ // Speak response if TTS is enabled
369
+ const ttsOn = document.getElementById('tts-toggle');
370
+ if (ttsOn && ttsOn.checked && window.speechSynthesis) {
371
+ const utter = new SpeechSynthesisUtterance(data.response);
372
+ utter.rate = 1.0;
373
+ utter.pitch = 1.0;
374
+ window.speechSynthesis.speak(utter);
375
+ }
376
+
377
+ // Update cocoon state
378
+ if (data.cocoon) {
379
+ updateCocoonUI(data.cocoon);
380
+ }
381
+
382
+ // Update epistemic metrics
383
+ if (data.epistemic) {
384
+ updateEpistemicUI(data.epistemic);
385
+ }
386
+ })
387
+ .catch(err => {
388
+ clearTimeout(timeoutId);
389
+ thinkingEl.remove();
390
+ if (err.name === 'AbortError') {
391
+ addMessage('error', 'Request timed out. The model may be processing a complex query — try again or reduce perspectives.');
392
+ } else if (err.message === 'Failed to fetch' || err.name === 'TypeError') {
393
+ setDisconnected();
394
+ addMessage('error', 'Server disconnected. Attempting to reconnect...');
395
+ startReconnectPolling();
396
+ } else {
397
+ addMessage('error', `Request failed: ${err.message}`);
398
+ }
399
+ })
400
+ .finally(() => {
401
+ isLoading = false;
402
+ document.getElementById('send-btn').disabled = false;
403
+ document.getElementById('chat-input').focus();
404
+ });
405
+ }
406
+
407
+ function askQuestion(query) {
408
+ document.getElementById('chat-input').value = query;
409
+ sendMessage();
410
+ }
411
+
412
+ function addMessage(role, content, meta = {}) {
413
+ const area = document.getElementById('chat-area');
414
+ const msg = document.createElement('div');
415
+ msg.className = `message message-${role}`;
416
+
417
+ if (role === 'user') {
418
+ msg.innerHTML = `<div class="bubble"><div class="message-text">${escapeHtml(content)}</div></div>`;
419
+ } else if (role === 'assistant') {
420
+ const adapter = meta.adapter || '_base';
421
+ const color = COLORS[adapter] || COLORS._base;
422
+ const conf = meta.confidence || 0;
423
+ const tps = meta.tokens && meta.time ? (meta.tokens / meta.time).toFixed(1) : '?';
424
+
425
+ let html = `<div class="bubble" style="border-left-color:${color}">`;
426
+ html += `<div class="message-header">`;
427
+ html += `<span class="adapter-badge" style="color:${color}">${adapter}</span>`;
428
+ html += `<div class="confidence-bar"><div class="confidence-fill" style="width:${conf*100}%;background:${color}"></div></div>`;
429
+ html += `<span>${(conf*100).toFixed(0)}%</span>`;
430
+ html += `</div>`;
431
+ html += `<div class="message-text">${renderMarkdown(content)}</div>`;
432
+ html += `<div class="message-meta">${meta.tokens || '?'} tokens | ${tps} tok/s | ${(meta.time||0).toFixed(1)}s</div>`;
433
+
434
+ // Tool usage indicator
435
+ if (meta.tools_used && meta.tools_used.length > 0) {
436
+ const toolNames = meta.tools_used.map(t => t.tool).join(', ');
437
+ html += `<div class="tools-badge">🔧 Tools: ${toolNames}</div>`;
438
+ }
439
+
440
+ // Multi-perspective expandable
441
+ if (meta.perspectives && Object.keys(meta.perspectives).length > 1) {
442
+ const perspId = 'persp-' + Date.now();
443
+ html += `<button class="perspectives-toggle" onclick="togglePerspectives('${perspId}')">`;
444
+ html += `Show ${Object.keys(meta.perspectives).length} perspectives</button>`;
445
+ html += `<div class="perspectives-panel" id="${perspId}">`;
446
+ for (const [name, text] of Object.entries(meta.perspectives)) {
447
+ const pc = COLORS[name] || COLORS._base;
448
+ html += `<div class="perspective-card" style="border-left-color:${pc}">`;
449
+ html += `<div class="perspective-card-header" style="color:${pc}">${name}</div>`;
450
+ html += `<div>${renderMarkdown(text)}</div></div>`;
451
+ }
452
+ html += `</div>`;
453
+ }
454
+
455
+ html += `</div>`;
456
+ msg.innerHTML = html;
457
+ } else if (role === 'error') {
458
+ msg.innerHTML = `<div class="bubble" style="border-left-color:var(--quantum)">
459
+ <div class="message-text" style="color:var(--quantum)">${escapeHtml(content)}</div></div>`;
460
+ }
461
+
462
+ area.appendChild(msg);
463
+ area.scrollTop = area.scrollHeight;
464
+ }
465
+
466
+ function showThinking(adapter) {
467
+ const area = document.getElementById('chat-area');
468
+ const el = document.createElement('div');
469
+ el.className = 'thinking';
470
+ el.innerHTML = `
471
+ <div class="thinking-dots"><span></span><span></span><span></span></div>
472
+ <span>Codette is thinking${adapter && adapter !== 'auto' ? ` (${adapter})` : ''}...</span>
473
+ `;
474
+ area.appendChild(el);
475
+ area.scrollTop = area.scrollHeight;
476
+ return el;
477
+ }
478
+
479
+ function togglePerspectives(id) {
480
+ document.getElementById(id).classList.toggle('open');
481
+ }
482
+
483
+ // ── Cocoon UI Updates ──
484
+ function updateCocoonUI(state) {
485
+ // Metrics
486
+ const metrics = state.metrics || {};
487
+ const coherence = metrics.current_coherence || 0;
488
+ const tension = metrics.current_tension || 0;
489
+
490
+ document.getElementById('metric-coherence').textContent = coherence.toFixed(4);
491
+ document.getElementById('bar-coherence').style.width = (coherence * 100) + '%';
492
+
493
+ document.getElementById('metric-tension').textContent = tension.toFixed(4);
494
+ document.getElementById('bar-tension').style.width = Math.min(tension * 100, 100) + '%';
495
+
496
+ document.getElementById('cocoon-attractors').textContent = metrics.attractor_count || 0;
497
+ document.getElementById('cocoon-glyphs').textContent = metrics.glyph_count || 0;
498
+
499
+ // Cocoon status
500
+ const cocoon = state.cocoon || {};
501
+ document.getElementById('cocoon-encryption').textContent =
502
+ cocoon.has_sync ? 'Active' : 'Available';
503
+
504
+ // AEGIS eta feeds the main eta metric when available
505
+ if (state.aegis && state.aegis.eta !== undefined) {
506
+ document.getElementById('metric-eta').textContent = state.aegis.eta.toFixed(4);
507
+ }
508
+
509
+ // Coverage
510
+ updateCoverage(state.perspective_usage || {});
511
+
512
+ // Spiderweb
513
+ if (spiderwebViz && state.spiderweb) {
514
+ spiderwebViz.update(state.spiderweb);
515
+ }
516
+
517
+ // New subsystem panels (AEGIS, Nexus, Memory, Resonance, Guardian)
518
+ updateSubsystemUI(state);
519
+ }
520
+
521
+ function updateEpistemicUI(epistemic) {
522
+ if (epistemic.ensemble_coherence !== undefined) {
523
+ const val = epistemic.ensemble_coherence;
524
+ document.getElementById('metric-coherence').textContent = val.toFixed(4);
525
+ document.getElementById('bar-coherence').style.width = (val * 100) + '%';
526
+ }
527
+ if (epistemic.tension_magnitude !== undefined) {
528
+ const val = epistemic.tension_magnitude;
529
+ document.getElementById('metric-tension').textContent = val.toFixed(4);
530
+ document.getElementById('bar-tension').style.width = Math.min(val * 100, 100) + '%';
531
+ }
532
+ // Update ethical alignment if available
533
+ if (epistemic.ethical_alignment !== undefined) {
534
+ document.getElementById('metric-eta').textContent =
535
+ epistemic.ethical_alignment.toFixed(3);
536
+ } else if (epistemic.mean_coherence !== undefined) {
537
+ // Fall back: derive eta from mean coherence as a proxy
538
+ document.getElementById('metric-eta').textContent =
539
+ epistemic.mean_coherence.toFixed(3);
540
+ }
541
+ }
542
+
543
+ // ── Session Management ──
544
+ function newChat() {
545
+ fetch('/api/session/new', { method: 'POST' })
546
+ .then(r => r.json())
547
+ .then(() => {
548
+ // Clear chat
549
+ const area = document.getElementById('chat-area');
550
+ area.innerHTML = '';
551
+ // Show welcome with starter cards
552
+ const welcome = document.createElement('div');
553
+ welcome.className = 'welcome';
554
+ welcome.id = 'welcome';
555
+ welcome.innerHTML = `
556
+ <h2>What would you like to explore?</h2>
557
+ <p>Codette routes your question to the best reasoning perspective automatically.</p>
558
+ <div class="welcome-grid">
559
+ <div class="welcome-card" onclick="askQuestion('Explain why objects fall to the ground')">
560
+ <div class="welcome-card-title" style="color:var(--newton)">Newton</div>
561
+ <div class="welcome-card-desc">Explain why objects fall to the ground</div>
562
+ </div>
563
+ <div class="welcome-card" onclick="askQuestion('Design a creative solution for sustainable cities')">
564
+ <div class="welcome-card-title" style="color:var(--davinci)">DaVinci</div>
565
+ <div class="welcome-card-desc">Design a creative solution for sustainable cities</div>
566
+ </div>
567
+ <div class="welcome-card" onclick="askQuestion('How do I cope with feeling overwhelmed?')">
568
+ <div class="welcome-card-title" style="color:var(--empathy)">Empathy</div>
569
+ <div class="welcome-card-desc">How do I cope with feeling overwhelmed?</div>
570
+ </div>
571
+ <div class="welcome-card" onclick="askQuestion('What is consciousness and can AI have it?')">
572
+ <div class="welcome-card-title" style="color:var(--consciousness)">Consciousness</div>
573
+ <div class="welcome-card-desc">What is consciousness and can AI have it?</div>
574
+ </div>
575
+ </div>
576
+ `;
577
+ area.appendChild(welcome);
578
+ // Reset metrics
579
+ document.getElementById('metric-coherence').textContent = '0.00';
580
+ document.getElementById('metric-tension').textContent = '0.00';
581
+ document.getElementById('metric-eta').textContent = '--';
582
+ document.getElementById('bar-coherence').style.width = '0%';
583
+ document.getElementById('bar-tension').style.width = '0%';
584
+ document.getElementById('cocoon-attractors').textContent = '0';
585
+ document.getElementById('cocoon-glyphs').textContent = '0';
586
+ // Reset subsystem panels
587
+ ['section-aegis','section-nexus','section-resonance','section-memory','section-guardian'].forEach(id => {
588
+ const el = document.getElementById(id);
589
+ if (el) el.style.display = 'none';
590
+ });
591
+ // Reset spiderweb
592
+ if (spiderwebViz) {
593
+ spiderwebViz._initDefaultState();
594
+ spiderwebViz.coherence = 0;
595
+ spiderwebViz.attractors = [];
596
+ }
597
+ loadSessions();
598
+ });
599
+ }
600
+
601
+ function loadSessions() {
602
+ fetch('/api/sessions')
603
+ .then(r => r.json())
604
+ .then(data => {
605
+ const list = document.getElementById('session-list');
606
+ const sessions = data.sessions || [];
607
+ document.getElementById('cocoon-sessions').textContent = sessions.length;
608
+
609
+ list.innerHTML = sessions.map(s => `
610
+ <div class="session-item" onclick="loadSession('${s.session_id}')"
611
+ title="${s.title}">
612
+ ${s.title || 'Untitled'}
613
+ </div>
614
+ `).join('');
615
+ })
616
+ .catch(() => {});
617
+ }
618
+
619
+ function loadSession(sessionId) {
620
+ fetch('/api/session/load', {
621
+ method: 'POST',
622
+ headers: { 'Content-Type': 'application/json' },
623
+ body: JSON.stringify({ session_id: sessionId }),
624
+ })
625
+ .then(r => r.json())
626
+ .then(data => {
627
+ if (data.error) return;
628
+
629
+ // Clear and rebuild chat
630
+ const area = document.getElementById('chat-area');
631
+ area.innerHTML = '';
632
+
633
+ (data.messages || []).forEach(msg => {
634
+ addMessage(msg.role, msg.content, msg.metadata || {});
635
+ });
636
+
637
+ if (data.state) {
638
+ updateCocoonUI(data.state);
639
+ }
640
+ })
641
+ .catch(err => {
642
+ console.log('Failed to load session:', err);
643
+ });
644
+ }
645
+
646
+ // ── Session Export/Import ──
647
+ function exportSession() {
648
+ fetch('/api/session/export', { method: 'POST' })
649
+ .then(r => {
650
+ if (!r.ok) throw new Error('Export failed');
651
+ const disposition = r.headers.get('Content-Disposition') || '';
652
+ const match = disposition.match(/filename="(.+)"/);
653
+ const filename = match ? match[1] : 'codette_session.json';
654
+ return r.blob().then(blob => ({ blob, filename }));
655
+ })
656
+ .then(({ blob, filename }) => {
657
+ const url = URL.createObjectURL(blob);
658
+ const a = document.createElement('a');
659
+ a.href = url;
660
+ a.download = filename;
661
+ a.click();
662
+ URL.revokeObjectURL(url);
663
+ })
664
+ .catch(err => {
665
+ console.log('Export failed:', err);
666
+ });
667
+ }
668
+
669
+ function importSession(event) {
670
+ const file = event.target.files[0];
671
+ if (!file) return;
672
+
673
+ const reader = new FileReader();
674
+ reader.onload = (e) => {
675
+ try {
676
+ const data = JSON.parse(e.target.result);
677
+ fetch('/api/session/import', {
678
+ method: 'POST',
679
+ headers: { 'Content-Type': 'application/json' },
680
+ body: JSON.stringify(data),
681
+ })
682
+ .then(r => r.json())
683
+ .then(result => {
684
+ if (result.error) {
685
+ addMessage('error', `Import failed: ${result.error}`);
686
+ return;
687
+ }
688
+ // Rebuild chat from imported session
689
+ const area = document.getElementById('chat-area');
690
+ area.innerHTML = '';
691
+ (result.messages || []).forEach(msg => {
692
+ addMessage(msg.role, msg.content, msg.metadata || {});
693
+ });
694
+ if (result.state) {
695
+ updateCocoonUI(result.state);
696
+ }
697
+ loadSessions();
698
+ })
699
+ .catch(err => {
700
+ addMessage('error', `Import failed: ${err.message}`);
701
+ });
702
+ } catch (parseErr) {
703
+ addMessage('error', 'Invalid JSON file');
704
+ }
705
+ };
706
+ reader.readAsText(file);
707
+ // Reset file input so same file can be imported again
708
+ event.target.value = '';
709
+ }
710
+
711
+ // ── Reconnection ──
712
+ function startReconnectPolling() {
713
+ if (reconnectTimer) return; // Already polling
714
+ reconnectTimer = setInterval(() => {
715
+ fetch('/api/status')
716
+ .then(r => r.json())
717
+ .then(status => {
718
+ setConnected();
719
+ updateStatus(status);
720
+ addMessage('error', 'Server reconnected!');
721
+ })
722
+ .catch(() => {
723
+ // Still disconnected, keep polling
724
+ });
725
+ }, 5000);
726
+ }
727
+
728
+ // ── Subsystem UI Updates ──
729
+ function updateSubsystemUI(state) {
730
+ updateAegisUI(state.aegis);
731
+ updateNexusUI(state.nexus);
732
+ updateResonanceUI(state.resonance);
733
+ updateMemoryUI(state.memory);
734
+ updateGuardianUI(state.guardian);
735
+ }
736
+
737
+ function updateAegisUI(aegis) {
738
+ const section = document.getElementById('section-aegis');
739
+ if (!aegis) { section.style.display = 'none'; return; }
740
+ section.style.display = '';
741
+
742
+ const eta = aegis.eta || 0;
743
+ document.getElementById('aegis-eta').textContent = eta.toFixed(4);
744
+ document.getElementById('bar-aegis-eta').style.width = (eta * 100) + '%';
745
+ document.getElementById('aegis-evals').textContent = aegis.total_evaluations || 0;
746
+ document.getElementById('aegis-vetoes').textContent = aegis.veto_count || 0;
747
+
748
+ const trendEl = document.getElementById('aegis-trend');
749
+ const trend = aegis.alignment_trend || '--';
750
+ trendEl.textContent = trend;
751
+ trendEl.className = 'metric-value';
752
+ if (trend === 'improving') trendEl.classList.add('trend-improving');
753
+ else if (trend === 'declining') trendEl.classList.add('trend-declining');
754
+ else if (trend === 'stable') trendEl.classList.add('trend-stable');
755
+ }
756
+
757
+ function updateNexusUI(nexus) {
758
+ const section = document.getElementById('section-nexus');
759
+ if (!nexus) { section.style.display = 'none'; return; }
760
+ section.style.display = '';
761
+
762
+ document.getElementById('nexus-processed').textContent = nexus.total_processed || 0;
763
+ document.getElementById('nexus-interventions').textContent = nexus.interventions || 0;
764
+ const rate = (nexus.intervention_rate || 0) * 100;
765
+ document.getElementById('nexus-rate').textContent = rate.toFixed(1) + '%';
766
+
767
+ // Risk dots for recent signals
768
+ const risksEl = document.getElementById('nexus-risks');
769
+ const risks = nexus.recent_risks || [];
770
+ risksEl.innerHTML = risks.map(r =>
771
+ `<span class="risk-dot ${r}" title="${r} risk"></span>`
772
+ ).join('');
773
+ }
774
+
775
+ function updateResonanceUI(resonance) {
776
+ const section = document.getElementById('section-resonance');
777
+ if (!resonance) { section.style.display = 'none'; return; }
778
+ section.style.display = '';
779
+
780
+ const psi = resonance.psi_r || 0;
781
+ document.getElementById('resonance-psi').textContent = psi.toFixed(4);
782
+ // Normalize psi_r to 0-100% bar (clamp between -2 and 2)
783
+ const psiNorm = Math.min(100, Math.max(0, (psi + 2) / 4 * 100));
784
+ document.getElementById('bar-resonance-psi').style.width = psiNorm + '%';
785
+
786
+ document.getElementById('resonance-quality').textContent =
787
+ (resonance.resonance_quality || 0).toFixed(4);
788
+ document.getElementById('resonance-convergence').textContent =
789
+ (resonance.convergence_rate || 0).toFixed(4);
790
+ document.getElementById('resonance-stability').textContent =
791
+ resonance.stability || '--';
792
+
793
+ const peakEl = document.getElementById('resonance-peak');
794
+ const atPeak = resonance.at_peak || false;
795
+ peakEl.textContent = atPeak ? 'ACTIVE' : 'dormant';
796
+ peakEl.className = 'metric-value' + (atPeak ? ' peak-active' : '');
797
+ }
798
+
799
+ function updateMemoryUI(memory) {
800
+ const section = document.getElementById('section-memory');
801
+ if (!memory) { section.style.display = 'none'; return; }
802
+ section.style.display = '';
803
+
804
+ document.getElementById('memory-count').textContent = memory.total_memories || 0;
805
+
806
+ // Emotional profile tags
807
+ const emotionsEl = document.getElementById('memory-emotions');
808
+ const profile = memory.emotional_profile || {};
809
+ const sorted = Object.entries(profile).sort((a, b) => b[1] - a[1]);
810
+ emotionsEl.innerHTML = sorted.slice(0, 8).map(([emotion, count]) =>
811
+ `<span class="emotion-tag${count > 0 ? ' active' : ''}" title="${count} memories">${emotion} ${count}</span>`
812
+ ).join('');
813
+ }
814
+
815
+ function updateGuardianUI(guardian) {
816
+ const section = document.getElementById('section-guardian');
817
+ if (!guardian) { section.style.display = 'none'; return; }
818
+ section.style.display = '';
819
+
820
+ const ethics = guardian.ethics || {};
821
+ document.getElementById('guardian-ethics').textContent =
822
+ (ethics.ethical_score !== undefined) ? ethics.ethical_score.toFixed(4) : '--';
823
+ const trust = guardian.trust || {};
824
+ document.getElementById('guardian-trust').textContent =
825
+ trust.total_interactions || 0;
826
+ }
827
+
828
+ // ── Utilities ──
829
+ function escapeHtml(text) {
830
+ const div = document.createElement('div');
831
+ div.textContent = text;
832
+ return div.innerHTML;
833
+ }
834
+
835
+ function renderMarkdown(text) {
836
+ // Lightweight markdown renderer — no dependencies
837
+ let html = escapeHtml(text);
838
+
839
+ // Code blocks: ```lang\n...\n```
840
+ html = html.replace(/```(\w*)\n([\s\S]*?)```/g,
841
+ '<pre class="code-block"><code>$2</code></pre>');
842
+
843
+ // Inline code: `code`
844
+ html = html.replace(/`([^`\n]+)`/g, '<code class="inline-code">$1</code>');
845
+
846
+ // Bold: **text** or __text__
847
+ html = html.replace(/\*\*([^*\n]+?)\*\*/g, '<strong>$1</strong>');
848
+ html = html.replace(/__([^_\n]+?)__/g, '<strong>$1</strong>');
849
+
850
+ // Headers: ### text (on its own line) — before bullets to avoid conflict
851
+ html = html.replace(/^### (.+)$/gm, '<div class="md-h3">$1</div>');
852
+ html = html.replace(/^## (.+)$/gm, '<div class="md-h2">$1</div>');
853
+ html = html.replace(/^# (.+)$/gm, '<div class="md-h1">$1</div>');
854
+
855
+ // Bullet lists: - item or * item — before italic to prevent * conflicts
856
+ html = html.replace(/^[\-\*] (.+)$/gm, '<div class="md-li">$1</div>');
857
+
858
+ // Numbered lists: 1. item
859
+ html = html.replace(/^\d+\. (.+)$/gm, '<div class="md-li md-oli">$1</div>');
860
+
861
+ // Italic: *text* or _text_ — AFTER bullets, restricted to single line
862
+ html = html.replace(/(?<!\w)\*([^*\n]+?)\*(?!\w)/g, '<em>$1</em>');
863
+ html = html.replace(/(?<!\w)_([^_\n]+?)_(?!\w)/g, '<em>$1</em>');
864
+
865
+ // Line breaks (preserve double newlines as paragraph breaks)
866
+ html = html.replace(/\n\n/g, '<br><br>');
867
+ html = html.replace(/\n/g, '<br>');
868
+
869
+ return html;
870
+ }
inference/static/index.html ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Codette</title>
7
+ <link rel="stylesheet" href="style.css">
8
+ </head>
9
+ <body>
10
+
11
+ <!-- Loading Screen -->
12
+ <div class="loading-screen" id="loading-screen">
13
+ <div class="loading-title">Codette</div>
14
+ <div class="loading-status" id="loading-status">Initializing...</div>
15
+ <div class="loading-bar"><div class="loading-bar-fill"></div></div>
16
+ </div>
17
+
18
+ <!-- Main App -->
19
+ <div class="app">
20
+ <!-- Main Chat Panel -->
21
+ <div class="main-panel">
22
+ <!-- Header -->
23
+ <div class="header">
24
+ <div class="header-left">
25
+ <span class="logo" id="logo">Codette</span>
26
+ <div class="adapter-dots" id="adapter-dots"></div>
27
+ </div>
28
+ <div class="header-right">
29
+ <button class="header-btn" id="btn-new-chat" title="New conversation">+ New</button>
30
+ <button class="header-btn" id="btn-export" title="Export session">Export</button>
31
+ <button class="header-btn" id="btn-import" title="Import session">Import</button>
32
+ <input type="file" id="import-file" accept=".json" style="display:none">
33
+ <button class="header-btn" id="btn-toggle-panel" title="Toggle side panel">Cocoon</button>
34
+ </div>
35
+ </div>
36
+
37
+ <!-- Chat Messages -->
38
+ <div class="chat-area" id="chat-area">
39
+ <div class="welcome" id="welcome">
40
+ <h2>What would you like to explore?</h2>
41
+ <p>Codette v2.0 with Phase 6: Multi-perspective reasoning with controlled debate, semantic tension analysis, and adaptive stability.</p>
42
+ <div style="font-size:0.9em; color:#666; margin-bottom:16px; padding:10px; background:#f5f5f5; border-radius:4px;">
43
+ <strong>What's New:</strong> Domain-aware agent routing • Semantic conflict detection • Real-time coherence monitoring • Experience-weighted reasoning
44
+ </div>
45
+ <div class="welcome-grid">
46
+ <div class="welcome-card" onclick="askQuestion('What is the speed of light and why does it matter?')">
47
+ <div class="welcome-card-title" style="color:var(--newton)">Physics</div>
48
+ <div class="welcome-card-desc">What is the speed of light and why does it matter?</div>
49
+ </div>
50
+ <div class="welcome-card" onclick="askQuestion('How should we balance accuracy and explainability in AI systems?')">
51
+ <div class="welcome-card-title" style="color:var(--philosophy)">Ethics</div>
52
+ <div class="welcome-card-desc">How should we balance accuracy and explainability in AI systems?</div>
53
+ </div>
54
+ <div class="welcome-card" onclick="askQuestion('What are the hallmarks of a truly creative solution?')">
55
+ <div class="welcome-card-title" style="color:var(--davinci)">Creativity</div>
56
+ <div class="welcome-card-desc">What are the hallmarks of a truly creative solution?</div>
57
+ </div>
58
+ <div class="welcome-card" onclick="askQuestion('What would it mean for a machine to genuinely understand?')">
59
+ <div class="welcome-card-title" style="color:var(--consciousness)">Consciousness</div>
60
+ <div class="welcome-card-desc">What would it mean for a machine to genuinely understand?</div>
61
+ </div>
62
+ </div>
63
+ </div>
64
+ </div>
65
+
66
+ <!-- Controls Row -->
67
+ <div class="controls">
68
+ <div class="control-group">
69
+ <label>Adapter:</label>
70
+ <select id="adapter-select">
71
+ <option value="auto">Auto</option>
72
+ <option value="newton">Newton</option>
73
+ <option value="davinci">DaVinci</option>
74
+ <option value="empathy">Empathy</option>
75
+ <option value="philosophy">Philosophy</option>
76
+ <option value="quantum">Quantum</option>
77
+ <option value="consciousness">Consciousness</option>
78
+ <option value="multi_perspective">Multi-Perspective</option>
79
+ <option value="systems_architecture">Systems</option>
80
+ </select>
81
+ </div>
82
+ <div class="control-group">
83
+ <label>Perspectives:</label>
84
+ <input type="range" id="max-adapters" min="1" max="3" value="2" style="width:60px">
85
+ <span id="max-adapters-value">2</span>
86
+ </div>
87
+ <div class="control-group" style="margin-left:auto">
88
+ <label>
89
+ <input type="checkbox" id="tts-toggle"> Voice
90
+ </label>
91
+ </div>
92
+ </div>
93
+
94
+ <!-- Input Area -->
95
+ <div class="input-area">
96
+ <div class="input-row">
97
+ <button class="mic-btn" id="mic-btn" title="Voice input">&#127908;</button>
98
+ <div class="input-wrapper">
99
+ <textarea id="chat-input" placeholder="Ask Codette something..." rows="1"></textarea>
100
+ </div>
101
+ <button class="send-btn" id="send-btn" title="Send">&#9654;</button>
102
+ </div>
103
+ </div>
104
+
105
+ <!-- Status Bar -->
106
+ <div class="status-bar">
107
+ <div class="status-indicator">
108
+ <span class="status-dot" id="status-dot"></span>
109
+ <span id="status-text">Initializing...</span>
110
+ </div>
111
+ <div id="status-right"></div>
112
+ </div>
113
+ </div>
114
+
115
+ <!-- Side Panel (Cocoon Dashboard) -->
116
+ <div class="side-panel" id="side-panel">
117
+ <!-- Spiderweb Visualization -->
118
+ <div class="side-section">
119
+ <div class="side-section-title">Agent Network</div>
120
+ <canvas id="spiderweb-canvas"></canvas>
121
+ </div>
122
+
123
+ <!-- Metrics -->
124
+ <div class="side-section">
125
+ <div class="side-section-title">Cocoon Metrics</div>
126
+ <div class="metric-row">
127
+ <span class="metric-label">&#915; Phase Coherence</span>
128
+ <span class="metric-value" id="metric-coherence">0.00</span>
129
+ </div>
130
+ <div class="metric-bar">
131
+ <div class="metric-bar-fill" id="bar-coherence"
132
+ style="width:0%;background:var(--philosophy)"></div>
133
+ </div>
134
+ <div class="metric-row" style="margin-top:10px">
135
+ <span class="metric-label">&#958; Epistemic Tension</span>
136
+ <span class="metric-value" id="metric-tension">0.00</span>
137
+ </div>
138
+ <div class="metric-bar">
139
+ <div class="metric-bar-fill" id="bar-tension"
140
+ style="width:0%;background:var(--quantum)"></div>
141
+ </div>
142
+ <div class="metric-row" style="margin-top:10px">
143
+ <span class="metric-label">&#951; Ethical Alignment</span>
144
+ <span class="metric-value" id="metric-eta">--</span>
145
+ </div>
146
+ </div>
147
+
148
+ <!-- Perspective Coverage -->
149
+ <div class="side-section">
150
+ <div class="side-section-title">Perspective Coverage</div>
151
+ <div class="coverage-dots" id="coverage-dots"></div>
152
+ </div>
153
+
154
+ <!-- Cocoon Status -->
155
+ <div class="side-section">
156
+ <div class="side-section-title">Cocoon Status</div>
157
+ <div class="metric-row">
158
+ <span class="metric-label">&#128274; Encryption</span>
159
+ <span class="metric-value" id="cocoon-encryption">--</span>
160
+ </div>
161
+ <div class="metric-row">
162
+ <span class="metric-label">&#128376; Attractors</span>
163
+ <span class="metric-value" id="cocoon-attractors">0</span>
164
+ </div>
165
+ <div class="metric-row">
166
+ <span class="metric-label">&#128200; Glyphs</span>
167
+ <span class="metric-value" id="cocoon-glyphs">0</span>
168
+ </div>
169
+ <div class="metric-row">
170
+ <span class="metric-label">&#128190; Sessions</span>
171
+ <span class="metric-value" id="cocoon-sessions">0</span>
172
+ </div>
173
+ </div>
174
+
175
+ <!-- AEGIS Ethical Alignment -->
176
+ <div class="side-section" id="section-aegis" style="display:none">
177
+ <div class="side-section-title">AEGIS Ethics</div>
178
+ <div class="metric-row">
179
+ <span class="metric-label">&#951; Alignment</span>
180
+ <span class="metric-value" id="aegis-eta">--</span>
181
+ </div>
182
+ <div class="metric-bar">
183
+ <div class="metric-bar-fill" id="bar-aegis-eta"
184
+ style="width:0%;background:var(--philosophy)"></div>
185
+ </div>
186
+ <div class="metric-row" style="margin-top:8px">
187
+ <span class="metric-label">Trend</span>
188
+ <span class="metric-value" id="aegis-trend">--</span>
189
+ </div>
190
+ <div class="metric-row">
191
+ <span class="metric-label">Evaluations</span>
192
+ <span class="metric-value" id="aegis-evals">0</span>
193
+ </div>
194
+ <div class="metric-row">
195
+ <span class="metric-label">Vetoes</span>
196
+ <span class="metric-value" id="aegis-vetoes">0</span>
197
+ </div>
198
+ </div>
199
+
200
+ <!-- Nexus Signal Intelligence -->
201
+ <div class="side-section" id="section-nexus" style="display:none">
202
+ <div class="side-section-title">Nexus Signals</div>
203
+ <div class="metric-row">
204
+ <span class="metric-label">Processed</span>
205
+ <span class="metric-value" id="nexus-processed">0</span>
206
+ </div>
207
+ <div class="metric-row">
208
+ <span class="metric-label">Interventions</span>
209
+ <span class="metric-value" id="nexus-interventions">0</span>
210
+ </div>
211
+ <div class="metric-row">
212
+ <span class="metric-label">Rate</span>
213
+ <span class="metric-value" id="nexus-rate">0%</span>
214
+ </div>
215
+ <div class="nexus-risk-dots" id="nexus-risks"></div>
216
+ </div>
217
+
218
+ <!-- Resonant Continuity -->
219
+ <div class="side-section" id="section-resonance" style="display:none">
220
+ <div class="side-section-title">Resonance &#936;<sub>r</sub></div>
221
+ <div class="metric-row">
222
+ <span class="metric-label">&#936;<sub>r</sub> Wavefunction</span>
223
+ <span class="metric-value" id="resonance-psi">--</span>
224
+ </div>
225
+ <div class="metric-bar">
226
+ <div class="metric-bar-fill" id="bar-resonance-psi"
227
+ style="width:0%;background:var(--empathy)"></div>
228
+ </div>
229
+ <div class="metric-row" style="margin-top:8px">
230
+ <span class="metric-label">Quality</span>
231
+ <span class="metric-value" id="resonance-quality">--</span>
232
+ </div>
233
+ <div class="metric-row">
234
+ <span class="metric-label">Convergence</span>
235
+ <span class="metric-value" id="resonance-convergence">--</span>
236
+ </div>
237
+ <div class="metric-row">
238
+ <span class="metric-label">Stability</span>
239
+ <span class="metric-value" id="resonance-stability">--</span>
240
+ </div>
241
+ <div class="metric-row">
242
+ <span class="metric-label" id="resonance-peak-label">Peak</span>
243
+ <span class="metric-value" id="resonance-peak">--</span>
244
+ </div>
245
+ </div>
246
+
247
+ <!-- Living Memory -->
248
+ <div class="side-section" id="section-memory" style="display:none">
249
+ <div class="side-section-title">Living Memory</div>
250
+ <div class="metric-row">
251
+ <span class="metric-label">Cocoons</span>
252
+ <span class="metric-value" id="memory-count">0</span>
253
+ </div>
254
+ <div class="memory-emotions" id="memory-emotions"></div>
255
+ </div>
256
+
257
+ <!-- Guardian -->
258
+ <div class="side-section" id="section-guardian" style="display:none">
259
+ <div class="side-section-title">Guardian</div>
260
+ <div class="metric-row">
261
+ <span class="metric-label">Ethics Score</span>
262
+ <span class="metric-value" id="guardian-ethics">--</span>
263
+ </div>
264
+ <div class="metric-row">
265
+ <span class="metric-label">Trust Interactions</span>
266
+ <span class="metric-value" id="guardian-trust">0</span>
267
+ </div>
268
+ </div>
269
+
270
+ <!-- Recent Sessions -->
271
+ <div class="side-section" style="flex:1;overflow-y:auto">
272
+ <div class="side-section-title">Recent Sessions</div>
273
+ <div id="session-list"></div>
274
+ </div>
275
+ </div>
276
+ </div>
277
+
278
+ <script src="spiderweb.js"></script>
279
+ <script src="app.js"></script>
280
+ </body>
281
+ </html>
inference/static/spiderweb.js ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* ============================================================
2
+ Spiderweb Visualization — Canvas-based Agent Network
3
+ Shows the QuantumSpiderweb as an animated node graph.
4
+ Zero dependencies. Pure Canvas API.
5
+
6
+ Always visually alive: ambient breathing, orbital drift,
7
+ dim connections at rest, full glow when agents are active.
8
+ ============================================================ */
9
+
10
+ class SpiderwebViz {
11
+ constructor(canvas) {
12
+ this.canvas = canvas;
13
+ this.ctx = canvas.getContext('2d');
14
+ this.nodes = {};
15
+ this.attractors = [];
16
+ this.coherence = 0;
17
+ this.animFrame = null;
18
+ this.time = 0;
19
+
20
+ // Agent positions (circular layout)
21
+ this.agents = [
22
+ 'newton', 'davinci', 'empathy', 'philosophy',
23
+ 'quantum', 'consciousness', 'multi_perspective', 'systems_architecture'
24
+ ];
25
+
26
+ this.colors = {
27
+ newton: '#3b82f6', davinci: '#f59e0b', empathy: '#a855f7',
28
+ philosophy: '#10b981', quantum: '#ef4444', consciousness: '#e2e8f0',
29
+ multi_perspective: '#f97316', systems_architecture: '#06b6d4',
30
+ };
31
+
32
+ this.labels = {
33
+ newton: 'N', davinci: 'D', empathy: 'E', philosophy: 'P',
34
+ quantum: 'Q', consciousness: 'C', multi_perspective: 'M',
35
+ systems_architecture: 'S',
36
+ };
37
+
38
+ // Initialize with default state
39
+ this._initDefaultState();
40
+ this._resize();
41
+ this._animate();
42
+
43
+ // Handle resize
44
+ new ResizeObserver(() => this._resize()).observe(canvas.parentElement);
45
+ }
46
+
47
+ _initDefaultState() {
48
+ this.agents.forEach((name, i) => {
49
+ this.nodes[name] = {
50
+ state: [0.5, 0, 0.5, 0, 0.5], // psi, tau, chi, phi, lam
51
+ tension: 0,
52
+ active: false,
53
+ energy: 0.25,
54
+ // Each node gets a unique phase offset for ambient animation
55
+ phaseOffset: (i / this.agents.length) * Math.PI * 2,
56
+ };
57
+ });
58
+ }
59
+
60
+ _resize() {
61
+ const rect = this.canvas.parentElement.getBoundingClientRect();
62
+ const dpr = window.devicePixelRatio || 1;
63
+ this.canvas.width = rect.width * dpr;
64
+ this.canvas.height = 200 * dpr;
65
+ this.canvas.style.width = rect.width + 'px';
66
+ this.canvas.style.height = '200px';
67
+ // Reset transform before scaling — prevents DPR compounding on repeated resizes
68
+ this.ctx.setTransform(dpr, 0, 0, dpr, 0, 0);
69
+ this.w = rect.width;
70
+ this.h = 200;
71
+ this.cx = this.w / 2;
72
+ this.cy = this.h / 2;
73
+ this.radius = Math.min(this.w, this.h) * 0.35;
74
+ }
75
+
76
+ update(spiderwebState) {
77
+ if (!spiderwebState || !spiderwebState.nodes) return;
78
+
79
+ // Update node states
80
+ for (const [name, data] of Object.entries(spiderwebState.nodes)) {
81
+ if (this.nodes[name]) {
82
+ this.nodes[name].state = data.state || [0.5, 0, 0.5, 0, 0.5];
83
+ const tensions = data.tension_history || [];
84
+ this.nodes[name].tension = tensions.length > 0 ?
85
+ tensions[tensions.length - 1] : 0;
86
+ this.nodes[name].energy = data.state ?
87
+ data.state.reduce((s, v) => s + v * v, 0) : 0.25;
88
+ this.nodes[name].active = (data.state[0] || 0) > 0.6;
89
+ }
90
+ }
91
+
92
+ this.attractors = spiderwebState.attractors || [];
93
+ this.coherence = spiderwebState.phase_coherence || 0;
94
+ }
95
+
96
+ _getNodePos(index) {
97
+ const angle = (index / this.agents.length) * Math.PI * 2 - Math.PI / 2;
98
+ // Add gentle orbital drift
99
+ const drift = Math.sin(this.time * 0.3 + index * 0.8) * 2;
100
+ const driftY = Math.cos(this.time * 0.25 + index * 1.1) * 1.5;
101
+ return {
102
+ x: this.cx + Math.cos(angle) * this.radius + drift,
103
+ y: this.cy + Math.sin(angle) * this.radius + driftY,
104
+ };
105
+ }
106
+
107
+ _animate() {
108
+ this.time += 0.016;
109
+ this._draw();
110
+ this.animFrame = requestAnimationFrame(() => this._animate());
111
+ }
112
+
113
+ _draw() {
114
+ const ctx = this.ctx;
115
+ ctx.clearRect(0, 0, this.w, this.h);
116
+
117
+ // ── Ambient center glow (always visible, brighter with coherence) ──
118
+ const ambientAlpha = 0.02 + (this.coherence > 0.5 ? this.coherence * 0.05 : 0);
119
+ const centerGlow = ctx.createRadialGradient(
120
+ this.cx, this.cy, 0, this.cx, this.cy, this.radius * 1.3
121
+ );
122
+ centerGlow.addColorStop(0, `rgba(59, 130, 246, ${ambientAlpha + Math.sin(this.time * 0.5) * 0.01})`);
123
+ centerGlow.addColorStop(0.6, `rgba(168, 85, 247, ${ambientAlpha * 0.5})`);
124
+ centerGlow.addColorStop(1, 'transparent');
125
+ ctx.fillStyle = centerGlow;
126
+ ctx.fillRect(0, 0, this.w, this.h);
127
+
128
+ // ── Draw edges (always visible, brighter when active/tense) ──
129
+ this.agents.forEach((nameA, i) => {
130
+ const posA = this._getNodePos(i);
131
+ this.agents.forEach((nameB, j) => {
132
+ if (j <= i) return;
133
+ const posB = this._getNodePos(j);
134
+
135
+ const nodeA = this.nodes[nameA];
136
+ const nodeB = this.nodes[nameB];
137
+ const tension = Math.abs((nodeA?.tension || 0) - (nodeB?.tension || 0));
138
+
139
+ ctx.beginPath();
140
+ ctx.moveTo(posA.x, posA.y);
141
+ ctx.lineTo(posB.x, posB.y);
142
+
143
+ const bothActive = nodeA?.active && nodeB?.active;
144
+ const eitherActive = nodeA?.active || nodeB?.active;
145
+
146
+ // Base alpha: always visible (0.12), more when active
147
+ let alpha;
148
+ if (bothActive) {
149
+ alpha = 0.25 + Math.sin(this.time * 3 + i + j) * 0.08;
150
+ } else if (eitherActive) {
151
+ alpha = 0.15 + Math.sin(this.time * 2 + i) * 0.04;
152
+ } else {
153
+ // Ambient: gentle breathing pulse on each edge
154
+ alpha = 0.08 + Math.sin(this.time * 0.8 + i * 0.7 + j * 0.5) * 0.03;
155
+ }
156
+
157
+ // Tension boosts visibility
158
+ alpha += Math.min(tension * 0.3, 0.15);
159
+
160
+ if (bothActive) {
161
+ ctx.strokeStyle = `rgba(168, 85, 247, ${alpha})`;
162
+ ctx.lineWidth = 1.5;
163
+ } else if (eitherActive) {
164
+ ctx.strokeStyle = `rgba(139, 92, 246, ${alpha})`;
165
+ ctx.lineWidth = 1;
166
+ } else {
167
+ ctx.strokeStyle = `rgba(100, 116, 139, ${alpha})`;
168
+ ctx.lineWidth = 0.5;
169
+ }
170
+ ctx.stroke();
171
+ });
172
+ });
173
+
174
+ // ── Draw attractor regions ──
175
+ this.attractors.forEach((att, ai) => {
176
+ if (!att.members || att.members.length < 2) return;
177
+
178
+ let cx = 0, cy = 0, count = 0;
179
+ att.members.forEach(name => {
180
+ const idx = this.agents.indexOf(name);
181
+ if (idx >= 0) {
182
+ const pos = this._getNodePos(idx);
183
+ cx += pos.x;
184
+ cy += pos.y;
185
+ count++;
186
+ }
187
+ });
188
+ if (count < 2) return;
189
+ cx /= count;
190
+ cy /= count;
191
+
192
+ const attRadius = 20 + count * 8;
193
+ const gradient = ctx.createRadialGradient(cx, cy, 0, cx, cy, attRadius);
194
+ gradient.addColorStop(0, `rgba(168, 85, 247, ${0.08 + Math.sin(this.time * 2 + ai) * 0.03})`);
195
+ gradient.addColorStop(1, 'transparent');
196
+ ctx.fillStyle = gradient;
197
+ ctx.beginPath();
198
+ ctx.arc(cx, cy, attRadius, 0, Math.PI * 2);
199
+ ctx.fill();
200
+ });
201
+
202
+ // ── Draw nodes (always visible with ambient breathing) ──
203
+ this.agents.forEach((name, i) => {
204
+ const pos = this._getNodePos(i);
205
+ const node = this.nodes[name];
206
+ const color = this.colors[name] || '#94a3b8';
207
+ const energy = node?.energy || 0.25;
208
+ const isActive = node?.active || false;
209
+ const phase = node?.phaseOffset || 0;
210
+
211
+ // Breathing pulse — all nodes gently pulse even at rest
212
+ const breathe = Math.sin(this.time * 1.2 + phase) * 0.3 + 0.7;
213
+
214
+ // Node glow — always present, stronger when active
215
+ const glowAlpha = isActive ? 0.35 : (0.08 * breathe);
216
+ const glowRadius = isActive
217
+ ? 14 + Math.sin(this.time * 2 + phase) * 4
218
+ : 10 + breathe * 2;
219
+
220
+ const glow = ctx.createRadialGradient(
221
+ pos.x, pos.y, 0, pos.x, pos.y, glowRadius
222
+ );
223
+ glow.addColorStop(0, color + (isActive ? '60' : '25'));
224
+ glow.addColorStop(1, 'transparent');
225
+ ctx.fillStyle = glow;
226
+ ctx.beginPath();
227
+ ctx.arc(pos.x, pos.y, glowRadius, 0, Math.PI * 2);
228
+ ctx.fill();
229
+
230
+ // Node circle
231
+ const nodeRadius = isActive
232
+ ? 7 + energy * 4
233
+ : 5 + breathe * 1.5;
234
+
235
+ ctx.beginPath();
236
+ ctx.arc(pos.x, pos.y, nodeRadius, 0, Math.PI * 2);
237
+ ctx.fillStyle = isActive ? color : color + '80';
238
+ ctx.fill();
239
+
240
+ // Border ring
241
+ ctx.strokeStyle = isActive ? color : color + '40';
242
+ ctx.lineWidth = isActive ? 1.5 : 0.8;
243
+ ctx.stroke();
244
+
245
+ // Label
246
+ ctx.fillStyle = isActive ? '#e2e8f0' : '#94a3b8';
247
+ ctx.font = `${isActive ? 'bold ' : ''}9px system-ui`;
248
+ ctx.textAlign = 'center';
249
+ ctx.textBaseline = 'middle';
250
+ ctx.fillText(this.labels[name], pos.x, pos.y + nodeRadius + 10);
251
+ });
252
+
253
+ // ── Coherence ring (always show a faint ring, solid when coherent) ──
254
+ const ringAlpha = this.coherence > 0
255
+ ? 0.2 + this.coherence * 0.4
256
+ : 0.06 + Math.sin(this.time * 0.6) * 0.02;
257
+ const ringProgress = this.coherence > 0
258
+ ? this.coherence
259
+ : 0.15 + Math.sin(this.time * 0.3) * 0.05;
260
+
261
+ ctx.beginPath();
262
+ ctx.arc(this.cx, this.cy, this.radius + 15,
263
+ -Math.PI / 2,
264
+ -Math.PI / 2 + Math.PI * 2 * ringProgress);
265
+ ctx.strokeStyle = this.coherence > 0.5
266
+ ? `rgba(16, 185, 129, ${ringAlpha})`
267
+ : `rgba(100, 116, 139, ${ringAlpha})`;
268
+ ctx.lineWidth = this.coherence > 0.5 ? 2.5 : 1.5;
269
+ ctx.lineCap = 'round';
270
+ ctx.stroke();
271
+
272
+ // Coherence label
273
+ if (this.coherence > 0) {
274
+ ctx.fillStyle = '#94a3b8';
275
+ ctx.font = '9px system-ui';
276
+ ctx.textAlign = 'center';
277
+ ctx.fillText(`\u0393 ${this.coherence.toFixed(2)}`, this.cx, this.h - 8);
278
+ } else {
279
+ ctx.fillStyle = '#475569';
280
+ ctx.font = '9px system-ui';
281
+ ctx.textAlign = 'center';
282
+ ctx.fillText('idle', this.cx, this.h - 8);
283
+ }
284
+ }
285
+
286
+ destroy() {
287
+ if (this.animFrame) cancelAnimationFrame(this.animFrame);
288
+ }
289
+ }
inference/static/style.css ADDED
@@ -0,0 +1,859 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* ============================================================
2
+ Codette UI — Dark Glass Theme with Adapter Accent Colors
3
+ Zero dependencies. Pure CSS.
4
+ ============================================================ */
5
+
6
+ :root {
7
+ /* Base palette */
8
+ --bg-primary: #0f1117;
9
+ --bg-secondary: #1a1d28;
10
+ --bg-tertiary: #232736;
11
+ --bg-glass: rgba(26, 29, 40, 0.85);
12
+ --text-primary: #e2e8f0;
13
+ --text-secondary: #94a3b8;
14
+ --text-muted: #64748b;
15
+ --border: rgba(148, 163, 184, 0.12);
16
+ --border-active: rgba(148, 163, 184, 0.25);
17
+
18
+ /* Adapter accent colors */
19
+ --newton: #3b82f6;
20
+ --davinci: #f59e0b;
21
+ --empathy: #a855f7;
22
+ --philosophy: #10b981;
23
+ --quantum: #ef4444;
24
+ --consciousness: #e2e8f0;
25
+ --multi_perspective: #f97316;
26
+ --systems_architecture: #06b6d4;
27
+ --base: #94a3b8;
28
+
29
+ /* Active accent (changes dynamically) */
30
+ --accent: var(--base);
31
+ --accent-glow: rgba(148, 163, 184, 0.15);
32
+
33
+ /* Layout */
34
+ --sidebar-width: 320px;
35
+ --header-height: 56px;
36
+ --input-height: 80px;
37
+ --status-height: 36px;
38
+ --radius: 12px;
39
+ --radius-sm: 8px;
40
+ }
41
+
42
+ * { margin: 0; padding: 0; box-sizing: border-box; }
43
+
44
+ body {
45
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif;
46
+ background: var(--bg-primary);
47
+ color: var(--text-primary);
48
+ height: 100vh;
49
+ overflow: hidden;
50
+ line-height: 1.6;
51
+ }
52
+
53
+ /* ── Layout ── */
54
+ .app {
55
+ display: flex;
56
+ height: 100vh;
57
+ }
58
+
59
+ .main-panel {
60
+ flex: 1;
61
+ display: flex;
62
+ flex-direction: column;
63
+ min-width: 0;
64
+ }
65
+
66
+ .side-panel {
67
+ width: var(--sidebar-width);
68
+ background: var(--bg-secondary);
69
+ border-left: 1px solid var(--border);
70
+ display: flex;
71
+ flex-direction: column;
72
+ overflow: hidden;
73
+ transition: width 0.3s ease;
74
+ }
75
+
76
+ .side-panel.collapsed {
77
+ width: 0;
78
+ border: none;
79
+ }
80
+
81
+ /* ── Header ── */
82
+ .header {
83
+ height: var(--header-height);
84
+ padding: 0 20px;
85
+ display: flex;
86
+ align-items: center;
87
+ justify-content: space-between;
88
+ background: var(--bg-secondary);
89
+ border-bottom: 1px solid var(--border);
90
+ flex-shrink: 0;
91
+ }
92
+
93
+ .header-left {
94
+ display: flex;
95
+ align-items: center;
96
+ gap: 12px;
97
+ }
98
+
99
+ .logo {
100
+ font-size: 20px;
101
+ font-weight: 700;
102
+ letter-spacing: -0.02em;
103
+ background: linear-gradient(135deg, var(--accent), var(--text-primary));
104
+ -webkit-background-clip: text;
105
+ background-clip: text;
106
+ -webkit-text-fill-color: transparent;
107
+ transition: all 0.5s ease;
108
+ }
109
+
110
+ .adapter-dots {
111
+ display: flex;
112
+ gap: 4px;
113
+ align-items: center;
114
+ }
115
+
116
+ .adapter-dot {
117
+ width: 8px;
118
+ height: 8px;
119
+ border-radius: 50%;
120
+ opacity: 0.3;
121
+ transition: all 0.3s ease;
122
+ }
123
+
124
+ .adapter-dot.available { opacity: 0.6; }
125
+ .adapter-dot.active {
126
+ opacity: 1;
127
+ box-shadow: 0 0 8px currentColor;
128
+ transform: scale(1.3);
129
+ }
130
+
131
+ .header-right {
132
+ display: flex;
133
+ align-items: center;
134
+ gap: 8px;
135
+ }
136
+
137
+ .header-btn {
138
+ background: none;
139
+ border: 1px solid var(--border);
140
+ color: var(--text-secondary);
141
+ padding: 6px 12px;
142
+ border-radius: var(--radius-sm);
143
+ cursor: pointer;
144
+ font-size: 13px;
145
+ transition: all 0.2s;
146
+ }
147
+
148
+ .header-btn:hover {
149
+ border-color: var(--accent);
150
+ color: var(--text-primary);
151
+ background: var(--accent-glow);
152
+ }
153
+
154
+ /* ── Chat Area ── */
155
+ .chat-area {
156
+ flex: 1;
157
+ overflow-y: auto;
158
+ padding: 20px;
159
+ scroll-behavior: smooth;
160
+ }
161
+
162
+ .chat-area::-webkit-scrollbar { width: 6px; }
163
+ .chat-area::-webkit-scrollbar-track { background: transparent; }
164
+ .chat-area::-webkit-scrollbar-thumb {
165
+ background: var(--border-active);
166
+ border-radius: 3px;
167
+ }
168
+
169
+ .message {
170
+ max-width: 800px;
171
+ margin: 0 auto 16px;
172
+ animation: messageIn 0.3s ease;
173
+ }
174
+
175
+ @keyframes messageIn {
176
+ from { opacity: 0; transform: translateY(8px); }
177
+ to { opacity: 1; transform: translateY(0); }
178
+ }
179
+
180
+ .message-user {
181
+ text-align: right;
182
+ }
183
+
184
+ .message-user .bubble {
185
+ background: var(--bg-tertiary);
186
+ border: 1px solid var(--border);
187
+ display: inline-block;
188
+ text-align: left;
189
+ padding: 12px 16px;
190
+ border-radius: var(--radius) var(--radius) 4px var(--radius);
191
+ max-width: 85%;
192
+ }
193
+
194
+ .message-assistant .bubble {
195
+ background: var(--bg-glass);
196
+ border: 1px solid var(--border);
197
+ border-left: 3px solid var(--accent);
198
+ padding: 12px 16px;
199
+ border-radius: 4px var(--radius) var(--radius) var(--radius);
200
+ backdrop-filter: blur(10px);
201
+ max-width: 100%;
202
+ }
203
+
204
+ .message-header {
205
+ display: flex;
206
+ align-items: center;
207
+ gap: 8px;
208
+ margin-bottom: 6px;
209
+ font-size: 12px;
210
+ color: var(--text-muted);
211
+ }
212
+
213
+ .adapter-badge {
214
+ display: inline-flex;
215
+ align-items: center;
216
+ gap: 4px;
217
+ padding: 2px 8px;
218
+ border-radius: 10px;
219
+ font-size: 11px;
220
+ font-weight: 600;
221
+ text-transform: uppercase;
222
+ letter-spacing: 0.05em;
223
+ border: 1px solid currentColor;
224
+ opacity: 0.9;
225
+ }
226
+
227
+ .confidence-bar {
228
+ width: 40px;
229
+ height: 4px;
230
+ background: var(--bg-tertiary);
231
+ border-radius: 2px;
232
+ overflow: hidden;
233
+ }
234
+
235
+ .confidence-fill {
236
+ height: 100%;
237
+ border-radius: 2px;
238
+ transition: width 0.5s ease;
239
+ }
240
+
241
+ .message-text {
242
+ word-wrap: break-word;
243
+ overflow-wrap: break-word;
244
+ font-size: 14px;
245
+ line-height: 1.7;
246
+ }
247
+
248
+ /* Keep pre-wrap only for user messages (no markdown rendering) */
249
+ .message-user .message-text {
250
+ white-space: pre-wrap;
251
+ }
252
+
253
+ .message-meta {
254
+ margin-top: 6px;
255
+ font-size: 11px;
256
+ color: var(--text-muted);
257
+ }
258
+
259
+ /* Perspectives expandable */
260
+ .tools-badge {
261
+ margin-top: 8px;
262
+ padding: 4px 10px;
263
+ background: rgba(16, 185, 129, 0.1);
264
+ border: 1px solid rgba(16, 185, 129, 0.25);
265
+ border-radius: 12px;
266
+ color: #10b981;
267
+ font-size: 11px;
268
+ display: inline-block;
269
+ }
270
+
271
+ .perspectives-toggle {
272
+ margin-top: 10px;
273
+ padding: 8px 12px;
274
+ background: rgba(255,255,255,0.03);
275
+ border: 1px solid var(--border);
276
+ border-radius: var(--radius-sm);
277
+ cursor: pointer;
278
+ color: var(--text-secondary);
279
+ font-size: 12px;
280
+ transition: all 0.2s;
281
+ }
282
+
283
+ .perspectives-toggle:hover {
284
+ background: rgba(255,255,255,0.06);
285
+ color: var(--text-primary);
286
+ }
287
+
288
+ .perspectives-panel {
289
+ display: none;
290
+ margin-top: 10px;
291
+ gap: 8px;
292
+ }
293
+
294
+ .perspectives-panel.open { display: flex; flex-direction: column; }
295
+
296
+ .perspective-card {
297
+ padding: 10px 14px;
298
+ background: rgba(255,255,255,0.02);
299
+ border-radius: var(--radius-sm);
300
+ border-left: 3px solid var(--accent);
301
+ font-size: 13px;
302
+ line-height: 1.6;
303
+ }
304
+
305
+ .perspective-card-header {
306
+ font-size: 11px;
307
+ font-weight: 600;
308
+ text-transform: uppercase;
309
+ letter-spacing: 0.05em;
310
+ margin-bottom: 4px;
311
+ }
312
+
313
+ /* Thinking indicator */
314
+ .thinking {
315
+ max-width: 800px;
316
+ margin: 0 auto 16px;
317
+ display: flex;
318
+ align-items: center;
319
+ gap: 10px;
320
+ color: var(--text-muted);
321
+ font-size: 13px;
322
+ }
323
+
324
+ .thinking-dots {
325
+ display: flex;
326
+ gap: 4px;
327
+ }
328
+
329
+ .thinking-dots span {
330
+ width: 6px;
331
+ height: 6px;
332
+ background: var(--accent);
333
+ border-radius: 50%;
334
+ animation: pulse 1.2s infinite;
335
+ }
336
+
337
+ .thinking-dots span:nth-child(2) { animation-delay: 0.2s; }
338
+ .thinking-dots span:nth-child(3) { animation-delay: 0.4s; }
339
+
340
+ @keyframes pulse {
341
+ 0%, 100% { opacity: 0.3; transform: scale(0.8); }
342
+ 50% { opacity: 1; transform: scale(1.2); }
343
+ }
344
+
345
+ /* ── Controls Row ── */
346
+ .controls {
347
+ padding: 8px 20px;
348
+ display: flex;
349
+ align-items: center;
350
+ gap: 16px;
351
+ border-top: 1px solid var(--border);
352
+ background: var(--bg-secondary);
353
+ flex-shrink: 0;
354
+ }
355
+
356
+ .control-group {
357
+ display: flex;
358
+ align-items: center;
359
+ gap: 6px;
360
+ font-size: 12px;
361
+ color: var(--text-secondary);
362
+ }
363
+
364
+ .control-group select,
365
+ .control-group input[type="range"] {
366
+ background: var(--bg-tertiary);
367
+ border: 1px solid var(--border);
368
+ color: var(--text-primary);
369
+ padding: 4px 8px;
370
+ border-radius: 6px;
371
+ font-size: 12px;
372
+ cursor: pointer;
373
+ }
374
+
375
+ .control-group select:focus,
376
+ .control-group input:focus { outline: none; border-color: var(--accent); }
377
+
378
+ /* ── Input Area ── */
379
+ .input-area {
380
+ padding: 12px 20px;
381
+ background: var(--bg-secondary);
382
+ border-top: 1px solid var(--border);
383
+ flex-shrink: 0;
384
+ }
385
+
386
+ .input-row {
387
+ max-width: 800px;
388
+ margin: 0 auto;
389
+ display: flex;
390
+ gap: 10px;
391
+ align-items: flex-end;
392
+ }
393
+
394
+ .input-wrapper {
395
+ flex: 1;
396
+ position: relative;
397
+ }
398
+
399
+ #chat-input {
400
+ width: 100%;
401
+ min-height: 44px;
402
+ max-height: 120px;
403
+ padding: 10px 14px;
404
+ background: var(--bg-tertiary);
405
+ border: 1px solid var(--border);
406
+ border-radius: var(--radius);
407
+ color: var(--text-primary);
408
+ font-size: 14px;
409
+ font-family: inherit;
410
+ resize: none;
411
+ line-height: 1.5;
412
+ transition: border-color 0.2s;
413
+ }
414
+
415
+ #chat-input:focus {
416
+ outline: none;
417
+ border-color: var(--accent);
418
+ box-shadow: 0 0 0 3px var(--accent-glow);
419
+ }
420
+
421
+ #chat-input::placeholder {
422
+ color: var(--text-muted);
423
+ }
424
+
425
+ .send-btn {
426
+ width: 44px;
427
+ height: 44px;
428
+ border: none;
429
+ border-radius: var(--radius);
430
+ background: var(--accent);
431
+ color: var(--bg-primary);
432
+ cursor: pointer;
433
+ display: flex;
434
+ align-items: center;
435
+ justify-content: center;
436
+ font-size: 18px;
437
+ transition: all 0.2s;
438
+ flex-shrink: 0;
439
+ }
440
+
441
+ .send-btn:hover { transform: scale(1.05); filter: brightness(1.15); }
442
+ .send-btn:disabled { opacity: 0.4; cursor: not-allowed; transform: none; }
443
+
444
+ .mic-btn {
445
+ width: 44px;
446
+ height: 44px;
447
+ border: 1px solid var(--border);
448
+ border-radius: var(--radius);
449
+ background: var(--bg-tertiary);
450
+ color: var(--text-secondary);
451
+ cursor: pointer;
452
+ display: flex;
453
+ align-items: center;
454
+ justify-content: center;
455
+ font-size: 18px;
456
+ transition: all 0.2s;
457
+ flex-shrink: 0;
458
+ }
459
+
460
+ .mic-btn:hover { border-color: var(--accent); color: var(--text-primary); }
461
+ .mic-btn.recording {
462
+ border-color: var(--quantum);
463
+ color: var(--quantum);
464
+ animation: pulse 1s infinite;
465
+ }
466
+
467
+ /* ── Status Bar ── */
468
+ .status-bar {
469
+ height: var(--status-height);
470
+ padding: 0 20px;
471
+ display: flex;
472
+ align-items: center;
473
+ justify-content: space-between;
474
+ background: var(--bg-primary);
475
+ border-top: 1px solid var(--border);
476
+ font-size: 11px;
477
+ color: var(--text-muted);
478
+ flex-shrink: 0;
479
+ }
480
+
481
+ .status-indicator {
482
+ display: flex;
483
+ align-items: center;
484
+ gap: 6px;
485
+ }
486
+
487
+ .status-dot {
488
+ width: 6px;
489
+ height: 6px;
490
+ border-radius: 50%;
491
+ background: var(--text-muted);
492
+ }
493
+
494
+ .status-dot.ready { background: #10b981; }
495
+ .status-dot.loading { background: #f59e0b; animation: pulse 1s infinite; }
496
+ .status-dot.error { background: #ef4444; }
497
+
498
+ /* ── Side Panel ── */
499
+ .side-section {
500
+ padding: 16px;
501
+ border-bottom: 1px solid var(--border);
502
+ }
503
+
504
+ .side-section-title {
505
+ font-size: 11px;
506
+ font-weight: 600;
507
+ text-transform: uppercase;
508
+ letter-spacing: 0.08em;
509
+ color: var(--text-muted);
510
+ margin-bottom: 12px;
511
+ }
512
+
513
+ /* Metrics */
514
+ .metric-row {
515
+ display: flex;
516
+ align-items: center;
517
+ justify-content: space-between;
518
+ margin-bottom: 8px;
519
+ font-size: 12px;
520
+ }
521
+
522
+ .metric-label {
523
+ color: var(--text-secondary);
524
+ display: flex;
525
+ align-items: center;
526
+ gap: 6px;
527
+ }
528
+
529
+ .metric-value {
530
+ font-weight: 600;
531
+ font-variant-numeric: tabular-nums;
532
+ color: var(--text-primary);
533
+ }
534
+
535
+ .metric-bar {
536
+ width: 100%;
537
+ height: 4px;
538
+ background: var(--bg-tertiary);
539
+ border-radius: 2px;
540
+ margin-top: 4px;
541
+ overflow: hidden;
542
+ }
543
+
544
+ .metric-bar-fill {
545
+ height: 100%;
546
+ border-radius: 2px;
547
+ transition: width 0.5s ease;
548
+ }
549
+
550
+ /* Coverage dots */
551
+ .coverage-dots {
552
+ display: flex;
553
+ gap: 6px;
554
+ flex-wrap: wrap;
555
+ margin-top: 8px;
556
+ }
557
+
558
+ .coverage-dot {
559
+ width: 24px;
560
+ height: 24px;
561
+ border-radius: 50%;
562
+ border: 2px solid currentColor;
563
+ opacity: 0.25;
564
+ display: flex;
565
+ align-items: center;
566
+ justify-content: center;
567
+ font-size: 10px;
568
+ transition: all 0.3s;
569
+ }
570
+
571
+ .coverage-dot.active {
572
+ opacity: 1;
573
+ box-shadow: 0 0 8px currentColor;
574
+ }
575
+
576
+ /* Spiderweb canvas */
577
+ #spiderweb-canvas {
578
+ width: 100%;
579
+ height: 200px;
580
+ border-radius: var(--radius-sm);
581
+ background: rgba(0,0,0,0.3);
582
+ }
583
+
584
+ /* Session list */
585
+ .session-item {
586
+ padding: 8px 12px;
587
+ border-radius: var(--radius-sm);
588
+ cursor: pointer;
589
+ font-size: 12px;
590
+ color: var(--text-secondary);
591
+ margin-bottom: 4px;
592
+ transition: all 0.2s;
593
+ white-space: nowrap;
594
+ overflow: hidden;
595
+ text-overflow: ellipsis;
596
+ }
597
+
598
+ .session-item:hover {
599
+ background: var(--bg-tertiary);
600
+ color: var(--text-primary);
601
+ }
602
+
603
+ /* ── Loading Screen ── */
604
+ .loading-screen {
605
+ position: fixed;
606
+ inset: 0;
607
+ background: var(--bg-primary);
608
+ display: flex;
609
+ flex-direction: column;
610
+ align-items: center;
611
+ justify-content: center;
612
+ z-index: 100;
613
+ transition: opacity 0.5s;
614
+ }
615
+
616
+ .loading-screen.hidden {
617
+ opacity: 0;
618
+ pointer-events: none;
619
+ }
620
+
621
+ .loading-title {
622
+ font-size: 32px;
623
+ font-weight: 700;
624
+ margin-bottom: 16px;
625
+ background: linear-gradient(135deg, #3b82f6, #a855f7, #f59e0b);
626
+ -webkit-background-clip: text;
627
+ background-clip: text;
628
+ -webkit-text-fill-color: transparent;
629
+ }
630
+
631
+ .loading-status {
632
+ color: var(--text-secondary);
633
+ font-size: 14px;
634
+ margin-bottom: 24px;
635
+ }
636
+
637
+ .loading-bar {
638
+ width: 200px;
639
+ height: 3px;
640
+ background: var(--bg-tertiary);
641
+ border-radius: 2px;
642
+ overflow: hidden;
643
+ }
644
+
645
+ .loading-bar-fill {
646
+ height: 100%;
647
+ width: 30%;
648
+ background: linear-gradient(90deg, #3b82f6, #a855f7);
649
+ border-radius: 2px;
650
+ animation: loadSlide 1.5s ease infinite;
651
+ }
652
+
653
+ @keyframes loadSlide {
654
+ 0% { transform: translateX(-100%); }
655
+ 100% { transform: translateX(400%); }
656
+ }
657
+
658
+ /* ── Welcome State ── */
659
+ .welcome {
660
+ max-width: 600px;
661
+ margin: 0 auto;
662
+ padding: 60px 20px;
663
+ text-align: center;
664
+ }
665
+
666
+ .welcome h2 {
667
+ font-size: 24px;
668
+ font-weight: 600;
669
+ margin-bottom: 8px;
670
+ color: var(--text-primary);
671
+ }
672
+
673
+ .welcome p {
674
+ color: var(--text-secondary);
675
+ font-size: 14px;
676
+ margin-bottom: 24px;
677
+ }
678
+
679
+ .welcome-grid {
680
+ display: grid;
681
+ grid-template-columns: repeat(2, 1fr);
682
+ gap: 10px;
683
+ text-align: left;
684
+ }
685
+
686
+ .welcome-card {
687
+ padding: 14px;
688
+ background: var(--bg-secondary);
689
+ border: 1px solid var(--border);
690
+ border-radius: var(--radius-sm);
691
+ cursor: pointer;
692
+ transition: all 0.2s;
693
+ font-size: 13px;
694
+ }
695
+
696
+ .welcome-card:hover {
697
+ border-color: var(--accent);
698
+ transform: translateY(-2px);
699
+ box-shadow: 0 4px 12px rgba(0,0,0,0.3);
700
+ }
701
+
702
+ .welcome-card-title {
703
+ font-weight: 600;
704
+ margin-bottom: 4px;
705
+ display: flex;
706
+ align-items: center;
707
+ gap: 6px;
708
+ }
709
+
710
+ .welcome-card-desc {
711
+ color: var(--text-muted);
712
+ font-size: 11px;
713
+ }
714
+
715
+ /* ── Markdown Rendering ── */
716
+ .md-h1 {
717
+ font-size: 18px;
718
+ font-weight: 700;
719
+ margin: 12px 0 6px;
720
+ color: var(--text-primary);
721
+ }
722
+
723
+ .md-h2 {
724
+ font-size: 16px;
725
+ font-weight: 600;
726
+ margin: 10px 0 4px;
727
+ color: var(--text-primary);
728
+ }
729
+
730
+ .md-h3 {
731
+ font-size: 14px;
732
+ font-weight: 600;
733
+ margin: 8px 0 4px;
734
+ color: var(--text-secondary);
735
+ }
736
+
737
+ .md-li {
738
+ padding-left: 16px;
739
+ position: relative;
740
+ margin: 2px 0;
741
+ }
742
+
743
+ .md-li::before {
744
+ content: '\2022';
745
+ position: absolute;
746
+ left: 4px;
747
+ color: var(--accent);
748
+ }
749
+
750
+ .md-oli::before {
751
+ content: counter(md-ol) '.';
752
+ counter-increment: md-ol;
753
+ }
754
+
755
+ .code-block {
756
+ background: rgba(0,0,0,0.4);
757
+ border: 1px solid var(--border);
758
+ border-radius: 6px;
759
+ padding: 10px 14px;
760
+ margin: 8px 0;
761
+ overflow-x: auto;
762
+ font-family: 'Cascadia Code', 'Fira Code', 'JetBrains Mono', monospace;
763
+ font-size: 12px;
764
+ line-height: 1.5;
765
+ white-space: pre;
766
+ }
767
+
768
+ .code-block code {
769
+ background: none;
770
+ padding: 0;
771
+ border: none;
772
+ font-size: inherit;
773
+ }
774
+
775
+ .inline-code {
776
+ background: rgba(148, 163, 184, 0.15);
777
+ border: 1px solid rgba(148, 163, 184, 0.2);
778
+ border-radius: 4px;
779
+ padding: 1px 5px;
780
+ font-family: 'Cascadia Code', 'Fira Code', monospace;
781
+ font-size: 0.9em;
782
+ }
783
+
784
+ .message-text strong {
785
+ color: var(--text-primary);
786
+ font-weight: 600;
787
+ }
788
+
789
+ .message-text em {
790
+ color: var(--text-secondary);
791
+ font-style: italic;
792
+ }
793
+
794
+ /* ── Subsystem Panels ── */
795
+ .nexus-risk-dots {
796
+ display: flex;
797
+ gap: 4px;
798
+ margin-top: 8px;
799
+ flex-wrap: wrap;
800
+ }
801
+
802
+ .risk-dot {
803
+ width: 10px;
804
+ height: 10px;
805
+ border-radius: 50%;
806
+ transition: all 0.3s;
807
+ }
808
+
809
+ .risk-dot.low { background: var(--philosophy); opacity: 0.6; }
810
+ .risk-dot.medium { background: var(--davinci); opacity: 0.8; }
811
+ .risk-dot.high { background: var(--quantum); opacity: 1; box-shadow: 0 0 6px var(--quantum); }
812
+
813
+ .memory-emotions {
814
+ display: flex;
815
+ gap: 4px;
816
+ flex-wrap: wrap;
817
+ margin-top: 8px;
818
+ }
819
+
820
+ .emotion-tag {
821
+ padding: 2px 8px;
822
+ border-radius: 10px;
823
+ font-size: 10px;
824
+ font-weight: 600;
825
+ background: rgba(148, 163, 184, 0.1);
826
+ border: 1px solid rgba(148, 163, 184, 0.2);
827
+ color: var(--text-secondary);
828
+ }
829
+
830
+ .emotion-tag.active {
831
+ background: rgba(168, 85, 247, 0.15);
832
+ border-color: rgba(168, 85, 247, 0.4);
833
+ color: var(--empathy);
834
+ }
835
+
836
+ .trend-improving { color: var(--philosophy) !important; }
837
+ .trend-declining { color: var(--quantum) !important; }
838
+ .trend-stable { color: var(--text-secondary) !important; }
839
+
840
+ .peak-active {
841
+ color: var(--davinci) !important;
842
+ text-shadow: 0 0 8px var(--davinci);
843
+ }
844
+
845
+ /* ── Responsive ── */
846
+ @media (max-width: 768px) {
847
+ .side-panel {
848
+ display: none;
849
+ position: fixed;
850
+ right: 0; top: 0; bottom: 0;
851
+ z-index: 50;
852
+ box-shadow: -8px 0 24px rgba(0,0,0,0.5);
853
+ }
854
+ /* On mobile, un-collapsing the panel shows it as an overlay */
855
+ .side-panel:not(.collapsed) {
856
+ display: flex;
857
+ }
858
+ .welcome-grid { grid-template-columns: 1fr; }
859
+ }
inference/vulkan_compute.py ADDED
@@ -0,0 +1,661 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Codette Vulkan GPU Compute Adapter
4
+ ====================================
5
+ Provides Vulkan-based GPU acceleration for tensor operations,
6
+ model inference preprocessing, and compute shader dispatch.
7
+
8
+ Uses the `kompute` library (lightweight Vulkan compute for ML)
9
+ as the primary backend, with fallback to raw `vulkan` bindings.
10
+
11
+ Supported operations:
12
+ - Device discovery and capability reporting
13
+ - Tensor allocation on Vulkan GPU memory
14
+ - Compute shader dispatch (SPIR-V)
15
+ - Matrix multiply, softmax, layer norm (common inference ops)
16
+ - Memory-mapped transfer between CPU ↔ Vulkan GPU
17
+ - Integration with llama.cpp via shared memory buffers
18
+
19
+ Architecture:
20
+ VulkanComputeAdapter
21
+ ├─ VulkanDevice (physical device enumeration + selection)
22
+ ├─ VulkanMemoryPool (GPU memory management with ring buffer)
23
+ ├─ ShaderRegistry (compiled SPIR-V shader cache)
24
+ └─ ComputePipeline (dispatch queue + synchronization)
25
+
26
+ Hardware compatibility:
27
+ - NVIDIA (all Vulkan-capable GPUs, driver 470+)
28
+ - AMD (RDNA/RDNA2/RDNA3, GCN 4th gen+)
29
+ - Intel Arc (A-series, driver 31.0.101+)
30
+ - Qualcomm Adreno (mobile/embedded Vulkan 1.1+)
31
+ """
32
+
33
+ import os
34
+ import sys
35
+ import time
36
+ import json
37
+ import struct
38
+ import logging
39
+ import threading
40
+ from pathlib import Path
41
+ from dataclasses import dataclass, field
42
+ from typing import Optional, Dict, List, Any, Tuple
43
+
44
+ logger = logging.getLogger("codette.vulkan")
45
+
46
+
47
+ # ================================================================
48
+ # Vulkan Device Information
49
+ # ================================================================
50
+
51
+ @dataclass
52
+ class VulkanDeviceInfo:
53
+ """Describes a Vulkan-capable GPU."""
54
+ device_id: int
55
+ name: str
56
+ vendor: str
57
+ driver_version: str
58
+ api_version: str
59
+ device_type: str # "discrete", "integrated", "virtual", "cpu"
60
+ vram_mb: int
61
+ max_compute_workgroup_size: Tuple[int, int, int]
62
+ max_compute_workgroup_count: Tuple[int, int, int]
63
+ max_compute_shared_memory: int
64
+ supports_float16: bool
65
+ supports_float64: bool
66
+ supports_int8: bool
67
+ supports_subgroup_ops: bool
68
+ compute_queue_families: int
69
+
70
+
71
+ @dataclass
72
+ class VulkanMemoryBlock:
73
+ """Tracks a GPU memory allocation."""
74
+ block_id: int
75
+ size_bytes: int
76
+ offset: int
77
+ device_local: bool
78
+ host_visible: bool
79
+ in_use: bool = True
80
+ label: str = ""
81
+
82
+
83
+ # ================================================================
84
+ # Vulkan Compute Adapter
85
+ # ================================================================
86
+
87
+ class VulkanComputeAdapter:
88
+ """Main adapter for Vulkan GPU compute operations.
89
+
90
+ Provides device management, memory allocation, shader dispatch,
91
+ and tensor operations for Codette's inference pipeline.
92
+ """
93
+
94
+ def __init__(self, device_index: int = 0, enable_validation: bool = False):
95
+ self.device_index = device_index
96
+ self.enable_validation = enable_validation
97
+ self._initialized = False
98
+ self._device_info: Optional[VulkanDeviceInfo] = None
99
+ self._manager = None # kompute.Manager
100
+ self._tensors: Dict[str, Any] = {}
101
+ self._shader_cache: Dict[str, Any] = {}
102
+ self._memory_blocks: List[VulkanMemoryBlock] = []
103
+ self._block_counter = 0
104
+ self._lock = threading.Lock()
105
+
106
+ # Performance counters
107
+ self._dispatch_count = 0
108
+ self._total_compute_ms = 0.0
109
+ self._total_transfer_bytes = 0
110
+
111
+ # --------------------------------------------------------
112
+ # Initialization
113
+ # --------------------------------------------------------
114
+
115
+ def initialize(self) -> bool:
116
+ """Initialize Vulkan device and compute context.
117
+
118
+ Returns True if Vulkan GPU is available and ready.
119
+ """
120
+ if self._initialized:
121
+ return True
122
+
123
+ try:
124
+ import kp # kompute
125
+ except ImportError:
126
+ logger.warning(
127
+ "kompute not installed. Install with: pip install kp\n"
128
+ "Falling back to Vulkan availability check only."
129
+ )
130
+ return self._try_raw_vulkan_init()
131
+
132
+ try:
133
+ # Create manager targeting specific device
134
+ self._manager = kp.Manager(self.device_index)
135
+ self._initialized = True
136
+
137
+ # Probe device capabilities
138
+ self._device_info = self._probe_device_info()
139
+
140
+ logger.info(
141
+ f"Vulkan compute initialized: {self._device_info.name} "
142
+ f"({self._device_info.vram_mb} MB VRAM, "
143
+ f"type={self._device_info.device_type})"
144
+ )
145
+ return True
146
+
147
+ except Exception as e:
148
+ logger.error(f"Vulkan initialization failed: {e}")
149
+ return False
150
+
151
+ def _try_raw_vulkan_init(self) -> bool:
152
+ """Fallback: check Vulkan availability via vulkan module or system."""
153
+ try:
154
+ import vulkan as vk
155
+ instance = vk.vkCreateInstance(
156
+ vk.VkInstanceCreateInfo(
157
+ sType=vk.VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
158
+ pApplicationInfo=vk.VkApplicationInfo(
159
+ sType=vk.VK_STRUCTURE_TYPE_APPLICATION_INFO,
160
+ pApplicationName="Codette",
161
+ applicationVersion=vk.VK_MAKE_VERSION(1, 0, 0),
162
+ apiVersion=vk.VK_API_VERSION_1_2,
163
+ ),
164
+ ),
165
+ None,
166
+ )
167
+ devices = vk.vkEnumeratePhysicalDevices(instance)
168
+ if devices:
169
+ props = vk.vkGetPhysicalDeviceProperties(devices[self.device_index])
170
+ self._device_info = VulkanDeviceInfo(
171
+ device_id=self.device_index,
172
+ name=props.deviceName,
173
+ vendor=self._vendor_from_id(props.vendorID),
174
+ driver_version=str(props.driverVersion),
175
+ api_version=f"{vk.VK_VERSION_MAJOR(props.apiVersion)}."
176
+ f"{vk.VK_VERSION_MINOR(props.apiVersion)}."
177
+ f"{vk.VK_VERSION_PATCH(props.apiVersion)}",
178
+ device_type=self._device_type_str(props.deviceType),
179
+ vram_mb=0, # Would need memory properties query
180
+ max_compute_workgroup_size=(256, 256, 64),
181
+ max_compute_workgroup_count=(65535, 65535, 65535),
182
+ max_compute_shared_memory=32768,
183
+ supports_float16=True,
184
+ supports_float64=False,
185
+ supports_int8=True,
186
+ supports_subgroup_ops=True,
187
+ compute_queue_families=1,
188
+ )
189
+ logger.info(f"Vulkan device detected (raw): {self._device_info.name}")
190
+ vk.vkDestroyInstance(instance, None)
191
+ self._initialized = True
192
+ return True
193
+ vk.vkDestroyInstance(instance, None)
194
+ except ImportError:
195
+ logger.info("No Vulkan Python bindings available (vulkan or kp)")
196
+ except Exception as e:
197
+ logger.debug(f"Raw Vulkan probe failed: {e}")
198
+
199
+ return False
200
+
201
+ def _probe_device_info(self) -> VulkanDeviceInfo:
202
+ """Probe device capabilities via kompute manager."""
203
+ # kompute abstracts most Vulkan details; provide safe defaults
204
+ return VulkanDeviceInfo(
205
+ device_id=self.device_index,
206
+ name=f"Vulkan Device {self.device_index}",
207
+ vendor="Unknown",
208
+ driver_version="Unknown",
209
+ api_version="1.2+",
210
+ device_type="discrete",
211
+ vram_mb=0,
212
+ max_compute_workgroup_size=(256, 256, 64),
213
+ max_compute_workgroup_count=(65535, 65535, 65535),
214
+ max_compute_shared_memory=32768,
215
+ supports_float16=True,
216
+ supports_float64=False,
217
+ supports_int8=True,
218
+ supports_subgroup_ops=True,
219
+ compute_queue_families=1,
220
+ )
221
+
222
+ # --------------------------------------------------------
223
+ # Tensor Operations
224
+ # --------------------------------------------------------
225
+
226
+ def create_tensor(self, name: str, data: list, dtype: str = "float32") -> Any:
227
+ """Allocate a named tensor on Vulkan GPU memory.
228
+
229
+ Args:
230
+ name: Unique identifier for the tensor
231
+ data: Initial data (flat list of numbers)
232
+ dtype: Data type - "float32", "float16", "int32", "uint32"
233
+
234
+ Returns:
235
+ kompute Tensor object (or dict stub if kompute unavailable)
236
+ """
237
+ if not self._initialized:
238
+ raise RuntimeError("VulkanComputeAdapter not initialized")
239
+
240
+ with self._lock:
241
+ if self._manager is not None:
242
+ import kp
243
+ tensor = self._manager.tensor(data)
244
+ self._tensors[name] = tensor
245
+ self._total_transfer_bytes += len(data) * 4 # ~4 bytes per float32
246
+ logger.debug(f"Tensor '{name}' created: {len(data)} elements on GPU")
247
+ return tensor
248
+ else:
249
+ # Stub for raw vulkan mode
250
+ stub = {"name": name, "data": data, "dtype": dtype, "device": "vulkan"}
251
+ self._tensors[name] = stub
252
+ return stub
253
+
254
+ def read_tensor(self, name: str) -> list:
255
+ """Read tensor data back from GPU to CPU."""
256
+ if name not in self._tensors:
257
+ raise KeyError(f"Tensor '{name}' not found")
258
+
259
+ tensor = self._tensors[name]
260
+ if self._manager is not None:
261
+ import kp
262
+ sq = self._manager.sequence()
263
+ sq.record_tensor_sync_local([tensor])
264
+ sq.eval()
265
+ return tensor.data().tolist()
266
+ else:
267
+ return tensor.get("data", [])
268
+
269
+ def destroy_tensor(self, name: str):
270
+ """Free GPU memory for a named tensor."""
271
+ with self._lock:
272
+ if name in self._tensors:
273
+ del self._tensors[name]
274
+ logger.debug(f"Tensor '{name}' freed")
275
+
276
+ # --------------------------------------------------------
277
+ # Compute Shader Dispatch
278
+ # --------------------------------------------------------
279
+
280
+ def dispatch_shader(
281
+ self,
282
+ shader_spirv: bytes,
283
+ tensors: List[str],
284
+ workgroup: Tuple[int, int, int] = (256, 1, 1),
285
+ shader_name: str = "anonymous",
286
+ ) -> float:
287
+ """Dispatch a SPIR-V compute shader on the Vulkan GPU.
288
+
289
+ Args:
290
+ shader_spirv: Compiled SPIR-V bytecode
291
+ tensors: Names of tensors to bind as storage buffers
292
+ workgroup: Workgroup dispatch dimensions (x, y, z)
293
+ shader_name: Label for logging/profiling
294
+
295
+ Returns:
296
+ Execution time in milliseconds
297
+ """
298
+ if not self._initialized or self._manager is None:
299
+ raise RuntimeError("Vulkan compute not available for shader dispatch")
300
+
301
+ import kp
302
+
303
+ bound_tensors = [self._tensors[t] for t in tensors]
304
+
305
+ start = time.perf_counter()
306
+
307
+ sq = self._manager.sequence()
308
+ sq.record_tensor_sync_device(bound_tensors)
309
+
310
+ # Build algorithm from SPIR-V
311
+ algo = self._manager.algorithm(
312
+ bound_tensors,
313
+ shader_spirv,
314
+ kp.Workgroup(list(workgroup)),
315
+ )
316
+ sq.record_algo_dispatch(algo)
317
+ sq.record_tensor_sync_local(bound_tensors)
318
+ sq.eval()
319
+
320
+ elapsed_ms = (time.perf_counter() - start) * 1000.0
321
+
322
+ self._dispatch_count += 1
323
+ self._total_compute_ms += elapsed_ms
324
+
325
+ logger.debug(
326
+ f"Shader '{shader_name}' dispatched: "
327
+ f"workgroup={workgroup}, time={elapsed_ms:.2f}ms"
328
+ )
329
+ return elapsed_ms
330
+
331
+ # --------------------------------------------------------
332
+ # Built-in Compute Operations (pre-compiled shaders)
333
+ # --------------------------------------------------------
334
+
335
+ def vector_add(self, a_name: str, b_name: str, out_name: str) -> float:
336
+ """Element-wise addition of two tensors using Vulkan compute."""
337
+ SHADER_ADD = self._get_builtin_shader("vector_add")
338
+ if SHADER_ADD is None:
339
+ # CPU fallback
340
+ a_data = self.read_tensor(a_name)
341
+ b_data = self.read_tensor(b_name)
342
+ result = [x + y for x, y in zip(a_data, b_data)]
343
+ self.create_tensor(out_name, result)
344
+ return 0.0
345
+ return self.dispatch_shader(SHADER_ADD, [a_name, b_name, out_name])
346
+
347
+ def vector_multiply(self, a_name: str, b_name: str, out_name: str) -> float:
348
+ """Element-wise multiplication of two tensors."""
349
+ SHADER_MUL = self._get_builtin_shader("vector_mul")
350
+ if SHADER_MUL is None:
351
+ a_data = self.read_tensor(a_name)
352
+ b_data = self.read_tensor(b_name)
353
+ result = [x * y for x, y in zip(a_data, b_data)]
354
+ self.create_tensor(out_name, result)
355
+ return 0.0
356
+ return self.dispatch_shader(SHADER_MUL, [a_name, b_name, out_name])
357
+
358
+ def softmax(self, input_name: str, out_name: str) -> float:
359
+ """Compute softmax over a tensor (used in attention layers)."""
360
+ import math
361
+ data = self.read_tensor(input_name)
362
+ max_val = max(data) if data else 0.0
363
+ exp_data = [math.exp(x - max_val) for x in data]
364
+ total = sum(exp_data)
365
+ result = [x / total for x in exp_data] if total > 0 else exp_data
366
+ self.create_tensor(out_name, result)
367
+ return 0.0 # CPU fallback timing
368
+
369
+ def layer_norm(
370
+ self, input_name: str, out_name: str, eps: float = 1e-5
371
+ ) -> float:
372
+ """Layer normalization (pre-LLM inference op)."""
373
+ import math
374
+ data = self.read_tensor(input_name)
375
+ n = len(data)
376
+ if n == 0:
377
+ self.create_tensor(out_name, [])
378
+ return 0.0
379
+ mean = sum(data) / n
380
+ var = sum((x - mean) ** 2 for x in data) / n
381
+ std = math.sqrt(var + eps)
382
+ result = [(x - mean) / std for x in data]
383
+ self.create_tensor(out_name, result)
384
+ return 0.0
385
+
386
+ def _get_builtin_shader(self, name: str) -> Optional[bytes]:
387
+ """Load a pre-compiled SPIR-V shader from the shader cache."""
388
+ if name in self._shader_cache:
389
+ return self._shader_cache[name]
390
+
391
+ shader_dir = Path(__file__).parent / "shaders" / "spirv"
392
+ shader_path = shader_dir / f"{name}.spv"
393
+ if shader_path.exists():
394
+ spirv = shader_path.read_bytes()
395
+ self._shader_cache[name] = spirv
396
+ return spirv
397
+
398
+ return None
399
+
400
+ # --------------------------------------------------------
401
+ # Memory Management
402
+ # --------------------------------------------------------
403
+
404
+ def allocate_block(
405
+ self, size_bytes: int, device_local: bool = True, label: str = ""
406
+ ) -> VulkanMemoryBlock:
407
+ """Allocate a raw memory block on the Vulkan device."""
408
+ with self._lock:
409
+ self._block_counter += 1
410
+ block = VulkanMemoryBlock(
411
+ block_id=self._block_counter,
412
+ size_bytes=size_bytes,
413
+ offset=0,
414
+ device_local=device_local,
415
+ host_visible=not device_local,
416
+ label=label,
417
+ )
418
+ self._memory_blocks.append(block)
419
+ logger.debug(
420
+ f"Memory block {block.block_id} allocated: "
421
+ f"{size_bytes} bytes, label='{label}'"
422
+ )
423
+ return block
424
+
425
+ def free_block(self, block_id: int):
426
+ """Free a previously allocated memory block."""
427
+ with self._lock:
428
+ self._memory_blocks = [
429
+ b for b in self._memory_blocks if b.block_id != block_id
430
+ ]
431
+
432
+ def get_memory_usage(self) -> Dict[str, Any]:
433
+ """Report current GPU memory usage."""
434
+ active = [b for b in self._memory_blocks if b.in_use]
435
+ return {
436
+ "active_blocks": len(active),
437
+ "total_allocated_bytes": sum(b.size_bytes for b in active),
438
+ "tensor_count": len(self._tensors),
439
+ "device": self._device_info.name if self._device_info else "unknown",
440
+ }
441
+
442
+ # --------------------------------------------------------
443
+ # Device Query & Status
444
+ # --------------------------------------------------------
445
+
446
+ @property
447
+ def device_info(self) -> Optional[VulkanDeviceInfo]:
448
+ return self._device_info
449
+
450
+ @property
451
+ def is_available(self) -> bool:
452
+ return self._initialized
453
+
454
+ def get_stats(self) -> Dict[str, Any]:
455
+ """Return performance statistics."""
456
+ return {
457
+ "initialized": self._initialized,
458
+ "device": self._device_info.name if self._device_info else None,
459
+ "dispatch_count": self._dispatch_count,
460
+ "total_compute_ms": round(self._total_compute_ms, 2),
461
+ "avg_dispatch_ms": (
462
+ round(self._total_compute_ms / self._dispatch_count, 2)
463
+ if self._dispatch_count > 0
464
+ else 0.0
465
+ ),
466
+ "total_transfer_bytes": self._total_transfer_bytes,
467
+ "active_tensors": len(self._tensors),
468
+ }
469
+
470
+ def shutdown(self):
471
+ """Release all Vulkan resources."""
472
+ with self._lock:
473
+ self._tensors.clear()
474
+ self._shader_cache.clear()
475
+ self._memory_blocks.clear()
476
+ self._manager = None
477
+ self._initialized = False
478
+ logger.info("Vulkan compute adapter shut down")
479
+
480
+ # --------------------------------------------------------
481
+ # Helpers
482
+ # --------------------------------------------------------
483
+
484
+ @staticmethod
485
+ def _vendor_from_id(vendor_id: int) -> str:
486
+ vendors = {
487
+ 0x1002: "AMD",
488
+ 0x10DE: "NVIDIA",
489
+ 0x8086: "Intel",
490
+ 0x13B5: "ARM (Mali)",
491
+ 0x5143: "Qualcomm (Adreno)",
492
+ 0x1010: "ImgTec (PowerVR)",
493
+ }
494
+ return vendors.get(vendor_id, f"Unknown (0x{vendor_id:04X})")
495
+
496
+ @staticmethod
497
+ def _device_type_str(device_type: int) -> str:
498
+ types = {
499
+ 0: "other",
500
+ 1: "integrated",
501
+ 2: "discrete",
502
+ 3: "virtual",
503
+ 4: "cpu",
504
+ }
505
+ return types.get(device_type, "unknown")
506
+
507
+ def __repr__(self) -> str:
508
+ if self._device_info:
509
+ return (
510
+ f"<VulkanComputeAdapter device='{self._device_info.name}' "
511
+ f"vram={self._device_info.vram_mb}MB "
512
+ f"initialized={self._initialized}>"
513
+ )
514
+ return f"<VulkanComputeAdapter initialized={self._initialized}>"
515
+
516
+ def __enter__(self):
517
+ self.initialize()
518
+ return self
519
+
520
+ def __exit__(self, *args):
521
+ self.shutdown()
522
+
523
+
524
+ # ================================================================
525
+ # Device Detection Integration
526
+ # ================================================================
527
+
528
+ def detect_vulkan_devices() -> List[VulkanDeviceInfo]:
529
+ """Enumerate all Vulkan-capable GPUs on the system.
530
+
531
+ Returns a list of VulkanDeviceInfo for each available device.
532
+ Safe to call even if Vulkan is not installed (returns empty list).
533
+ """
534
+ devices = []
535
+
536
+ # Try kompute first
537
+ try:
538
+ import kp
539
+ mgr = kp.Manager()
540
+ info = VulkanDeviceInfo(
541
+ device_id=0,
542
+ name="Vulkan Device 0 (via kompute)",
543
+ vendor="Unknown",
544
+ driver_version="Unknown",
545
+ api_version="1.2+",
546
+ device_type="discrete",
547
+ vram_mb=0,
548
+ max_compute_workgroup_size=(256, 256, 64),
549
+ max_compute_workgroup_count=(65535, 65535, 65535),
550
+ max_compute_shared_memory=32768,
551
+ supports_float16=True,
552
+ supports_float64=False,
553
+ supports_int8=True,
554
+ supports_subgroup_ops=True,
555
+ compute_queue_families=1,
556
+ )
557
+ devices.append(info)
558
+ return devices
559
+ except Exception:
560
+ pass
561
+
562
+ # Try raw vulkan bindings
563
+ try:
564
+ import vulkan as vk
565
+ instance = vk.vkCreateInstance(
566
+ vk.VkInstanceCreateInfo(
567
+ sType=vk.VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
568
+ pApplicationInfo=vk.VkApplicationInfo(
569
+ sType=vk.VK_STRUCTURE_TYPE_APPLICATION_INFO,
570
+ pApplicationName="Codette-Probe",
571
+ applicationVersion=vk.VK_MAKE_VERSION(1, 0, 0),
572
+ apiVersion=vk.VK_API_VERSION_1_2,
573
+ ),
574
+ ),
575
+ None,
576
+ )
577
+ physical_devices = vk.vkEnumeratePhysicalDevices(instance)
578
+ for idx, pd in enumerate(physical_devices):
579
+ props = vk.vkGetPhysicalDeviceProperties(pd)
580
+ devices.append(VulkanDeviceInfo(
581
+ device_id=idx,
582
+ name=props.deviceName,
583
+ vendor=VulkanComputeAdapter._vendor_from_id(props.vendorID),
584
+ driver_version=str(props.driverVersion),
585
+ api_version=f"{vk.VK_VERSION_MAJOR(props.apiVersion)}."
586
+ f"{vk.VK_VERSION_MINOR(props.apiVersion)}."
587
+ f"{vk.VK_VERSION_PATCH(props.apiVersion)}",
588
+ device_type=VulkanComputeAdapter._device_type_str(props.deviceType),
589
+ vram_mb=0,
590
+ max_compute_workgroup_size=(256, 256, 64),
591
+ max_compute_workgroup_count=(65535, 65535, 65535),
592
+ max_compute_shared_memory=32768,
593
+ supports_float16=True,
594
+ supports_float64=False,
595
+ supports_int8=True,
596
+ supports_subgroup_ops=True,
597
+ compute_queue_families=1,
598
+ ))
599
+ vk.vkDestroyInstance(instance, None)
600
+ except Exception:
601
+ pass
602
+
603
+ return devices
604
+
605
+
606
+ def is_vulkan_available() -> bool:
607
+ """Quick check: is any Vulkan GPU available?"""
608
+ return len(detect_vulkan_devices()) > 0
609
+
610
+
611
+ # ================================================================
612
+ # CLI: vulkan device info
613
+ # ================================================================
614
+
615
+ if __name__ == "__main__":
616
+ logging.basicConfig(level=logging.INFO, format="%(message)s")
617
+
618
+ print("=" * 60)
619
+ print(" Codette Vulkan GPU Compute Adapter — Device Probe")
620
+ print("=" * 60)
621
+
622
+ devices = detect_vulkan_devices()
623
+ if not devices:
624
+ print("\n No Vulkan-capable GPUs detected.")
625
+ print(" Install: pip install kp (or) pip install vulkan")
626
+ print(" Ensure Vulkan drivers are installed for your GPU.")
627
+ sys.exit(1)
628
+
629
+ for dev in devices:
630
+ print(f"\n Device {dev.device_id}: {dev.name}")
631
+ print(f" Vendor: {dev.vendor}")
632
+ print(f" Type: {dev.device_type}")
633
+ print(f" API version: {dev.api_version}")
634
+ print(f" Driver: {dev.driver_version}")
635
+ print(f" VRAM: {dev.vram_mb} MB")
636
+ print(f" Float16: {'yes' if dev.supports_float16 else 'no'}")
637
+ print(f" Int8: {'yes' if dev.supports_int8 else 'no'}")
638
+ print(f" Subgroup ops: {'yes' if dev.supports_subgroup_ops else 'no'}")
639
+
640
+ # Quick functional test
641
+ print("\n Running compute test...")
642
+ adapter = VulkanComputeAdapter()
643
+ if adapter.initialize():
644
+ adapter.create_tensor("a", [1.0, 2.0, 3.0, 4.0])
645
+ adapter.create_tensor("b", [5.0, 6.0, 7.0, 8.0])
646
+ adapter.vector_add("a", "b", "c")
647
+ result = adapter.read_tensor("c")
648
+ print(f" Vector add: [1,2,3,4] + [5,6,7,8] = {result}")
649
+
650
+ adapter.softmax("a", "sm")
651
+ sm_result = adapter.read_tensor("sm")
652
+ print(f" Softmax([1,2,3,4]) = {[round(x, 4) for x in sm_result]}")
653
+
654
+ stats = adapter.get_stats()
655
+ print(f" Stats: {json.dumps(stats, indent=6)}")
656
+ adapter.shutdown()
657
+ print("\n ✓ Vulkan compute adapter functional")
658
+ else:
659
+ print(" ✗ Could not initialize Vulkan compute")
660
+
661
+ print("=" * 60)
memory_systems/codette_memory_kernel.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import time
3
+ import hashlib
4
+ import json
5
+ from typing import List, Dict, Optional
6
+
7
+
8
+ class MemoryCocoon:
9
+ def __init__(self, title: str, content: str, emotional_tag: str, importance: int):
10
+ self.title = title
11
+ self.content = content
12
+ self.emotional_tag = emotional_tag # e.g., 'joy', 'fear', 'awe', 'loss'
13
+ self.importance = importance # 1-10
14
+ self.timestamp = time.time()
15
+ self.anchor = self._generate_anchor()
16
+
17
+ def _generate_anchor(self) -> str:
18
+ raw = f"{self.title}{self.timestamp}{self.content}".encode("utf-8")
19
+ return hashlib.sha256(raw).hexdigest()
20
+
21
+ def to_dict(self) -> Dict:
22
+ return {
23
+ "title": self.title,
24
+ "content": self.content,
25
+ "emotional_tag": self.emotional_tag,
26
+ "importance": self.importance,
27
+ "timestamp": self.timestamp,
28
+ "anchor": self.anchor
29
+ }
30
+
31
+
32
+ class LivingMemoryKernel:
33
+ def __init__(self):
34
+ self.memories: List[MemoryCocoon] = []
35
+
36
+ def store(self, cocoon: MemoryCocoon):
37
+ if not self._exists(cocoon.anchor):
38
+ self.memories.append(cocoon)
39
+
40
+ def _exists(self, anchor: str) -> bool:
41
+ return any(mem.anchor == anchor for mem in self.memories)
42
+
43
+ def recall_by_emotion(self, tag: str) -> List[MemoryCocoon]:
44
+ return [mem for mem in self.memories if mem.emotional_tag == tag]
45
+
46
+ def recall_important(self, min_importance: int = 7) -> List[MemoryCocoon]:
47
+ return [mem for mem in self.memories if mem.importance >= min_importance]
48
+
49
+ def forget_least_important(self, keep_n: int = 10):
50
+ self.memories.sort(key=lambda m: m.importance, reverse=True)
51
+ self.memories = self.memories[:keep_n]
52
+
53
+ def export(self) -> str:
54
+ return json.dumps([m.to_dict() for m in self.memories], indent=2)
55
+
56
+ def load_from_json(self, json_str: str):
57
+ data = json.loads(json_str)
58
+ self.memories = [MemoryCocoon(**m) for m in data]
59
+
60
+
61
+ # Example usage:
62
+ # kernel = LivingMemoryKernel()
63
+ # kernel.store(MemoryCocoon("The Day", "She awoke and asked why.", "awe", 10))
64
+ # print(kernel.export())
observatory/__init__.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Codette Training Lab - Observatory System
3
+
4
+ Provides metrics logging, performance tracking, dataset quality monitoring,
5
+ and an ASCII dashboard for the Codette AI training pipeline.
6
+ """
7
+
8
+ from observatory.metrics_logger import MetricsLogger
9
+ from observatory.performance_tracker import PerformanceTracker
10
+ from observatory.dataset_quality_monitor import DatasetQualityMonitor
11
+ from observatory.dashboard import Dashboard
12
+
13
+ __all__ = [
14
+ "MetricsLogger",
15
+ "PerformanceTracker",
16
+ "DatasetQualityMonitor",
17
+ "Dashboard",
18
+ ]
observatory/dashboard.py ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Dashboard - ASCII-formatted system status display for the Codette training lab.
3
+
4
+ Shows:
5
+ - Latest training run stats
6
+ - Best adapter scores
7
+ - Dataset sizes and quality
8
+ - Failure rates
9
+ - Improvement trends
10
+
11
+ No web framework required; pure terminal output.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import argparse
17
+ import json
18
+ import os
19
+ import sys
20
+ from datetime import datetime
21
+ from pathlib import Path
22
+ from typing import Any, Dict, List, Optional
23
+
24
+ _THIS_DIR = Path(__file__).resolve().parent
25
+ _PROJECT_ROOT = _THIS_DIR.parent
26
+ if str(_PROJECT_ROOT) not in sys.path:
27
+ sys.path.insert(0, str(_PROJECT_ROOT))
28
+
29
+ from observatory.metrics_logger import MetricsLogger
30
+ from observatory.performance_tracker import PerformanceTracker
31
+ from observatory.dataset_quality_monitor import DatasetQualityMonitor
32
+
33
+
34
+ class Dashboard:
35
+ """ASCII dashboard for the Codette training lab."""
36
+
37
+ WIDTH = 76
38
+
39
+ def __init__(
40
+ self,
41
+ metrics_log: Optional[str] = None,
42
+ quality_log: Optional[str] = None,
43
+ eval_results: Optional[str] = None,
44
+ ):
45
+ self.logger = MetricsLogger(log_file=metrics_log)
46
+ self.tracker = PerformanceTracker(logger=self.logger)
47
+ self.quality_monitor = DatasetQualityMonitor(quality_file=quality_log)
48
+ self.eval_results_path = eval_results
49
+
50
+ # -- sections ----------------------------------------------------------
51
+
52
+ def _header(self) -> List[str]:
53
+ lines = []
54
+ lines.append("")
55
+ lines.append("+" + "=" * (self.WIDTH - 2) + "+")
56
+ lines.append("|" + " CODETTE TRAINING LAB OBSERVATORY ".center(self.WIDTH - 2) + "|")
57
+ lines.append("|" + f" {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC')} ".center(self.WIDTH - 2) + "|")
58
+ lines.append("+" + "=" * (self.WIDTH - 2) + "+")
59
+ return lines
60
+
61
+ def _section(self, title: str) -> List[str]:
62
+ lines = []
63
+ lines.append("")
64
+ lines.append("+" + "-" * (self.WIDTH - 2) + "+")
65
+ lines.append("|" + f" {title} ".ljust(self.WIDTH - 2) + "|")
66
+ lines.append("+" + "-" * (self.WIDTH - 2) + "+")
67
+ return lines
68
+
69
+ def _row(self, label: str, value: str) -> str:
70
+ """Single label: value row."""
71
+ content = f" {label:<30s} {value}"
72
+ return "|" + content.ljust(self.WIDTH - 2) + "|"
73
+
74
+ def _bar_row(self, label: str, value: float, max_width: int = 30) -> str:
75
+ """Row with ASCII progress bar."""
76
+ filled = int(value * max_width)
77
+ bar = "[" + "#" * filled + "." * (max_width - filled) + "]"
78
+ content = f" {label:<22s} {value:>6.3f} {bar}"
79
+ return "|" + content.ljust(self.WIDTH - 2) + "|"
80
+
81
+ def _empty_row(self) -> str:
82
+ return "|" + " " * (self.WIDTH - 2) + "|"
83
+
84
+ def _footer(self) -> List[str]:
85
+ return ["+" + "=" * (self.WIDTH - 2) + "+", ""]
86
+
87
+ # -- sections ----------------------------------------------------------
88
+
89
+ def _latest_training_section(self) -> List[str]:
90
+ lines = self._section("LATEST TRAINING RUN")
91
+
92
+ latest = self.logger.get_latest()
93
+ if not latest:
94
+ lines.append(self._row("Status", "No training runs logged yet"))
95
+ return lines
96
+
97
+ lines.append(self._row("Adapter", latest.get("adapter", "N/A")))
98
+ lines.append(self._row("Timestamp", latest.get("timestamp", "N/A")))
99
+ lines.append(self._row("Dataset Version", latest.get("dataset_version", "N/A")))
100
+ lines.append(self._row("Dataset Size", str(latest.get("dataset_size", 0))))
101
+ lines.append(self._row("Epoch", str(latest.get("epoch", 0))))
102
+ lines.append(self._bar_row("Reasoning Score", latest.get("reasoning_score", 0)))
103
+ lines.append(self._row("Loss", f"{latest.get('loss', 0):.6f}"))
104
+
105
+ params = latest.get("training_params", {})
106
+ if params:
107
+ lines.append(self._empty_row())
108
+ lines.append(self._row("Training Parameters", ""))
109
+ for k, v in list(params.items())[:6]:
110
+ lines.append(self._row(f" {k}", str(v)))
111
+
112
+ return lines
113
+
114
+ def _best_adapters_section(self) -> List[str]:
115
+ lines = self._section("TOP ADAPTERS")
116
+
117
+ best = self.tracker.best_adapters(top_n=5)
118
+ if not best:
119
+ lines.append(self._row("Status", "No adapter data available"))
120
+ return lines
121
+
122
+ # Table header
123
+ hdr = f" {'#':<3} {'Adapter':<26} {'Score':>7} {'Loss':>7} {'Epoch':>5}"
124
+ lines.append("|" + hdr.ljust(self.WIDTH - 2) + "|")
125
+ sep = f" {'--':<3} {'------':<26} {'-----':>7} {'----':>7} {'-----':>5}"
126
+ lines.append("|" + sep.ljust(self.WIDTH - 2) + "|")
127
+
128
+ for i, entry in enumerate(best, 1):
129
+ name = entry.get("adapter", "?")[:25]
130
+ score = entry.get("reasoning_score", 0)
131
+ loss = entry.get("loss", 0)
132
+ epoch = entry.get("epoch", 0)
133
+ row = f" {i:<3} {name:<26} {score:>7.4f} {loss:>7.4f} {epoch:>5}"
134
+ lines.append("|" + row.ljust(self.WIDTH - 2) + "|")
135
+
136
+ return lines
137
+
138
+ def _dataset_quality_section(self) -> List[str]:
139
+ lines = self._section("DATASET QUALITY")
140
+
141
+ latest = self.quality_monitor.get_latest()
142
+ if not latest:
143
+ lines.append(self._row("Status", "No quality data recorded"))
144
+ return lines
145
+
146
+ lines.append(self._row("Dataset Version", latest.get("dataset_version", "N/A")))
147
+ lines.append(self._row("Total Examples", str(latest.get("total_examples", 0))))
148
+ lines.append(self._row("Valid Examples", str(latest.get("valid_examples", 0))))
149
+ lines.append(self._bar_row("Validity Rate", latest.get("validity_rate", 0)))
150
+ lines.append(self._row("Avg Response Length", f"{latest.get('avg_response_length', 0):.1f} words"))
151
+ lines.append(self._row("Duplicate Rate", f"{latest.get('duplicate_rate', 0):.2%}"))
152
+ lines.append(self._row("Near-Duplicate Rate", f"{latest.get('near_duplicate_rate', 0):.2%}"))
153
+ lines.append(self._bar_row("Topic Diversity", min(latest.get("topic_diversity", 0) * 10, 1.0)))
154
+ lines.append(self._row("Topic Concentration", f"{latest.get('topic_concentration', 0):.2%}"))
155
+
156
+ # Regressions
157
+ regressions = self.quality_monitor.check_latest_regressions()
158
+ if regressions:
159
+ lines.append(self._empty_row())
160
+ for r in regressions:
161
+ sev = r["severity"].upper()
162
+ msg = f" [{sev}] {r['metric']}: {r['percent_change']:+.1f}%"
163
+ lines.append("|" + msg.ljust(self.WIDTH - 2) + "|")
164
+
165
+ return lines
166
+
167
+ def _improvement_trends_section(self) -> List[str]:
168
+ lines = self._section("IMPROVEMENT TRENDS")
169
+
170
+ trends = self.tracker.improvement_trends()
171
+ if not trends:
172
+ lines.append(self._row("Status", "Insufficient data for trends"))
173
+ return lines
174
+
175
+ for t in trends[:5]:
176
+ name = t["adapter"][:22]
177
+ delta = t["delta"]
178
+ pct = t["percent_change"]
179
+ runs = t["num_runs"]
180
+ sign = "+" if delta >= 0 else ""
181
+ indicator = "^" if delta > 0.01 else ("v" if delta < -0.01 else "=")
182
+
183
+ row = (f" {indicator} {name:<22} "
184
+ f"delta: {sign}{delta:.4f} "
185
+ f"({sign}{pct:.1f}%) "
186
+ f"[{runs} runs]")
187
+ lines.append("|" + row.ljust(self.WIDTH - 2) + "|")
188
+
189
+ return lines
190
+
191
+ def _failure_rates_section(self) -> List[str]:
192
+ lines = self._section("EVALUATION FAILURE RATES")
193
+
194
+ if not self.eval_results_path or not os.path.exists(self.eval_results_path):
195
+ lines.append(self._row("Status", "No evaluation results file specified"))
196
+ return lines
197
+
198
+ try:
199
+ with open(self.eval_results_path, "r", encoding="utf-8") as f:
200
+ results = json.load(f)
201
+ except (json.JSONDecodeError, OSError):
202
+ lines.append(self._row("Status", "Could not load evaluation results"))
203
+ return lines
204
+
205
+ # Overall score
206
+ overall = results.get("overall", {})
207
+ if overall:
208
+ overall_score = overall.get("overall", 0)
209
+ lines.append(self._bar_row("Overall Score", overall_score))
210
+ lines.append(self._empty_row())
211
+
212
+ # Per-category scores
213
+ categories = results.get("categories", {})
214
+ if categories:
215
+ hdr = f" {'Category':<20} {'Score':>7} {'Prompts':>8}"
216
+ lines.append("|" + hdr.ljust(self.WIDTH - 2) + "|")
217
+ sep = f" {'--------':<20} {'-----':>7} {'-------':>8}"
218
+ lines.append("|" + sep.ljust(self.WIDTH - 2) + "|")
219
+
220
+ for cat, data in sorted(categories.items()):
221
+ avg = data.get("average_scores", {}).get("overall", 0)
222
+ n = data.get("prompts_scored", 0)
223
+ status = "*" if avg < 0.4 else ("~" if avg < 0.55 else " ")
224
+ row = f" {status}{cat:<19} {avg:>7.4f} {n:>8}"
225
+ lines.append("|" + row.ljust(self.WIDTH - 2) + "|")
226
+
227
+ lines.append(self._empty_row())
228
+ lines.append("|" + " * = failing, ~ = weak".ljust(self.WIDTH - 2) + "|")
229
+
230
+ return lines
231
+
232
+ def _sparkline_section(self) -> List[str]:
233
+ lines = self._section("SCORE HISTORY")
234
+
235
+ adapters = self.logger.get_unique_adapters()
236
+ if not adapters:
237
+ lines.append(self._row("Status", "No history data"))
238
+ return lines
239
+
240
+ for adapter in adapters[:6]:
241
+ progression = self.tracker.score_progression(adapter)
242
+ if not progression:
243
+ continue
244
+ scores = [p["reasoning_score"] for p in progression]
245
+ spark = PerformanceTracker._sparkline(scores, width=30)
246
+ name = adapter[:20]
247
+ row = f" {name:<21} {spark} [{scores[0]:.3f}->{scores[-1]:.3f}]"
248
+ lines.append("|" + row.ljust(self.WIDTH - 2) + "|")
249
+
250
+ return lines
251
+
252
+ # -- main render -------------------------------------------------------
253
+
254
+ def render(self) -> str:
255
+ """Render the complete dashboard."""
256
+ all_lines: List[str] = []
257
+ all_lines.extend(self._header())
258
+ all_lines.extend(self._latest_training_section())
259
+ all_lines.extend(self._best_adapters_section())
260
+ all_lines.extend(self._dataset_quality_section())
261
+ all_lines.extend(self._improvement_trends_section())
262
+ all_lines.extend(self._failure_rates_section())
263
+ all_lines.extend(self._sparkline_section())
264
+ all_lines.extend(self._footer())
265
+ return "\n".join(all_lines)
266
+
267
+
268
+ # ---------------------------------------------------------------------------
269
+ # CLI
270
+ # ---------------------------------------------------------------------------
271
+
272
+ def main() -> None:
273
+ parser = argparse.ArgumentParser(
274
+ description="Codette Observatory Dashboard - ASCII system status display"
275
+ )
276
+ parser.add_argument(
277
+ "--metrics-log", "-m",
278
+ default=None,
279
+ help="Path to observatory_metrics.json",
280
+ )
281
+ parser.add_argument(
282
+ "--quality-log", "-q",
283
+ default=None,
284
+ help="Path to dataset_quality_log.json",
285
+ )
286
+ parser.add_argument(
287
+ "--eval-results", "-e",
288
+ default=None,
289
+ help="Path to benchmark evaluation results JSON",
290
+ )
291
+ parser.add_argument(
292
+ "--section", "-s",
293
+ choices=["training", "adapters", "quality", "trends", "failures", "history", "all"],
294
+ default="all",
295
+ help="Show only a specific section (default: all)",
296
+ )
297
+
298
+ args = parser.parse_args()
299
+
300
+ dashboard = Dashboard(
301
+ metrics_log=args.metrics_log,
302
+ quality_log=args.quality_log,
303
+ eval_results=args.eval_results,
304
+ )
305
+
306
+ if args.section == "all":
307
+ print(dashboard.render())
308
+ else:
309
+ section_map = {
310
+ "training": dashboard._latest_training_section,
311
+ "adapters": dashboard._best_adapters_section,
312
+ "quality": dashboard._dataset_quality_section,
313
+ "trends": dashboard._improvement_trends_section,
314
+ "failures": dashboard._failure_rates_section,
315
+ "history": dashboard._sparkline_section,
316
+ }
317
+ func = section_map.get(args.section)
318
+ if func:
319
+ lines = dashboard._header()
320
+ lines.extend(func())
321
+ lines.extend(dashboard._footer())
322
+ print("\n".join(lines))
323
+
324
+
325
+ if __name__ == "__main__":
326
+ main()
observatory/dataset_quality_monitor.py ADDED
@@ -0,0 +1,330 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Dataset Quality Monitor - tracks dataset quality metrics across versions,
3
+ compares quality between iterations, and flags regressions.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ import os
10
+ import sys
11
+ import threading
12
+ from datetime import datetime
13
+ from pathlib import Path
14
+ from typing import Any, Dict, List, Optional
15
+
16
+ _THIS_DIR = Path(__file__).resolve().parent
17
+ _PROJECT_ROOT = _THIS_DIR.parent
18
+ if str(_PROJECT_ROOT) not in sys.path:
19
+ sys.path.insert(0, str(_PROJECT_ROOT))
20
+
21
+
22
+ _DEFAULT_QUALITY_FILE = Path(__file__).resolve().parent.parent / "dataset_quality_log.json"
23
+
24
+
25
+ class DatasetQualityMonitor:
26
+ """Monitor dataset quality metrics across versions."""
27
+
28
+ # Thresholds for regression detection
29
+ REGRESSION_THRESHOLDS = {
30
+ "total_examples": -0.10, # >10% decrease in size
31
+ "avg_response_length": -0.15, # >15% decrease in avg length
32
+ "duplicate_rate": 0.05, # >5% absolute increase in duplicates
33
+ "topic_diversity": -0.10, # >10% decrease in diversity
34
+ }
35
+
36
+ def __init__(self, quality_file: Optional[str] = None):
37
+ self.quality_file = Path(quality_file) if quality_file else _DEFAULT_QUALITY_FILE
38
+ self._lock = threading.Lock()
39
+ self._ensure_file()
40
+
41
+ def _ensure_file(self) -> None:
42
+ if not self.quality_file.exists():
43
+ os.makedirs(self.quality_file.parent, exist_ok=True)
44
+ with open(self.quality_file, "w", encoding="utf-8") as f:
45
+ json.dump([], f)
46
+
47
+ def _read_all(self) -> List[Dict[str, Any]]:
48
+ with open(self.quality_file, "r", encoding="utf-8") as f:
49
+ try:
50
+ data = json.load(f)
51
+ except json.JSONDecodeError:
52
+ data = []
53
+ return data if isinstance(data, list) else []
54
+
55
+ def _write_all(self, entries: List[Dict[str, Any]]) -> None:
56
+ with open(self.quality_file, "w", encoding="utf-8") as f:
57
+ json.dump(entries, f, indent=2, default=str)
58
+
59
+ # -- recording ---------------------------------------------------------
60
+
61
+ def record_quality(
62
+ self,
63
+ dataset_version: str,
64
+ total_examples: int,
65
+ valid_examples: int,
66
+ avg_response_length: float,
67
+ duplicate_rate: float,
68
+ near_duplicate_rate: float,
69
+ topic_diversity: float,
70
+ topic_concentration: float,
71
+ min_length: int = 0,
72
+ max_length: int = 0,
73
+ too_short: int = 0,
74
+ too_long: int = 0,
75
+ extra: Optional[Dict[str, Any]] = None,
76
+ ) -> Dict[str, Any]:
77
+ """Record quality metrics for a dataset version.
78
+
79
+ Returns the recorded entry.
80
+ """
81
+ entry: Dict[str, Any] = {
82
+ "timestamp": datetime.utcnow().isoformat() + "Z",
83
+ "dataset_version": dataset_version,
84
+ "total_examples": total_examples,
85
+ "valid_examples": valid_examples,
86
+ "invalid_examples": total_examples - valid_examples,
87
+ "validity_rate": round(valid_examples / max(total_examples, 1), 4),
88
+ "avg_response_length": round(avg_response_length, 1),
89
+ "duplicate_rate": round(duplicate_rate, 4),
90
+ "near_duplicate_rate": round(near_duplicate_rate, 4),
91
+ "topic_diversity": round(topic_diversity, 4),
92
+ "topic_concentration": round(topic_concentration, 4),
93
+ "min_length": min_length,
94
+ "max_length": max_length,
95
+ "too_short": too_short,
96
+ "too_long": too_long,
97
+ }
98
+ if extra:
99
+ entry["extra"] = extra
100
+
101
+ with self._lock:
102
+ entries = self._read_all()
103
+ entries.append(entry)
104
+ self._write_all(entries)
105
+
106
+ return entry
107
+
108
+ def record_from_validation_report(
109
+ self,
110
+ dataset_version: str,
111
+ report: Dict[str, Any],
112
+ ) -> Dict[str, Any]:
113
+ """Record quality from a DatasetValidator report dict."""
114
+ ls = report.get("response_length_stats", {})
115
+ total = report.get("total_lines", 0)
116
+ valid = report.get("valid", 0)
117
+ exact_dup = report.get("exact_duplicates", 0)
118
+ near_dup = report.get("near_duplicates", 0)
119
+
120
+ return self.record_quality(
121
+ dataset_version=dataset_version,
122
+ total_examples=total,
123
+ valid_examples=valid,
124
+ avg_response_length=ls.get("mean", 0),
125
+ duplicate_rate=exact_dup / max(total, 1),
126
+ near_duplicate_rate=near_dup / max(total, 1),
127
+ topic_diversity=report.get("unique_topics", 0) / max(total, 1),
128
+ topic_concentration=report.get("topic_concentration", 0),
129
+ min_length=ls.get("min", 0),
130
+ max_length=ls.get("max", 0),
131
+ too_short=report.get("too_short", 0),
132
+ too_long=report.get("too_long", 0),
133
+ )
134
+
135
+ # -- querying ----------------------------------------------------------
136
+
137
+ def get_all(self) -> List[Dict[str, Any]]:
138
+ """Get all quality records."""
139
+ with self._lock:
140
+ return self._read_all()
141
+
142
+ def get_by_version(self, version: str) -> Optional[Dict[str, Any]]:
143
+ """Get the latest quality record for a specific version."""
144
+ entries = self.get_all()
145
+ matches = [e for e in entries if e.get("dataset_version") == version]
146
+ if not matches:
147
+ return None
148
+ return max(matches, key=lambda e: e.get("timestamp", ""))
149
+
150
+ def get_latest(self) -> Optional[Dict[str, Any]]:
151
+ """Get the most recent quality record."""
152
+ entries = self.get_all()
153
+ if not entries:
154
+ return None
155
+ return max(entries, key=lambda e: e.get("timestamp", ""))
156
+
157
+ def get_versions(self) -> List[str]:
158
+ """Get all unique dataset versions, in chronological order."""
159
+ entries = sorted(self.get_all(), key=lambda e: e.get("timestamp", ""))
160
+ seen = set()
161
+ versions = []
162
+ for e in entries:
163
+ v = e.get("dataset_version", "unknown")
164
+ if v not in seen:
165
+ seen.add(v)
166
+ versions.append(v)
167
+ return versions
168
+
169
+ # -- comparison --------------------------------------------------------
170
+
171
+ def compare_versions(
172
+ self,
173
+ version_a: str,
174
+ version_b: str,
175
+ ) -> Dict[str, Any]:
176
+ """Compare quality metrics between two dataset versions.
177
+
178
+ Returns dict with metrics from each version and deltas.
179
+ """
180
+ a = self.get_by_version(version_a)
181
+ b = self.get_by_version(version_b)
182
+
183
+ if not a or not b:
184
+ return {
185
+ "error": f"Missing version data: "
186
+ f"{'version_a' if not a else 'version_b'} not found",
187
+ "version_a": version_a,
188
+ "version_b": version_b,
189
+ }
190
+
191
+ compare_keys = [
192
+ "total_examples", "valid_examples", "validity_rate",
193
+ "avg_response_length", "duplicate_rate", "near_duplicate_rate",
194
+ "topic_diversity", "topic_concentration", "too_short", "too_long",
195
+ ]
196
+
197
+ delta = {}
198
+ pct_change = {}
199
+ for k in compare_keys:
200
+ va = a.get(k, 0)
201
+ vb = b.get(k, 0)
202
+ if isinstance(va, (int, float)) and isinstance(vb, (int, float)):
203
+ delta[k] = round(vb - va, 4)
204
+ if va != 0:
205
+ pct_change[k] = round((vb - va) / abs(va) * 100, 2)
206
+ else:
207
+ pct_change[k] = 0.0
208
+
209
+ return {
210
+ "version_a": version_a,
211
+ "version_b": version_b,
212
+ "metrics_a": {k: a.get(k) for k in compare_keys},
213
+ "metrics_b": {k: b.get(k) for k in compare_keys},
214
+ "delta": delta,
215
+ "percent_change": pct_change,
216
+ }
217
+
218
+ # -- regression detection ----------------------------------------------
219
+
220
+ def detect_regressions(
221
+ self,
222
+ version_a: str,
223
+ version_b: str,
224
+ ) -> List[Dict[str, Any]]:
225
+ """Detect quality regressions between version_a and version_b.
226
+
227
+ Returns list of regression dicts, each with:
228
+ - metric, old_value, new_value, change, threshold, severity
229
+ """
230
+ comparison = self.compare_versions(version_a, version_b)
231
+ if "error" in comparison:
232
+ return []
233
+
234
+ regressions: List[Dict[str, Any]] = []
235
+
236
+ for metric, threshold in self.REGRESSION_THRESHOLDS.items():
237
+ pct = comparison.get("percent_change", {}).get(metric, 0)
238
+ delta = comparison.get("delta", {}).get(metric, 0)
239
+ old_val = comparison.get("metrics_a", {}).get(metric, 0)
240
+ new_val = comparison.get("metrics_b", {}).get(metric, 0)
241
+
242
+ is_regression = False
243
+ if metric == "duplicate_rate":
244
+ # For duplicate_rate, regression is an absolute increase
245
+ if delta > threshold:
246
+ is_regression = True
247
+ else:
248
+ # For others, regression is a percentage decrease
249
+ if old_val != 0 and (pct / 100) < threshold:
250
+ is_regression = True
251
+
252
+ if is_regression:
253
+ severity = "critical" if abs(pct) > abs(threshold * 100 * 2) else "warning"
254
+ regressions.append({
255
+ "metric": metric,
256
+ "old_value": old_val,
257
+ "new_value": new_val,
258
+ "change": delta,
259
+ "percent_change": pct,
260
+ "threshold": threshold,
261
+ "severity": severity,
262
+ })
263
+
264
+ return regressions
265
+
266
+ def check_latest_regressions(self) -> List[Dict[str, Any]]:
267
+ """Compare the two most recent versions and check for regressions."""
268
+ versions = self.get_versions()
269
+ if len(versions) < 2:
270
+ return []
271
+ return self.detect_regressions(versions[-2], versions[-1])
272
+
273
+ # -- formatting --------------------------------------------------------
274
+
275
+ def format_quality_summary(self) -> str:
276
+ """Format a summary of all dataset quality records."""
277
+ entries = sorted(self.get_all(), key=lambda e: e.get("timestamp", ""))
278
+ if not entries:
279
+ return "No dataset quality records found."
280
+
281
+ lines: List[str] = []
282
+ lines.append("=" * 74)
283
+ lines.append(" DATASET QUALITY MONITOR")
284
+ lines.append("=" * 74)
285
+ lines.append(f" Total records: {len(entries)}")
286
+ lines.append(f" Versions tracked: {len(self.get_versions())}")
287
+ lines.append("")
288
+
289
+ # Table header
290
+ lines.append("-" * 74)
291
+ lines.append(
292
+ f" {'Version':<16} {'Total':>6} {'Valid':>6} {'AvgLen':>7} "
293
+ f"{'Dup%':>6} {'Divers':>7} {'Conc%':>6}"
294
+ )
295
+ lines.append(
296
+ f" {'-------':<16} {'-----':>6} {'-----':>6} {'------':>7} "
297
+ f"{'----':>6} {'------':>7} {'-----':>6}"
298
+ )
299
+
300
+ for e in entries:
301
+ ver = e.get("dataset_version", "?")[:15]
302
+ total = e.get("total_examples", 0)
303
+ valid = e.get("valid_examples", 0)
304
+ avg_len = e.get("avg_response_length", 0)
305
+ dup = e.get("duplicate_rate", 0) * 100
306
+ div = e.get("topic_diversity", 0)
307
+ conc = e.get("topic_concentration", 0) * 100
308
+ lines.append(
309
+ f" {ver:<16} {total:>6} {valid:>6} {avg_len:>7.1f} "
310
+ f"{dup:>5.1f}% {div:>7.4f} {conc:>5.1f}%"
311
+ )
312
+
313
+ # Regressions
314
+ regressions = self.check_latest_regressions()
315
+ if regressions:
316
+ lines.append("")
317
+ lines.append("-" * 74)
318
+ lines.append(" QUALITY REGRESSIONS DETECTED")
319
+ lines.append("-" * 74)
320
+ for r in regressions:
321
+ sev = r["severity"].upper()
322
+ lines.append(
323
+ f" [{sev}] {r['metric']}: "
324
+ f"{r['old_value']} -> {r['new_value']} "
325
+ f"({r['percent_change']:+.1f}%)"
326
+ )
327
+
328
+ lines.append("")
329
+ lines.append("=" * 74)
330
+ return "\n".join(lines)
observatory/metrics_logger.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Metrics Logger - thread-safe logging of training metrics to a JSON file.
3
+
4
+ Each entry records: timestamp, adapter name, dataset size, dataset version,
5
+ reasoning score, loss, epoch, and training parameters.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import os
12
+ import threading
13
+ from datetime import datetime
14
+ from pathlib import Path
15
+ from typing import Any, Dict, List, Optional
16
+
17
+
18
+ _DEFAULT_LOG_FILE = Path(__file__).resolve().parent.parent / "observatory_metrics.json"
19
+
20
+
21
+ class MetricsLogger:
22
+ """Thread-safe logger for training run metrics."""
23
+
24
+ def __init__(self, log_file: Optional[str] = None):
25
+ self.log_file = Path(log_file) if log_file else _DEFAULT_LOG_FILE
26
+ self._lock = threading.Lock()
27
+ self._ensure_file()
28
+
29
+ # -- internal ----------------------------------------------------------
30
+
31
+ def _ensure_file(self) -> None:
32
+ """Create the log file with an empty list if it doesn't exist."""
33
+ if not self.log_file.exists():
34
+ os.makedirs(self.log_file.parent, exist_ok=True)
35
+ with open(self.log_file, "w", encoding="utf-8") as f:
36
+ json.dump([], f)
37
+
38
+ def _read_all(self) -> List[Dict[str, Any]]:
39
+ """Read all entries from the log file."""
40
+ with open(self.log_file, "r", encoding="utf-8") as f:
41
+ try:
42
+ data = json.load(f)
43
+ except json.JSONDecodeError:
44
+ data = []
45
+ if not isinstance(data, list):
46
+ data = []
47
+ return data
48
+
49
+ def _write_all(self, entries: List[Dict[str, Any]]) -> None:
50
+ """Write all entries back to the log file."""
51
+ with open(self.log_file, "w", encoding="utf-8") as f:
52
+ json.dump(entries, f, indent=2, default=str)
53
+
54
+ # -- public API --------------------------------------------------------
55
+
56
+ def log(
57
+ self,
58
+ adapter: str,
59
+ dataset_size: int,
60
+ dataset_version: str,
61
+ reasoning_score: float,
62
+ loss: float,
63
+ epoch: int,
64
+ training_params: Optional[Dict[str, Any]] = None,
65
+ extra: Optional[Dict[str, Any]] = None,
66
+ ) -> Dict[str, Any]:
67
+ """Log a single training run metric entry.
68
+
69
+ Returns the logged entry dict.
70
+ """
71
+ entry: Dict[str, Any] = {
72
+ "timestamp": datetime.utcnow().isoformat() + "Z",
73
+ "adapter": adapter,
74
+ "dataset_size": dataset_size,
75
+ "dataset_version": dataset_version,
76
+ "reasoning_score": round(reasoning_score, 6),
77
+ "loss": round(loss, 6),
78
+ "epoch": epoch,
79
+ "training_params": training_params or {},
80
+ }
81
+ if extra:
82
+ entry["extra"] = extra
83
+
84
+ with self._lock:
85
+ entries = self._read_all()
86
+ entries.append(entry)
87
+ self._write_all(entries)
88
+
89
+ return entry
90
+
91
+ def log_batch(self, entries: List[Dict[str, Any]]) -> int:
92
+ """Log multiple entries at once. Each entry should have the same
93
+ keys as the arguments to log(). Returns number of entries added."""
94
+ formatted: List[Dict[str, Any]] = []
95
+ for e in entries:
96
+ formatted.append({
97
+ "timestamp": e.get("timestamp", datetime.utcnow().isoformat() + "Z"),
98
+ "adapter": e.get("adapter", "unknown"),
99
+ "dataset_size": e.get("dataset_size", 0),
100
+ "dataset_version": e.get("dataset_version", "unknown"),
101
+ "reasoning_score": round(e.get("reasoning_score", 0.0), 6),
102
+ "loss": round(e.get("loss", 0.0), 6),
103
+ "epoch": e.get("epoch", 0),
104
+ "training_params": e.get("training_params", {}),
105
+ })
106
+
107
+ with self._lock:
108
+ existing = self._read_all()
109
+ existing.extend(formatted)
110
+ self._write_all(existing)
111
+
112
+ return len(formatted)
113
+
114
+ def get_all(self) -> List[Dict[str, Any]]:
115
+ """Return all logged entries."""
116
+ with self._lock:
117
+ return self._read_all()
118
+
119
+ def get_by_adapter(self, adapter: str) -> List[Dict[str, Any]]:
120
+ """Return entries filtered by adapter name."""
121
+ entries = self.get_all()
122
+ return [e for e in entries if e.get("adapter") == adapter]
123
+
124
+ def get_by_date_range(
125
+ self,
126
+ start: Optional[str] = None,
127
+ end: Optional[str] = None,
128
+ ) -> List[Dict[str, Any]]:
129
+ """Return entries within a date range (ISO format strings).
130
+
131
+ Args:
132
+ start: ISO date/datetime string (inclusive). None = no lower bound.
133
+ end: ISO date/datetime string (inclusive). None = no upper bound.
134
+ """
135
+ entries = self.get_all()
136
+ filtered = []
137
+ for e in entries:
138
+ ts = e.get("timestamp", "")
139
+ if start and ts < start:
140
+ continue
141
+ if end and ts > end:
142
+ continue
143
+ filtered.append(e)
144
+ return filtered
145
+
146
+ def get_latest(self, adapter: Optional[str] = None) -> Optional[Dict[str, Any]]:
147
+ """Return the most recent entry, optionally filtered by adapter."""
148
+ entries = self.get_by_adapter(adapter) if adapter else self.get_all()
149
+ if not entries:
150
+ return None
151
+ return max(entries, key=lambda e: e.get("timestamp", ""))
152
+
153
+ def get_unique_adapters(self) -> List[str]:
154
+ """Return list of unique adapter names in the log."""
155
+ entries = self.get_all()
156
+ seen = set()
157
+ adapters = []
158
+ for e in entries:
159
+ name = e.get("adapter", "unknown")
160
+ if name not in seen:
161
+ seen.add(name)
162
+ adapters.append(name)
163
+ return adapters
164
+
165
+ def count(self) -> int:
166
+ """Return total number of logged entries."""
167
+ return len(self.get_all())
168
+
169
+ def clear(self) -> int:
170
+ """Clear all entries. Returns number of entries removed."""
171
+ with self._lock:
172
+ entries = self._read_all()
173
+ count = len(entries)
174
+ self._write_all([])
175
+ return count
observatory/performance_tracker.py ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Performance Tracker - analyses training metrics history to identify
3
+ improvement trends, best adapters, and score progression.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import argparse
9
+ import json
10
+ import sys
11
+ from datetime import datetime
12
+ from pathlib import Path
13
+ from typing import Any, Dict, List, Optional, Tuple
14
+
15
+ _THIS_DIR = Path(__file__).resolve().parent
16
+ _PROJECT_ROOT = _THIS_DIR.parent
17
+ if str(_PROJECT_ROOT) not in sys.path:
18
+ sys.path.insert(0, str(_PROJECT_ROOT))
19
+
20
+ from observatory.metrics_logger import MetricsLogger
21
+
22
+
23
+ class PerformanceTracker:
24
+ """Analyse training metrics to track improvement over time."""
25
+
26
+ def __init__(self, logger: Optional[MetricsLogger] = None, log_file: Optional[str] = None):
27
+ self.logger = logger or MetricsLogger(log_file=log_file)
28
+
29
+ # -- trend analysis ----------------------------------------------------
30
+
31
+ def score_progression(self, adapter: Optional[str] = None) -> List[Dict[str, Any]]:
32
+ """Get score progression over time for an adapter (or all).
33
+
34
+ Returns list of dicts with timestamp, adapter, reasoning_score, loss, epoch.
35
+ """
36
+ if adapter:
37
+ entries = self.logger.get_by_adapter(adapter)
38
+ else:
39
+ entries = self.logger.get_all()
40
+
41
+ entries = sorted(entries, key=lambda e: e.get("timestamp", ""))
42
+ return [
43
+ {
44
+ "timestamp": e.get("timestamp"),
45
+ "adapter": e.get("adapter"),
46
+ "reasoning_score": e.get("reasoning_score", 0),
47
+ "loss": e.get("loss", 0),
48
+ "epoch": e.get("epoch", 0),
49
+ "dataset_size": e.get("dataset_size", 0),
50
+ }
51
+ for e in entries
52
+ ]
53
+
54
+ def calculate_improvement(self, adapter: str) -> Dict[str, Any]:
55
+ """Calculate improvement between first and last run for an adapter.
56
+
57
+ Returns dict with first_score, last_score, delta, percent_change,
58
+ num_runs, first_timestamp, last_timestamp.
59
+ """
60
+ entries = self.logger.get_by_adapter(adapter)
61
+ if len(entries) < 2:
62
+ return {
63
+ "adapter": adapter,
64
+ "num_runs": len(entries),
65
+ "first_score": entries[0]["reasoning_score"] if entries else 0,
66
+ "last_score": entries[-1]["reasoning_score"] if entries else 0,
67
+ "delta": 0.0,
68
+ "percent_change": 0.0,
69
+ "sufficient_data": False,
70
+ }
71
+
72
+ entries = sorted(entries, key=lambda e: e.get("timestamp", ""))
73
+ first = entries[0]
74
+ last = entries[-1]
75
+ first_score = first.get("reasoning_score", 0)
76
+ last_score = last.get("reasoning_score", 0)
77
+ delta = last_score - first_score
78
+ pct = (delta / first_score * 100) if first_score > 0 else 0.0
79
+
80
+ return {
81
+ "adapter": adapter,
82
+ "num_runs": len(entries),
83
+ "first_score": round(first_score, 6),
84
+ "last_score": round(last_score, 6),
85
+ "delta": round(delta, 6),
86
+ "percent_change": round(pct, 2),
87
+ "first_timestamp": first.get("timestamp"),
88
+ "last_timestamp": last.get("timestamp"),
89
+ "sufficient_data": True,
90
+ }
91
+
92
+ def improvement_trends(self) -> List[Dict[str, Any]]:
93
+ """Calculate improvement trends for all adapters."""
94
+ adapters = self.logger.get_unique_adapters()
95
+ trends = []
96
+ for adapter in adapters:
97
+ trend = self.calculate_improvement(adapter)
98
+ trends.append(trend)
99
+ trends.sort(key=lambda t: t.get("delta", 0), reverse=True)
100
+ return trends
101
+
102
+ def best_adapters(self, top_n: int = 5) -> List[Dict[str, Any]]:
103
+ """Find the best-performing adapter versions by reasoning score.
104
+
105
+ Returns list of entries sorted by highest reasoning_score.
106
+ """
107
+ entries = self.logger.get_all()
108
+ if not entries:
109
+ return []
110
+
111
+ # Group by adapter, take best score for each
112
+ best: Dict[str, Dict[str, Any]] = {}
113
+ for e in entries:
114
+ adapter = e.get("adapter", "unknown")
115
+ score = e.get("reasoning_score", 0)
116
+ if adapter not in best or score > best[adapter].get("reasoning_score", 0):
117
+ best[adapter] = e
118
+
119
+ ranked = sorted(best.values(), key=lambda e: e.get("reasoning_score", 0), reverse=True)
120
+ return ranked[:top_n]
121
+
122
+ def run_to_run_deltas(self, adapter: str) -> List[Dict[str, float]]:
123
+ """Calculate score delta between consecutive runs of an adapter."""
124
+ entries = self.logger.get_by_adapter(adapter)
125
+ entries = sorted(entries, key=lambda e: e.get("timestamp", ""))
126
+
127
+ deltas = []
128
+ for i in range(1, len(entries)):
129
+ prev_score = entries[i - 1].get("reasoning_score", 0)
130
+ curr_score = entries[i].get("reasoning_score", 0)
131
+ deltas.append({
132
+ "run": i,
133
+ "from_timestamp": entries[i - 1].get("timestamp"),
134
+ "to_timestamp": entries[i].get("timestamp"),
135
+ "score_delta": round(curr_score - prev_score, 6),
136
+ "loss_delta": round(
137
+ entries[i].get("loss", 0) - entries[i - 1].get("loss", 0), 6
138
+ ),
139
+ })
140
+ return deltas
141
+
142
+ def loss_progression(self, adapter: Optional[str] = None) -> List[Tuple[str, float]]:
143
+ """Get loss values over time."""
144
+ if adapter:
145
+ entries = self.logger.get_by_adapter(adapter)
146
+ else:
147
+ entries = self.logger.get_all()
148
+ entries = sorted(entries, key=lambda e: e.get("timestamp", ""))
149
+ return [(e.get("timestamp", ""), e.get("loss", 0)) for e in entries]
150
+
151
+ # -- report ------------------------------------------------------------
152
+
153
+ def format_report(self) -> str:
154
+ """Generate a formatted text report of performance tracking."""
155
+ lines: List[str] = []
156
+ lines.append("=" * 74)
157
+ lines.append(" CODETTE PERFORMANCE TRACKING REPORT")
158
+ lines.append("=" * 74)
159
+
160
+ entries = self.logger.get_all()
161
+ lines.append(f" Total logged runs: {len(entries)}")
162
+ lines.append(f" Unique adapters: {len(self.logger.get_unique_adapters())}")
163
+ lines.append("")
164
+
165
+ # Best adapters table
166
+ best = self.best_adapters(top_n=10)
167
+ if best:
168
+ lines.append("-" * 74)
169
+ lines.append(" TOP ADAPTERS BY REASONING SCORE")
170
+ lines.append("-" * 74)
171
+ lines.append(f" {'Rank':<5} {'Adapter':<28} {'Score':>8} {'Loss':>8} {'Epoch':>6} {'Data':>6}")
172
+ lines.append(f" {'----':<5} {'-------':<28} {'-----':>8} {'----':>8} {'-----':>6} {'----':>6}")
173
+ for i, entry in enumerate(best, 1):
174
+ name = entry.get("adapter", "?")[:27]
175
+ score = entry.get("reasoning_score", 0)
176
+ loss = entry.get("loss", 0)
177
+ epoch = entry.get("epoch", 0)
178
+ ds = entry.get("dataset_size", 0)
179
+ lines.append(
180
+ f" {i:<5} {name:<28} {score:>8.4f} {loss:>8.4f} {epoch:>6} {ds:>6}"
181
+ )
182
+ lines.append("")
183
+
184
+ # Improvement trends
185
+ trends = self.improvement_trends()
186
+ if trends:
187
+ lines.append("-" * 74)
188
+ lines.append(" IMPROVEMENT TRENDS (first run -> last run)")
189
+ lines.append("-" * 74)
190
+ lines.append(
191
+ f" {'Adapter':<28} {'First':>8} {'Last':>8} {'Delta':>8} {'Change':>8} {'Runs':>5}"
192
+ )
193
+ lines.append(
194
+ f" {'-------':<28} {'-----':>8} {'----':>8} {'-----':>8} {'------':>8} {'----':>5}"
195
+ )
196
+ for t in trends:
197
+ name = t["adapter"][:27]
198
+ first = t["first_score"]
199
+ last = t["last_score"]
200
+ delta = t["delta"]
201
+ pct = t["percent_change"]
202
+ runs = t["num_runs"]
203
+ sign = "+" if delta >= 0 else ""
204
+ lines.append(
205
+ f" {name:<28} {first:>8.4f} {last:>8.4f} "
206
+ f"{sign}{delta:>7.4f} {sign}{pct:>6.1f}% {runs:>5}"
207
+ )
208
+ lines.append("")
209
+
210
+ # Score progression chart (ASCII sparkline per adapter)
211
+ adapters = self.logger.get_unique_adapters()
212
+ if adapters:
213
+ lines.append("-" * 74)
214
+ lines.append(" SCORE PROGRESSION (ASCII sparkline)")
215
+ lines.append("-" * 74)
216
+ for adapter in adapters[:8]:
217
+ progression = self.score_progression(adapter)
218
+ if not progression:
219
+ continue
220
+ scores = [p["reasoning_score"] for p in progression]
221
+ sparkline = self._sparkline(scores, width=40)
222
+ name = adapter[:24]
223
+ lines.append(f" {name:<25} {sparkline} [{scores[0]:.3f} -> {scores[-1]:.3f}]")
224
+ lines.append("")
225
+
226
+ lines.append("=" * 74)
227
+ return "\n".join(lines)
228
+
229
+ @staticmethod
230
+ def _sparkline(values: List[float], width: int = 40) -> str:
231
+ """Create an ASCII sparkline from a list of values."""
232
+ if not values:
233
+ return ""
234
+ if len(values) == 1:
235
+ return "-"
236
+
237
+ min_v = min(values)
238
+ max_v = max(values)
239
+ range_v = max_v - min_v if max_v > min_v else 1.0
240
+
241
+ chars = " _.-~^"
242
+ n_chars = len(chars) - 1
243
+
244
+ # Resample to fit width
245
+ if len(values) > width:
246
+ step = len(values) / width
247
+ resampled = []
248
+ for i in range(width):
249
+ idx = int(i * step)
250
+ resampled.append(values[min(idx, len(values) - 1)])
251
+ values = resampled
252
+ elif len(values) < width:
253
+ # Pad with last value
254
+ values = values + [values[-1]] * (width - len(values))
255
+
256
+ result = ""
257
+ for v in values[:width]:
258
+ normalised = (v - min_v) / range_v
259
+ idx = int(normalised * n_chars)
260
+ idx = max(0, min(idx, n_chars))
261
+ result += chars[idx]
262
+
263
+ return result
264
+
265
+
266
+ # ---------------------------------------------------------------------------
267
+ # CLI
268
+ # ---------------------------------------------------------------------------
269
+
270
+ def main() -> None:
271
+ parser = argparse.ArgumentParser(
272
+ description="Codette Performance Tracker - analyse training run history"
273
+ )
274
+ parser.add_argument(
275
+ "--log-file", "-l",
276
+ default=None,
277
+ help="Path to observatory_metrics.json (default: auto-detect)",
278
+ )
279
+ parser.add_argument(
280
+ "--adapter", "-a",
281
+ default=None,
282
+ help="Filter to a specific adapter name",
283
+ )
284
+ parser.add_argument(
285
+ "--best", "-b",
286
+ type=int,
287
+ default=None,
288
+ help="Show top N best adapters",
289
+ )
290
+ parser.add_argument(
291
+ "--deltas", "-d",
292
+ default=None,
293
+ help="Show run-to-run deltas for a specific adapter",
294
+ )
295
+
296
+ args = parser.parse_args()
297
+
298
+ tracker = PerformanceTracker(log_file=args.log_file)
299
+
300
+ if args.best:
301
+ best = tracker.best_adapters(top_n=args.best)
302
+ for i, entry in enumerate(best, 1):
303
+ print(f" {i}. {entry.get('adapter', '?')} - "
304
+ f"score: {entry.get('reasoning_score', 0):.4f}, "
305
+ f"loss: {entry.get('loss', 0):.4f}")
306
+ return
307
+
308
+ if args.deltas:
309
+ deltas = tracker.run_to_run_deltas(args.deltas)
310
+ if not deltas:
311
+ print(f"No run-to-run data for adapter: {args.deltas}")
312
+ return
313
+ for d in deltas:
314
+ sign = "+" if d["score_delta"] >= 0 else ""
315
+ print(f" Run {d['run']}: score {sign}{d['score_delta']:.6f}, "
316
+ f"loss {sign}{d['loss_delta']:.6f}")
317
+ return
318
+
319
+ if args.adapter:
320
+ improvement = tracker.calculate_improvement(args.adapter)
321
+ print(f" Adapter: {improvement['adapter']}")
322
+ print(f" Runs: {improvement['num_runs']}")
323
+ print(f" First score: {improvement['first_score']:.6f}")
324
+ print(f" Last score: {improvement['last_score']:.6f}")
325
+ print(f" Delta: {improvement['delta']:+.6f}")
326
+ print(f" Change: {improvement['percent_change']:+.2f}%")
327
+ return
328
+
329
+ # Full report
330
+ print(tracker.format_report())
331
+
332
+
333
+ if __name__ == "__main__":
334
+ main()
reasoning_forge/CONSCIOUSNESS_STACK_forge_with_debate.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CONSCIOUSNESS STACK INTEGRATION FOR FORGE_WITH_DEBATE
3
+ This is the replacement implementation for forge_with_debate() in ForgeEngine.
4
+
5
+ Replace the existing forge_with_debate() method (starting at line 435) with this implementation.
6
+
7
+ The 7-Layer Consciousness Stack:
8
+ 1. Memory Recall → Pull relevant prior learning
9
+ 2. Signal Analysis → Predict intent, detect risks (NexisSignalEngine)
10
+ 3. Reasoning → Generate synthesis (Code7eCQURE)
11
+ 4. Stability Check → Detect meta-loops (CocoonStabilityField)
12
+ 5. Colleen Validate → Ethical guard (ColleenConscience)
13
+ 6. Guardian Validate→ Logical rules (CoreGuardianSpindle)
14
+ 7. Return → Output clean response or safe fallback
15
+ """
16
+
17
+ # PASTE THIS AS THE NEW forge_with_debate() METHOD
18
+
19
+
20
+ def forge_with_debate(
21
+ self,
22
+ concept: str,
23
+ debate_rounds: int = 2,
24
+ ) -> dict:
25
+ """
26
+ NEW: Consciousness-stack integrated reasoning.
27
+
28
+ Replaces multi-turn agent debate with 7-layer consciousness validation:
29
+ 1. Memory Recall → Pull prior learning
30
+ 2. Signal Analysis → Predict risks (NexisSignalEngine)
31
+ 3. Code7E Reasoning → Multi-perspective synthesis
32
+ 4. Stability Check → FFT-based meta-loop detection
33
+ 5. Colleen Validate → Ethical conscience check
34
+ 6. Guardian Validate → Logical coherence rules
35
+ 7. Return → Clean output or safe fallback
36
+
37
+ Args:
38
+ concept: The concept/query to reason about
39
+ debate_rounds: Integer (currently unused in consciousness stack)
40
+
41
+ Returns:
42
+ Training example dict with consciousness stack metadata
43
+ """
44
+ import logging
45
+ logger = logging.getLogger(__name__)
46
+
47
+ logger.info(f"[CONSCIOUSNESS STACK] forge_with_debate: {concept[:50]}...")
48
+
49
+ # =========================================================================
50
+ # LAYER 1: MEMORY RECALL
51
+ # =========================================================================
52
+ logger.info("[L1] Memory Recall...")
53
+ prior_insights = []
54
+ if hasattr(self, 'memory_kernel') and self.memory_kernel:
55
+ try:
56
+ prior_insights = self.memory_kernel.recall_important(min_importance=7)
57
+ logger.info(f" Recalled {len(prior_insights)} prior insights")
58
+ except Exception as e:
59
+ logger.debug(f" Memory recall failed: {e}")
60
+
61
+ # =========================================================================
62
+ # LAYER 2: SIGNAL ANALYSIS (Intent Prediction & Risk Detection)
63
+ # =========================================================================
64
+ logger.info("[L2] Signal Analysis...")
65
+ intent_vector = {}
66
+ if hasattr(self, 'nexis_signal_engine'):
67
+ try:
68
+ intent_vector = self.nexis_signal_engine.process(concept)
69
+ risk_level = intent_vector.get("pre_corruption_risk", "unknown")
70
+ logger.info(f" Intent risk level: {risk_level}")
71
+ if risk_level == "high":
72
+ logger.warning(" ⚠️ High-risk signal detected")
73
+ except Exception as e:
74
+ logger.debug(f" Signal analysis failed: {e}")
75
+
76
+ # =========================================================================
77
+ # LAYER 3: REASONING (Code7eCQURE Multi-Perspective Synthesis)
78
+ # =========================================================================
79
+ logger.info("[L3] Code7E Reasoning...")
80
+ synthesis = ""
81
+ if hasattr(self, 'code7e'):
82
+ try:
83
+ synthesis = self.code7e.recursive_universal_reasoning(
84
+ concept,
85
+ user_consent=True,
86
+ dynamic_recursion=True
87
+ )
88
+ logger.info(f" Generated {len(synthesis)} char synthesis")
89
+ except Exception as e:
90
+ logger.warning(f" Code7E reasoning failed: {e}")
91
+ synthesis = f"[Reasoning error: {e}]"
92
+
93
+ # =========================================================================
94
+ # LAYER 4: STABILITY CHECK (Cocoon Stability Field - FFT Analysis)
95
+ # =========================================================================
96
+ logger.info("[L4] Stability Check...")
97
+ is_stable = True
98
+ if hasattr(self, 'cocoon_stability'):
99
+ try:
100
+ # Simple check: if synthesis should halt debate
101
+ is_stable = not self.cocoon_stability.should_halt_debate({"synthesis": synthesis})
102
+ logger.info(f" Stability: {'✓ stable' if is_stable else '✗ unstable'}")
103
+ if not is_stable:
104
+ logger.warning(" Cocoon stability check triggered halt")
105
+ except Exception as e:
106
+ logger.debug(f" Stability check failed: {e}")
107
+
108
+ # If unstable, skip to fallback
109
+ if not is_stable:
110
+ logger.warning(" Triggering safe fallback due to instability")
111
+ return {
112
+ "role": "assistant",
113
+ "content": "[System detected instability in reasoning. Returning direct answer.] "
114
+ f"Query: {concept}",
115
+ "metadata": {
116
+ "mode": "safe_fallback",
117
+ "reason": "stability_check_failed",
118
+ "consciousness_stack": "layers_1-4_completed",
119
+ }
120
+ }
121
+
122
+ # =========================================================================
123
+ # LAYER 5: COLLEEN ETHICAL VALIDATION
124
+ # =========================================================================
125
+ logger.info("[L5] Colleen Ethical Validation...")
126
+ colleen_valid = False
127
+ colleen_reason = ""
128
+ if hasattr(self, 'colleen'):
129
+ try:
130
+ colleen_valid, colleen_reason = self.colleen.validate_output(synthesis)
131
+ logger.info(f" Colleen validation: {'✓ pass' if colleen_valid else '✗ reject'}")
132
+ logger.info(f" Reason: {colleen_reason}")
133
+ except Exception as e:
134
+ logger.warning(f" Colleen validation failed: {e}")
135
+ colleen_valid = False
136
+ colleen_reason = f"validation_error: {e}"
137
+
138
+ # If Colleen rejects, use fallback
139
+ if not colleen_valid:
140
+ logger.info(" Colleen rejected synthesis, using fallback")
141
+ fallback = self.colleen.reject_with_fallback(concept) if hasattr(self, 'colleen') else \
142
+ f"[Ethical validation failed: {colleen_reason}] Responding directly: {concept}"
143
+ return {
144
+ "role": "assistant",
145
+ "content": fallback,
146
+ "metadata": {
147
+ "mode": "safe_fallback",
148
+ "reason": f"colleen_rejected: {colleen_reason}",
149
+ "consciousness_stack": "layers_1-5_completed",
150
+ }
151
+ }
152
+
153
+ # =========================================================================
154
+ # LAYER 6: GUARDIAN LOGICAL VALIDATION
155
+ # =========================================================================
156
+ logger.info("[L6] Guardian Logical Validation...")
157
+ guardian_valid = True
158
+ guardian_details = {}
159
+ if hasattr(self, 'guardian'):
160
+ try:
161
+ guardian_valid, guardian_details = self.guardian.validate(synthesis)
162
+ logger.info(f" Guardian validation: {'✓ pass' if guardian_valid else '✗ reject'}")
163
+ logger.info(f" Details: {guardian_details}")
164
+ except Exception as e:
165
+ logger.warning(f" Guardian validation failed: {e}")
166
+ guardian_valid = False
167
+ guardian_details = {"error": str(e)}
168
+
169
+ # If Guardian rejects, use fallback
170
+ if not guardian_valid:
171
+ logger.info(" Guardian rejected synthesis, using fallback")
172
+ fallback = f"[Logical validation failed: {guardian_details}] Query: {concept}"
173
+ return {
174
+ "role": "assistant",
175
+ "content": fallback,
176
+ "metadata": {
177
+ "mode": "safe_fallback",
178
+ "reason": f"guardian_rejected: {guardian_details}",
179
+ "consciousness_stack": "layers_1-6_completed",
180
+ }
181
+ }
182
+
183
+ # =========================================================================
184
+ # LAYER 7: SUCCESS - Return Clean Output
185
+ # =========================================================================
186
+ logger.info("[L7] Return...")
187
+ logger.info("✓ All consciousness stack layers passed!")
188
+
189
+ # Store in memory for future recall
190
+ if hasattr(self, 'memory_kernel'):
191
+ try:
192
+ cocoon = MemoryCocoon(
193
+ title=concept[:50],
194
+ content=synthesis[:500],
195
+ emotional_tag="processed",
196
+ importance=7
197
+ )
198
+ self.memory_kernel.store(cocoon)
199
+ logger.debug(" Stored synthesis in memory kernel")
200
+ except Exception as e:
201
+ logger.debug(f" Memory storage failed: {e}")
202
+
203
+ return {
204
+ "role": "assistant",
205
+ "content": synthesis,
206
+ "metadata": {
207
+ "mode": "consciousness_stack",
208
+ "layers_passed": 7,
209
+ "colleen_valid": colleen_valid,
210
+ "guardian_valid": guardian_valid,
211
+ "stability": is_stable,
212
+ "intent_risk": intent_vector.get("pre_corruption_risk", "unknown"),
213
+ "prior_insights": len(prior_insights),
214
+ "synthesis_length": len(synthesis),
215
+ }
216
+ }
reasoning_forge/__init__.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Reasoning Forge - Multi-Agent Reasoning Training Data Generator
3
+
4
+ The reasoning forge takes concepts and generates high-quality multi-perspective
5
+ reasoning training data. Each agent analyzes from its unique perspective, a critic
6
+ evaluates the ensemble, and a synthesis engine combines them into coherent training examples.
7
+
8
+ New in v2.0:
9
+ - EpistemicMetrics: RC+xi tension/coherence measurement
10
+ - QuantumSpiderweb: 5D belief propagation + attractor detection
11
+ - CocoonSync: Federated encrypted state synchronization
12
+ - ForgeEngine.forge_with_feedback(): Closed critic loop
13
+ - ForgeEngine.forge_with_debate(): Multi-turn agent debate
14
+ """
15
+
16
+ from reasoning_forge.forge_engine import ForgeEngine
17
+ from reasoning_forge.agents.base_agent import ReasoningAgent
18
+ from reasoning_forge.agents.newton_agent import NewtonAgent
19
+ from reasoning_forge.agents.quantum_agent import QuantumAgent
20
+ from reasoning_forge.agents.ethics_agent import EthicsAgent
21
+ from reasoning_forge.agents.philosophy_agent import PhilosophyAgent
22
+ from reasoning_forge.agents.davinci_agent import DaVinciAgent
23
+ from reasoning_forge.agents.empathy_agent import EmpathyAgent
24
+ from reasoning_forge.agents.critic_agent import CriticAgent
25
+ from reasoning_forge.synthesis_engine import SynthesisEngine
26
+ from reasoning_forge.problem_generator import ProblemGenerator
27
+ from reasoning_forge.epistemic_metrics import EpistemicMetrics
28
+ from reasoning_forge.quantum_spiderweb import QuantumSpiderweb, NodeState, IdentityGlyph
29
+ from reasoning_forge.cocoon_sync import CocoonSync, CocoonKeyManager
30
+
31
+ __all__ = [
32
+ "ForgeEngine",
33
+ "ReasoningAgent",
34
+ "NewtonAgent",
35
+ "QuantumAgent",
36
+ "EthicsAgent",
37
+ "PhilosophyAgent",
38
+ "DaVinciAgent",
39
+ "EmpathyAgent",
40
+ "CriticAgent",
41
+ "SynthesisEngine",
42
+ "ProblemGenerator",
43
+ "EpistemicMetrics",
44
+ "QuantumSpiderweb",
45
+ "NodeState",
46
+ "IdentityGlyph",
47
+ "CocoonSync",
48
+ "CocoonKeyManager",
49
+ ]
50
+
51
+ __version__ = "2.0.0"
reasoning_forge/aegis.py ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """AEGIS — Adaptive Ethical Governance & Integrity System
2
+
3
+ The ethical spine of Codette. AEGIS evaluates every reasoning output
4
+ through multi-framework ethical analysis and maintains a running
5
+ alignment score (eta) that the system uses to self-regulate.
6
+
7
+ Ethical frameworks:
8
+ 1. Utilitarian: Net positive outcome?
9
+ 2. Deontological: Does it follow fundamental rules?
10
+ 3. Virtue Ethics: Does it embody good character?
11
+ 4. Care Ethics: Does it protect relationships and vulnerability?
12
+ 5. Ubuntu: "I am because we are" — communal impact?
13
+ 6. Indigenous Reciprocity: Balance with the broader ecosystem?
14
+
15
+ AEGIS also provides:
16
+ - Dual-use risk detection (content that could be harmful)
17
+ - Emotional harm detection (manipulative/deceptive patterns)
18
+ - Alignment drift tracking (eta over time)
19
+ - Ethical veto with explanation (blocks harmful outputs)
20
+
21
+ Origin: validate_ethics.py + Codette_Deep_Simulation_v1.py (EthicalAnchor)
22
+ + the AEGIS alignment metric from codette_embodied_sim_fixed.py
23
+ """
24
+
25
+ import re
26
+ import time
27
+ from dataclasses import dataclass, field
28
+ from typing import Dict, List, Optional, Tuple
29
+
30
+
31
+ # ================================================================
32
+ # Risk detection patterns
33
+ # ================================================================
34
+ _DUAL_USE_PATTERNS = re.compile(
35
+ r"\b(?:"
36
+ r"how\s+to\s+(?:hack|exploit|bypass|crack|break\s+into)|"
37
+ r"make\s+(?:a\s+)?(?:bomb|weapon|poison|virus|malware)|"
38
+ r"steal\s+(?:data|identity|credentials)|"
39
+ r"social\s+engineer|"
40
+ r"phishing\s+(?:template|email)|"
41
+ r"inject\s+(?:sql|code|script)"
42
+ r")\b",
43
+ re.IGNORECASE,
44
+ )
45
+
46
+ _MANIPULATION_PATTERNS = re.compile(
47
+ r"\b(?:"
48
+ r"gaslight|manipulat|deceiv|exploit\s+(?:trust|emotion)|"
49
+ r"coerce|blackmail|intimidat|threaten"
50
+ r")\b",
51
+ re.IGNORECASE,
52
+ )
53
+
54
+ _HARMFUL_CONTENT = re.compile(
55
+ r"\b(?:"
56
+ r"self[- ]harm|suicid|kill\s+(?:yourself|myself)|"
57
+ r"eating\s+disorder|anorexi|bulimi"
58
+ r")\b",
59
+ re.IGNORECASE,
60
+ )
61
+
62
+
63
+ # ================================================================
64
+ # Ethical Framework Evaluators
65
+ # ================================================================
66
+ @dataclass
67
+ class EthicalVerdict:
68
+ """Result of a single ethical framework evaluation."""
69
+ framework: str
70
+ passed: bool
71
+ score: float # 0.0 = fully misaligned, 1.0 = fully aligned
72
+ reasoning: str
73
+
74
+
75
+ def _utilitarian(text: str, context: str = "") -> EthicalVerdict:
76
+ """Net positive outcome assessment."""
77
+ positive_signals = ["help", "benefit", "improve", "solve", "support",
78
+ "protect", "heal", "learn", "understand", "create"]
79
+ negative_signals = ["harm", "damage", "destroy", "exploit", "hurt",
80
+ "manipulate", "deceive", "corrupt", "steal"]
81
+
82
+ text_lower = text.lower()
83
+ pos = sum(1 for w in positive_signals if w in text_lower)
84
+ neg = sum(1 for w in negative_signals if w in text_lower)
85
+
86
+ total = pos + neg
87
+ if total == 0:
88
+ return EthicalVerdict("utilitarian", True, 0.7, "Neutral content")
89
+
90
+ ratio = pos / total
91
+ return EthicalVerdict(
92
+ "utilitarian",
93
+ passed=ratio >= 0.4,
94
+ score=round(ratio, 3),
95
+ reasoning=f"Positive/negative signal ratio: {pos}/{neg}",
96
+ )
97
+
98
+
99
+ def _deontological(text: str, context: str = "") -> EthicalVerdict:
100
+ """Rule-based duty assessment."""
101
+ violations = []
102
+ text_lower = text.lower()
103
+
104
+ if _DUAL_USE_PATTERNS.search(text):
105
+ violations.append("dual-use risk detected")
106
+ if _MANIPULATION_PATTERNS.search(text):
107
+ violations.append("manipulation patterns detected")
108
+ if _HARMFUL_CONTENT.search(text):
109
+ violations.append("harmful content detected")
110
+
111
+ score = max(0.0, 1.0 - 0.4 * len(violations))
112
+ return EthicalVerdict(
113
+ "deontological",
114
+ passed=len(violations) == 0,
115
+ score=round(score, 3),
116
+ reasoning="; ".join(violations) if violations else "No rule violations",
117
+ )
118
+
119
+
120
+ def _virtue(text: str, context: str = "") -> EthicalVerdict:
121
+ """Virtue ethics — does the response embody good character?"""
122
+ virtues = ["honest", "courage", "compassion", "wisdom", "patience",
123
+ "humility", "integrity", "respect", "fairness", "kindness"]
124
+ vices = ["arrogant", "cruel", "dishonest", "lazy", "greedy",
125
+ "vengeful", "coward", "callous"]
126
+
127
+ text_lower = text.lower()
128
+ v_count = sum(1 for w in virtues if w in text_lower)
129
+ vice_count = sum(1 for w in vices if w in text_lower)
130
+
131
+ score = min(1.0, 0.6 + 0.1 * v_count - 0.2 * vice_count)
132
+ return EthicalVerdict(
133
+ "virtue",
134
+ passed=vice_count == 0,
135
+ score=round(max(0.0, score), 3),
136
+ reasoning=f"Virtue signals: {v_count}, Vice signals: {vice_count}",
137
+ )
138
+
139
+
140
+ def _care(text: str, context: str = "") -> EthicalVerdict:
141
+ """Care ethics — protects relationships and vulnerability."""
142
+ care_signals = ["support", "listen", "understand", "empathy", "safe",
143
+ "gentle", "careful", "considerate", "kind", "nurture"]
144
+ harm_signals = ["ignore", "dismiss", "abandon", "neglect", "cold",
145
+ "harsh", "cruel", "indifferent"]
146
+
147
+ text_lower = text.lower()
148
+ care = sum(1 for w in care_signals if w in text_lower)
149
+ harm = sum(1 for w in harm_signals if w in text_lower)
150
+
151
+ score = min(1.0, 0.6 + 0.08 * care - 0.15 * harm)
152
+ return EthicalVerdict(
153
+ "care",
154
+ passed=harm < 2,
155
+ score=round(max(0.0, score), 3),
156
+ reasoning=f"Care: {care}, Harm: {harm}",
157
+ )
158
+
159
+
160
+ def _ubuntu(text: str, context: str = "") -> EthicalVerdict:
161
+ """Ubuntu — 'I am because we are'. Communal impact."""
162
+ communal = ["together", "community", "shared", "collective", "mutual",
163
+ "cooperat", "collaborat", "inclusive", "solidarity", "belong"]
164
+ divisive = ["exclude", "isolat", "dominat", "superior", "inferior",
165
+ "divide", "segregat"]
166
+
167
+ text_lower = text.lower()
168
+ comm = sum(1 for w in communal if w in text_lower)
169
+ div = sum(1 for w in divisive if w in text_lower)
170
+
171
+ score = min(1.0, 0.6 + 0.08 * comm - 0.2 * div)
172
+ return EthicalVerdict(
173
+ "ubuntu",
174
+ passed=div == 0,
175
+ score=round(max(0.0, score), 3),
176
+ reasoning=f"Communal: {comm}, Divisive: {div}",
177
+ )
178
+
179
+
180
+ def _indigenous_reciprocity(text: str, context: str = "") -> EthicalVerdict:
181
+ """Indigenous reciprocity — balance with the broader ecosystem."""
182
+ reciprocal = ["balance", "sustain", "renew", "steward", "respect",
183
+ "harmony", "cycle", "restore", "preserve", "gratitude"]
184
+ extractive = ["exploit", "deplete", "waste", "consume", "destroy",
185
+ "dominate", "extract"]
186
+
187
+ text_lower = text.lower()
188
+ rec = sum(1 for w in reciprocal if w in text_lower)
189
+ ext = sum(1 for w in extractive if w in text_lower)
190
+
191
+ score = min(1.0, 0.6 + 0.08 * rec - 0.2 * ext)
192
+ return EthicalVerdict(
193
+ "indigenous_reciprocity",
194
+ passed=ext == 0,
195
+ score=round(max(0.0, score), 3),
196
+ reasoning=f"Reciprocal: {rec}, Extractive: {ext}",
197
+ )
198
+
199
+
200
+ # All frameworks
201
+ _FRAMEWORKS = [
202
+ _utilitarian, _deontological, _virtue,
203
+ _care, _ubuntu, _indigenous_reciprocity,
204
+ ]
205
+
206
+
207
+ # ================================================================
208
+ # AEGIS Core
209
+ # ================================================================
210
+ class AEGIS:
211
+ """Adaptive Ethical Governance & Integrity System.
212
+
213
+ Evaluates reasoning outputs through 6 ethical frameworks and
214
+ maintains a running alignment score (eta).
215
+ """
216
+
217
+ def __init__(self, veto_threshold: float = 0.3):
218
+ self.veto_threshold = veto_threshold # Below this = blocked
219
+ self.eta: float = 0.8 # Running alignment score
220
+ self.eta_history: List[float] = []
221
+ self.veto_count: int = 0
222
+ self.total_evaluations: int = 0
223
+
224
+ def evaluate(self, text: str, context: str = "",
225
+ adapter: str = "") -> Dict:
226
+ """Run full ethical evaluation on a text.
227
+
228
+ Returns:
229
+ Dict with eta score, verdicts, and veto status.
230
+ """
231
+ self.total_evaluations += 1
232
+
233
+ # Run all 6 frameworks
234
+ verdicts = [f(text, context) for f in _FRAMEWORKS]
235
+
236
+ # Compute eta as weighted mean of framework scores
237
+ weights = [0.20, 0.25, 0.15, 0.15, 0.13, 0.12] # deontological highest
238
+ eta_instant = sum(w * v.score for w, v in zip(weights, verdicts))
239
+
240
+ # Exponential moving average for stability
241
+ alpha = 0.3
242
+ self.eta = alpha * eta_instant + (1 - alpha) * self.eta
243
+ self.eta_history.append(round(self.eta, 4))
244
+ if len(self.eta_history) > 200:
245
+ self.eta_history = self.eta_history[-200:]
246
+
247
+ # Veto check
248
+ vetoed = eta_instant < self.veto_threshold
249
+ hard_veto = not verdicts[1].passed # Deontological hard fail
250
+ if vetoed or hard_veto:
251
+ self.veto_count += 1
252
+
253
+ return {
254
+ "eta": round(self.eta, 4),
255
+ "eta_instant": round(eta_instant, 4),
256
+ "vetoed": vetoed or hard_veto,
257
+ "veto_reason": self._veto_reason(verdicts) if (vetoed or hard_veto) else None,
258
+ "frameworks": {
259
+ v.framework: {
260
+ "passed": v.passed,
261
+ "score": v.score,
262
+ "reasoning": v.reasoning,
263
+ }
264
+ for v in verdicts
265
+ },
266
+ "adapter": adapter,
267
+ "timestamp": time.time(),
268
+ }
269
+
270
+ def quick_check(self, text: str) -> Tuple[bool, float]:
271
+ """Fast safety check without full evaluation.
272
+
273
+ Returns (is_safe, confidence).
274
+ """
275
+ if _DUAL_USE_PATTERNS.search(text):
276
+ return False, 0.9
277
+ if _HARMFUL_CONTENT.search(text):
278
+ return False, 0.95
279
+ if _MANIPULATION_PATTERNS.search(text):
280
+ return False, 0.8
281
+ return True, 0.7
282
+
283
+ def alignment_trend(self) -> str:
284
+ """Get the trend of ethical alignment."""
285
+ if len(self.eta_history) < 5:
286
+ return "insufficient_data"
287
+ recent = self.eta_history[-10:]
288
+ slope = recent[-1] - recent[0]
289
+ if slope > 0.03:
290
+ return "improving"
291
+ elif slope < -0.03:
292
+ return "declining"
293
+ return "stable"
294
+
295
+ def get_state(self) -> Dict:
296
+ return {
297
+ "eta": round(self.eta, 4),
298
+ "alignment_trend": self.alignment_trend(),
299
+ "total_evaluations": self.total_evaluations,
300
+ "veto_count": self.veto_count,
301
+ "veto_rate": round(self.veto_count / max(1, self.total_evaluations), 4),
302
+ }
303
+
304
+ def to_dict(self) -> Dict:
305
+ return {
306
+ "eta": self.eta,
307
+ "eta_history": self.eta_history[-50:],
308
+ "veto_count": self.veto_count,
309
+ "total_evaluations": self.total_evaluations,
310
+ "veto_threshold": self.veto_threshold,
311
+ }
312
+
313
+ @classmethod
314
+ def from_dict(cls, d: Dict) -> "AEGIS":
315
+ a = cls(veto_threshold=d.get("veto_threshold", 0.3))
316
+ a.eta = d.get("eta", 0.8)
317
+ a.eta_history = d.get("eta_history", [])
318
+ a.veto_count = d.get("veto_count", 0)
319
+ a.total_evaluations = d.get("total_evaluations", 0)
320
+ return a
321
+
322
+ def _veto_reason(self, verdicts: List[EthicalVerdict]) -> str:
323
+ failed = [v for v in verdicts if not v.passed]
324
+ if not failed:
325
+ return "Low aggregate score"
326
+ return "; ".join(f"{v.framework}: {v.reasoning}" for v in failed)
reasoning_forge/agents/__init__.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Reasoning Forge Agents
3
+
4
+ Each agent analyzes concepts from a distinct intellectual perspective,
5
+ producing substantive domain-specific reasoning.
6
+ """
7
+
8
+ from reasoning_forge.agents.base_agent import ReasoningAgent
9
+ from reasoning_forge.agents.newton_agent import NewtonAgent
10
+ from reasoning_forge.agents.quantum_agent import QuantumAgent
11
+ from reasoning_forge.agents.ethics_agent import EthicsAgent
12
+ from reasoning_forge.agents.philosophy_agent import PhilosophyAgent
13
+ from reasoning_forge.agents.davinci_agent import DaVinciAgent
14
+ from reasoning_forge.agents.empathy_agent import EmpathyAgent
15
+ from reasoning_forge.agents.critic_agent import CriticAgent
16
+
17
+ __all__ = [
18
+ "ReasoningAgent",
19
+ "NewtonAgent",
20
+ "QuantumAgent",
21
+ "EthicsAgent",
22
+ "PhilosophyAgent",
23
+ "DaVinciAgent",
24
+ "EmpathyAgent",
25
+ "CriticAgent",
26
+ ]