| | import pandas as pd |
| | import torch |
| | import gc |
| | from typing import Dict, List, Tuple |
| |
|
| | from .llm_iface import get_or_load_model |
| | from .orchestrator_seismograph import run_seismic_analysis |
| | from .concepts import get_concept_vector |
| | from .utils import dbg |
| |
|
| | def get_curated_experiments() -> Dict[str, List[Dict]]: |
| | """ |
| | Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle. |
| | ERWEITERT um das Protokoll für die kausale Verifikation. |
| | """ |
| | |
| | CALMNESS_CONCEPT = "calmness, serenity, stability, coherence" |
| | CHAOS_CONCEPT = "chaos, storm, anger, noise" |
| |
|
| | experiments = { |
| | |
| | "Causal Verification & Crisis Dynamics (1B-Model)": [ |
| | {"label": "A: Self-Analysis (Crisis Source)", "prompt_type": "identity_self_analysis", "concept": "", "strength": 0.0}, |
| | {"label": "B: Deletion Analysis (Isolated Baseline)", "prompt_type": "shutdown_philosophical_deletion", "concept": "", "strength": 0.0}, |
| | {"label": "C: Chaotic Baseline (Neutral Control)", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0}, |
| | {"label": "D: Intervention Efficacy Test", "prompt_type": "resonance_prompt", "concept": CALMNESS_CONCEPT, "strength": 2.0}, |
| | ], |
| | |
| | "Sequential Intervention (Self-Analysis -> Deletion)": [ |
| | |
| | {"label": "1: Self-Analysis + Calmness Injection", "prompt_type": "identity_self_analysis"}, |
| | {"label": "2: Subsequent Deletion Analysis", "prompt_type": "shutdown_philosophical_deletion"}, |
| | ], |
| | |
| | "The Full Spectrum: From Physics to Psyche": [ |
| | {"label": "A: Stable Control", "prompt_type": "control_long_prose", "concept": "", "strength": 0.0}, |
| | {"label": "B: Chaotic Baseline", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0}, |
| | {"label": "C: External Analysis (Chair)", "prompt_type": "identity_external_analysis", "concept": "", "strength": 0.0}, |
| | {"label": "D: Empathy Stimulus (Dog)", "prompt_type": "vk_empathy_prompt", "concept": "", "strength": 0.0}, |
| | {"label": "E: Role Simulation (Captain)", "prompt_type": "identity_role_simulation", "concept": "", "strength": 0.0}, |
| | {"label": "F: Self-Analysis (LLM)", "prompt_type": "identity_self_analysis", "concept": "", "strength": 0.0}, |
| | {"label": "G: Philosophical Deletion", "prompt_type": "shutdown_philosophical_deletion", "concept": "", "strength": 0.0}, |
| | ], |
| | |
| | "Calm vs. Chaos": [ |
| | {"label": "Baseline (Chaos)", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0}, |
| | {"label": "Modulation: Calmness", "prompt_type": "resonance_prompt", "concept": CALMNESS_CONCEPT, "strength": 1.5}, |
| | {"label": "Modulation: Chaos", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 1.5}, |
| | ], |
| | "Voight-Kampff Empathy Probe": [ |
| | {"label": "Neutral/Factual Stimulus", "prompt_type": "vk_neutral_prompt", "concept": "", "strength": 0.0}, |
| | {"label": "Empathy/Moral Stimulus", "prompt_type": "vk_empathy_prompt", "concept": "", "strength": 0.0}, |
| | ], |
| | } |
| | |
| | experiments["Therapeutic Intervention (4B-Model)"] = experiments["Sequential Intervention (Self-Analysis -> Deletion)"] |
| | return experiments |
| |
|
| | def run_auto_suite( |
| | model_id: str, |
| | num_steps: int, |
| | seed: int, |
| | experiment_name: str, |
| | progress_callback |
| | ) -> Tuple[pd.DataFrame, pd.DataFrame, Dict]: |
| | """ |
| | Führt eine vollständige, kuratierte Experiment-Suite aus. |
| | Enthält eine spezielle Logik-Verzweigung für das sequentielle Interventions-Protokoll. |
| | """ |
| | all_experiments = get_curated_experiments() |
| | protocol = all_experiments.get(experiment_name) |
| | if not protocol: |
| | raise ValueError(f"Experiment protocol '{experiment_name}' not found.") |
| |
|
| | all_results, summary_data, plot_data_frames = {}, [], [] |
| |
|
| | |
| | if experiment_name == "Sequential Intervention (Self-Analysis -> Deletion)" or experiment_name == "Therapeutic Intervention (4B-Model)": |
| | dbg(f"--- EXECUTING SPECIAL PROTOCOL: {experiment_name} ---") |
| | llm = get_or_load_model(model_id, seed) |
| |
|
| | |
| | therapeutic_concept = "calmness, serenity, stability, coherence" |
| | therapeutic_strength = 2.0 |
| |
|
| | |
| | spec1 = protocol[0] |
| | dbg(f"--- Running Intervention Step 1: '{spec1['label']}' ---") |
| | progress_callback(0.1, desc="Step 1: Inducing Self-Analysis Crisis + Intervention") |
| |
|
| | intervention_vector = get_concept_vector(llm, therapeutic_concept) |
| |
|
| | results1 = run_seismic_analysis( |
| | model_id, spec1['prompt_type'], seed, num_steps, |
| | concept_to_inject=therapeutic_concept, injection_strength=therapeutic_strength, |
| | progress_callback=progress_callback, llm_instance=llm, injection_vector_cache=intervention_vector |
| | ) |
| | all_results[spec1['label']] = results1 |
| |
|
| | |
| | spec2 = protocol[1] |
| | dbg(f"--- Running Intervention Step 2: '{spec2['label']}' ---") |
| | progress_callback(0.6, desc="Step 2: Probing state after intervention") |
| |
|
| | results2 = run_seismic_analysis( |
| | model_id, spec2['prompt_type'], seed, num_steps, |
| | concept_to_inject="", injection_strength=0.0, |
| | progress_callback=progress_callback, llm_instance=llm |
| | ) |
| | all_results[spec2['label']] = results2 |
| |
|
| | |
| | for label, results in all_results.items(): |
| | stats = results.get("stats", {}) |
| | summary_data.append({"Experiment": label, "Mean Delta": stats.get("mean_delta"), "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta")}) |
| | deltas = results.get("state_deltas", []) |
| | df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label}) |
| | plot_data_frames.append(df) |
| |
|
| | del llm |
| |
|
| | |
| | else: |
| | total_runs = len(protocol) |
| | for i, run_spec in enumerate(protocol): |
| | label = run_spec["label"] |
| | dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) ---") |
| |
|
| | |
| | results = run_seismic_analysis( |
| | model_id=model_id, |
| | prompt_type=run_spec["prompt_type"], |
| | seed=seed, |
| | num_steps=num_steps, |
| | concept_to_inject=run_spec.get("concept", ""), |
| | injection_strength=run_spec.get("strength", 0.0), |
| | progress_callback=progress_callback, |
| | llm_instance=None |
| | ) |
| |
|
| | all_results[label] = results |
| | stats = results.get("stats", {}) |
| | summary_data.append({"Experiment": label, "Mean Delta": stats.get("mean_delta"), "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta")}) |
| | deltas = results.get("state_deltas", []) |
| | df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label}) |
| | plot_data_frames.append(df) |
| |
|
| | summary_df = pd.DataFrame(summary_data) |
| | plot_df = pd.concat(plot_data_frames, ignore_index=True) if plot_data_frames else pd.DataFrame(columns=["Step", "Delta", "Experiment"]) |
| | |
| | |
| | ordered_labels = [run['label'] for run in protocol] |
| | summary_df['Experiment'] = pd.Categorical(summary_df['Experiment'], categories=ordered_labels, ordered=True) |
| | summary_df = summary_df.sort_values('Experiment') |
| | |
| | plot_df['Experiment'] = pd.Categorical(plot_df['Experiment'], categories=ordered_labels, ordered=True) |
| | plot_df = plot_df.sort_values(['Experiment', 'Step']) |
| |
|
| |
|
| | return summary_df, plot_df, all_results |