QGAN_Project / vGe0.1 /verify_ensemble.py
1bnjmn3's picture
Add files using upload-large-folder tool
0f755ec verified
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
# --- CONFIGURATION ---
OUTPUT_DIR = "ensemble_results"
if not os.path.exists(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)
def main():
print("πŸš€ Starting Hybrid Ensemble Verification...")
# 1. LOAD QSVC RESULTS (The Quantum Signal)
q_result_path = "qsvc_results/qsvc_results.npz"
if not os.path.exists(q_result_path):
print("❌ Error: QSVC results not found. Run 'run_qsvc_m1_final.py' first.")
return
q_data = np.load(q_result_path)
q_preds = q_data['preds'] # The 300 test predictions
y_test_q = q_data['targets'] # The 300 targets used
print(f" βœ… Loaded QSVC Predictions (AUC: {roc_auc_score(y_test_q, q_preds):.4f})")
# 2. LOAD ORIGINAL DATA (To Train Classical Baseline)
# We need to train the RF on the original data
possible_paths = ['vG.0.1/qgan_data_optimized.npz', 'qgan_data_optimized.npz']
data_path = next((p for p in possible_paths if os.path.exists(p)), None)
if not data_path:
print("❌ Error: qgan_data_optimized.npz not found.")
return
data = np.load(data_path)
X_train_full = data['X_train']
y_train_full = data['y_train']
X_test_full = data['X_test']
y_test_full = data['y_test']
# 3. TRAIN CLASSICAL MODEL (Random Forest)
# We train on the FULL dataset to give Classical the best possible chance.
print(" 🌲 Training Random Forest (Classical Baseline)...")
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_full, y_train_full)
# Get predictions on the SAME subset used by QSVC (First 300)
# Note: run_qsvc_m1_final.py used X_test[:TEST_SIZE] (first 300)
X_test_sub = X_test_full[:len(q_preds)]
y_test_sub = y_test_full[:len(q_preds)]
# Sanity Check: Ensure targets match
if not np.allclose(y_test_q, y_test_sub):
print("⚠️ Warning: Target mismatch! The test sets might not align.")
print(" Using QSVC targets as truth.")
c_preds = rf.predict_proba(X_test_sub)[:, 1]
c_auc = roc_auc_score(y_test_sub, c_preds)
print(f" βœ… Random Forest Predictions (AUC: {c_auc:.4f})")
# 4. CORRELATION CHECK
# If correlation is 1.0, they are identical (Ensemble useless).
# If correlation is low, they are "Orthogonal" (Ensemble powerful).
correlation = np.corrcoef(c_preds, q_preds)[0, 1]
print(f" πŸ“‰ Prediction Correlation: {correlation:.4f}")
if correlation < 0.8:
print(" ✨ GREAT! Models are thinking differently.")
else:
print(" ⚠️ Models are very similar.")
# 5. THE ENSEMBLE SWEEP
# We try different weights to find the best mix
print("\n 🀝 Calculating Ensemble Scores...")
best_auc = 0
best_w = 0
results = []
# Sweep from 0.0 (Pure Quantum) to 1.0 (Pure Classical)
for w in np.arange(0.0, 1.1, 0.1):
# Weighted Average
ensemble_preds = (w * c_preds) + ((1 - w) * q_preds)
score = roc_auc_score(y_test_sub, ensemble_preds)
results.append(score)
print(f" Weight {w:.1f} Classical / {1-w:.1f} Quantum -> AUC: {score:.5f}")
if score > best_auc:
best_auc = score
best_w = w
# 6. REPORT
print("\n" + "="*40)
print("πŸ† HYBRID ENSEMBLE RESULTS")
print("="*40)
print(f"🌲 Classical Best: {c_auc:.4f}")
print(f"βš›οΈ Quantum Best: {roc_auc_score(y_test_q, q_preds):.4f}")
print("-" * 30)
print(f"🀝 Hybrid Best: {best_auc:.4f} (w={best_w:.1f})")
print("="*40)
if best_auc > c_auc:
print("πŸš€ SUCCESS: Quantum Intelligence improved the result!")
print(f" Gain: +{best_auc - c_auc:.4f}")
else:
print("πŸ“‰ Result: No gain. Classical model dominated.")
# Plot
plt.figure(figsize=(8, 6))
plt.plot(np.arange(0.0, 1.1, 0.1), results, marker='o')
plt.axhline(y=c_auc, color='r', linestyle='--', label='Classical Baseline')
plt.title("Ensemble Performance: Mixing Classical & Quantum")
plt.xlabel("Weight (Classical)")
plt.ylabel("AUC Score")
plt.legend()
plt.grid(True)
plt.savefig(f"{OUTPUT_DIR}/ensemble_sweep.png")
print(f" πŸ“Έ Saved sweep plot to {OUTPUT_DIR}/ensemble_sweep.png")
if __name__ == "__main__":
main()