import numpy as np import matplotlib.pyplot as plt import os from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import roc_auc_score # --- CONFIGURATION --- OUTPUT_DIR = "ensemble_results" if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) def main(): print("🚀 Starting Hybrid Ensemble Verification...") # 1. LOAD QSVC RESULTS (The Quantum Signal) q_result_path = "qsvc_results/qsvc_results.npz" if not os.path.exists(q_result_path): print("❌ Error: QSVC results not found. Run 'run_qsvc_m1_final.py' first.") return q_data = np.load(q_result_path) q_preds = q_data['preds'] # The 300 test predictions y_test_q = q_data['targets'] # The 300 targets used print(f" ✅ Loaded QSVC Predictions (AUC: {roc_auc_score(y_test_q, q_preds):.4f})") # 2. LOAD ORIGINAL DATA (To Train Classical Baseline) # We need to train the RF on the original data possible_paths = ['vG.0.1/qgan_data_optimized.npz', 'qgan_data_optimized.npz'] data_path = next((p for p in possible_paths if os.path.exists(p)), None) if not data_path: print("❌ Error: qgan_data_optimized.npz not found.") return data = np.load(data_path) X_train_full = data['X_train'] y_train_full = data['y_train'] X_test_full = data['X_test'] y_test_full = data['y_test'] # 3. TRAIN CLASSICAL MODEL (Random Forest) # We train on the FULL dataset to give Classical the best possible chance. print(" 🌲 Training Random Forest (Classical Baseline)...") rf = RandomForestClassifier(n_estimators=100, random_state=42) rf.fit(X_train_full, y_train_full) # Get predictions on the SAME subset used by QSVC (First 300) # Note: run_qsvc_m1_final.py used X_test[:TEST_SIZE] (first 300) X_test_sub = X_test_full[:len(q_preds)] y_test_sub = y_test_full[:len(q_preds)] # Sanity Check: Ensure targets match if not np.allclose(y_test_q, y_test_sub): print("⚠️ Warning: Target mismatch! The test sets might not align.") print(" Using QSVC targets as truth.") c_preds = rf.predict_proba(X_test_sub)[:, 1] c_auc = roc_auc_score(y_test_sub, c_preds) print(f" ✅ Random Forest Predictions (AUC: {c_auc:.4f})") # 4. CORRELATION CHECK # If correlation is 1.0, they are identical (Ensemble useless). # If correlation is low, they are "Orthogonal" (Ensemble powerful). correlation = np.corrcoef(c_preds, q_preds)[0, 1] print(f" 📉 Prediction Correlation: {correlation:.4f}") if correlation < 0.8: print(" ✨ GREAT! Models are thinking differently.") else: print(" ⚠️ Models are very similar.") # 5. THE ENSEMBLE SWEEP # We try different weights to find the best mix print("\n 🤝 Calculating Ensemble Scores...") best_auc = 0 best_w = 0 results = [] # Sweep from 0.0 (Pure Quantum) to 1.0 (Pure Classical) for w in np.arange(0.0, 1.1, 0.1): # Weighted Average ensemble_preds = (w * c_preds) + ((1 - w) * q_preds) score = roc_auc_score(y_test_sub, ensemble_preds) results.append(score) print(f" Weight {w:.1f} Classical / {1-w:.1f} Quantum -> AUC: {score:.5f}") if score > best_auc: best_auc = score best_w = w # 6. REPORT print("\n" + "="*40) print("🏆 HYBRID ENSEMBLE RESULTS") print("="*40) print(f"🌲 Classical Best: {c_auc:.4f}") print(f"⚛️ Quantum Best: {roc_auc_score(y_test_q, q_preds):.4f}") print("-" * 30) print(f"🤝 Hybrid Best: {best_auc:.4f} (w={best_w:.1f})") print("="*40) if best_auc > c_auc: print("🚀 SUCCESS: Quantum Intelligence improved the result!") print(f" Gain: +{best_auc - c_auc:.4f}") else: print("📉 Result: No gain. Classical model dominated.") # Plot plt.figure(figsize=(8, 6)) plt.plot(np.arange(0.0, 1.1, 0.1), results, marker='o') plt.axhline(y=c_auc, color='r', linestyle='--', label='Classical Baseline') plt.title("Ensemble Performance: Mixing Classical & Quantum") plt.xlabel("Weight (Classical)") plt.ylabel("AUC Score") plt.legend() plt.grid(True) plt.savefig(f"{OUTPUT_DIR}/ensemble_sweep.png") print(f" 📸 Saved sweep plot to {OUTPUT_DIR}/ensemble_sweep.png") if __name__ == "__main__": main()