""" Machine Learning Example Demonstrates scikit-learn capabilities """ import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor from sklearn.linear_model import LogisticRegression, LinearRegression from sklearn.svm import SVC, SVR from sklearn.metrics import accuracy_score, classification_report, confusion_matrix from sklearn.metrics import mean_squared_error, r2_score from sklearn.preprocessing import StandardScaler from sklearn.datasets import make_classification, make_regression import matplotlib.pyplot as plt print("=" * 60) print("MACHINE LEARNING EXAMPLE") print("=" * 60) # Classification Example print("\n🎯 CLASSIFICATION TASK") print("-" * 40) # Generate classification data X_class, y_class = make_classification( n_samples=1000, n_features=10, n_informative=5, n_redundant=2, n_classes=3, random_state=42 ) # Split data X_train_cls, X_test_cls, y_train_cls, y_test_cls = train_test_split( X_class, y_class, test_size=0.2, random_state=42, stratify=y_class ) # Train models models_cls = { 'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42), 'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000), 'SVM': SVC(random_state=42) } results_cls = {} for name, model in models_cls.items(): model.fit(X_train_cls, y_train_cls) y_pred = model.predict(X_test_cls) accuracy = accuracy_score(y_test_cls, y_pred) results_cls[name] = accuracy print(f"{name}: Accuracy = {accuracy:.4f}") # Best model best_cls = max(results_cls, key=results_cls.get) print(f"\n🏆 Best Classification Model: {best_cls} ({results_cls[best_cls]:.4f})") # Classification report for best model best_model_cls = models_cls[best_cls] y_pred_best = best_model_cls.predict(X_test_cls) print("\n📊 Classification Report:") print(classification_report(y_test_cls, y_pred_best)) # Confusion matrix cm = confusion_matrix(y_test_cls, y_pred_best) print("\n🔢 Confusion Matrix:") print(cm) # Regression Example print("\n\n📈 REGRESSION TASK") print("-" * 40) # Generate regression data X_reg, y_reg = make_regression( n_samples=1000, n_features=10, noise=0.1, random_state=42 ) # Split data X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split( X_reg, y_reg, test_size=0.2, random_state=42 ) # Train models models_reg = { 'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42), 'Linear Regression': LinearRegression(), 'SVR': SVR() } results_reg = {} predictions_reg = {} for name, model in models_reg.items(): if name == 'SVR': # Scale features for SVR scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train_reg) X_test_scaled = scaler.transform(X_test_reg) model.fit(X_train_scaled, y_train_reg) y_pred = model.predict(X_test_scaled) else: model.fit(X_train_reg, y_train_reg) y_pred = model.predict(X_test_reg) mse = mean_squared_error(y_test_reg, y_pred) r2 = r2_score(y_test_reg, y_pred) results_reg[name] = {'MSE': mse, 'R2': r2} predictions_reg[name] = y_pred print(f"{name}:") print(f" - MSE: {mse:.4f}") print(f" - R2 Score: {r2:.4f}") # Best model (by R2 score) best_reg = max(results_reg, key=lambda k: results_reg[k]['R2']) print(f"\n🏆 Best Regression Model: {best_reg} (R2 = {results_reg[best_reg]['R2']:.4f})") # Feature importance (Random Forest) print("\n🎯 Feature Importance (Random Forest):") rf_class = models_cls['Random Forest'] feature_names = [f'Feature_{i}' for i in range(X_class.shape[1])] importance = rf_class.feature_importances_ for name, imp in zip(feature_names, importance): print(f" {name}: {imp:.4f}") # Visualizations fig, axes = plt.subplots(2, 2, figsize=(15, 12)) # Plot 1: Model comparison (Classification) models_names = list(results_cls.keys()) accuracies = list(results_cls.values()) axes[0, 0].bar(models_names, accuracies, color=['skyblue', 'lightgreen', 'salmon']) axes[0, 0].set_title('Classification Models Comparison') axes[0, 0].set_ylabel('Accuracy') axes[0, 0].tick_params(axis='x', rotation=45) # Plot 2: Confusion Matrix import seaborn as sns sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[0, 1]) axes[0, 1].set_title('Confusion Matrix') axes[0, 1].set_xlabel('Predicted') axes[0, 1].set_ylabel('Actual') # Plot 3: Model comparison (Regression) reg_models = list(results_reg.keys()) r2_scores = [results_reg[m]['R2'] for m in reg_models] axes[1, 0].bar(reg_models, r2_scores, color=['orange', 'lightcoral', 'gold']) axes[1, 0].set_title('Regression Models Comparison (R²)') axes[1, 0].set_ylabel('R² Score') axes[1, 0].tick_params(axis='x', rotation=45) # Plot 4: Predictions vs Actual (best model) y_pred_best_reg = predictions_reg[best_reg] axes[1, 1].scatter(y_test_reg, y_pred_best_reg, alpha=0.5) axes[1, 1].plot([y_test_reg.min(), y_test_reg.max()], [y_test_reg.min(), y_test_reg.max()], 'r--', lw=2) axes[1, 1].set_xlabel('Actual') axes[1, 1].set_ylabel('Predicted') axes[1, 1].set_title(f'{best_reg} - Predictions vs Actual') plt.tight_layout() plt.show() # Summary print("\n" + "=" * 60) print("SUMMARY") print("=" * 60) print(f"✅ Classification completed: {best_cls} achieved {results_cls[best_cls]:.2%} accuracy") print(f"✅ Regression completed: {best_reg} achieved R² = {results_reg[best_reg]['R2']:.4f}") print(f"✅ Feature importance analysis complete") print(f"✅ Visualizations generated")