|
|
""" |
|
|
Machine Learning Example |
|
|
Demonstrates scikit-learn capabilities |
|
|
""" |
|
|
|
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor |
|
|
from sklearn.linear_model import LogisticRegression, LinearRegression |
|
|
from sklearn.svm import SVC, SVR |
|
|
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix |
|
|
from sklearn.metrics import mean_squared_error, r2_score |
|
|
from sklearn.preprocessing import StandardScaler |
|
|
from sklearn.datasets import make_classification, make_regression |
|
|
import matplotlib.pyplot as plt |
|
|
|
|
|
print("=" * 60) |
|
|
print("MACHINE LEARNING EXAMPLE") |
|
|
print("=" * 60) |
|
|
|
|
|
|
|
|
print("\nπ― CLASSIFICATION TASK") |
|
|
print("-" * 40) |
|
|
|
|
|
|
|
|
X_class, y_class = make_classification( |
|
|
n_samples=1000, |
|
|
n_features=10, |
|
|
n_informative=5, |
|
|
n_redundant=2, |
|
|
n_classes=3, |
|
|
random_state=42 |
|
|
) |
|
|
|
|
|
|
|
|
X_train_cls, X_test_cls, y_train_cls, y_test_cls = train_test_split( |
|
|
X_class, y_class, test_size=0.2, random_state=42, stratify=y_class |
|
|
) |
|
|
|
|
|
|
|
|
models_cls = { |
|
|
'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42), |
|
|
'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000), |
|
|
'SVM': SVC(random_state=42) |
|
|
} |
|
|
|
|
|
results_cls = {} |
|
|
for name, model in models_cls.items(): |
|
|
model.fit(X_train_cls, y_train_cls) |
|
|
y_pred = model.predict(X_test_cls) |
|
|
accuracy = accuracy_score(y_test_cls, y_pred) |
|
|
results_cls[name] = accuracy |
|
|
print(f"{name}: Accuracy = {accuracy:.4f}") |
|
|
|
|
|
|
|
|
best_cls = max(results_cls, key=results_cls.get) |
|
|
print(f"\nπ Best Classification Model: {best_cls} ({results_cls[best_cls]:.4f})") |
|
|
|
|
|
|
|
|
best_model_cls = models_cls[best_cls] |
|
|
y_pred_best = best_model_cls.predict(X_test_cls) |
|
|
print("\nπ Classification Report:") |
|
|
print(classification_report(y_test_cls, y_pred_best)) |
|
|
|
|
|
|
|
|
cm = confusion_matrix(y_test_cls, y_pred_best) |
|
|
print("\nπ’ Confusion Matrix:") |
|
|
print(cm) |
|
|
|
|
|
|
|
|
print("\n\nπ REGRESSION TASK") |
|
|
print("-" * 40) |
|
|
|
|
|
|
|
|
X_reg, y_reg = make_regression( |
|
|
n_samples=1000, |
|
|
n_features=10, |
|
|
noise=0.1, |
|
|
random_state=42 |
|
|
) |
|
|
|
|
|
|
|
|
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split( |
|
|
X_reg, y_reg, test_size=0.2, random_state=42 |
|
|
) |
|
|
|
|
|
|
|
|
models_reg = { |
|
|
'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42), |
|
|
'Linear Regression': LinearRegression(), |
|
|
'SVR': SVR() |
|
|
} |
|
|
|
|
|
results_reg = {} |
|
|
predictions_reg = {} |
|
|
for name, model in models_reg.items(): |
|
|
if name == 'SVR': |
|
|
|
|
|
scaler = StandardScaler() |
|
|
X_train_scaled = scaler.fit_transform(X_train_reg) |
|
|
X_test_scaled = scaler.transform(X_test_reg) |
|
|
model.fit(X_train_scaled, y_train_reg) |
|
|
y_pred = model.predict(X_test_scaled) |
|
|
else: |
|
|
model.fit(X_train_reg, y_train_reg) |
|
|
y_pred = model.predict(X_test_reg) |
|
|
|
|
|
mse = mean_squared_error(y_test_reg, y_pred) |
|
|
r2 = r2_score(y_test_reg, y_pred) |
|
|
results_reg[name] = {'MSE': mse, 'R2': r2} |
|
|
predictions_reg[name] = y_pred |
|
|
print(f"{name}:") |
|
|
print(f" - MSE: {mse:.4f}") |
|
|
print(f" - R2 Score: {r2:.4f}") |
|
|
|
|
|
|
|
|
best_reg = max(results_reg, key=lambda k: results_reg[k]['R2']) |
|
|
print(f"\nπ Best Regression Model: {best_reg} (R2 = {results_reg[best_reg]['R2']:.4f})") |
|
|
|
|
|
|
|
|
print("\nπ― Feature Importance (Random Forest):") |
|
|
rf_class = models_cls['Random Forest'] |
|
|
feature_names = [f'Feature_{i}' for i in range(X_class.shape[1])] |
|
|
importance = rf_class.feature_importances_ |
|
|
for name, imp in zip(feature_names, importance): |
|
|
print(f" {name}: {imp:.4f}") |
|
|
|
|
|
|
|
|
fig, axes = plt.subplots(2, 2, figsize=(15, 12)) |
|
|
|
|
|
|
|
|
models_names = list(results_cls.keys()) |
|
|
accuracies = list(results_cls.values()) |
|
|
axes[0, 0].bar(models_names, accuracies, color=['skyblue', 'lightgreen', 'salmon']) |
|
|
axes[0, 0].set_title('Classification Models Comparison') |
|
|
axes[0, 0].set_ylabel('Accuracy') |
|
|
axes[0, 0].tick_params(axis='x', rotation=45) |
|
|
|
|
|
|
|
|
import seaborn as sns |
|
|
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[0, 1]) |
|
|
axes[0, 1].set_title('Confusion Matrix') |
|
|
axes[0, 1].set_xlabel('Predicted') |
|
|
axes[0, 1].set_ylabel('Actual') |
|
|
|
|
|
|
|
|
reg_models = list(results_reg.keys()) |
|
|
r2_scores = [results_reg[m]['R2'] for m in reg_models] |
|
|
axes[1, 0].bar(reg_models, r2_scores, color=['orange', 'lightcoral', 'gold']) |
|
|
axes[1, 0].set_title('Regression Models Comparison (RΒ²)') |
|
|
axes[1, 0].set_ylabel('RΒ² Score') |
|
|
axes[1, 0].tick_params(axis='x', rotation=45) |
|
|
|
|
|
|
|
|
y_pred_best_reg = predictions_reg[best_reg] |
|
|
axes[1, 1].scatter(y_test_reg, y_pred_best_reg, alpha=0.5) |
|
|
axes[1, 1].plot([y_test_reg.min(), y_test_reg.max()], |
|
|
[y_test_reg.min(), y_test_reg.max()], 'r--', lw=2) |
|
|
axes[1, 1].set_xlabel('Actual') |
|
|
axes[1, 1].set_ylabel('Predicted') |
|
|
axes[1, 1].set_title(f'{best_reg} - Predictions vs Actual') |
|
|
|
|
|
plt.tight_layout() |
|
|
plt.show() |
|
|
|
|
|
|
|
|
print("\n" + "=" * 60) |
|
|
print("SUMMARY") |
|
|
print("=" * 60) |
|
|
print(f"β
Classification completed: {best_cls} achieved {results_cls[best_cls]:.2%} accuracy") |
|
|
print(f"β
Regression completed: {best_reg} achieved RΒ² = {results_reg[best_reg]['R2']:.4f}") |
|
|
print(f"β
Feature importance analysis complete") |
|
|
print(f"β
Visualizations generated") |
|
|
|