likhonsheikh's picture
Initial commit: Advanced Code Interpreter Sandbox
523f6c3 verified
"""
Machine Learning Example
Demonstrates scikit-learn capabilities
"""
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.svm import SVC, SVR
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification, make_regression
import matplotlib.pyplot as plt
print("=" * 60)
print("MACHINE LEARNING EXAMPLE")
print("=" * 60)
# Classification Example
print("\n🎯 CLASSIFICATION TASK")
print("-" * 40)
# Generate classification data
X_class, y_class = make_classification(
n_samples=1000,
n_features=10,
n_informative=5,
n_redundant=2,
n_classes=3,
random_state=42
)
# Split data
X_train_cls, X_test_cls, y_train_cls, y_test_cls = train_test_split(
X_class, y_class, test_size=0.2, random_state=42, stratify=y_class
)
# Train models
models_cls = {
'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000),
'SVM': SVC(random_state=42)
}
results_cls = {}
for name, model in models_cls.items():
model.fit(X_train_cls, y_train_cls)
y_pred = model.predict(X_test_cls)
accuracy = accuracy_score(y_test_cls, y_pred)
results_cls[name] = accuracy
print(f"{name}: Accuracy = {accuracy:.4f}")
# Best model
best_cls = max(results_cls, key=results_cls.get)
print(f"\nπŸ† Best Classification Model: {best_cls} ({results_cls[best_cls]:.4f})")
# Classification report for best model
best_model_cls = models_cls[best_cls]
y_pred_best = best_model_cls.predict(X_test_cls)
print("\nπŸ“Š Classification Report:")
print(classification_report(y_test_cls, y_pred_best))
# Confusion matrix
cm = confusion_matrix(y_test_cls, y_pred_best)
print("\nπŸ”’ Confusion Matrix:")
print(cm)
# Regression Example
print("\n\nπŸ“ˆ REGRESSION TASK")
print("-" * 40)
# Generate regression data
X_reg, y_reg = make_regression(
n_samples=1000,
n_features=10,
noise=0.1,
random_state=42
)
# Split data
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(
X_reg, y_reg, test_size=0.2, random_state=42
)
# Train models
models_reg = {
'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
'Linear Regression': LinearRegression(),
'SVR': SVR()
}
results_reg = {}
predictions_reg = {}
for name, model in models_reg.items():
if name == 'SVR':
# Scale features for SVR
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_reg)
X_test_scaled = scaler.transform(X_test_reg)
model.fit(X_train_scaled, y_train_reg)
y_pred = model.predict(X_test_scaled)
else:
model.fit(X_train_reg, y_train_reg)
y_pred = model.predict(X_test_reg)
mse = mean_squared_error(y_test_reg, y_pred)
r2 = r2_score(y_test_reg, y_pred)
results_reg[name] = {'MSE': mse, 'R2': r2}
predictions_reg[name] = y_pred
print(f"{name}:")
print(f" - MSE: {mse:.4f}")
print(f" - R2 Score: {r2:.4f}")
# Best model (by R2 score)
best_reg = max(results_reg, key=lambda k: results_reg[k]['R2'])
print(f"\nπŸ† Best Regression Model: {best_reg} (R2 = {results_reg[best_reg]['R2']:.4f})")
# Feature importance (Random Forest)
print("\n🎯 Feature Importance (Random Forest):")
rf_class = models_cls['Random Forest']
feature_names = [f'Feature_{i}' for i in range(X_class.shape[1])]
importance = rf_class.feature_importances_
for name, imp in zip(feature_names, importance):
print(f" {name}: {imp:.4f}")
# Visualizations
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
# Plot 1: Model comparison (Classification)
models_names = list(results_cls.keys())
accuracies = list(results_cls.values())
axes[0, 0].bar(models_names, accuracies, color=['skyblue', 'lightgreen', 'salmon'])
axes[0, 0].set_title('Classification Models Comparison')
axes[0, 0].set_ylabel('Accuracy')
axes[0, 0].tick_params(axis='x', rotation=45)
# Plot 2: Confusion Matrix
import seaborn as sns
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[0, 1])
axes[0, 1].set_title('Confusion Matrix')
axes[0, 1].set_xlabel('Predicted')
axes[0, 1].set_ylabel('Actual')
# Plot 3: Model comparison (Regression)
reg_models = list(results_reg.keys())
r2_scores = [results_reg[m]['R2'] for m in reg_models]
axes[1, 0].bar(reg_models, r2_scores, color=['orange', 'lightcoral', 'gold'])
axes[1, 0].set_title('Regression Models Comparison (RΒ²)')
axes[1, 0].set_ylabel('RΒ² Score')
axes[1, 0].tick_params(axis='x', rotation=45)
# Plot 4: Predictions vs Actual (best model)
y_pred_best_reg = predictions_reg[best_reg]
axes[1, 1].scatter(y_test_reg, y_pred_best_reg, alpha=0.5)
axes[1, 1].plot([y_test_reg.min(), y_test_reg.max()],
[y_test_reg.min(), y_test_reg.max()], 'r--', lw=2)
axes[1, 1].set_xlabel('Actual')
axes[1, 1].set_ylabel('Predicted')
axes[1, 1].set_title(f'{best_reg} - Predictions vs Actual')
plt.tight_layout()
plt.show()
# Summary
print("\n" + "=" * 60)
print("SUMMARY")
print("=" * 60)
print(f"βœ… Classification completed: {best_cls} achieved {results_cls[best_cls]:.2%} accuracy")
print(f"βœ… Regression completed: {best_reg} achieved RΒ² = {results_reg[best_reg]['R2']:.4f}")
print(f"βœ… Feature importance analysis complete")
print(f"βœ… Visualizations generated")