| | |
| | import numpy as np |
| |
|
| | import matplotlib.pyplot as plt |
| | |
| | |
| |
|
| | from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier |
| | |
| |
|
| | |
| |
|
| | def rf_importance(X_train,y_train,max_depth=10,class_weight=None,top_n=15,n_estimators=50,random_state=0): |
| | |
| | model = RandomForestClassifier(n_estimators=n_estimators,max_depth=max_depth, |
| | random_state=random_state,class_weight=class_weight, |
| | n_jobs=-1) |
| | model.fit(X_train, y_train) |
| | importances = model.feature_importances_ |
| | indices = np.argsort(importances)[::-1] |
| | feat_labels = X_train.columns |
| | std = np.std([tree.feature_importances_ for tree in model.estimators_], |
| | axis=0) |
| | print("Feature ranking:") |
| | |
| | for f in range(X_train.shape[1]): |
| | print("%d. feature no:%d feature name:%s (%f)" % (f + 1, indices[f], feat_labels[indices[f]], importances[indices[f]])) |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | indices = indices[0:top_n] |
| | plt.figure() |
| | plt.title("Feature importances top %d" % top_n) |
| | plt.bar(range(top_n), importances[indices], |
| | color="r", yerr=std[indices], align="center") |
| | plt.xticks(range(top_n), indices) |
| | plt.xlim([-1,top_n]) |
| | plt.show() |
| | |
| | return model |
| |
|
| |
|
| | def gbt_importance(X_train,y_train,max_depth=10,top_n=15,n_estimators=50,random_state=0): |
| | |
| | model = GradientBoostingClassifier(n_estimators=n_estimators,max_depth=max_depth, |
| | random_state=random_state) |
| | model.fit(X_train, y_train) |
| | importances = model.feature_importances_ |
| | indices = np.argsort(importances)[::-1] |
| | feat_labels = X_train.columns |
| | std = np.std([tree[0].feature_importances_ for tree in model.estimators_], |
| | axis=0) |
| | print("Feature ranking:") |
| | |
| | for f in range(X_train.shape[1]): |
| | print("%d. feature no:%d feature name:%s (%f)" % (f + 1, indices[f], feat_labels[indices[f]], importances[indices[f]])) |
| | |
| | |
| | |
| | |
| | |
| | |
| | indices = indices[0:top_n] |
| | plt.figure() |
| | plt.title("Feature importances top %d" % top_n) |
| | plt.bar(range(top_n), importances[indices], |
| | color="r", yerr=std[indices], align="center") |
| | plt.xticks(range(top_n), indices) |
| | plt.xlim([-1,top_n]) |
| | plt.show() |
| | |
| | return model |