Zefirkash commited on
Commit
81084b5
·
verified ·
1 Parent(s): 96d68f1

Create Klas.py

Browse files
Files changed (1) hide show
  1. Klas.py +82 -0
Klas.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 1. Установка и импорт библиотек
2
+ import numpy as np
3
+ import pandas as pd
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import joblib
7
+
8
+ from sklearn.model_selection import train_test_split, GridSearchCV
9
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
10
+ from sklearn.linear_model import LogisticRegression
11
+ from sklearn.tree import DecisionTreeClassifier
12
+ from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
13
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
14
+
15
+ # 2. Загрузка и первичный анализ данных
16
+ df = pd.read_csv("dataset.csv")
17
+ print(df.head(), df.info(), df.isnull().sum(), df.describe())
18
+
19
+ # 3. Предобработка данных
20
+ df.fillna(df.mean(), inplace=True)
21
+ df.fillna(df.mode().iloc[0], inplace=True)
22
+
23
+ le = LabelEncoder()
24
+ df['target'] = le.fit_transform(df['target'])
25
+ df = pd.get_dummies(df, drop_first=True)
26
+
27
+ X = df.drop(columns=['target'])
28
+ y = df['target']
29
+
30
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
31
+
32
+ scaler = StandardScaler()
33
+ X_train = scaler.fit_transform(X_train)
34
+ X_test = scaler.transform(X_test)
35
+
36
+ # 4. Обучение моделей
37
+ models = {
38
+ "Logistic Regression": LogisticRegression(),
39
+ "Decision Tree": DecisionTreeClassifier(),
40
+ "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
41
+ "Gradient Boosting": GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
42
+ }
43
+
44
+ predictions = {}
45
+ for name, model in models.items():
46
+ model.fit(X_train, y_train)
47
+ predictions[name] = model.predict(X_test)
48
+
49
+ # 5. Оценка качества моделей
50
+ def evaluate_model(name, y_true, y_pred):
51
+ print(f"=== {name} ===")
52
+ print("Accuracy:", accuracy_score(y_true, y_pred))
53
+ print("Classification Report:\n", classification_report(y_true, y_pred))
54
+ print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
55
+ print("\n")
56
+
57
+ for name, y_pred in predictions.items():
58
+ evaluate_model(name, y_test, y_pred)
59
+
60
+ # 6. Подбор гиперпараметров для случайного леса
61
+ param_grid = {
62
+ 'n_estimators': [50, 100, 200],
63
+ 'max_depth': [None, 10, 20],
64
+ 'min_samples_split': [2, 5, 10]
65
+ }
66
+
67
+ grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, scoring='accuracy', n_jobs=-1)
68
+ grid_search.fit(X_train, y_train)
69
+
70
+ print("Лучшие параметры:", grid_search.best_params_)
71
+ best_model = grid_search.best_estimator_
72
+ y_pred_best = best_model.predict(X_test)
73
+ evaluate_model("Best Random Forest", y_test, y_pred_best)
74
+
75
+ # 7. Визуализация матрицы ошибок
76
+ fig, ax = plt.subplots(figsize=(5, 5))
77
+ ConfusionMatrixDisplay.from_estimator(best_model, X_test, y_test, ax=ax, cmap='Blues')
78
+ plt.show()
79
+
80
+ # 8. Сохранение и загрузка модели
81
+ joblib.dump(best_model, "best_model.pkl")
82
+ loaded_model = joblib.load("best_model.pkl")