| | import pandas as pd |
| | import numpy as np |
| | import joblib |
| | from sklearn.pipeline import make_pipeline |
| | from sklearn.preprocessing import StandardScaler |
| | from xgboost import XGBClassifier |
| | from sklearn.metrics import classification_report |
| |
|
| | |
| | df = pd.read_csv('../city_day_clean_dated.csv', parse_dates=['Date']).sort_values('Date') |
| |
|
| | |
| | full = pd.DataFrame(pd.date_range(df['Date'].min(), df['Date'].max(), freq='h'), |
| | columns=['DateTime']) |
| | df = full.merge(df, left_on=full['DateTime'].dt.date, |
| | right_on=df['Date'].dt.date, how='left').ffill() |
| | df['Date'] = df['DateTime'] |
| |
|
| | |
| | gas_cols = ['alcohol', 'NH3', 'CO', 'CO2', 'Toluene', 'acetone', 'lpg', 'smoke'] |
| |
|
| | |
| | danger_thresholds = {col: np.percentile(df[col].dropna(), 95) for col in gas_cols} |
| | for col in gas_cols: |
| | df[f'{col}_danger'] = (df[col] > danger_thresholds[col]).astype(int) |
| |
|
| | df['Danger'] = df[[f'{col}_danger' for col in gas_cols]].max(axis=1) |
| |
|
| | |
| | df['Hour'] = df['DateTime'].dt.hour |
| | df['Weekday'] = df['DateTime'].dt.weekday |
| | df['Month'] = df['DateTime'].dt.month |
| | df['Afternoon'] = ((df['Hour'] >= 12) & (df['Hour'] <= 15)).astype(int) |
| |
|
| | |
| | features = ['Hour', 'Weekday', 'Month', 'Afternoon'] |
| | target = 'Danger' |
| |
|
| | train = df[df['DateTime'] < '2020-01-01'] |
| | test = df[df['DateTime'] >= '2020-01-01'] |
| |
|
| | X_train, y_train = train[features], train[target] |
| | X_test, y_test = test[features], test[target] |
| |
|
| | |
| | model = make_pipeline( |
| | StandardScaler(), |
| | XGBClassifier( |
| | n_estimators=200, |
| | max_depth=5, |
| | learning_rate=0.1, |
| | subsample=0.8, |
| | colsample_bytree=0.8, |
| | scale_pos_weight=10, |
| | random_state=42, |
| | eval_metric='logloss' |
| | ) |
| | ) |
| | model.fit(X_train, y_train) |
| |
|
| | print('Training report:') |
| | print(classification_report(y_train, model.predict(X_train))) |
| | print('\nTest report:') |
| | print(classification_report(y_test, model.predict(X_test))) |
| |
|
| | |
| | joblib.dump({'model': model, 'features': features}, 'gas_danger_model.pkl') |
| | print('✔️ Model saved to gas_danger_model.pkl') |
| |
|