3basetorg
/

piometric

Model card Files Files and versions

piometric / train.py

owner120's picture

Upload folder using huggingface_hub

0d4f97b verified 9 months ago

2.3 kB

	import pandas as pd
	import numpy as np
	import joblib
	from sklearn.pipeline import make_pipeline
	from sklearn.preprocessing import StandardScaler
	from xgboost import XGBClassifier
	from sklearn.metrics import classification_report

	# ---------------- Load & preprocess ----------------
	df = pd.read_csv('../city_day_clean_dated.csv', parse_dates=['Date']).sort_values('Date')

	# Fill missing hours
	full = pd.DataFrame(pd.date_range(df['Date'].min(), df['Date'].max(), freq='h'), # 'H' → 'h'
	columns=['DateTime'])
	df = full.merge(df, left_on=full['DateTime'].dt.date,
	right_on=df['Date'].dt.date, how='left').ffill()
	df['Date'] = df['DateTime']

	# Filter numeric gas columns only (avoid 'DateTime', etc.)
	gas_cols = ['alcohol', 'NH3', 'CO', 'CO2', 'Toluene', 'acetone', 'lpg', 'smoke']

	# Create danger flags
	danger_thresholds = {col: np.percentile(df[col].dropna(), 95) for col in gas_cols}
	for col in gas_cols:
	df[f'{col}_danger'] = (df[col] > danger_thresholds[col]).astype(int)

	df['Danger'] = df[[f'{col}_danger' for col in gas_cols]].max(axis=1)

	# Time features
	df['Hour'] = df['DateTime'].dt.hour
	df['Weekday'] = df['DateTime'].dt.weekday
	df['Month'] = df['DateTime'].dt.month
	df['Afternoon'] = ((df['Hour'] >= 12) & (df['Hour'] <= 15)).astype(int)

	# Simple demo features
	features = ['Hour', 'Weekday', 'Month', 'Afternoon']
	target = 'Danger'

	train = df[df['DateTime'] < '2020-01-01']
	test = df[df['DateTime'] >= '2020-01-01']

	X_train, y_train = train[features], train[target]
	X_test, y_test = test[features], test[target]

	# ---------------- Build model ----------------
	model = make_pipeline(
	StandardScaler(),
	XGBClassifier(
	n_estimators=200,
	max_depth=5,
	learning_rate=0.1,
	subsample=0.8,
	colsample_bytree=0.8,
	scale_pos_weight=10,
	random_state=42,
	eval_metric='logloss'
	)
	)
	model.fit(X_train, y_train)

	print('Training report:')
	print(classification_report(y_train, model.predict(X_train)))
	print('\nTest report:')
	print(classification_report(y_test, model.predict(X_test)))

	# ---------------- Save bundle ----------------
	joblib.dump({'model': model, 'features': features}, 'gas_danger_model.pkl')
	print('✔️ Model saved to gas_danger_model.pkl')