Spaces:
Sleeping
Sleeping
Update train_model.py
Browse files- train_model.py +5 -8
train_model.py
CHANGED
|
@@ -4,8 +4,11 @@ from sklearn.ensemble import RandomForestClassifier
|
|
| 4 |
from sklearn.preprocessing import LabelEncoder
|
| 5 |
import joblib
|
| 6 |
|
| 7 |
-
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
def train_and_save_model():
|
| 11 |
if os.path.exists(MODEL_PATH) and os.path.exists(LABEL_ENCODER_PATH):
|
|
@@ -13,19 +16,15 @@ def train_and_save_model():
|
|
| 13 |
return
|
| 14 |
|
| 15 |
print("Training model...")
|
| 16 |
-
|
| 17 |
-
# Load CSV with utf-8-sig encoding to handle BOM and special chars
|
| 18 |
data = pd.read_csv("heating_mantle_sample_30.csv", encoding='utf-8-sig')
|
| 19 |
|
| 20 |
print("Original columns:", data.columns.tolist())
|
| 21 |
|
| 22 |
-
# Clean column names: strip spaces and remove strange chars
|
| 23 |
data.columns = data.columns.str.strip()
|
| 24 |
data.columns = data.columns.str.replace('Â', '', regex=False)
|
| 25 |
|
| 26 |
print("Cleaned columns:", data.columns.tolist())
|
| 27 |
|
| 28 |
-
# Rename columns for ease of use
|
| 29 |
data.rename(columns={
|
| 30 |
'Max_Temperature (°C)': 'Max_Temperature',
|
| 31 |
'Duration_Minutes': 'Duration',
|
|
@@ -34,7 +33,6 @@ def train_and_save_model():
|
|
| 34 |
|
| 35 |
print("Columns after rename:", data.columns.tolist())
|
| 36 |
|
| 37 |
-
# Create Risk_Level from Risk_Score
|
| 38 |
def risk_label_from_score(score):
|
| 39 |
if score < 30:
|
| 40 |
return 'Low'
|
|
@@ -48,7 +46,6 @@ def train_and_save_model():
|
|
| 48 |
le = LabelEncoder()
|
| 49 |
data['risk_encoded'] = le.fit_transform(data['Risk_Level'])
|
| 50 |
|
| 51 |
-
# Check final columns for features
|
| 52 |
print("Using features:", ['Max_Temperature', 'Duration'])
|
| 53 |
|
| 54 |
X = data[['Max_Temperature', 'Duration']]
|
|
|
|
| 4 |
from sklearn.preprocessing import LabelEncoder
|
| 5 |
import joblib
|
| 6 |
|
| 7 |
+
MODEL_DIR = "model_files"
|
| 8 |
+
os.makedirs(MODEL_DIR, exist_ok=True) # Ensure folder exists
|
| 9 |
+
|
| 10 |
+
MODEL_PATH = os.path.join(MODEL_DIR, "heating_risk_model.pkl")
|
| 11 |
+
LABEL_ENCODER_PATH = os.path.join(MODEL_DIR, "label_encoder.pkl")
|
| 12 |
|
| 13 |
def train_and_save_model():
|
| 14 |
if os.path.exists(MODEL_PATH) and os.path.exists(LABEL_ENCODER_PATH):
|
|
|
|
| 16 |
return
|
| 17 |
|
| 18 |
print("Training model...")
|
|
|
|
|
|
|
| 19 |
data = pd.read_csv("heating_mantle_sample_30.csv", encoding='utf-8-sig')
|
| 20 |
|
| 21 |
print("Original columns:", data.columns.tolist())
|
| 22 |
|
|
|
|
| 23 |
data.columns = data.columns.str.strip()
|
| 24 |
data.columns = data.columns.str.replace('Â', '', regex=False)
|
| 25 |
|
| 26 |
print("Cleaned columns:", data.columns.tolist())
|
| 27 |
|
|
|
|
| 28 |
data.rename(columns={
|
| 29 |
'Max_Temperature (°C)': 'Max_Temperature',
|
| 30 |
'Duration_Minutes': 'Duration',
|
|
|
|
| 33 |
|
| 34 |
print("Columns after rename:", data.columns.tolist())
|
| 35 |
|
|
|
|
| 36 |
def risk_label_from_score(score):
|
| 37 |
if score < 30:
|
| 38 |
return 'Low'
|
|
|
|
| 46 |
le = LabelEncoder()
|
| 47 |
data['risk_encoded'] = le.fit_transform(data['Risk_Level'])
|
| 48 |
|
|
|
|
| 49 |
print("Using features:", ['Max_Temperature', 'Duration'])
|
| 50 |
|
| 51 |
X = data[['Max_Temperature', 'Duration']]
|