Sirivennela commited on
Commit
5b0de28
·
verified ·
1 Parent(s): 6f65092

Update train_model.py

Browse files
Files changed (1) hide show
  1. train_model.py +5 -8
train_model.py CHANGED
@@ -4,8 +4,11 @@ from sklearn.ensemble import RandomForestClassifier
4
  from sklearn.preprocessing import LabelEncoder
5
  import joblib
6
 
7
- MODEL_PATH = "heating_risk_model.pkl"
8
- LABEL_ENCODER_PATH = "label_encoder.pkl"
 
 
 
9
 
10
  def train_and_save_model():
11
  if os.path.exists(MODEL_PATH) and os.path.exists(LABEL_ENCODER_PATH):
@@ -13,19 +16,15 @@ def train_and_save_model():
13
  return
14
 
15
  print("Training model...")
16
-
17
- # Load CSV with utf-8-sig encoding to handle BOM and special chars
18
  data = pd.read_csv("heating_mantle_sample_30.csv", encoding='utf-8-sig')
19
 
20
  print("Original columns:", data.columns.tolist())
21
 
22
- # Clean column names: strip spaces and remove strange chars
23
  data.columns = data.columns.str.strip()
24
  data.columns = data.columns.str.replace('Â', '', regex=False)
25
 
26
  print("Cleaned columns:", data.columns.tolist())
27
 
28
- # Rename columns for ease of use
29
  data.rename(columns={
30
  'Max_Temperature (°C)': 'Max_Temperature',
31
  'Duration_Minutes': 'Duration',
@@ -34,7 +33,6 @@ def train_and_save_model():
34
 
35
  print("Columns after rename:", data.columns.tolist())
36
 
37
- # Create Risk_Level from Risk_Score
38
  def risk_label_from_score(score):
39
  if score < 30:
40
  return 'Low'
@@ -48,7 +46,6 @@ def train_and_save_model():
48
  le = LabelEncoder()
49
  data['risk_encoded'] = le.fit_transform(data['Risk_Level'])
50
 
51
- # Check final columns for features
52
  print("Using features:", ['Max_Temperature', 'Duration'])
53
 
54
  X = data[['Max_Temperature', 'Duration']]
 
4
  from sklearn.preprocessing import LabelEncoder
5
  import joblib
6
 
7
+ MODEL_DIR = "model_files"
8
+ os.makedirs(MODEL_DIR, exist_ok=True) # Ensure folder exists
9
+
10
+ MODEL_PATH = os.path.join(MODEL_DIR, "heating_risk_model.pkl")
11
+ LABEL_ENCODER_PATH = os.path.join(MODEL_DIR, "label_encoder.pkl")
12
 
13
  def train_and_save_model():
14
  if os.path.exists(MODEL_PATH) and os.path.exists(LABEL_ENCODER_PATH):
 
16
  return
17
 
18
  print("Training model...")
 
 
19
  data = pd.read_csv("heating_mantle_sample_30.csv", encoding='utf-8-sig')
20
 
21
  print("Original columns:", data.columns.tolist())
22
 
 
23
  data.columns = data.columns.str.strip()
24
  data.columns = data.columns.str.replace('Â', '', regex=False)
25
 
26
  print("Cleaned columns:", data.columns.tolist())
27
 
 
28
  data.rename(columns={
29
  'Max_Temperature (°C)': 'Max_Temperature',
30
  'Duration_Minutes': 'Duration',
 
33
 
34
  print("Columns after rename:", data.columns.tolist())
35
 
 
36
  def risk_label_from_score(score):
37
  if score < 30:
38
  return 'Low'
 
46
  le = LabelEncoder()
47
  data['risk_encoded'] = le.fit_transform(data['Risk_Level'])
48
 
 
49
  print("Using features:", ['Max_Temperature', 'Duration'])
50
 
51
  X = data[['Max_Temperature', 'Duration']]