File size: 3,556 Bytes
513c1a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import joblib
import os

class TrafficRiskModel:
    def __init__(self, model_path="traffic_model.pkl", encoder_path="encoder.pkl"):
        self.model_path = model_path
        self.encoder_path = encoder_path
        self.model = None
        self.encoder = LabelEncoder()
        self.conditions = ["Clear", "Cloudy", "Rainy", "Storm", "Other"]
        
        # Fit encoder with all possible categories
        self.encoder.fit(self.conditions)

    def generate_synthetic_data(self, n_samples=1000):
        """Generates synthetic data for training the model."""
        np.random.seed(42)
        
        temps = np.random.uniform(5, 45, n_samples)
        rains = np.random.uniform(0, 50, n_samples)
        conditions = np.random.choice(self.conditions, n_samples)
        
        data = pd.DataFrame({
            'temp': temps,
            'rain': rains,
            'condition': conditions
        })
        
        # Calculate a pseudo risk score to create non-trivial labels
        # 0: Low, 1: Medium, 2: High
        def calculate_label(row):
            score = 0
            if row['temp'] > 35 or row['temp'] < 10: score += 1
            if row['rain'] > 10: score += 1
            if row['rain'] > 25: score += 1
            if row['condition'] in ['Rainy', 'Storm']: score += 1
            if row['condition'] == 'Storm': score += 1
            
            if score <= 1: return "Low"
            if score <= 3: return "Medium"
            return "High"
            
        data['risk_level'] = data.apply(calculate_label, axis=1)
        return data

    def train(self):
        """Trains the model on synthetic data."""
        data = self.generate_synthetic_data()
        
        X = data[['temp', 'rain', 'condition']].copy()
        X['condition'] = self.encoder.transform(X['condition'])
        y = data['risk_level']
        
        self.model = RandomForestClassifier(n_estimators=100, random_state=42)
        self.model.fit(X, y)
        
        # Save model and encoder
        joblib.dump(self.model, self.model_path)
        joblib.dump(self.encoder, self.encoder_path)
        print(f"Model trained and saved to {self.model_path}")

    def load(self):
        """Loads the model and encoder if they exist."""
        if os.path.exists(self.model_path) and os.path.exists(self.encoder_path):
            self.model = joblib.load(self.model_path)
            self.encoder = joblib.load(self.encoder_path)
            return True
        return False

    def predict(self, temp, rain, condition):
        """Predicts traffic risk level based on weather inputs."""
        if self.model is None:
            if not self.load():
                self.train()
        
        # Ensure condition is known to encoder
        if condition not in self.encoder.classes_:
            condition = "Other"
            
        encoded_condition = self.encoder.transform([condition])[0]
        input_data = pd.DataFrame([[temp, rain, encoded_condition]], 
                                 columns=['temp', 'rain', 'condition'])
        
        prediction = self.model.predict(input_data)[0]
        probabilities = self.model.predict_proba(input_data)[0]
        confidence = max(probabilities)
        
        return prediction, confidence

if __name__ == "__main__":
    model = TrafficRiskModel()
    model.train()
    print("Test Prediction:", model.predict(28, 12, "Rainy"))