File size: 9,816 Bytes
a8ba5ce
 
 
 
1f20aac
 
 
 
a8ba5ce
 
 
 
1f20aac
 
a8ba5ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9ae8ce
a8ba5ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
"""
Enhanced Metro Synthetic Data Generator
Generates realistic metro train scheduling data with time-based constraints
"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

import random
import uuid
from datetime import datetime, timedelta, time
from typing import List, Dict, Tuple

from DataService.metro_models import (
    TrainHealthStatus, Station, Route, FitnessCertificates,
    FitnessCertificate, CertificateStatus, JobCards, Branding
)


class MetroDataGenerator:
    """Generate synthetic data for metro train scheduling"""
    
    STATIONS_ALUVA_PETTAH = [
        "Aluva", "Pulinchodu", "Companypadi", "Ambattukavu", "Muttom",
        "Kalamassery", "Cochin University", "Pathadipalam", "Edapally",
        "Changampuzha Park", "Palarivattom", "J.L.N Stadium", "Kaloor",
        "Town Hall", "M.G. Road", "Maharaja's College", "Ernakulam South",
        "Kadavanthra", "Elamkulam", "Vyttila", "Thaikoodam", "Petta",
        "Vadakkekotta", "SN Junction", "Pettah"
    ]
    
    DEPOT_BAYS = [f"BAY-{str(i).zfill(2)}" for i in range(1, 16)]
    IBL_BAYS = [f"IBL-{str(i).zfill(2)}" for i in range(1, 6)]
    WASH_BAYS = [f"WASH-BAY-{str(i).zfill(2)}" for i in range(1, 4)]
    
    ADVERTISERS = [
        "COCACOLA-2024", "FLIPKART-FESTIVE", "AMAZON-PRIME",
        "RELIANCE-JIO", "TATA-MOTORS", "SAMSUNG-GALAXY",
        "NONE"
    ]
    
    UNAVAILABLE_REASONS = [
        "SCHEDULED_MAINTENANCE", "BRAKE_SYSTEM_REPAIR",
        "HVAC_REPLACEMENT", "BOGIE_OVERHAUL", "ELECTRICAL_FAULT",
        "ACCIDENT_DAMAGE", "PANTOGRAPH_REPAIR", "DOOR_SYSTEM_FAULT"
    ]
    
    def __init__(self, num_trains: int = 25, num_stations: int = 25):
        self.num_trains = num_trains
        self.num_stations = min(num_stations, len(self.STATIONS_ALUVA_PETTAH))
        self.trainset_ids = [f"TS-{str(i+1).zfill(3)}" for i in range(num_trains)]
        
    def generate_route(self, route_name: str = "Aluva-Pettah Line") -> Route:
        """Generate metro route with stations"""
        stations = []
        total_distance = 25.612  # Actual KMRL distance
        
        for i in range(self.num_stations):
            distance = (total_distance / (self.num_stations - 1)) * i
            station = Station(
                station_id=f"STN-{str(i+1).zfill(3)}",
                name=self.STATIONS_ALUVA_PETTAH[i],
                sequence=i + 1,
                distance_from_origin_km=round(distance, 2),
                avg_dwell_time_seconds=random.randint(20, 45)
            )
            stations.append(station)
        
        return Route(
            route_id="KMRL-LINE-01",
            name=route_name,
            stations=stations,
            total_distance_km=total_distance,
            avg_speed_kmh=random.randint(32, 38),  # Varies around 35 km/h average operating speed
            turnaround_time_minutes=random.randint(8, 12)
        )
    
    def generate_train_health_statuses(self) -> List[TrainHealthStatus]:
        """Generate health status for all trains"""
        statuses = []
        
        for i, ts_id in enumerate(self.trainset_ids):
            # Determine train health category
            health_roll = random.random()
            
            if health_roll < 0.65:  # 65% fully healthy
                is_healthy = True
                available_hours = None
                reason = None
            elif health_roll < 0.85:  # 20% partially healthy
                is_healthy = False
                # Random availability window
                start_hour = random.randint(5, 12)
                end_hour = random.randint(start_hour + 4, 23)
                available_hours = [(time(start_hour, 0), time(end_hour, 0))]
                reason = f"Limited availability: {random.choice(['Minor repairs', 'Partial maintenance', 'Certificate renewal pending'])}"
            else:  # 15% unavailable
                is_healthy = False
                available_hours = []
                reason = random.choice(self.UNAVAILABLE_REASONS)
            
            status = TrainHealthStatus(
                trainset_id=ts_id,
                is_fully_healthy=is_healthy,
                available_hours=available_hours,
                unavailable_reason=reason,
                cumulative_mileage=random.randint(50000, 200000),
                days_since_maintenance=random.randint(1, 45),
                component_health={
                    "brakes": random.uniform(0.7, 1.0),
                    "hvac": random.uniform(0.65, 1.0),
                    "doors": random.uniform(0.7, 1.0),
                    "bogies": random.uniform(0.75, 1.0),
                    "pantograph": random.uniform(0.7, 1.0),
                    "battery": random.uniform(0.65, 1.0),
                    "motor": random.uniform(0.75, 1.0)
                }
            )
            statuses.append(status)
        
        return statuses
    
    def generate_fitness_certificates(self, train_id: str) -> FitnessCertificates:
        """Generate fitness certificates for a train"""
        now = datetime.now()
        
        def random_cert_status() -> Tuple[str, CertificateStatus]:
            roll = random.random()
            if roll < 0.75:  # 75% valid
                days_valid = random.randint(10, 60)
                return (now + timedelta(days=days_valid)).isoformat(), CertificateStatus.VALID
            elif roll < 0.90:  # 15% expiring soon
                days_valid = random.randint(1, 9)
                return (now + timedelta(days=days_valid)).isoformat(), CertificateStatus.EXPIRING_SOON
            else:  # 10% expired
                days_expired = random.randint(1, 5)
                return (now - timedelta(days=days_expired)).isoformat(), CertificateStatus.EXPIRED
        
        rs_date, rs_status = random_cert_status()
        sig_date, sig_status = random_cert_status()
        tel_date, tel_status = random_cert_status()
        
        return FitnessCertificates(
            rolling_stock=FitnessCertificate(valid_until=rs_date, status=rs_status),
            signalling=FitnessCertificate(valid_until=sig_date, status=sig_status),
            telecom=FitnessCertificate(valid_until=tel_date, status=tel_status)
        )
    
    def generate_job_cards(self, train_id: str) -> JobCards:
        """Generate job cards for a train"""
        num_open = random.choices([0, 1, 2, 3, 4, 5], weights=[50, 25, 15, 7, 2, 1])[0]
        
        blocking = []
        if num_open > 0:
            num_blocking = random.choices([0, 1, 2, 3], weights=[70, 20, 8, 2])[0]
            if num_blocking > 0:
                components = ["BRAKE", "HVAC", "DOOR", "BOGIE", "PANTOGRAPH", "ELECTRICAL"]
                selected = random.sample(components, min(num_blocking, len(components)))
                blocking = [f"JC-{random.randint(40000, 49999)}-{comp}" for comp in selected]
        
        return JobCards(open=num_open, blocking=blocking)
    
    def generate_branding(self) -> Branding:
        """Generate branding information"""
        advertiser = random.choice(self.ADVERTISERS)
        
        if advertiser == "NONE":
            return Branding(
                advertiser="NONE",
                contract_hours_remaining=0,
                exposure_priority="NONE"
            )
        
        return Branding(
            advertiser=advertiser,
            contract_hours_remaining=random.randint(50, 500),
            exposure_priority=random.choice(["LOW", "MEDIUM", "HIGH", "CRITICAL"])
        )
    
    def calculate_readiness_score(
        self,
        fitness_certs: FitnessCertificates,
        job_cards: JobCards,
        component_health: Dict[str, float]
    ) -> float:
        """Calculate overall readiness score for a train"""
        score = 1.0
        
        # Certificate penalties
        if fitness_certs.rolling_stock.status == CertificateStatus.EXPIRED:
            score -= 0.4
        elif fitness_certs.rolling_stock.status == CertificateStatus.EXPIRING_SOON:
            score -= 0.1
            
        if fitness_certs.signalling.status == CertificateStatus.EXPIRED:
            score -= 0.3
        elif fitness_certs.signalling.status == CertificateStatus.EXPIRING_SOON:
            score -= 0.05
            
        if fitness_certs.telecom.status == CertificateStatus.EXPIRED:
            score -= 0.2
        elif fitness_certs.telecom.status == CertificateStatus.EXPIRING_SOON:
            score -= 0.05
        
        # Job card penalties
        if job_cards.open > 0:
            score -= min(0.15, job_cards.open * 0.03)
        if len(job_cards.blocking) > 0:
            score -= min(0.25, len(job_cards.blocking) * 0.1)
        
        # Component health impact
        avg_health = sum(component_health.values()) / len(component_health)
        health_factor = (avg_health - 0.5) * 0.2  # -0.1 to +0.1
        score += health_factor
        
        return max(0.0, min(1.0, score))
    
    def generate_depot_layout(self) -> Dict[str, List[str]]:
        """Generate depot bay layout"""
        return {
            "stabling_bays": self.DEPOT_BAYS.copy(),
            "ibl_bays": self.IBL_BAYS.copy(),
            "wash_bays": self.WASH_BAYS.copy()
        }
    
    def get_realistic_mileage_distribution(self, num_trains: int) -> List[int]:
        """Generate realistic cumulative mileage distribution"""
        # Create a distribution with some variance
        base_mileage = 120000
        mileages = []
        
        for i in range(num_trains):
            # Add variance based on age and usage patterns
            variance = random.randint(-40000, 50000)
            mileage = base_mileage + variance
            mileages.append(max(50000, min(200000, mileage)))
        
        return mileages