Spaces:

Arpit-Bansal
/

train-schedule-optimization

Sleeping

App Files Files Community

train-schedule-optimization / DataService /synthetic_base.py

Arpit-Bansal

job cards optional everywhere

c7ea7db 3 months ago

raw

history blame contribute delete

16.6 kB

	import json
	import random
	from datetime import datetime, timedelta
	from typing import Dict, List
	import uuid

	class MetroSyntheticDataGenerator:
	"""Generate synthetic data for metro trainset scheduling system"""

	def __init__(self, num_trainsets: int = 25):
	self.num_trainsets = num_trainsets
	self.trainset_ids = [f"TS-{str(i+1).zfill(3)}" for i in range(num_trainsets)]
	self.departments = ["Rolling Stock", "Signalling", "Telecom"]
	self.brands = ["Brand-A", "Brand-B", "Brand-C", "Brand-D", "Brand-E"]

	def generate_trainset_status(self) -> List[Dict]:
	"""Generate current operational status for all trainsets"""
	statuses = []
	for ts_id in self.trainset_ids:
	status = {
	"trainset_id": ts_id,
	"current_location": random.choice(["Depot-A", "Depot-B", "In-Service", "IBL", "Cleaning-Bay"]),
	"operational_status": random.choice(["Available", "In-Service", "Maintenance", "Standby"]),
	"last_service_date": (datetime.now() - timedelta(days=random.randint(1, 30))).isoformat(),
	"total_mileage_km": random.randint(50000, 200000),
	"daily_mileage_km": random.randint(200, 400),
	"operational_hours": random.randint(5000, 15000),
	"last_updated": datetime.now().isoformat()
	}
	statuses.append(status)
	return statuses

	def generate_fitness_certificates(self) -> List[Dict]:
	"""Generate fitness certificates from different departments"""
	certificates = []
	for ts_id in self.trainset_ids:
	for dept in self.departments:
	cert = {
	"certificate_id": str(uuid.uuid4()),
	"trainset_id": ts_id,
	"department": dept,
	"issue_date": (datetime.now() - timedelta(days=random.randint(1, 60))).isoformat(),
	"expiry_date": (datetime.now() + timedelta(days=random.randint(-5, 90))).isoformat(),
	"status": random.choice(["Valid", "Valid", "Valid", "Expired", "Expiring-Soon"]),
	"inspector_id": f"INS-{random.randint(100, 999)}",
	"compliance_score": random.randint(75, 100),
	"remarks": random.choice(["All systems operational", "Minor issues noted", "Requires follow-up", ""])
	}
	certificates.append(cert)
	return certificates

	def generate_job_cards(self) -> List[Dict]:
	"""Generate IBM Maximo job cards"""
	job_types = ["Preventive", "Corrective", "Breakdown", "Inspection"]
	priorities = ["Critical", "High", "Medium", "Low"]

	job_cards = []
	for ts_id in self.trainset_ids:
	# Random number of job cards per trainset
	num_jobs = random.randint(0, 5)
	for _ in range(num_jobs):
	job = {
	"job_card_id": f"JC-{random.randint(10000, 99999)}",
	"trainset_id": ts_id,
	"work_order_number": f"WO-{random.randint(100000, 999999)}",
	"job_type": random.choice(job_types),
	"priority": random.choice(priorities),
	"status": random.choice(["Open", "Open", "Closed", "In-Progress", "Pending-Parts"]),
	"created_date": (datetime.now() - timedelta(days=random.randint(1, 30))).isoformat(),
	"estimated_completion": (datetime.now() + timedelta(hours=random.randint(2, 48))).isoformat(),
	"assigned_technician": f"TECH-{random.randint(100, 999)}",
	"component": random.choice(["Brakes", "HVAC", "Doors", "Bogies", "Pantograph", "Electrical"]),
	"description": "Routine maintenance required",
	"estimated_hours": random.randint(2, 24),
	"cost_estimate": random.randint(5000, 50000)
	}
	job_cards.append(job)
	return job_cards

	def generate_component_health(self) -> List[Dict]:
	"""Generate IoT sensor data for component health"""
	components = {
	"Bogie": {"wear_threshold": 80, "unit": "% wear"},
	"Brake_Pad": {"wear_threshold": 70, "unit": "% remaining"},
	"HVAC": {"wear_threshold": 85, "unit": "% efficiency"},
	"Door_System": {"wear_threshold": 90, "unit": "cycles"},
	"Pantograph": {"wear_threshold": 75, "unit": "% condition"},
	"Battery": {"wear_threshold": 80, "unit": "% capacity"}
	}

	health_data = []
	for ts_id in self.trainset_ids:
	for comp, meta in components.items():
	health = {
	"trainset_id": ts_id,
	"component": comp,
	"health_score": random.randint(60, 100),
	"wear_level": random.randint(0, 100),
	"threshold": meta["wear_threshold"],
	"unit": meta["unit"],
	"status": random.choice(["Good", "Good", "Good", "Fair", "Warning"]),
	"next_maintenance_km": random.randint(1000, 5000),
	"last_maintenance_date": (datetime.now() - timedelta(days=random.randint(1, 60))).isoformat(),
	"predicted_failure_date": (datetime.now() + timedelta(days=random.randint(30, 180))).isoformat(),
	"timestamp": datetime.now().isoformat()
	}
	health_data.append(health)
	return health_data

	def generate_iot_sensors(self) -> List[Dict]:
	"""Generate real-time IoT sensor readings"""
	sensor_data = []
	for ts_id in self.trainset_ids:
	sensors = {
	"trainset_id": ts_id,
	"timestamp": datetime.now().isoformat(),
	"vibration": {
	"bogie_1": round(random.uniform(0.5, 3.5), 2),
	"bogie_2": round(random.uniform(0.5, 3.5), 2),
	"unit": "mm/s"
	},
	"pressure": {
	"brake_system": round(random.uniform(5.5, 8.5), 2),
	"pneumatic_doors": round(random.uniform(6.0, 8.0), 2),
	"unit": "bar"
	},
	"electrical": {
	"voltage": round(random.uniform(730, 770), 1),
	"current": round(random.uniform(100, 400), 1),
	"power_consumption": round(random.uniform(200, 600), 1),
	"battery_voltage": round(random.uniform(70, 85), 1)
	},
	"door_cycles": {
	"door_1": random.randint(50000, 200000),
	"door_2": random.randint(50000, 200000),
	"door_3": random.randint(50000, 200000),
	"door_4": random.randint(50000, 200000)
	},
	"gps": {
	"latitude": round(random.uniform(9.9, 10.1), 6),
	"longitude": round(random.uniform(76.2, 76.4), 6),
	"speed_kmh": round(random.uniform(0, 80), 1) # Max tested speed: 80 km/h
	}
	}
	sensor_data.append(sensors)
	return sensor_data

	def generate_branding_contracts(self) -> List[Dict]:
	"""Generate branding/advertisement contract data"""
	contracts = []
	for ts_id in random.sample(self.trainset_ids, random.randint(10, 15)):
	contract = {
	"trainset_id": ts_id,
	"brand": random.choice(self.brands),
	"contract_id": f"ADV-{random.randint(1000, 9999)}",
	"start_date": (datetime.now() - timedelta(days=random.randint(30, 180))).isoformat(),
	"end_date": (datetime.now() + timedelta(days=random.randint(30, 365))).isoformat(),
	"contracted_exposure_hours": random.randint(2000, 5000),
	"actual_exposure_hours": random.randint(1500, 4500),
	"daily_target_hours": random.randint(8, 12),
	"contract_value": random.randint(500000, 2000000),
	"penalty_per_hour_shortfall": random.randint(500, 2000),
	"status": random.choice(["Active", "Active", "Active", "At-Risk", "Compliant"]),
	"priority_level": random.choice(["High", "Medium", "Low"])
	}
	contracts.append(contract)
	return contracts

	def generate_maintenance_schedule(self) -> List[Dict]:
	"""Generate planned maintenance schedules"""
	maintenance_types = ["A-Check", "B-Check", "C-Check", "D-Check", "Overhaul"]

	schedules = []
	for ts_id in self.trainset_ids:
	schedule = {
	"trainset_id": ts_id,
	"maintenance_type": random.choice(maintenance_types),
	"scheduled_date": (datetime.now() + timedelta(days=random.randint(1, 60))).isoformat(),
	"estimated_duration_hours": random.randint(4, 72),
	"bay_required": random.choice(["IBL-1", "IBL-2", "Cleaning-Bay", "Workshop"]),
	"priority": random.choice(["Mandatory", "Scheduled", "Optional"]),
	"km_since_last_maintenance": random.randint(5000, 20000),
	"days_since_last_maintenance": random.randint(15, 90),
	"status": random.choice(["Scheduled", "Pending", "Overdue"])
	}
	schedules.append(schedule)
	return schedules

	def generate_performance_metrics(self) -> List[Dict]:
	"""Generate historical performance data"""
	metrics = []
	for ts_id in self.trainset_ids:
	# Last 30 days of performance
	for days_ago in range(30):
	date = datetime.now() - timedelta(days=days_ago)
	metric = {
	"trainset_id": ts_id,
	"date": date.date().isoformat(),
	"service_availability": random.choice([True, True, True, True, False]),
	"punctuality_percent": round(random.uniform(95, 100), 2),
	"km_traveled": random.randint(150, 450),
	"trips_completed": random.randint(15, 35),
	"breakdown_count": random.randint(0, 2),
	"delay_minutes": random.randint(0, 30),
	"passenger_count": random.randint(5000, 15000),
	"energy_consumed_kwh": round(random.uniform(300, 800), 2),
	"average_speed_kmh": round(random.uniform(35, 55), 1)
	}
	metrics.append(metric)
	return metrics

	def generate_cleaning_slots(self) -> List[Dict]:
	"""Generate cleaning bay availability and schedules"""
	bays = ["Cleaning-Bay-1", "Cleaning-Bay-2", "Cleaning-Bay-3"]
	shifts = ["Morning", "Afternoon", "Night"]

	slots = []
	for bay in bays:
	for shift in shifts:
	slot = {
	"bay_name": bay,
	"date": datetime.now().date().isoformat(),
	"shift": shift,
	"capacity": random.randint(2, 4),
	"occupied": random.randint(0, 3),
	"available": random.randint(0, 2),
	"scheduled_trainsets": random.sample(self.trainset_ids, random.randint(0, 3)),
	"manpower_available": random.randint(2, 6),
	"estimated_duration_hours": random.randint(2, 4)
	}
	slots.append(slot)
	return slots

	def generate_manual_overrides(self) -> List[Dict]:
	"""Generate supervisor manual override entries"""
	overrides = []
	for _ in range(random.randint(3, 8)):
	override = {
	"override_id": str(uuid.uuid4()),
	"trainset_id": random.choice(self.trainset_ids),
	"timestamp": datetime.now().isoformat(),
	"supervisor_id": f"SUP-{random.randint(100, 999)}",
	"action": random.choice(["Force-Induction", "Hold-Back", "Priority-Change", "IBL-Delay"]),
	"reason": random.choice([
	"Emergency service requirement",
	"VIP movement",
	"Component inspection needed",
	"Branding priority",
	"Safety precaution"
	]),
	"priority": random.choice(["Critical", "High", "Medium"]),
	"expiry": (datetime.now() + timedelta(hours=24)).isoformat()
	}
	overrides.append(override)
	return overrides

	def generate_external_factors(self) -> Dict:
	"""Generate external factors affecting operations"""
	return {
	"date": datetime.now().date().isoformat(),
	"weather": {
	"humidity": random.randint(60, 90),
	"rainfall_mm": round(random.uniform(0, 50), 1),
	"condition": random.choice(["Clear", "Cloudy", "Rainy", "Stormy"])
	},
	"special_events": random.choice([
	None,
	"Festival - High ridership expected",
	"VIP visit - Route restrictions",
	"Maintenance window - Track work"
	]),
	"ridership_forecast": {
	"expected_passengers": random.randint(80000, 150000),
	"peak_hours": ["08:00-10:00", "17:00-20:00"],
	"load_factor": round(random.uniform(0.6, 0.9), 2)
	},
	"track_conditions": {
	"status": random.choice(["Normal", "Caution", "Restricted"]),
	"maintenance_zones": random.randint(0, 3),
	"speed_restrictions": random.randint(0, 2)
	}
	}

	def generate_complete_dataset(self, include_job_cards: bool = False) -> Dict:
	"""Generate complete synthetic dataset for metro scheduling.

	Args:
	include_job_cards: Whether to include job cards in the dataset. Default False.
	"""
	dataset = {
	"metadata": {
	"generated_at": datetime.now().isoformat(),
	"num_trainsets": self.num_trainsets,
	"system": "Kochi Metro Rail",
	"data_version": "1.0"
	},
	"trainset_status": self.generate_trainset_status(),
	"fitness_certificates": self.generate_fitness_certificates(),
	"job_cards": self.generate_job_cards() if include_job_cards else [],
	"component_health": self.generate_component_health(),
	"iot_sensors": self.generate_iot_sensors(),
	"branding_contracts": self.generate_branding_contracts(),
	"maintenance_schedule": self.generate_maintenance_schedule(),
	"performance_metrics": self.generate_performance_metrics(),
	"cleaning_slots": self.generate_cleaning_slots(),
	"manual_overrides": self.generate_manual_overrides(),
	"external_factors": self.generate_external_factors()
	}
	return dataset

	def save_to_json(self, filename: str = "metro_synthetic_data.json", include_job_cards: bool = False):
	"""Save generated data to JSON file.

	Args:
	filename: Output filename.
	include_job_cards: Whether to include job cards in the dataset. Default False.
	"""
	data = self.generate_complete_dataset(include_job_cards=include_job_cards)
	with open(filename, 'w') as f:
	json.dump(data, f, indent=2)
	print(f"Synthetic data generated and saved to {filename}")
	return data


	# Usage example
	if __name__ == "__main__":
	generator = MetroSyntheticDataGenerator(num_trainsets=25)

	# Generate and save complete dataset
	data = generator.save_to_json("metro_synthetic_data.json")

	# Print summary
	print(f"\nDataset Summary:")
	print(f"Trainsets: {len(data['trainset_status'])}")
	print(f"Fitness Certificates: {len(data['fitness_certificates'])}")
	print(f"Job Cards: {len(data['job_cards'])}")
	print(f"Component Health Records: {len(data['component_health'])}")
	print(f"IoT Sensor Readings: {len(data['iot_sensors'])}")
	print(f"Branding Contracts: {len(data['branding_contracts'])}")
	print(f"Performance Metrics: {len(data['performance_metrics'])}")