Spaces:

Suraj44
/

crowdroute

Sleeping

App Files Files Community

crowdroute / ml /prepare_data.py

UAVDETECTION

Initial CrowdRoute API deployment

90776a1 2 months ago

raw

history blame contribute delete

2.04 kB

	import pandas as pd
	import numpy as np

	def load_and_prepare(filepath: str) -> pd.DataFrame:
	df = pd.read_csv(filepath)

	# ── Parse datetime ──────────────────────────────────────
	df['datetime'] = pd.to_datetime(df['datetime']) # adjust column name
	df['hour'] = df['datetime'].dt.hour
	df['day_of_week'] = df['datetime'].dt.dayofweek # 0=Mon, 6=Sun
	df['month'] = df['datetime'].dt.month
	df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)
	df['is_peak_hour']= df['hour'].isin([7,8,9,17,18,19]).astype(int)

	# ── Create target label ──────────────────────────────────
	# Adjust thresholds based on YOUR dataset's ridership range
	def label_crowd(count):
	if count < 1000:
	return 0 # LOW
	elif count < 3000:
	return 1 # MEDIUM
	else:
	return 2 # HIGH

	df['crowd_level'] = df['passenger_count'].apply(label_crowd)

	# ── Encode transport type ────────────────────────────────
	transport_map = {'bus': 0, 'metro': 1, 'train': 2}
	df['transport_encoded'] = df['transport_type'].map(transport_map)

	# ── Drop rows with nulls ─────────────────────────────────
	df = df.dropna(subset=['crowd_level', 'hour', 'passenger_count'])

	return df


	def get_features_and_target(df: pd.DataFrame):
	features = [
	'hour',
	'day_of_week',
	'month',
	'is_weekend',
	'is_peak_hour',
	'is_holiday', # add if available
	'temperature', # add if available
	'transport_encoded'
	]
	# Only use columns that exist in your dataset
	features = [f for f in features if f in df.columns]

	X = df[features]
	y = df['crowd_level']
	return X, y