Spaces:

baiganinn
/

hackdrive

Sleeping

App Files Files Community

hackdrive / app.py

baiganinn

Radical HF Spaces optimization: 10k rows limit, minimal maps, error handling

6070490 5 months ago

raw

history blame contribute delete

11.2 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import joblib
	import folium
	from folium.plugins import HeatMap

	# Загружаем модель
	MODEL_PATH = "model/optimization_model.joblib"
	try:
	model = joblib.load(MODEL_PATH)
	print("Модель загружена успешно")
	except:
	model = None
	print("Не удалось загрузить модель")

	def create_maps(file):
	"""Creating two maps from notebook: markers map and heatmap"""
	if file is None:
	return "Please upload geodata file", None, None

	try:
	# Very aggressive data limits for HF Spaces
	print("Loading data...")
	df = pd.read_csv(file.name, nrows=10000) # Drastically reduced to 10k rows
	print(f"Loaded {len(df)} data rows")

	# Check file size
	if len(df) == 0:
	return "Error: Empty file", None, None

	# Check columns
	required_cols = ['lat', 'lng', 'spd', 'alt', 'azm', 'randomized_id']
	missing_cols = [col for col in required_cols if col not in df.columns]
	if missing_cols:
	return f"Missing columns: {missing_cols}. Available: {list(df.columns)}", None, None

	# Aggressive sampling for HF Spaces memory limits
	if len(df) > 5000:
	df = df.sample(n=5000, random_state=42)
	print(f"Sampled down to {len(df)} rows for HF Spaces")

	# Simplified distance calculation
	print("Processing distances...")
	df['distance'] = 100.0 # Constant distance

	# Create coarser grid for fewer zones
	print("Creating spatial grid...")
	lat_min, lat_max = df['lat'].min(), df['lat'].max()
	lng_min, lng_max = df['lng'].min(), df['lng'].max()

	# Use larger grid cells (0.01 instead of 0.005) for fewer zones
	df['lat_bin'] = ((df['lat'] - lat_min) // 0.01).astype(int)
	df['lng_bin'] = ((df['lng'] - lng_min) // 0.01).astype(int)

	# Create string identifiers for grouping
	df['lat_grid'] = df['lat_bin'].astype(str)
	df['lng_grid'] = df['lng_bin'].astype(str)

	# Aggregate by zones as in notebook
	df_zone_stats = df.groupby(['lat_grid', 'lng_grid']).agg(
	zone_avg_spd=('spd', 'mean'),
	zone_spd_std=('spd', 'std'),
	zone_min_spd=('spd', 'min'),
	zone_max_spd=('spd', 'max'),
	zone_avg_alt=('alt', 'mean'),
	zone_alt_std=('alt', 'std'),
	zone_min_alt=('alt', 'min'),
	zone_max_alt=('alt', 'max'),
	zone_avg_azm=('azm', 'mean'),
	zone_azm_std=('azm', 'std'),
	zone_point_count=('randomized_id', 'count'),
	zone_total_distance=('distance', 'sum')
	).reset_index().fillna(0)

	# Create target variable
	zone_counts = df.groupby(['lat_grid', 'lng_grid'])['randomized_id'].nunique().reset_index(name='zone_density')
	zone_counts['target'] = np.log1p(zone_counts['zone_density'])

	# Merge data
	df_ml = pd.merge(df_zone_stats, zone_counts, on=['lat_grid', 'lng_grid'], how='inner')

	if model is None:
	return "Model not loaded", None, None

	# FIX: model expects predicted_demand in data
	# Add dummy column with value 0
	df_ml['predicted_demand'] = 0.0

	# Use all columns except identifiers and target variable
	X = df_ml.drop(['lat_grid', 'lng_grid', 'zone_density', 'target'], axis=1)

	# Predict
	predictions = model.predict(X)

	# Replace dummy values with real predictions (convert from log-scale)
	df_ml['predicted_demand'] = np.expm1(predictions)

	# Create predictions_df as in notebook
	predictions_df = df_ml[['lat_grid', 'lng_grid', 'zone_avg_alt', 'zone_avg_azm',
	'zone_point_count', 'target', 'predicted_demand']].copy()

	# Calculate zone center coordinates - use grouping of original data
	zone_centers = df.groupby(['lat_grid', 'lng_grid']).agg({
	'lat': 'mean',
	'lng': 'mean'
	}).reset_index()

	# Merge with predictions
	predictions_df = pd.merge(predictions_df, zone_centers, on=['lat_grid', 'lng_grid'], how='left')

	# Add calculations as in notebook
	predictions_df['actual_demand'] = np.expm1(predictions_df['target'])
	predictions_df['priority_score'] = predictions_df['predicted_demand'] # Already converted
	predictions_df['supply'] = predictions_df['zone_point_count'] / predictions_df['zone_point_count'].mean()
	predictions_df['demand_supply_ratio'] = predictions_df['priority_score'] / predictions_df['supply']
	predictions_df['demand_supply_difference'] = predictions_df['priority_score'] - predictions_df['supply']

	# Aggressive limits for HF Spaces
	if len(predictions_df) > 100: # Drastically reduced from 1000
	predictions_df = predictions_df.head(100)
	print(f"Limited to top 100 zones for HF Spaces")

	# Sort by priority
	predictions_df = predictions_df.sort_values(by='priority_score', ascending=False)

	# === MAP 1: Minimal markers map for HF Spaces ===
	top_n = min(5, len(predictions_df)) # Only 5 markers instead of 10
	top_zones = predictions_df.head(top_n)

	if len(top_zones) == 0:
	return "No valid zones found", None, None

	# Create minimal map
	map_center_lat = top_zones['lat'].mean()
	map_center_lng = top_zones['lng'].mean()
	m = folium.Map(
	location=[map_center_lat, map_center_lng],
	zoom_start=10, # Reduced zoom
	tiles='OpenStreetMap',
	width=600, # Fixed width
	height=400 # Fixed height
	)

	# Minimal markers with simple tooltips
	for index, row in top_zones.iterrows():
	folium.Marker(
	location=[row['lat'], row['lng']],
	popup=f"Demand: {row['priority_score']:.1f}", # Use popup instead of tooltip
	icon=folium.Icon(color='red', icon='star')
	).add_to(m)

	# Get HTML for first map (with error handling)
	try:
	markers_html = m._repr_html_()
	except:
	markers_html = "<p>Map generation failed - please try with smaller file</p>"

	# === MAP 2: Ultra-simplified heatmap for HF Spaces ===
	try:
	# Minimal heatmap data (only top 50 zones)
	heat_zones = predictions_df.head(min(50, len(predictions_df)))

	# Simple heatmap data with positive values only
	heat_data = []
	for index, row in heat_zones.iterrows():
	value = max(0.1, abs(row['demand_supply_difference'])) # Ensure positive values
	heat_data.append([row['lat'], row['lng'], value])

	# Minimal heatmap
	m_heatmap = folium.Map(
	location=[map_center_lat, map_center_lng],
	zoom_start=10,
	tiles='OpenStreetMap',
	width=600,
	height=400
	)

	# Simple heatmap with minimal settings
	if heat_data:
	HeatMap(heat_data, radius=10, blur=10).add_to(m_heatmap)

	heatmap_html = m_heatmap._repr_html_()
	except Exception as e:
	heatmap_html = f"<p>Heatmap generation failed: {str(e)}</p>"

	status = f"✅ Processed {len(predictions_df)} zones from {len(df)} data points (HF Spaces optimized)"

	return status, markers_html, heatmap_html

	except MemoryError:
	return "❌ File too large for HF Spaces. Please use a smaller dataset (< 1MB)", None, None
	except pd.errors.EmptyDataError:
	return "❌ Empty or invalid file", None, None
	except Exception as e:
	error_msg = str(e)
	if "BodyStreamBuffer" in error_msg:
	return "❌ Processing timeout. Please use a smaller file (< 5000 rows)", None, None
	return f"❌ Error: {error_msg}", None, None

	# Create beautiful Gradio interface
	with gr.Blocks(
	title="Driver Placement Optimization System",
	theme=gr.themes.Soft(),
	css="""
	.main-container {
	max-width: 1200px;
	margin: 0 auto;
	padding: 15px;
	}
	.header {
	text-align: center;
	margin-bottom: 20px;
	color: white;
	padding: 15px;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	border-radius: 10px;
	}
	.upload-section {
	background: #f8f9fa;
	padding: 15px;
	border-radius: 8px;
	margin-bottom: 15px;
	}
	.maps-container {
	gap: 15px;
	}
	.map-card {
	background: white;
	border: 1px solid #e0e0e0;
	border-radius: 8px;
	padding: 10px;
	box-shadow: 0 1px 3px rgba(0,0,0,0.1);
	max-height: 500px;
	overflow: hidden;
	}
	"""
	) as interface:

	with gr.Column(elem_classes="main-container"):
	with gr.Row(elem_classes="header"):
	gr.Markdown(
	"""
	# Driver Placement Optimization System
	### Geodata analysis for optimal placement zones
	""",
	elem_classes="header-text"
	)

	with gr.Row(elem_classes="upload-section"):
	with gr.Column():
	gr.Markdown("### Data Upload")
	gr.Markdown("⚠️ HF Spaces Limits: Max 10,000 rows, 5MB file size")
	file_input = gr.File(
	label="Select file with geodata (CSV format)",
	elem_id="file-upload"
	)
	status_output = gr.Textbox(
	label="Processing Status",
	interactive=False,
	lines=2
	)

	gr.Markdown("### Analysis Results")

	with gr.Row(elem_classes="maps-container"):
	with gr.Column(elem_classes="map-card"):
	gr.Markdown("#### Priority Zones Map")
	gr.Markdown("Displays top-10 zones with highest demand")
	map1_output = gr.HTML(
	label="Top Zones Map for Driver Placement",
	elem_id="map1"
	)

	with gr.Column(elem_classes="map-card"):
	gr.Markdown("#### Imbalance Heatmap")
	gr.Markdown("Shows difference between demand and supply")
	map2_output = gr.HTML(
	label="Demand-Supply Imbalance Heatmap",
	elem_id="map2"
	)

	file_input.change(
	fn=create_maps,
	inputs=file_input,
	outputs=[status_output, map1_output, map2_output]
	)

	if __name__ == "__main__":
	interface.launch(server_name="0.0.0.0", debug=False, show_error=True)