init
Browse files- README.md +21 -0
- app.py +245 -0
- model/optimization_model.joblib +3 -0
- requirements.txt +6 -0
README.md
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Driver Placement Optimization System
|
| 2 |
+
|
| 3 |
+
This Gradio web application analyzes geodata to determine optimal driver placement zones using machine learning.
|
| 4 |
+
|
| 5 |
+
## Features
|
| 6 |
+
- Upload geodata files for analysis
|
| 7 |
+
- Interactive maps showing priority zones
|
| 8 |
+
- Heatmap visualization of demand-supply imbalance
|
| 9 |
+
- Machine learning-based demand prediction
|
| 10 |
+
|
| 11 |
+
## How to use
|
| 12 |
+
1. Upload your geodata file (CSV format with lat, lng columns)
|
| 13 |
+
2. The system will process the data and generate two maps:
|
| 14 |
+
- Priority zones map with top-10 highest demand areas
|
| 15 |
+
- Heatmap showing demand-supply imbalance
|
| 16 |
+
|
| 17 |
+
## Technology Stack
|
| 18 |
+
- Gradio for web interface
|
| 19 |
+
- Folium for interactive maps
|
| 20 |
+
- scikit-learn for machine learning predictions
|
| 21 |
+
- Pandas for data processing
|
app.py
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import joblib
|
| 5 |
+
import folium
|
| 6 |
+
from folium.plugins import HeatMap
|
| 7 |
+
|
| 8 |
+
# Загружаем модель
|
| 9 |
+
MODEL_PATH = "model/optimization_model.joblib"
|
| 10 |
+
try:
|
| 11 |
+
model = joblib.load(MODEL_PATH)
|
| 12 |
+
print("Модель загружена успешно")
|
| 13 |
+
except:
|
| 14 |
+
model = None
|
| 15 |
+
print("Не удалось загрузить модель")
|
| 16 |
+
|
| 17 |
+
def create_maps(file):
|
| 18 |
+
"""Creating two maps from notebook: markers map and heatmap"""
|
| 19 |
+
if file is None:
|
| 20 |
+
return "Please upload geodata file", None, None
|
| 21 |
+
|
| 22 |
+
try:
|
| 23 |
+
# Read all data without limitations
|
| 24 |
+
print("Loading data...")
|
| 25 |
+
df = pd.read_csv(file.name)
|
| 26 |
+
print(f"Loaded {len(df)} data rows")
|
| 27 |
+
|
| 28 |
+
# Check columns
|
| 29 |
+
required_cols = ['lat', 'lng', 'spd', 'alt', 'azm', 'randomized_id']
|
| 30 |
+
if not all(col in df.columns for col in required_cols):
|
| 31 |
+
return f"Missing columns: {required_cols}", None, None
|
| 32 |
+
|
| 33 |
+
# Calculate distances (simplified)
|
| 34 |
+
df['distance'] = df.groupby('randomized_id').apply(
|
| 35 |
+
lambda x: [0] + [100] * (len(x) - 1) # Simplified, constant distance
|
| 36 |
+
).explode().reset_index(drop=True)
|
| 37 |
+
df['distance'] = pd.to_numeric(df['distance'], errors='coerce')
|
| 38 |
+
|
| 39 |
+
# Create grid WITHOUT categorical data - fix error
|
| 40 |
+
lat_min, lat_max = df['lat'].min(), df['lat'].max()
|
| 41 |
+
lng_min, lng_max = df['lng'].min(), df['lng'].max()
|
| 42 |
+
|
| 43 |
+
# Use numeric bins instead of pd.cut
|
| 44 |
+
df['lat_bin'] = ((df['lat'] - lat_min) // 0.005).astype(int)
|
| 45 |
+
df['lng_bin'] = ((df['lng'] - lng_min) // 0.005).astype(int)
|
| 46 |
+
|
| 47 |
+
# Create string identifiers for grouping
|
| 48 |
+
df['lat_grid'] = df['lat_bin'].astype(str)
|
| 49 |
+
df['lng_grid'] = df['lng_bin'].astype(str)
|
| 50 |
+
|
| 51 |
+
# Aggregate by zones as in notebook
|
| 52 |
+
df_zone_stats = df.groupby(['lat_grid', 'lng_grid']).agg(
|
| 53 |
+
zone_avg_spd=('spd', 'mean'),
|
| 54 |
+
zone_spd_std=('spd', 'std'),
|
| 55 |
+
zone_min_spd=('spd', 'min'),
|
| 56 |
+
zone_max_spd=('spd', 'max'),
|
| 57 |
+
zone_avg_alt=('alt', 'mean'),
|
| 58 |
+
zone_alt_std=('alt', 'std'),
|
| 59 |
+
zone_min_alt=('alt', 'min'),
|
| 60 |
+
zone_max_alt=('alt', 'max'),
|
| 61 |
+
zone_avg_azm=('azm', 'mean'),
|
| 62 |
+
zone_azm_std=('azm', 'std'),
|
| 63 |
+
zone_point_count=('randomized_id', 'count'),
|
| 64 |
+
zone_total_distance=('distance', 'sum')
|
| 65 |
+
).reset_index().fillna(0)
|
| 66 |
+
|
| 67 |
+
# Create target variable
|
| 68 |
+
zone_counts = df.groupby(['lat_grid', 'lng_grid'])['randomized_id'].nunique().reset_index(name='zone_density')
|
| 69 |
+
zone_counts['target'] = np.log1p(zone_counts['zone_density'])
|
| 70 |
+
|
| 71 |
+
# Merge data
|
| 72 |
+
df_ml = pd.merge(df_zone_stats, zone_counts, on=['lat_grid', 'lng_grid'], how='inner')
|
| 73 |
+
|
| 74 |
+
if model is None:
|
| 75 |
+
return "Model not loaded", None, None
|
| 76 |
+
|
| 77 |
+
# FIX: model expects predicted_demand in data
|
| 78 |
+
# Add dummy column with value 0
|
| 79 |
+
df_ml['predicted_demand'] = 0.0
|
| 80 |
+
|
| 81 |
+
# Use all columns except identifiers and target variable
|
| 82 |
+
X = df_ml.drop(['lat_grid', 'lng_grid', 'zone_density', 'target'], axis=1)
|
| 83 |
+
|
| 84 |
+
# Predict
|
| 85 |
+
predictions = model.predict(X)
|
| 86 |
+
|
| 87 |
+
# Replace dummy values with real predictions (convert from log-scale)
|
| 88 |
+
df_ml['predicted_demand'] = np.expm1(predictions)
|
| 89 |
+
|
| 90 |
+
# Create predictions_df as in notebook
|
| 91 |
+
predictions_df = df_ml[['lat_grid', 'lng_grid', 'zone_avg_alt', 'zone_avg_azm',
|
| 92 |
+
'zone_point_count', 'target', 'predicted_demand']].copy()
|
| 93 |
+
|
| 94 |
+
# Calculate zone center coordinates - use grouping of original data
|
| 95 |
+
zone_centers = df.groupby(['lat_grid', 'lng_grid']).agg({
|
| 96 |
+
'lat': 'mean',
|
| 97 |
+
'lng': 'mean'
|
| 98 |
+
}).reset_index()
|
| 99 |
+
|
| 100 |
+
# Merge with predictions
|
| 101 |
+
predictions_df = pd.merge(predictions_df, zone_centers, on=['lat_grid', 'lng_grid'], how='left')
|
| 102 |
+
|
| 103 |
+
# Add calculations as in notebook
|
| 104 |
+
predictions_df['actual_demand'] = np.expm1(predictions_df['target'])
|
| 105 |
+
predictions_df['priority_score'] = predictions_df['predicted_demand'] # Already converted
|
| 106 |
+
predictions_df['supply'] = predictions_df['zone_point_count'] / predictions_df['zone_point_count'].mean()
|
| 107 |
+
predictions_df['demand_supply_ratio'] = predictions_df['priority_score'] / predictions_df['supply']
|
| 108 |
+
predictions_df['demand_supply_difference'] = predictions_df['priority_score'] - predictions_df['supply']
|
| 109 |
+
|
| 110 |
+
# Sort by priority
|
| 111 |
+
predictions_df = predictions_df.sort_values(by='priority_score', ascending=False)
|
| 112 |
+
|
| 113 |
+
# === MAP 1: Top zones with markers (as in notebook) ===
|
| 114 |
+
top_n = 10
|
| 115 |
+
top_zones = predictions_df.head(top_n)
|
| 116 |
+
|
| 117 |
+
map_center_lat = top_zones['lat'].mean()
|
| 118 |
+
map_center_lng = top_zones['lng'].mean()
|
| 119 |
+
m = folium.Map(location=[map_center_lat, map_center_lng], zoom_start=12)
|
| 120 |
+
|
| 121 |
+
# Add markers for top zones with tooltips
|
| 122 |
+
for index, row in top_zones.iterrows():
|
| 123 |
+
tooltip_text = f"Predicted Demand: {row['priority_score']:.2f}<br>" \
|
| 124 |
+
f"Actual Demand: {row['actual_demand']:.0f}<br>" \
|
| 125 |
+
f"Priority Score: {row['priority_score']:.2f}"
|
| 126 |
+
folium.Marker(
|
| 127 |
+
location=[row['lat'], row['lng']],
|
| 128 |
+
tooltip=tooltip_text,
|
| 129 |
+
icon=folium.Icon(color='red', icon='info-sign')
|
| 130 |
+
).add_to(m)
|
| 131 |
+
|
| 132 |
+
# Save first map and get HTML
|
| 133 |
+
markers_html = m._repr_html_()
|
| 134 |
+
|
| 135 |
+
# === MAP 2: Heatmap of imbalance (as in notebook) ===
|
| 136 |
+
# Create data for heatmap
|
| 137 |
+
heat_data = [[row['lat'], row['lng'], row['demand_supply_difference']]
|
| 138 |
+
for index, row in predictions_df.iterrows()]
|
| 139 |
+
|
| 140 |
+
# Create heatmap
|
| 141 |
+
map_center_lat = predictions_df['lat'].mean()
|
| 142 |
+
map_center_lng = predictions_df['lng'].mean()
|
| 143 |
+
m_heatmap = folium.Map(location=[map_center_lat, map_center_lng], zoom_start=12)
|
| 144 |
+
|
| 145 |
+
# Add heatmap
|
| 146 |
+
HeatMap(heat_data).add_to(m_heatmap)
|
| 147 |
+
|
| 148 |
+
# Get HTML for second map
|
| 149 |
+
heatmap_html = m_heatmap._repr_html_()
|
| 150 |
+
|
| 151 |
+
status = f"Processed {len(predictions_df)} zones from {len(df)} data points"
|
| 152 |
+
|
| 153 |
+
return status, markers_html, heatmap_html
|
| 154 |
+
|
| 155 |
+
except Exception as e:
|
| 156 |
+
return f"Error: {str(e)}", None, None
|
| 157 |
+
|
| 158 |
+
# Create beautiful Gradio interface
|
| 159 |
+
with gr.Blocks(
|
| 160 |
+
title="Driver Placement Optimization System",
|
| 161 |
+
theme=gr.themes.Soft(),
|
| 162 |
+
css="""
|
| 163 |
+
.main-container {
|
| 164 |
+
max-width: 1400px;
|
| 165 |
+
margin: 0 auto;
|
| 166 |
+
padding: 20px;
|
| 167 |
+
}
|
| 168 |
+
.header {
|
| 169 |
+
text-align: center;
|
| 170 |
+
margin-bottom: 30px;
|
| 171 |
+
color: white;
|
| 172 |
+
padding: 20px;
|
| 173 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 174 |
+
border-radius: 15px;
|
| 175 |
+
color: white;
|
| 176 |
+
}
|
| 177 |
+
.upload-section {
|
| 178 |
+
background: #f8f9fa;
|
| 179 |
+
padding: 20px;
|
| 180 |
+
border-radius: 10px;
|
| 181 |
+
margin-bottom: 20px;
|
| 182 |
+
}
|
| 183 |
+
.maps-container {
|
| 184 |
+
gap: 20px;
|
| 185 |
+
}
|
| 186 |
+
.map-card {
|
| 187 |
+
background: white;
|
| 188 |
+
border: 1px solid #e0e0e0;
|
| 189 |
+
border-radius: 10px;
|
| 190 |
+
padding: 15px;
|
| 191 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
| 192 |
+
}
|
| 193 |
+
"""
|
| 194 |
+
) as interface:
|
| 195 |
+
|
| 196 |
+
with gr.Column(elem_classes="main-container"):
|
| 197 |
+
with gr.Row(elem_classes="header"):
|
| 198 |
+
gr.Markdown(
|
| 199 |
+
"""
|
| 200 |
+
# Driver Placement Optimization System
|
| 201 |
+
### Geodata analysis for optimal placement zones
|
| 202 |
+
""",
|
| 203 |
+
elem_classes="header-text"
|
| 204 |
+
)
|
| 205 |
+
|
| 206 |
+
with gr.Row(elem_classes="upload-section"):
|
| 207 |
+
with gr.Column():
|
| 208 |
+
gr.Markdown("### Data Upload")
|
| 209 |
+
file_input = gr.File(
|
| 210 |
+
label="Select file with geodata",
|
| 211 |
+
elem_id="file-upload"
|
| 212 |
+
)
|
| 213 |
+
status_output = gr.Textbox(
|
| 214 |
+
label="Processing Status",
|
| 215 |
+
interactive=False,
|
| 216 |
+
lines=2
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
gr.Markdown("### Analysis Results")
|
| 220 |
+
|
| 221 |
+
with gr.Row(elem_classes="maps-container"):
|
| 222 |
+
with gr.Column(elem_classes="map-card"):
|
| 223 |
+
gr.Markdown("#### Priority Zones Map")
|
| 224 |
+
gr.Markdown("*Displays top-10 zones with highest demand*")
|
| 225 |
+
map1_output = gr.HTML(
|
| 226 |
+
label="Top Zones Map for Driver Placement",
|
| 227 |
+
elem_id="map1"
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
with gr.Column(elem_classes="map-card"):
|
| 231 |
+
gr.Markdown("#### Imbalance Heatmap")
|
| 232 |
+
gr.Markdown("*Shows difference between demand and supply*")
|
| 233 |
+
map2_output = gr.HTML(
|
| 234 |
+
label="Demand-Supply Imbalance Heatmap",
|
| 235 |
+
elem_id="map2"
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
file_input.change(
|
| 239 |
+
fn=create_maps,
|
| 240 |
+
inputs=file_input,
|
| 241 |
+
outputs=[status_output, map1_output, map2_output]
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
if __name__ == "__main__":
|
| 245 |
+
interface.launch(server_name="127.0.0.1", server_port=7870, share=False)
|
model/optimization_model.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab47250d98f86d183ed95f5b6aa8d4017597d0d510be8d4fb43abd623d4ae75c
|
| 3 |
+
size 409969
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==3.50.2
|
| 2 |
+
pandas
|
| 3 |
+
numpy
|
| 4 |
+
scikit-learn
|
| 5 |
+
joblib
|
| 6 |
+
folium
|