baiganinn commited on
Commit
cd5feed
·
1 Parent(s): f4de349
Files changed (4) hide show
  1. README.md +21 -0
  2. app.py +245 -0
  3. model/optimization_model.joblib +3 -0
  4. requirements.txt +6 -0
README.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Driver Placement Optimization System
2
+
3
+ This Gradio web application analyzes geodata to determine optimal driver placement zones using machine learning.
4
+
5
+ ## Features
6
+ - Upload geodata files for analysis
7
+ - Interactive maps showing priority zones
8
+ - Heatmap visualization of demand-supply imbalance
9
+ - Machine learning-based demand prediction
10
+
11
+ ## How to use
12
+ 1. Upload your geodata file (CSV format with lat, lng columns)
13
+ 2. The system will process the data and generate two maps:
14
+ - Priority zones map with top-10 highest demand areas
15
+ - Heatmap showing demand-supply imbalance
16
+
17
+ ## Technology Stack
18
+ - Gradio for web interface
19
+ - Folium for interactive maps
20
+ - scikit-learn for machine learning predictions
21
+ - Pandas for data processing
app.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import joblib
5
+ import folium
6
+ from folium.plugins import HeatMap
7
+
8
+ # Загружаем модель
9
+ MODEL_PATH = "model/optimization_model.joblib"
10
+ try:
11
+ model = joblib.load(MODEL_PATH)
12
+ print("Модель загружена успешно")
13
+ except:
14
+ model = None
15
+ print("Не удалось загрузить модель")
16
+
17
+ def create_maps(file):
18
+ """Creating two maps from notebook: markers map and heatmap"""
19
+ if file is None:
20
+ return "Please upload geodata file", None, None
21
+
22
+ try:
23
+ # Read all data without limitations
24
+ print("Loading data...")
25
+ df = pd.read_csv(file.name)
26
+ print(f"Loaded {len(df)} data rows")
27
+
28
+ # Check columns
29
+ required_cols = ['lat', 'lng', 'spd', 'alt', 'azm', 'randomized_id']
30
+ if not all(col in df.columns for col in required_cols):
31
+ return f"Missing columns: {required_cols}", None, None
32
+
33
+ # Calculate distances (simplified)
34
+ df['distance'] = df.groupby('randomized_id').apply(
35
+ lambda x: [0] + [100] * (len(x) - 1) # Simplified, constant distance
36
+ ).explode().reset_index(drop=True)
37
+ df['distance'] = pd.to_numeric(df['distance'], errors='coerce')
38
+
39
+ # Create grid WITHOUT categorical data - fix error
40
+ lat_min, lat_max = df['lat'].min(), df['lat'].max()
41
+ lng_min, lng_max = df['lng'].min(), df['lng'].max()
42
+
43
+ # Use numeric bins instead of pd.cut
44
+ df['lat_bin'] = ((df['lat'] - lat_min) // 0.005).astype(int)
45
+ df['lng_bin'] = ((df['lng'] - lng_min) // 0.005).astype(int)
46
+
47
+ # Create string identifiers for grouping
48
+ df['lat_grid'] = df['lat_bin'].astype(str)
49
+ df['lng_grid'] = df['lng_bin'].astype(str)
50
+
51
+ # Aggregate by zones as in notebook
52
+ df_zone_stats = df.groupby(['lat_grid', 'lng_grid']).agg(
53
+ zone_avg_spd=('spd', 'mean'),
54
+ zone_spd_std=('spd', 'std'),
55
+ zone_min_spd=('spd', 'min'),
56
+ zone_max_spd=('spd', 'max'),
57
+ zone_avg_alt=('alt', 'mean'),
58
+ zone_alt_std=('alt', 'std'),
59
+ zone_min_alt=('alt', 'min'),
60
+ zone_max_alt=('alt', 'max'),
61
+ zone_avg_azm=('azm', 'mean'),
62
+ zone_azm_std=('azm', 'std'),
63
+ zone_point_count=('randomized_id', 'count'),
64
+ zone_total_distance=('distance', 'sum')
65
+ ).reset_index().fillna(0)
66
+
67
+ # Create target variable
68
+ zone_counts = df.groupby(['lat_grid', 'lng_grid'])['randomized_id'].nunique().reset_index(name='zone_density')
69
+ zone_counts['target'] = np.log1p(zone_counts['zone_density'])
70
+
71
+ # Merge data
72
+ df_ml = pd.merge(df_zone_stats, zone_counts, on=['lat_grid', 'lng_grid'], how='inner')
73
+
74
+ if model is None:
75
+ return "Model not loaded", None, None
76
+
77
+ # FIX: model expects predicted_demand in data
78
+ # Add dummy column with value 0
79
+ df_ml['predicted_demand'] = 0.0
80
+
81
+ # Use all columns except identifiers and target variable
82
+ X = df_ml.drop(['lat_grid', 'lng_grid', 'zone_density', 'target'], axis=1)
83
+
84
+ # Predict
85
+ predictions = model.predict(X)
86
+
87
+ # Replace dummy values with real predictions (convert from log-scale)
88
+ df_ml['predicted_demand'] = np.expm1(predictions)
89
+
90
+ # Create predictions_df as in notebook
91
+ predictions_df = df_ml[['lat_grid', 'lng_grid', 'zone_avg_alt', 'zone_avg_azm',
92
+ 'zone_point_count', 'target', 'predicted_demand']].copy()
93
+
94
+ # Calculate zone center coordinates - use grouping of original data
95
+ zone_centers = df.groupby(['lat_grid', 'lng_grid']).agg({
96
+ 'lat': 'mean',
97
+ 'lng': 'mean'
98
+ }).reset_index()
99
+
100
+ # Merge with predictions
101
+ predictions_df = pd.merge(predictions_df, zone_centers, on=['lat_grid', 'lng_grid'], how='left')
102
+
103
+ # Add calculations as in notebook
104
+ predictions_df['actual_demand'] = np.expm1(predictions_df['target'])
105
+ predictions_df['priority_score'] = predictions_df['predicted_demand'] # Already converted
106
+ predictions_df['supply'] = predictions_df['zone_point_count'] / predictions_df['zone_point_count'].mean()
107
+ predictions_df['demand_supply_ratio'] = predictions_df['priority_score'] / predictions_df['supply']
108
+ predictions_df['demand_supply_difference'] = predictions_df['priority_score'] - predictions_df['supply']
109
+
110
+ # Sort by priority
111
+ predictions_df = predictions_df.sort_values(by='priority_score', ascending=False)
112
+
113
+ # === MAP 1: Top zones with markers (as in notebook) ===
114
+ top_n = 10
115
+ top_zones = predictions_df.head(top_n)
116
+
117
+ map_center_lat = top_zones['lat'].mean()
118
+ map_center_lng = top_zones['lng'].mean()
119
+ m = folium.Map(location=[map_center_lat, map_center_lng], zoom_start=12)
120
+
121
+ # Add markers for top zones with tooltips
122
+ for index, row in top_zones.iterrows():
123
+ tooltip_text = f"Predicted Demand: {row['priority_score']:.2f}<br>" \
124
+ f"Actual Demand: {row['actual_demand']:.0f}<br>" \
125
+ f"Priority Score: {row['priority_score']:.2f}"
126
+ folium.Marker(
127
+ location=[row['lat'], row['lng']],
128
+ tooltip=tooltip_text,
129
+ icon=folium.Icon(color='red', icon='info-sign')
130
+ ).add_to(m)
131
+
132
+ # Save first map and get HTML
133
+ markers_html = m._repr_html_()
134
+
135
+ # === MAP 2: Heatmap of imbalance (as in notebook) ===
136
+ # Create data for heatmap
137
+ heat_data = [[row['lat'], row['lng'], row['demand_supply_difference']]
138
+ for index, row in predictions_df.iterrows()]
139
+
140
+ # Create heatmap
141
+ map_center_lat = predictions_df['lat'].mean()
142
+ map_center_lng = predictions_df['lng'].mean()
143
+ m_heatmap = folium.Map(location=[map_center_lat, map_center_lng], zoom_start=12)
144
+
145
+ # Add heatmap
146
+ HeatMap(heat_data).add_to(m_heatmap)
147
+
148
+ # Get HTML for second map
149
+ heatmap_html = m_heatmap._repr_html_()
150
+
151
+ status = f"Processed {len(predictions_df)} zones from {len(df)} data points"
152
+
153
+ return status, markers_html, heatmap_html
154
+
155
+ except Exception as e:
156
+ return f"Error: {str(e)}", None, None
157
+
158
+ # Create beautiful Gradio interface
159
+ with gr.Blocks(
160
+ title="Driver Placement Optimization System",
161
+ theme=gr.themes.Soft(),
162
+ css="""
163
+ .main-container {
164
+ max-width: 1400px;
165
+ margin: 0 auto;
166
+ padding: 20px;
167
+ }
168
+ .header {
169
+ text-align: center;
170
+ margin-bottom: 30px;
171
+ color: white;
172
+ padding: 20px;
173
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
174
+ border-radius: 15px;
175
+ color: white;
176
+ }
177
+ .upload-section {
178
+ background: #f8f9fa;
179
+ padding: 20px;
180
+ border-radius: 10px;
181
+ margin-bottom: 20px;
182
+ }
183
+ .maps-container {
184
+ gap: 20px;
185
+ }
186
+ .map-card {
187
+ background: white;
188
+ border: 1px solid #e0e0e0;
189
+ border-radius: 10px;
190
+ padding: 15px;
191
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
192
+ }
193
+ """
194
+ ) as interface:
195
+
196
+ with gr.Column(elem_classes="main-container"):
197
+ with gr.Row(elem_classes="header"):
198
+ gr.Markdown(
199
+ """
200
+ # Driver Placement Optimization System
201
+ ### Geodata analysis for optimal placement zones
202
+ """,
203
+ elem_classes="header-text"
204
+ )
205
+
206
+ with gr.Row(elem_classes="upload-section"):
207
+ with gr.Column():
208
+ gr.Markdown("### Data Upload")
209
+ file_input = gr.File(
210
+ label="Select file with geodata",
211
+ elem_id="file-upload"
212
+ )
213
+ status_output = gr.Textbox(
214
+ label="Processing Status",
215
+ interactive=False,
216
+ lines=2
217
+ )
218
+
219
+ gr.Markdown("### Analysis Results")
220
+
221
+ with gr.Row(elem_classes="maps-container"):
222
+ with gr.Column(elem_classes="map-card"):
223
+ gr.Markdown("#### Priority Zones Map")
224
+ gr.Markdown("*Displays top-10 zones with highest demand*")
225
+ map1_output = gr.HTML(
226
+ label="Top Zones Map for Driver Placement",
227
+ elem_id="map1"
228
+ )
229
+
230
+ with gr.Column(elem_classes="map-card"):
231
+ gr.Markdown("#### Imbalance Heatmap")
232
+ gr.Markdown("*Shows difference between demand and supply*")
233
+ map2_output = gr.HTML(
234
+ label="Demand-Supply Imbalance Heatmap",
235
+ elem_id="map2"
236
+ )
237
+
238
+ file_input.change(
239
+ fn=create_maps,
240
+ inputs=file_input,
241
+ outputs=[status_output, map1_output, map2_output]
242
+ )
243
+
244
+ if __name__ == "__main__":
245
+ interface.launch(server_name="127.0.0.1", server_port=7870, share=False)
model/optimization_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab47250d98f86d183ed95f5b6aa8d4017597d0d510be8d4fb43abd623d4ae75c
3
+ size 409969
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio==3.50.2
2
+ pandas
3
+ numpy
4
+ scikit-learn
5
+ joblib
6
+ folium