baiganinn commited on
Commit
94899de
·
1 Parent(s): 2292677

Fix HF Spaces BodyStreamBuffer error - add data limits and optimize maps

Browse files
Files changed (2) hide show
  1. app.py +46 -28
  2. requirements.txt +5 -5
app.py CHANGED
@@ -20,23 +20,32 @@ def create_maps(file):
20
  return "Please upload geodata file", None, None
21
 
22
  try:
23
- # Read all data without limitations
24
  print("Loading data...")
25
- df = pd.read_csv(file.name)
26
  print(f"Loaded {len(df)} data rows")
27
 
 
 
 
 
28
  # Check columns
29
  required_cols = ['lat', 'lng', 'spd', 'alt', 'azm', 'randomized_id']
30
- if not all(col in df.columns for col in required_cols):
31
- return f"Missing columns: {required_cols}", None, None
 
 
 
 
 
 
32
 
33
- # Calculate distances (simplified)
34
- df['distance'] = df.groupby('randomized_id').apply(
35
- lambda x: [0] + [100] * (len(x) - 1) # Simplified, constant distance
36
- ).explode().reset_index(drop=True)
37
- df['distance'] = pd.to_numeric(df['distance'], errors='coerce')
38
 
39
  # Create grid WITHOUT categorical data - fix error
 
40
  lat_min, lat_max = df['lat'].min(), df['lat'].max()
41
  lng_min, lng_max = df['lng'].min(), df['lng'].max()
42
 
@@ -107,22 +116,29 @@ def create_maps(file):
107
  predictions_df['demand_supply_ratio'] = predictions_df['priority_score'] / predictions_df['supply']
108
  predictions_df['demand_supply_difference'] = predictions_df['priority_score'] - predictions_df['supply']
109
 
 
 
 
 
 
110
  # Sort by priority
111
  predictions_df = predictions_df.sort_values(by='priority_score', ascending=False)
112
 
113
- # === MAP 1: Top zones with markers (as in notebook) ===
114
- top_n = 10
115
  top_zones = predictions_df.head(top_n)
116
 
 
 
 
117
  map_center_lat = top_zones['lat'].mean()
118
  map_center_lng = top_zones['lng'].mean()
119
- m = folium.Map(location=[map_center_lat, map_center_lng], zoom_start=12)
 
120
 
121
- # Add markers for top zones with tooltips
122
  for index, row in top_zones.iterrows():
123
- tooltip_text = f"Predicted Demand: {row['priority_score']:.2f}<br>" \
124
- f"Actual Demand: {row['actual_demand']:.0f}<br>" \
125
- f"Priority Score: {row['priority_score']:.2f}"
126
  folium.Marker(
127
  location=[row['lat'], row['lng']],
128
  tooltip=tooltip_text,
@@ -132,23 +148,25 @@ def create_maps(file):
132
  # Save first map and get HTML
133
  markers_html = m._repr_html_()
134
 
135
- # === MAP 2: Heatmap of imbalance (as in notebook) ===
136
- # Create data for heatmap
137
- heat_data = [[row['lat'], row['lng'], row['demand_supply_difference']]
138
- for index, row in predictions_df.iterrows()]
 
139
 
140
- # Create heatmap
141
- map_center_lat = predictions_df['lat'].mean()
142
- map_center_lng = predictions_df['lng'].mean()
143
- m_heatmap = folium.Map(location=[map_center_lat, map_center_lng], zoom_start=12)
 
144
 
145
- # Add heatmap
146
- HeatMap(heat_data).add_to(m_heatmap)
147
 
148
  # Get HTML for second map
149
  heatmap_html = m_heatmap._repr_html_()
150
 
151
- status = f"Processed {len(predictions_df)} zones from {len(df)} data points"
152
 
153
  return status, markers_html, heatmap_html
154
 
@@ -242,4 +260,4 @@ with gr.Blocks(
242
  )
243
 
244
  if __name__ == "__main__":
245
- interface.launch(server_name="0.0.0.0", debug=True, show_error=True, server_port=7870, share=True)
 
20
  return "Please upload geodata file", None, None
21
 
22
  try:
23
+ # Read data with size limit for HF Spaces
24
  print("Loading data...")
25
+ df = pd.read_csv(file.name, nrows=100000) # Limit to 100k rows for HF Spaces
26
  print(f"Loaded {len(df)} data rows")
27
 
28
+ # Check file size
29
+ if len(df) == 0:
30
+ return "Error: Empty file", None, None
31
+
32
  # Check columns
33
  required_cols = ['lat', 'lng', 'spd', 'alt', 'azm', 'randomized_id']
34
+ missing_cols = [col for col in required_cols if col not in df.columns]
35
+ if missing_cols:
36
+ return f"Missing columns: {missing_cols}. Available: {list(df.columns)}", None, None
37
+
38
+ # Sample data if too large (for HF Spaces memory limits)
39
+ if len(df) > 50000:
40
+ df = df.sample(n=50000, random_state=42)
41
+ print(f"Sampled down to {len(df)} rows for processing")
42
 
43
+ # Calculate distances (simplified to avoid memory issues)
44
+ print("Processing distances...")
45
+ df['distance'] = 100.0 # Simplified constant distance
 
 
46
 
47
  # Create grid WITHOUT categorical data - fix error
48
+ print("Creating spatial grid...")
49
  lat_min, lat_max = df['lat'].min(), df['lat'].max()
50
  lng_min, lng_max = df['lng'].min(), df['lng'].max()
51
 
 
116
  predictions_df['demand_supply_ratio'] = predictions_df['priority_score'] / predictions_df['supply']
117
  predictions_df['demand_supply_difference'] = predictions_df['priority_score'] - predictions_df['supply']
118
 
119
+ # Limit zones for HF Spaces performance
120
+ if len(predictions_df) > 1000:
121
+ predictions_df = predictions_df.head(1000)
122
+ print(f"Limited to top 1000 zones for performance")
123
+
124
  # Sort by priority
125
  predictions_df = predictions_df.sort_values(by='priority_score', ascending=False)
126
 
127
+ # === MAP 1: Top zones with markers (simplified for HF Spaces) ===
128
+ top_n = min(10, len(predictions_df))
129
  top_zones = predictions_df.head(top_n)
130
 
131
+ if len(top_zones) == 0:
132
+ return "No valid zones found", None, None
133
+
134
  map_center_lat = top_zones['lat'].mean()
135
  map_center_lng = top_zones['lng'].mean()
136
+ m = folium.Map(location=[map_center_lat, map_center_lng], zoom_start=11,
137
+ tiles='OpenStreetMap', attr='OpenStreetMap')
138
 
139
+ # Add markers for top zones with simplified tooltips
140
  for index, row in top_zones.iterrows():
141
+ tooltip_text = f"Demand: {row['priority_score']:.1f}<br>Actual: {row['actual_demand']:.0f}"
 
 
142
  folium.Marker(
143
  location=[row['lat'], row['lng']],
144
  tooltip=tooltip_text,
 
148
  # Save first map and get HTML
149
  markers_html = m._repr_html_()
150
 
151
+ # === MAP 2: Simplified heatmap for HF Spaces ===
152
+ # Limit heatmap data to top zones for performance
153
+ heat_zones = predictions_df.head(min(500, len(predictions_df)))
154
+ heat_data = [[row['lat'], row['lng'], max(0, row['demand_supply_difference'])]
155
+ for index, row in heat_zones.iterrows()]
156
 
157
+ # Create heatmap with simplified settings
158
+ map_center_lat = heat_zones['lat'].mean()
159
+ map_center_lng = heat_zones['lng'].mean()
160
+ m_heatmap = folium.Map(location=[map_center_lat, map_center_lng], zoom_start=11,
161
+ tiles='OpenStreetMap', attr='OpenStreetMap')
162
 
163
+ # Add heatmap with performance settings
164
+ HeatMap(heat_data, radius=15, blur=15, max_zoom=1).add_to(m_heatmap)
165
 
166
  # Get HTML for second map
167
  heatmap_html = m_heatmap._repr_html_()
168
 
169
+ status = f"Processed {len(predictions_df)} zones from {len(df)} data points"
170
 
171
  return status, markers_html, heatmap_html
172
 
 
260
  )
261
 
262
  if __name__ == "__main__":
263
+ interface.launch(server_name="0.0.0.0", debug=False, show_error=True)
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  gradio==3.50.2
2
- pandas
3
- numpy
4
- scikit-learn==1.6.1
5
- joblib
6
- folium
 
1
  gradio==3.50.2
2
+ pandas>=1.5.0
3
+ numpy>=1.21.0
4
+ scikit-learn>=1.0.0
5
+ joblib>=1.1.0
6
+ folium>=0.14.0