Fix HF Spaces BodyStreamBuffer error - add data limits and optimize maps
Browse files- app.py +46 -28
- requirements.txt +5 -5
app.py
CHANGED
|
@@ -20,23 +20,32 @@ def create_maps(file):
|
|
| 20 |
return "Please upload geodata file", None, None
|
| 21 |
|
| 22 |
try:
|
| 23 |
-
# Read
|
| 24 |
print("Loading data...")
|
| 25 |
-
df = pd.read_csv(file.name)
|
| 26 |
print(f"Loaded {len(df)} data rows")
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
# Check columns
|
| 29 |
required_cols = ['lat', 'lng', 'spd', 'alt', 'azm', 'randomized_id']
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
-
# Calculate distances (simplified)
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
).explode().reset_index(drop=True)
|
| 37 |
-
df['distance'] = pd.to_numeric(df['distance'], errors='coerce')
|
| 38 |
|
| 39 |
# Create grid WITHOUT categorical data - fix error
|
|
|
|
| 40 |
lat_min, lat_max = df['lat'].min(), df['lat'].max()
|
| 41 |
lng_min, lng_max = df['lng'].min(), df['lng'].max()
|
| 42 |
|
|
@@ -107,22 +116,29 @@ def create_maps(file):
|
|
| 107 |
predictions_df['demand_supply_ratio'] = predictions_df['priority_score'] / predictions_df['supply']
|
| 108 |
predictions_df['demand_supply_difference'] = predictions_df['priority_score'] - predictions_df['supply']
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
# Sort by priority
|
| 111 |
predictions_df = predictions_df.sort_values(by='priority_score', ascending=False)
|
| 112 |
|
| 113 |
-
# === MAP 1: Top zones with markers (
|
| 114 |
-
top_n = 10
|
| 115 |
top_zones = predictions_df.head(top_n)
|
| 116 |
|
|
|
|
|
|
|
|
|
|
| 117 |
map_center_lat = top_zones['lat'].mean()
|
| 118 |
map_center_lng = top_zones['lng'].mean()
|
| 119 |
-
m = folium.Map(location=[map_center_lat, map_center_lng], zoom_start=
|
|
|
|
| 120 |
|
| 121 |
-
# Add markers for top zones with tooltips
|
| 122 |
for index, row in top_zones.iterrows():
|
| 123 |
-
tooltip_text = f"
|
| 124 |
-
f"Actual Demand: {row['actual_demand']:.0f}<br>" \
|
| 125 |
-
f"Priority Score: {row['priority_score']:.2f}"
|
| 126 |
folium.Marker(
|
| 127 |
location=[row['lat'], row['lng']],
|
| 128 |
tooltip=tooltip_text,
|
|
@@ -132,23 +148,25 @@ def create_maps(file):
|
|
| 132 |
# Save first map and get HTML
|
| 133 |
markers_html = m._repr_html_()
|
| 134 |
|
| 135 |
-
# === MAP 2:
|
| 136 |
-
#
|
| 137 |
-
|
| 138 |
-
|
|
|
|
| 139 |
|
| 140 |
-
# Create heatmap
|
| 141 |
-
map_center_lat =
|
| 142 |
-
map_center_lng =
|
| 143 |
-
m_heatmap = folium.Map(location=[map_center_lat, map_center_lng], zoom_start=
|
|
|
|
| 144 |
|
| 145 |
-
# Add heatmap
|
| 146 |
-
HeatMap(heat_data).add_to(m_heatmap)
|
| 147 |
|
| 148 |
# Get HTML for second map
|
| 149 |
heatmap_html = m_heatmap._repr_html_()
|
| 150 |
|
| 151 |
-
status = f"Processed {len(predictions_df)} zones from {len(df)} data points"
|
| 152 |
|
| 153 |
return status, markers_html, heatmap_html
|
| 154 |
|
|
@@ -242,4 +260,4 @@ with gr.Blocks(
|
|
| 242 |
)
|
| 243 |
|
| 244 |
if __name__ == "__main__":
|
| 245 |
-
interface.launch(server_name="0.0.0.0", debug=
|
|
|
|
| 20 |
return "Please upload geodata file", None, None
|
| 21 |
|
| 22 |
try:
|
| 23 |
+
# Read data with size limit for HF Spaces
|
| 24 |
print("Loading data...")
|
| 25 |
+
df = pd.read_csv(file.name, nrows=100000) # Limit to 100k rows for HF Spaces
|
| 26 |
print(f"Loaded {len(df)} data rows")
|
| 27 |
|
| 28 |
+
# Check file size
|
| 29 |
+
if len(df) == 0:
|
| 30 |
+
return "Error: Empty file", None, None
|
| 31 |
+
|
| 32 |
# Check columns
|
| 33 |
required_cols = ['lat', 'lng', 'spd', 'alt', 'azm', 'randomized_id']
|
| 34 |
+
missing_cols = [col for col in required_cols if col not in df.columns]
|
| 35 |
+
if missing_cols:
|
| 36 |
+
return f"Missing columns: {missing_cols}. Available: {list(df.columns)}", None, None
|
| 37 |
+
|
| 38 |
+
# Sample data if too large (for HF Spaces memory limits)
|
| 39 |
+
if len(df) > 50000:
|
| 40 |
+
df = df.sample(n=50000, random_state=42)
|
| 41 |
+
print(f"Sampled down to {len(df)} rows for processing")
|
| 42 |
|
| 43 |
+
# Calculate distances (simplified to avoid memory issues)
|
| 44 |
+
print("Processing distances...")
|
| 45 |
+
df['distance'] = 100.0 # Simplified constant distance
|
|
|
|
|
|
|
| 46 |
|
| 47 |
# Create grid WITHOUT categorical data - fix error
|
| 48 |
+
print("Creating spatial grid...")
|
| 49 |
lat_min, lat_max = df['lat'].min(), df['lat'].max()
|
| 50 |
lng_min, lng_max = df['lng'].min(), df['lng'].max()
|
| 51 |
|
|
|
|
| 116 |
predictions_df['demand_supply_ratio'] = predictions_df['priority_score'] / predictions_df['supply']
|
| 117 |
predictions_df['demand_supply_difference'] = predictions_df['priority_score'] - predictions_df['supply']
|
| 118 |
|
| 119 |
+
# Limit zones for HF Spaces performance
|
| 120 |
+
if len(predictions_df) > 1000:
|
| 121 |
+
predictions_df = predictions_df.head(1000)
|
| 122 |
+
print(f"Limited to top 1000 zones for performance")
|
| 123 |
+
|
| 124 |
# Sort by priority
|
| 125 |
predictions_df = predictions_df.sort_values(by='priority_score', ascending=False)
|
| 126 |
|
| 127 |
+
# === MAP 1: Top zones with markers (simplified for HF Spaces) ===
|
| 128 |
+
top_n = min(10, len(predictions_df))
|
| 129 |
top_zones = predictions_df.head(top_n)
|
| 130 |
|
| 131 |
+
if len(top_zones) == 0:
|
| 132 |
+
return "No valid zones found", None, None
|
| 133 |
+
|
| 134 |
map_center_lat = top_zones['lat'].mean()
|
| 135 |
map_center_lng = top_zones['lng'].mean()
|
| 136 |
+
m = folium.Map(location=[map_center_lat, map_center_lng], zoom_start=11,
|
| 137 |
+
tiles='OpenStreetMap', attr='OpenStreetMap')
|
| 138 |
|
| 139 |
+
# Add markers for top zones with simplified tooltips
|
| 140 |
for index, row in top_zones.iterrows():
|
| 141 |
+
tooltip_text = f"Demand: {row['priority_score']:.1f}<br>Actual: {row['actual_demand']:.0f}"
|
|
|
|
|
|
|
| 142 |
folium.Marker(
|
| 143 |
location=[row['lat'], row['lng']],
|
| 144 |
tooltip=tooltip_text,
|
|
|
|
| 148 |
# Save first map and get HTML
|
| 149 |
markers_html = m._repr_html_()
|
| 150 |
|
| 151 |
+
# === MAP 2: Simplified heatmap for HF Spaces ===
|
| 152 |
+
# Limit heatmap data to top zones for performance
|
| 153 |
+
heat_zones = predictions_df.head(min(500, len(predictions_df)))
|
| 154 |
+
heat_data = [[row['lat'], row['lng'], max(0, row['demand_supply_difference'])]
|
| 155 |
+
for index, row in heat_zones.iterrows()]
|
| 156 |
|
| 157 |
+
# Create heatmap with simplified settings
|
| 158 |
+
map_center_lat = heat_zones['lat'].mean()
|
| 159 |
+
map_center_lng = heat_zones['lng'].mean()
|
| 160 |
+
m_heatmap = folium.Map(location=[map_center_lat, map_center_lng], zoom_start=11,
|
| 161 |
+
tiles='OpenStreetMap', attr='OpenStreetMap')
|
| 162 |
|
| 163 |
+
# Add heatmap with performance settings
|
| 164 |
+
HeatMap(heat_data, radius=15, blur=15, max_zoom=1).add_to(m_heatmap)
|
| 165 |
|
| 166 |
# Get HTML for second map
|
| 167 |
heatmap_html = m_heatmap._repr_html_()
|
| 168 |
|
| 169 |
+
status = f"✅ Processed {len(predictions_df)} zones from {len(df)} data points"
|
| 170 |
|
| 171 |
return status, markers_html, heatmap_html
|
| 172 |
|
|
|
|
| 260 |
)
|
| 261 |
|
| 262 |
if __name__ == "__main__":
|
| 263 |
+
interface.launch(server_name="0.0.0.0", debug=False, show_error=True)
|
requirements.txt
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
gradio==3.50.2
|
| 2 |
-
pandas
|
| 3 |
-
numpy
|
| 4 |
-
scikit-learn
|
| 5 |
-
joblib
|
| 6 |
-
folium
|
|
|
|
| 1 |
gradio==3.50.2
|
| 2 |
+
pandas>=1.5.0
|
| 3 |
+
numpy>=1.21.0
|
| 4 |
+
scikit-learn>=1.0.0
|
| 5 |
+
joblib>=1.1.0
|
| 6 |
+
folium>=0.14.0
|