Spaces:

baiganinn
/

hackdrive

Sleeping

App Files Files Community

baiganinn commited on Sep 14, 2025

Commit

94899de

1 Parent(s): 2292677

Fix HF Spaces BodyStreamBuffer error - add data limits and optimize maps

Browse files

Files changed (2) hide show

app.py +46 -28
requirements.txt +5 -5

app.py CHANGED Viewed

@@ -20,23 +20,32 @@ def create_maps(file):
         return "Please upload geodata file", None, None
     try:
-        # Read all data without limitations
         print("Loading data...")
-        df = pd.read_csv(file.name)
         print(f"Loaded {len(df)} data rows")
         # Check columns
         required_cols = ['lat', 'lng', 'spd', 'alt', 'azm', 'randomized_id']
-        if not all(col in df.columns for col in required_cols):
-            return f"Missing columns: {required_cols}", None, None
-        # Calculate distances (simplified)
-        df['distance'] = df.groupby('randomized_id').apply(
-            lambda x: [0] + [100] * (len(x) - 1)  # Simplified, constant distance
-        ).explode().reset_index(drop=True)
-        df['distance'] = pd.to_numeric(df['distance'], errors='coerce')
         # Create grid WITHOUT categorical data - fix error
         lat_min, lat_max = df['lat'].min(), df['lat'].max()
         lng_min, lng_max = df['lng'].min(), df['lng'].max()
@@ -107,22 +116,29 @@ def create_maps(file):
         predictions_df['demand_supply_ratio'] = predictions_df['priority_score'] / predictions_df['supply']
         predictions_df['demand_supply_difference'] = predictions_df['priority_score'] - predictions_df['supply']
         # Sort by priority
         predictions_df = predictions_df.sort_values(by='priority_score', ascending=False)
-        # === MAP 1: Top zones with markers (as in notebook) ===
-        top_n = 10
         top_zones = predictions_df.head(top_n)
         map_center_lat = top_zones['lat'].mean()
         map_center_lng = top_zones['lng'].mean()
-        m = folium.Map(location=[map_center_lat, map_center_lng], zoom_start=12)
-        # Add markers for top zones with tooltips
         for index, row in top_zones.iterrows():
-            tooltip_text = f"Predicted Demand: {row['priority_score']:.2f}<br>" \
-                          f"Actual Demand: {row['actual_demand']:.0f}<br>" \
-                          f"Priority Score: {row['priority_score']:.2f}"
             folium.Marker(
                 location=[row['lat'], row['lng']],
                 tooltip=tooltip_text,
@@ -132,23 +148,25 @@ def create_maps(file):
         # Save first map and get HTML
         markers_html = m._repr_html_()
-        # === MAP 2: Heatmap of imbalance (as in notebook) ===
-        # Create data for heatmap
-        heat_data = [[row['lat'], row['lng'], row['demand_supply_difference']]
-                     for index, row in predictions_df.iterrows()]
-        # Create heatmap
-        map_center_lat = predictions_df['lat'].mean()
-        map_center_lng = predictions_df['lng'].mean()
-        m_heatmap = folium.Map(location=[map_center_lat, map_center_lng], zoom_start=12)
-        # Add heatmap
-        HeatMap(heat_data).add_to(m_heatmap)
         # Get HTML for second map
         heatmap_html = m_heatmap._repr_html_()
-        status = f"Processed {len(predictions_df)} zones from {len(df)} data points"
         return status, markers_html, heatmap_html
@@ -242,4 +260,4 @@ with gr.Blocks(
     )
 if __name__ == "__main__":
-    interface.launch(server_name="0.0.0.0", debug=True, show_error=True, server_port=7870, share=True)

         return "Please upload geodata file", None, None
     try:
+        # Read data with size limit for HF Spaces
         print("Loading data...")
+        df = pd.read_csv(file.name, nrows=100000)  # Limit to 100k rows for HF Spaces
         print(f"Loaded {len(df)} data rows")
+        # Check file size
+        if len(df) == 0:
+            return "Error: Empty file", None, None
         # Check columns
         required_cols = ['lat', 'lng', 'spd', 'alt', 'azm', 'randomized_id']
+        missing_cols = [col for col in required_cols if col not in df.columns]
+        if missing_cols:
+            return f"Missing columns: {missing_cols}. Available: {list(df.columns)}", None, None
+        # Sample data if too large (for HF Spaces memory limits)
+        if len(df) > 50000:
+            df = df.sample(n=50000, random_state=42)
+            print(f"Sampled down to {len(df)} rows for processing")
+        # Calculate distances (simplified to avoid memory issues)
+        print("Processing distances...")
+        df['distance'] = 100.0  # Simplified constant distance
         # Create grid WITHOUT categorical data - fix error
+        print("Creating spatial grid...")
         lat_min, lat_max = df['lat'].min(), df['lat'].max()
         lng_min, lng_max = df['lng'].min(), df['lng'].max()
         predictions_df['demand_supply_ratio'] = predictions_df['priority_score'] / predictions_df['supply']
         predictions_df['demand_supply_difference'] = predictions_df['priority_score'] - predictions_df['supply']
+        # Limit zones for HF Spaces performance
+        if len(predictions_df) > 1000:
+            predictions_df = predictions_df.head(1000)
+            print(f"Limited to top 1000 zones for performance")
         # Sort by priority
         predictions_df = predictions_df.sort_values(by='priority_score', ascending=False)
+        # === MAP 1: Top zones with markers (simplified for HF Spaces) ===
+        top_n = min(10, len(predictions_df))
         top_zones = predictions_df.head(top_n)
+        if len(top_zones) == 0:
+            return "No valid zones found", None, None
         map_center_lat = top_zones['lat'].mean()
         map_center_lng = top_zones['lng'].mean()
+        m = folium.Map(location=[map_center_lat, map_center_lng], zoom_start=11,
+                       tiles='OpenStreetMap', attr='OpenStreetMap')
+        # Add markers for top zones with simplified tooltips
         for index, row in top_zones.iterrows():
+            tooltip_text = f"Demand: {row['priority_score']:.1f}<br>Actual: {row['actual_demand']:.0f}"
             folium.Marker(
                 location=[row['lat'], row['lng']],
                 tooltip=tooltip_text,
         # Save first map and get HTML
         markers_html = m._repr_html_()
+        # === MAP 2: Simplified heatmap for HF Spaces ===
+        # Limit heatmap data to top zones for performance
+        heat_zones = predictions_df.head(min(500, len(predictions_df)))
+        heat_data = [[row['lat'], row['lng'], max(0, row['demand_supply_difference'])]
+                     for index, row in heat_zones.iterrows()]
+        # Create heatmap with simplified settings
+        map_center_lat = heat_zones['lat'].mean()
+        map_center_lng = heat_zones['lng'].mean()
+        m_heatmap = folium.Map(location=[map_center_lat, map_center_lng], zoom_start=11,
+                              tiles='OpenStreetMap', attr='OpenStreetMap')
+        # Add heatmap with performance settings
+        HeatMap(heat_data, radius=15, blur=15, max_zoom=1).add_to(m_heatmap)
         # Get HTML for second map
         heatmap_html = m_heatmap._repr_html_()
+        status = f"✅ Processed {len(predictions_df)} zones from {len(df)} data points"
         return status, markers_html, heatmap_html
     )
 if __name__ == "__main__":
+    interface.launch(server_name="0.0.0.0", debug=False, show_error=True)

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 gradio==3.50.2
-pandas
-numpy
-scikit-learn==1.6.1
-joblib
-folium

 gradio==3.50.2
+pandas>=1.5.0
+numpy>=1.21.0
+scikit-learn>=1.0.0
+joblib>=1.1.0
+folium>=0.14.0