import gradio as gr import pandas as pd import numpy as np import joblib import folium from folium.plugins import HeatMap # Загружаем модель MODEL_PATH = "model/optimization_model.joblib" try: model = joblib.load(MODEL_PATH) print("Модель загружена успешно") except: model = None print("Не удалось загрузить модель") def create_maps(file): """Creating two maps from notebook: markers map and heatmap""" if file is None: return "Please upload geodata file", None, None try: # Very aggressive data limits for HF Spaces print("Loading data...") df = pd.read_csv(file.name, nrows=10000) # Drastically reduced to 10k rows print(f"Loaded {len(df)} data rows") # Check file size if len(df) == 0: return "Error: Empty file", None, None # Check columns required_cols = ['lat', 'lng', 'spd', 'alt', 'azm', 'randomized_id'] missing_cols = [col for col in required_cols if col not in df.columns] if missing_cols: return f"Missing columns: {missing_cols}. Available: {list(df.columns)}", None, None # Aggressive sampling for HF Spaces memory limits if len(df) > 5000: df = df.sample(n=5000, random_state=42) print(f"Sampled down to {len(df)} rows for HF Spaces") # Simplified distance calculation print("Processing distances...") df['distance'] = 100.0 # Constant distance # Create coarser grid for fewer zones print("Creating spatial grid...") lat_min, lat_max = df['lat'].min(), df['lat'].max() lng_min, lng_max = df['lng'].min(), df['lng'].max() # Use larger grid cells (0.01 instead of 0.005) for fewer zones df['lat_bin'] = ((df['lat'] - lat_min) // 0.01).astype(int) df['lng_bin'] = ((df['lng'] - lng_min) // 0.01).astype(int) # Create string identifiers for grouping df['lat_grid'] = df['lat_bin'].astype(str) df['lng_grid'] = df['lng_bin'].astype(str) # Aggregate by zones as in notebook df_zone_stats = df.groupby(['lat_grid', 'lng_grid']).agg( zone_avg_spd=('spd', 'mean'), zone_spd_std=('spd', 'std'), zone_min_spd=('spd', 'min'), zone_max_spd=('spd', 'max'), zone_avg_alt=('alt', 'mean'), zone_alt_std=('alt', 'std'), zone_min_alt=('alt', 'min'), zone_max_alt=('alt', 'max'), zone_avg_azm=('azm', 'mean'), zone_azm_std=('azm', 'std'), zone_point_count=('randomized_id', 'count'), zone_total_distance=('distance', 'sum') ).reset_index().fillna(0) # Create target variable zone_counts = df.groupby(['lat_grid', 'lng_grid'])['randomized_id'].nunique().reset_index(name='zone_density') zone_counts['target'] = np.log1p(zone_counts['zone_density']) # Merge data df_ml = pd.merge(df_zone_stats, zone_counts, on=['lat_grid', 'lng_grid'], how='inner') if model is None: return "Model not loaded", None, None # FIX: model expects predicted_demand in data # Add dummy column with value 0 df_ml['predicted_demand'] = 0.0 # Use all columns except identifiers and target variable X = df_ml.drop(['lat_grid', 'lng_grid', 'zone_density', 'target'], axis=1) # Predict predictions = model.predict(X) # Replace dummy values with real predictions (convert from log-scale) df_ml['predicted_demand'] = np.expm1(predictions) # Create predictions_df as in notebook predictions_df = df_ml[['lat_grid', 'lng_grid', 'zone_avg_alt', 'zone_avg_azm', 'zone_point_count', 'target', 'predicted_demand']].copy() # Calculate zone center coordinates - use grouping of original data zone_centers = df.groupby(['lat_grid', 'lng_grid']).agg({ 'lat': 'mean', 'lng': 'mean' }).reset_index() # Merge with predictions predictions_df = pd.merge(predictions_df, zone_centers, on=['lat_grid', 'lng_grid'], how='left') # Add calculations as in notebook predictions_df['actual_demand'] = np.expm1(predictions_df['target']) predictions_df['priority_score'] = predictions_df['predicted_demand'] # Already converted predictions_df['supply'] = predictions_df['zone_point_count'] / predictions_df['zone_point_count'].mean() predictions_df['demand_supply_ratio'] = predictions_df['priority_score'] / predictions_df['supply'] predictions_df['demand_supply_difference'] = predictions_df['priority_score'] - predictions_df['supply'] # Aggressive limits for HF Spaces if len(predictions_df) > 100: # Drastically reduced from 1000 predictions_df = predictions_df.head(100) print(f"Limited to top 100 zones for HF Spaces") # Sort by priority predictions_df = predictions_df.sort_values(by='priority_score', ascending=False) # === MAP 1: Minimal markers map for HF Spaces === top_n = min(5, len(predictions_df)) # Only 5 markers instead of 10 top_zones = predictions_df.head(top_n) if len(top_zones) == 0: return "No valid zones found", None, None # Create minimal map map_center_lat = top_zones['lat'].mean() map_center_lng = top_zones['lng'].mean() m = folium.Map( location=[map_center_lat, map_center_lng], zoom_start=10, # Reduced zoom tiles='OpenStreetMap', width=600, # Fixed width height=400 # Fixed height ) # Minimal markers with simple tooltips for index, row in top_zones.iterrows(): folium.Marker( location=[row['lat'], row['lng']], popup=f"Demand: {row['priority_score']:.1f}", # Use popup instead of tooltip icon=folium.Icon(color='red', icon='star') ).add_to(m) # Get HTML for first map (with error handling) try: markers_html = m._repr_html_() except: markers_html = "
Map generation failed - please try with smaller file
" # === MAP 2: Ultra-simplified heatmap for HF Spaces === try: # Minimal heatmap data (only top 50 zones) heat_zones = predictions_df.head(min(50, len(predictions_df))) # Simple heatmap data with positive values only heat_data = [] for index, row in heat_zones.iterrows(): value = max(0.1, abs(row['demand_supply_difference'])) # Ensure positive values heat_data.append([row['lat'], row['lng'], value]) # Minimal heatmap m_heatmap = folium.Map( location=[map_center_lat, map_center_lng], zoom_start=10, tiles='OpenStreetMap', width=600, height=400 ) # Simple heatmap with minimal settings if heat_data: HeatMap(heat_data, radius=10, blur=10).add_to(m_heatmap) heatmap_html = m_heatmap._repr_html_() except Exception as e: heatmap_html = f"Heatmap generation failed: {str(e)}
" status = f"✅ Processed {len(predictions_df)} zones from {len(df)} data points (HF Spaces optimized)" return status, markers_html, heatmap_html except MemoryError: return "❌ File too large for HF Spaces. Please use a smaller dataset (< 1MB)", None, None except pd.errors.EmptyDataError: return "❌ Empty or invalid file", None, None except Exception as e: error_msg = str(e) if "BodyStreamBuffer" in error_msg: return "❌ Processing timeout. Please use a smaller file (< 5000 rows)", None, None return f"❌ Error: {error_msg}", None, None # Create beautiful Gradio interface with gr.Blocks( title="Driver Placement Optimization System", theme=gr.themes.Soft(), css=""" .main-container { max-width: 1200px; margin: 0 auto; padding: 15px; } .header { text-align: center; margin-bottom: 20px; color: white; padding: 15px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 10px; } .upload-section { background: #f8f9fa; padding: 15px; border-radius: 8px; margin-bottom: 15px; } .maps-container { gap: 15px; } .map-card { background: white; border: 1px solid #e0e0e0; border-radius: 8px; padding: 10px; box-shadow: 0 1px 3px rgba(0,0,0,0.1); max-height: 500px; overflow: hidden; } """ ) as interface: with gr.Column(elem_classes="main-container"): with gr.Row(elem_classes="header"): gr.Markdown( """ # Driver Placement Optimization System ### Geodata analysis for optimal placement zones """, elem_classes="header-text" ) with gr.Row(elem_classes="upload-section"): with gr.Column(): gr.Markdown("### Data Upload") gr.Markdown("⚠️ **HF Spaces Limits**: Max 10,000 rows, 5MB file size") file_input = gr.File( label="Select file with geodata (CSV format)", elem_id="file-upload" ) status_output = gr.Textbox( label="Processing Status", interactive=False, lines=2 ) gr.Markdown("### Analysis Results") with gr.Row(elem_classes="maps-container"): with gr.Column(elem_classes="map-card"): gr.Markdown("#### Priority Zones Map") gr.Markdown("*Displays top-10 zones with highest demand*") map1_output = gr.HTML( label="Top Zones Map for Driver Placement", elem_id="map1" ) with gr.Column(elem_classes="map-card"): gr.Markdown("#### Imbalance Heatmap") gr.Markdown("*Shows difference between demand and supply*") map2_output = gr.HTML( label="Demand-Supply Imbalance Heatmap", elem_id="map2" ) file_input.change( fn=create_maps, inputs=file_input, outputs=[status_output, map1_output, map2_output] ) if __name__ == "__main__": interface.launch(server_name="0.0.0.0", debug=False, show_error=True)