Spaces:

baiganinn
/

hackdrive

Sleeping

File size: 11,238 Bytes

import gradio as gr
import pandas as pd
import numpy as np
import joblib
import folium
from folium.plugins import HeatMap

# Загружаем модель
MODEL_PATH = "model/optimization_model.joblib"
try:
    model = joblib.load(MODEL_PATH)
    print("Модель загружена успешно")
except:
    model = None
    print("Не удалось загрузить модель")

def create_maps(file):
    """Creating two maps from notebook: markers map and heatmap"""
    if file is None:
        return "Please upload geodata file", None, None
    
    try:
        # Very aggressive data limits for HF Spaces
        print("Loading data...")
        df = pd.read_csv(file.name, nrows=10000)  # Drastically reduced to 10k rows
        print(f"Loaded {len(df)} data rows")
        
        # Check file size
        if len(df) == 0:
            return "Error: Empty file", None, None
        
        # Check columns
        required_cols = ['lat', 'lng', 'spd', 'alt', 'azm', 'randomized_id']
        missing_cols = [col for col in required_cols if col not in df.columns]
        if missing_cols:
            return f"Missing columns: {missing_cols}. Available: {list(df.columns)}", None, None
        
        # Aggressive sampling for HF Spaces memory limits
        if len(df) > 5000:
            df = df.sample(n=5000, random_state=42)
            print(f"Sampled down to {len(df)} rows for HF Spaces")
        
        # Simplified distance calculation
        print("Processing distances...")
        df['distance'] = 100.0  # Constant distance
        
        # Create coarser grid for fewer zones
        print("Creating spatial grid...")
        lat_min, lat_max = df['lat'].min(), df['lat'].max()
        lng_min, lng_max = df['lng'].min(), df['lng'].max()
        
        # Use larger grid cells (0.01 instead of 0.005) for fewer zones
        df['lat_bin'] = ((df['lat'] - lat_min) // 0.01).astype(int)
        df['lng_bin'] = ((df['lng'] - lng_min) // 0.01).astype(int)
        
        # Create string identifiers for grouping
        df['lat_grid'] = df['lat_bin'].astype(str)
        df['lng_grid'] = df['lng_bin'].astype(str)
        
        # Aggregate by zones as in notebook
        df_zone_stats = df.groupby(['lat_grid', 'lng_grid']).agg(
            zone_avg_spd=('spd', 'mean'),
            zone_spd_std=('spd', 'std'),
            zone_min_spd=('spd', 'min'),
            zone_max_spd=('spd', 'max'),
            zone_avg_alt=('alt', 'mean'),
            zone_alt_std=('alt', 'std'),
            zone_min_alt=('alt', 'min'),
            zone_max_alt=('alt', 'max'),
            zone_avg_azm=('azm', 'mean'),
            zone_azm_std=('azm', 'std'),
            zone_point_count=('randomized_id', 'count'),
            zone_total_distance=('distance', 'sum')
        ).reset_index().fillna(0)
        
        # Create target variable
        zone_counts = df.groupby(['lat_grid', 'lng_grid'])['randomized_id'].nunique().reset_index(name='zone_density')
        zone_counts['target'] = np.log1p(zone_counts['zone_density'])
        
        # Merge data
        df_ml = pd.merge(df_zone_stats, zone_counts, on=['lat_grid', 'lng_grid'], how='inner')
        
        if model is None:
            return "Model not loaded", None, None
        
        # FIX: model expects predicted_demand in data
        # Add dummy column with value 0
        df_ml['predicted_demand'] = 0.0
        
        # Use all columns except identifiers and target variable
        X = df_ml.drop(['lat_grid', 'lng_grid', 'zone_density', 'target'], axis=1)
        
        # Predict
        predictions = model.predict(X)
        
        # Replace dummy values with real predictions (convert from log-scale)
        df_ml['predicted_demand'] = np.expm1(predictions)
        
        # Create predictions_df as in notebook
        predictions_df = df_ml[['lat_grid', 'lng_grid', 'zone_avg_alt', 'zone_avg_azm', 
                               'zone_point_count', 'target', 'predicted_demand']].copy()
        
        # Calculate zone center coordinates - use grouping of original data
        zone_centers = df.groupby(['lat_grid', 'lng_grid']).agg({
            'lat': 'mean',
            'lng': 'mean'
        }).reset_index()
        
        # Merge with predictions
        predictions_df = pd.merge(predictions_df, zone_centers, on=['lat_grid', 'lng_grid'], how='left')
        
        # Add calculations as in notebook
        predictions_df['actual_demand'] = np.expm1(predictions_df['target'])
        predictions_df['priority_score'] = predictions_df['predicted_demand']  # Already converted
        predictions_df['supply'] = predictions_df['zone_point_count'] / predictions_df['zone_point_count'].mean()
        predictions_df['demand_supply_ratio'] = predictions_df['priority_score'] / predictions_df['supply']
        predictions_df['demand_supply_difference'] = predictions_df['priority_score'] - predictions_df['supply']
        
        # Aggressive limits for HF Spaces
        if len(predictions_df) > 100:  # Drastically reduced from 1000
            predictions_df = predictions_df.head(100)
            print(f"Limited to top 100 zones for HF Spaces")
        
        # Sort by priority
        predictions_df = predictions_df.sort_values(by='priority_score', ascending=False)
        
        # === MAP 1: Minimal markers map for HF Spaces ===
        top_n = min(5, len(predictions_df))  # Only 5 markers instead of 10
        top_zones = predictions_df.head(top_n)
        
        if len(top_zones) == 0:
            return "No valid zones found", None, None
        
        # Create minimal map
        map_center_lat = top_zones['lat'].mean()
        map_center_lng = top_zones['lng'].mean()
        m = folium.Map(
            location=[map_center_lat, map_center_lng], 
            zoom_start=10,  # Reduced zoom
            tiles='OpenStreetMap',
            width=600,  # Fixed width
            height=400  # Fixed height
        )
        
        # Minimal markers with simple tooltips
        for index, row in top_zones.iterrows():
            folium.Marker(
                location=[row['lat'], row['lng']],
                popup=f"Demand: {row['priority_score']:.1f}",  # Use popup instead of tooltip
                icon=folium.Icon(color='red', icon='star')
            ).add_to(m)
        
        # Get HTML for first map (with error handling)
        try:
            markers_html = m._repr_html_()
        except:
            markers_html = "<p>Map generation failed - please try with smaller file</p>"
        
        # === MAP 2: Ultra-simplified heatmap for HF Spaces ===
        try:
            # Minimal heatmap data (only top 50 zones)
            heat_zones = predictions_df.head(min(50, len(predictions_df)))
            
            # Simple heatmap data with positive values only
            heat_data = []
            for index, row in heat_zones.iterrows():
                value = max(0.1, abs(row['demand_supply_difference']))  # Ensure positive values
                heat_data.append([row['lat'], row['lng'], value])
            
            # Minimal heatmap
            m_heatmap = folium.Map(
                location=[map_center_lat, map_center_lng], 
                zoom_start=10,
                tiles='OpenStreetMap',
                width=600,
                height=400
            )
            
            # Simple heatmap with minimal settings
            if heat_data:
                HeatMap(heat_data, radius=10, blur=10).add_to(m_heatmap)
            
            heatmap_html = m_heatmap._repr_html_()
        except Exception as e:
            heatmap_html = f"<p>Heatmap generation failed: {str(e)}</p>"
        
        status = f"✅ Processed {len(predictions_df)} zones from {len(df)} data points (HF Spaces optimized)"
        
        return status, markers_html, heatmap_html
        
    except MemoryError:
        return "❌ File too large for HF Spaces. Please use a smaller dataset (< 1MB)", None, None
    except pd.errors.EmptyDataError:
        return "❌ Empty or invalid file", None, None
    except Exception as e:
        error_msg = str(e)
        if "BodyStreamBuffer" in error_msg:
            return "❌ Processing timeout. Please use a smaller file (< 5000 rows)", None, None
        return f"❌ Error: {error_msg}", None, None

# Create beautiful Gradio interface
with gr.Blocks(
    title="Driver Placement Optimization System",
    theme=gr.themes.Soft(),
    css="""
    .main-container {
        max-width: 1200px;
        margin: 0 auto;
        padding: 15px;
    }
    .header {
        text-align: center;
        margin-bottom: 20px;
        color: white;
        padding: 15px;
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        border-radius: 10px;
    }
    .upload-section {
        background: #f8f9fa;
        padding: 15px;
        border-radius: 8px;
        margin-bottom: 15px;
    }
    .maps-container {
        gap: 15px;
    }
    .map-card {
        background: white;
        border: 1px solid #e0e0e0;
        border-radius: 8px;
        padding: 10px;
        box-shadow: 0 1px 3px rgba(0,0,0,0.1);
        max-height: 500px;
        overflow: hidden;
    }
    """
) as interface:
    
    with gr.Column(elem_classes="main-container"):
        with gr.Row(elem_classes="header"):
            gr.Markdown(
                """
                # Driver Placement Optimization System
                ### Geodata analysis for optimal placement zones
                """,
                elem_classes="header-text"
            )
        
        with gr.Row(elem_classes="upload-section"):
            with gr.Column():
                gr.Markdown("### Data Upload")
                gr.Markdown("⚠️ **HF Spaces Limits**: Max 10,000 rows, 5MB file size")
                file_input = gr.File(
                    label="Select file with geodata (CSV format)",
                    elem_id="file-upload"
                )
                status_output = gr.Textbox(
                    label="Processing Status",
                    interactive=False,
                    lines=2
                )
        
        gr.Markdown("### Analysis Results")
        
        with gr.Row(elem_classes="maps-container"):
            with gr.Column(elem_classes="map-card"):
                gr.Markdown("#### Priority Zones Map")
                gr.Markdown("*Displays top-10 zones with highest demand*")
                map1_output = gr.HTML(
                    label="Top Zones Map for Driver Placement",
                    elem_id="map1"
                )
            
            with gr.Column(elem_classes="map-card"):
                gr.Markdown("#### Imbalance Heatmap")
                gr.Markdown("*Shows difference between demand and supply*")
                map2_output = gr.HTML(
                    label="Demand-Supply Imbalance Heatmap",
                    elem_id="map2"
                )
    
    file_input.change(
        fn=create_maps,
        inputs=file_input,
        outputs=[status_output, map1_output, map2_output]
    )

if __name__ == "__main__":
    interface.launch(server_name="0.0.0.0", debug=False, show_error=True)