Spaces:

WAQASCHANNA
/

AIforConnectivityHack

Sleeping

App Files Files Community

WAQASCHANNA commited on Feb 21, 2025

Commit

edf79c6

verified ·

1 Parent(s): ae0702a

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -26

app.py CHANGED Viewed

@@ -6,18 +6,18 @@ from sklearn.ensemble import RandomForestRegressor
 from sklearn.model_selection import train_test_split
 # ----------------------
-# Load Sample Data (Fallback)
 # ----------------------
 @st.cache_data
 def load_giga_school_data():
-    """Load synthetic school connectivity data"""
     try:
-        # Try original URL first
         url = "https://raw.githubusercontent.com/Project-Giga/public-datasets/main/school_connectivity.csv"
         df = pd.read_csv(url)
-    except:
-        # Fallback to synthetic data
-        st.warning("Using synthetic data - replace with real Giga dataset when available")
         data = {
             "school_id": [1, 2, 3, 4, 5],
             "latitude": [40.7128, 34.0522, 41.8781, 29.7604, 33.7490],
@@ -29,7 +29,7 @@ def load_giga_school_data():
 @st.cache_data
 def load_ookla_speedtest_data():
-    """Sample speedtest data"""
     return pd.DataFrame({
         'latitude': [40.7128, 34.0522, 41.8781, 29.7604, 33.7490],
         'longitude': [-74.0060, -118.2437, -87.6298, -95.3698, -84.3880],
@@ -42,9 +42,12 @@ def load_ookla_speedtest_data():
 # ----------------------
 def train_model(df):
     # Feature engineering
-    df["hour"] = np.random.randint(0, 24, len(df))  # Simulate timestamps
     X = df[["hour", "avg_latency_ms", "avg_download_mbps", "connectivity_score"]]
-    y = df["energy_kwh"] = np.random.normal(200, 50, len(df))  # Synthetic energy data
     # Model training
     model = RandomForestRegressor(n_estimators=10)
@@ -59,13 +62,22 @@ def train_model(df):
 st.set_page_config(page_title="Public Sector Energy Optimizer", layout="wide")
 st.title("🏫 AI for School Network Efficiency")
-# Load data
 giga_df = load_giga_school_data()
 ookla_df = load_ookla_speedtest_data()
-merged_df = pd.merge(giga_df, ookla_df, on=["latitude", "longitude"], how="left")
 # Train model
-df, feature_importances = train_model(merged_df)
 # ----------------------
 # Dashboard Sections
@@ -74,27 +86,49 @@ tab1, tab2, tab3 = st.tabs(["📈 Analysis", "🗺️ Map", "About"])
 with tab1:
     st.subheader("Energy vs. Connectivity Analysis")
-    fig = px.scatter(df, x="avg_download_mbps", y="energy_kwh",
-                     color="savings_kwh", title="Download Speed vs. Energy Use")
     st.plotly_chart(fig, use_container_width=True)
 with tab2:
-    st.subheader("School Locations")
-    fig = px.scatter_mapbox(df, lat="latitude", lon="longitude",
-                          color="savings_kwh", size="savings_kwh",
-                          hover_data=["connectivity_score"],
-                          mapbox_style="carto-positron",
-                          zoom=3)
     st.plotly_chart(fig, use_container_width=True)
 with tab3:
     st.write("## About")
     st.markdown("""
-    **Temporary Demo Version**
-    Currently using synthetic data. To use real data:
-    1. Request access to [Giga School Data](https://giga.global)
-    2. Replace URLs in `load_giga_school_data()`
-    3. Update column names as needed
     """)
-st.sidebar.markdown("⚠️ Note: This is a prototype using sample data")

 from sklearn.model_selection import train_test_split
 # ----------------------
+# Data Loading (Robust Version)
 # ----------------------
 @st.cache_data
 def load_giga_school_data():
+    """Load school data with fallback to synthetic data"""
     try:
+        # Try real data URL (replace with valid URL when available)
         url = "https://raw.githubusercontent.com/Project-Giga/public-datasets/main/school_connectivity.csv"
         df = pd.read_csv(url)
+        st.success("Loaded real Giga school data!")
+    except Exception as e:
+        st.warning(f"Using synthetic data - {str(e)}")
         data = {
             "school_id": [1, 2, 3, 4, 5],
             "latitude": [40.7128, 34.0522, 41.8781, 29.7604, 33.7490],
 @st.cache_data
 def load_ookla_speedtest_data():
+    """Generate synthetic speedtest data"""
     return pd.DataFrame({
         'latitude': [40.7128, 34.0522, 41.8781, 29.7604, 33.7490],
         'longitude': [-74.0060, -118.2437, -87.6298, -95.3698, -84.3880],
 # ----------------------
 def train_model(df):
     # Feature engineering
+    np.random.seed(42)
+    df["hour"] = np.random.randint(0, 24, len(df))
+    df["energy_kwh"] = np.random.normal(200, 50, len(df))
     X = df[["hour", "avg_latency_ms", "avg_download_mbps", "connectivity_score"]]
+    y = df["energy_kwh"]
     # Model training
     model = RandomForestRegressor(n_estimators=10)
 st.set_page_config(page_title="Public Sector Energy Optimizer", layout="wide")
 st.title("🏫 AI for School Network Efficiency")
+# Load and merge data
 giga_df = load_giga_school_data()
 ookla_df = load_ookla_speedtest_data()
+merged_df = pd.merge(giga_df, ookla_df, on=["latitude", "longitude"], how="inner")
 # Train model
+if not merged_df.empty:
+    df, feature_importances = train_model(merged_df)
+    # Prepare visualization parameters
+    df["size"] = np.abs(df["savings_kwh"]).clip(lower=5)
+    df["size"] = df["size"].apply(lambda x: min(x, 50))
+    df["color"] = np.where(df["savings_kwh"] > 0, "green", "red")
+else:
+    st.error("No data available for analysis!")
+    st.stop()
 # ----------------------
 # Dashboard Sections
 with tab1:
     st.subheader("Energy vs. Connectivity Analysis")
+    fig = px.scatter(df, x="avg_download_mbps", y="energy_kwh",
+                     color="savings_kwh", title="Download Speed vs. Energy Use",
+                     labels={"avg_download_mbps": "Download Speed (Mbps)",
+                             "energy_kwh": "Energy Consumption (kWh)"})
     st.plotly_chart(fig, use_container_width=True)
 with tab2:
+    st.subheader("School Energy Efficiency Map")
+    fig = px.scatter_mapbox(
+        df,
+        lat="latitude",
+        lon="longitude",
+        color="color",
+        size="size",
+        hover_name="school_id",
+        hover_data=["connectivity_score", "savings_kwh"],
+        mapbox_style="carto-positron",
+        zoom=3,
+        color_discrete_map={"green": "#2ecc71", "red": "#e74c3c"},
+        title="Energy Savings Potential"
+    )
+    fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
     st.plotly_chart(fig, use_container_width=True)
 with tab3:
     st.write("## About")
     st.markdown("""
+    **Public Sector Network Optimization Dashboard**
+    *AI for Connectivity Hackathon II Submission*
+    Features:
+    - 🎯 Identifies energy inefficiencies in school networks
+    - 🌍 Geographic visualization of savings potential
+    - 🤖 AI-powered energy consumption predictions
+    Data Sources:
+    - School locations: Giga (synthetic data in this demo)
+    - Network performance: Ookla Open Data (synthetic)
+    Next Steps:
+    1. Replace synthetic data with real school telemetry
+    2. Integrate live energy monitoring APIs
+    3. Add maintenance scheduling features
     """)
+st.sidebar.markdown("⚠️ **Note**: Demo uses synthetic data - replace with real datasets for production use")