Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,46 +2,54 @@ import streamlit as st
|
|
| 2 |
import pandas as pd
|
| 3 |
import numpy as np
|
| 4 |
import plotly.express as px
|
| 5 |
-
import requests
|
| 6 |
from sklearn.ensemble import RandomForestRegressor
|
| 7 |
from sklearn.model_selection import train_test_split
|
| 8 |
|
| 9 |
# ----------------------
|
| 10 |
-
# Load
|
| 11 |
# ----------------------
|
| 12 |
@st.cache_data
|
| 13 |
def load_giga_school_data():
|
| 14 |
-
"""Load
|
| 15 |
-
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
return df
|
| 18 |
|
| 19 |
@st.cache_data
|
| 20 |
-
def load_ookla_speedtest_data(
|
| 21 |
-
"""
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
# ----------------------
|
| 27 |
-
# AI Model
|
| 28 |
# ----------------------
|
| 29 |
def train_model(df):
|
| 30 |
# Feature engineering
|
| 31 |
-
df["hour"] =
|
| 32 |
-
df["
|
| 33 |
-
|
| 34 |
-
y = df["energy_kwh"]
|
| 35 |
-
|
| 36 |
-
# Train/test split
|
| 37 |
-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
|
| 38 |
|
| 39 |
# Model training
|
| 40 |
-
model = RandomForestRegressor(n_estimators=
|
| 41 |
-
model.fit(
|
| 42 |
df["predicted_energy"] = model.predict(X)
|
| 43 |
-
|
| 44 |
-
# Calculate savings potential
|
| 45 |
df["savings_kwh"] = df["energy_kwh"] - df["predicted_energy"]
|
| 46 |
return df, model.feature_importances_
|
| 47 |
|
|
@@ -49,75 +57,44 @@ def train_model(df):
|
|
| 49 |
# Streamlit App
|
| 50 |
# ----------------------
|
| 51 |
st.set_page_config(page_title="Public Sector Energy Optimizer", layout="wide")
|
| 52 |
-
st.title("🏫 AI for School
|
| 53 |
|
| 54 |
-
# Load
|
| 55 |
giga_df = load_giga_school_data()
|
| 56 |
ookla_df = load_ookla_speedtest_data()
|
| 57 |
merged_df = pd.merge(giga_df, ookla_df, on=["latitude", "longitude"], how="left")
|
| 58 |
|
| 59 |
-
# Generate synthetic energy data (replace with real telemetry if available)
|
| 60 |
-
merged_df["energy_kwh"] = np.random.normal(200, 50, len(merged_df))
|
| 61 |
-
merged_df["traffic_pct"] = np.random.randint(0, 100, len(merged_df))
|
| 62 |
-
merged_df["timestamp"] = pd.date_range("2024-01-01", periods=len(merged_df), freq="H")
|
| 63 |
-
|
| 64 |
# Train model
|
| 65 |
df, feature_importances = train_model(merged_df)
|
| 66 |
|
| 67 |
# ----------------------
|
| 68 |
# Dashboard Sections
|
| 69 |
# ----------------------
|
| 70 |
-
tab1, tab2, tab3
|
| 71 |
|
| 72 |
with tab1:
|
| 73 |
st.subheader("Energy vs. Connectivity Analysis")
|
| 74 |
-
|
| 75 |
-
# Energy vs. Download Speed
|
| 76 |
fig = px.scatter(df, x="avg_download_mbps", y="energy_kwh",
|
| 77 |
color="savings_kwh", title="Download Speed vs. Energy Use")
|
| 78 |
st.plotly_chart(fig, use_container_width=True)
|
| 79 |
-
|
| 80 |
-
# Feature Importance
|
| 81 |
-
st.subheader("Key Drivers of Energy Consumption")
|
| 82 |
-
features = ["Hour", "Weekday", "Latency", "Traffic", "Download Speed"]
|
| 83 |
-
fig = px.bar(x=features, y=feature_importances, labels={"x": "Factor", "y": "Importance"})
|
| 84 |
-
st.plotly_chart(fig, use_container_width=True)
|
| 85 |
|
| 86 |
with tab2:
|
| 87 |
-
st.subheader("School
|
| 88 |
-
|
| 89 |
-
# Filter inefficient devices
|
| 90 |
-
df["size"] = np.where(df["savings_kwh"] > 20, 10, 2) # Highlight high-waste locations
|
| 91 |
-
|
| 92 |
fig = px.scatter_mapbox(df, lat="latitude", lon="longitude",
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
st.plotly_chart(fig, use_container_width=True)
|
| 98 |
|
| 99 |
with tab3:
|
| 100 |
-
st.subheader("Optimization Priorities")
|
| 101 |
-
|
| 102 |
-
# Priority Schools/Hospitals
|
| 103 |
-
st.write("### Top 5 High-Impact Facilities")
|
| 104 |
-
priority_df = df.sort_values("savings_kwh", ascending=False).head(5)
|
| 105 |
-
st.dataframe(priority_df[["latitude", "longitude", "savings_kwh", "avg_download_mbps"]],
|
| 106 |
-
hide_index=True)
|
| 107 |
-
|
| 108 |
-
# Cost-Benefit Calculator
|
| 109 |
-
st.write("### Cost Savings Estimator")
|
| 110 |
-
total_savings = df["savings_kwh"].sum() * 0.25 # Assume $0.25/kWh
|
| 111 |
-
co2_reduction = df["savings_kwh"].sum() * 0.5 # 0.5 kg CO2 per kWh
|
| 112 |
-
st.metric("Monthly Cost Savings", f"${total_savings:,.0f}")
|
| 113 |
-
st.metric("CO₂ Reduction", f"{co2_reduction:,.0f} kg")
|
| 114 |
-
|
| 115 |
-
with tab4:
|
| 116 |
st.write("## About")
|
| 117 |
st.markdown("""
|
| 118 |
-
**
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
""")
|
|
|
|
|
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
import numpy as np
|
| 4 |
import plotly.express as px
|
|
|
|
| 5 |
from sklearn.ensemble import RandomForestRegressor
|
| 6 |
from sklearn.model_selection import train_test_split
|
| 7 |
|
| 8 |
# ----------------------
|
| 9 |
+
# Load Sample Data (Fallback)
|
| 10 |
# ----------------------
|
| 11 |
@st.cache_data
|
| 12 |
def load_giga_school_data():
|
| 13 |
+
"""Load synthetic school connectivity data"""
|
| 14 |
+
try:
|
| 15 |
+
# Try original URL first
|
| 16 |
+
url = "https://raw.githubusercontent.com/Project-Giga/public-datasets/main/school_connectivity.csv"
|
| 17 |
+
df = pd.read_csv(url)
|
| 18 |
+
except:
|
| 19 |
+
# Fallback to synthetic data
|
| 20 |
+
st.warning("Using synthetic data - replace with real Giga dataset when available")
|
| 21 |
+
data = {
|
| 22 |
+
"school_id": [1, 2, 3, 4, 5],
|
| 23 |
+
"latitude": [40.7128, 34.0522, 41.8781, 29.7604, 33.7490],
|
| 24 |
+
"longitude": [-74.0060, -118.2437, -87.6298, -95.3698, -84.3880],
|
| 25 |
+
"connectivity_score": [45, 72, 38, 65, 82]
|
| 26 |
+
}
|
| 27 |
+
df = pd.DataFrame(data)
|
| 28 |
return df
|
| 29 |
|
| 30 |
@st.cache_data
|
| 31 |
+
def load_ookla_speedtest_data():
|
| 32 |
+
"""Sample speedtest data"""
|
| 33 |
+
return pd.DataFrame({
|
| 34 |
+
'latitude': [40.7128, 34.0522, 41.8781, 29.7604, 33.7490],
|
| 35 |
+
'longitude': [-74.0060, -118.2437, -87.6298, -95.3698, -84.3880],
|
| 36 |
+
'avg_download_mbps': [25.3, 45.6, 18.9, 32.1, 55.4],
|
| 37 |
+
'avg_latency_ms': [45, 32, 68, 51, 28]
|
| 38 |
+
})
|
| 39 |
|
| 40 |
# ----------------------
|
| 41 |
+
# AI Model Training
|
| 42 |
# ----------------------
|
| 43 |
def train_model(df):
|
| 44 |
# Feature engineering
|
| 45 |
+
df["hour"] = np.random.randint(0, 24, len(df)) # Simulate timestamps
|
| 46 |
+
X = df[["hour", "avg_latency_ms", "avg_download_mbps", "connectivity_score"]]
|
| 47 |
+
y = df["energy_kwh"] = np.random.normal(200, 50, len(df)) # Synthetic energy data
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
# Model training
|
| 50 |
+
model = RandomForestRegressor(n_estimators=10)
|
| 51 |
+
model.fit(X, y)
|
| 52 |
df["predicted_energy"] = model.predict(X)
|
|
|
|
|
|
|
| 53 |
df["savings_kwh"] = df["energy_kwh"] - df["predicted_energy"]
|
| 54 |
return df, model.feature_importances_
|
| 55 |
|
|
|
|
| 57 |
# Streamlit App
|
| 58 |
# ----------------------
|
| 59 |
st.set_page_config(page_title="Public Sector Energy Optimizer", layout="wide")
|
| 60 |
+
st.title("🏫 AI for School Network Efficiency")
|
| 61 |
|
| 62 |
+
# Load data
|
| 63 |
giga_df = load_giga_school_data()
|
| 64 |
ookla_df = load_ookla_speedtest_data()
|
| 65 |
merged_df = pd.merge(giga_df, ookla_df, on=["latitude", "longitude"], how="left")
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
# Train model
|
| 68 |
df, feature_importances = train_model(merged_df)
|
| 69 |
|
| 70 |
# ----------------------
|
| 71 |
# Dashboard Sections
|
| 72 |
# ----------------------
|
| 73 |
+
tab1, tab2, tab3 = st.tabs(["📈 Analysis", "🗺️ Map", "About"])
|
| 74 |
|
| 75 |
with tab1:
|
| 76 |
st.subheader("Energy vs. Connectivity Analysis")
|
|
|
|
|
|
|
| 77 |
fig = px.scatter(df, x="avg_download_mbps", y="energy_kwh",
|
| 78 |
color="savings_kwh", title="Download Speed vs. Energy Use")
|
| 79 |
st.plotly_chart(fig, use_container_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
with tab2:
|
| 82 |
+
st.subheader("School Locations")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
fig = px.scatter_mapbox(df, lat="latitude", lon="longitude",
|
| 84 |
+
color="savings_kwh", size="savings_kwh",
|
| 85 |
+
hover_data=["connectivity_score"],
|
| 86 |
+
mapbox_style="carto-positron",
|
| 87 |
+
zoom=3)
|
| 88 |
st.plotly_chart(fig, use_container_width=True)
|
| 89 |
|
| 90 |
with tab3:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
st.write("## About")
|
| 92 |
st.markdown("""
|
| 93 |
+
**Temporary Demo Version**
|
| 94 |
+
Currently using synthetic data. To use real data:
|
| 95 |
+
1. Request access to [Giga School Data](https://giga.global)
|
| 96 |
+
2. Replace URLs in `load_giga_school_data()`
|
| 97 |
+
3. Update column names as needed
|
| 98 |
""")
|
| 99 |
+
|
| 100 |
+
st.sidebar.markdown("⚠️ Note: This is a prototype using sample data")
|