Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -36,107 +36,89 @@ st.markdown("""
|
|
| 36 |
# --- 1. DATA LOADING & CLEANING ---
|
| 37 |
@st.cache_data
|
| 38 |
def load_data():
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
def clean_numeric(val):
|
| 45 |
if isinstance(val, str):
|
| 46 |
-
val = val.replace(',', '').strip()
|
| 47 |
return pd.to_numeric(val, errors='coerce')
|
| 48 |
|
| 49 |
df['Power (MW)'] = df['Power (MW)'].apply(clean_numeric)
|
| 50 |
df['Carbon Intensity'] = df['Carbon Intensity'].apply(clean_numeric)
|
| 51 |
df['Annual Million tCO2'] = df['Annual Million tCO2'].apply(clean_numeric)
|
| 52 |
|
| 53 |
-
# ---
|
| 54 |
-
#
|
| 55 |
-
|
| 56 |
-
# assume it is Kilotonnes and divide by 1000 to get Million Tonnes (Mt).
|
| 57 |
-
# Recalculate to verify: MW * 8760 * (Intensity/1000) / 1,000,000 = Mt
|
| 58 |
|
| 59 |
-
#
|
| 60 |
-
#
|
| 61 |
df['Emissions_Mt'] = df['Annual Million tCO2'].apply(lambda x: x / 1000 if x > 100 else x)
|
| 62 |
|
| 63 |
-
#
|
| 64 |
-
#
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
df.loc[df['Project'].str.contains('Vantage', case=False, na=False), ['Latitude', 'Longitude']] = [38.381, -77.495]
|
| 73 |
-
# Stargate Michigan -> Saline Township, MI
|
| 74 |
-
df.loc[df['Project'].str.contains('Stargate Michigan', case=False, na=False), ['Latitude', 'Longitude']] = [42.167, -83.850]
|
| 75 |
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
dms_str = str(dms_str).strip()
|
| 91 |
-
if not dms_str: return None
|
| 92 |
-
|
| 93 |
-
# Simple parser for format: 42°40'28"N
|
| 94 |
-
try:
|
| 95 |
-
parts = dms_str.replace('°', ' ').replace("'", ' ').replace('"', ' ').split()
|
| 96 |
-
degrees = float(parts[0])
|
| 97 |
-
minutes = float(parts[1]) if len(parts) > 1 else 0
|
| 98 |
-
seconds = float(parts[2]) if len(parts) > 2 else 0
|
| 99 |
-
direction = parts[-1] if parts[-1] in ['N','S','E','W'] else 'N' # Default N/E if missing
|
| 100 |
-
|
| 101 |
-
dd = degrees + minutes/60 + seconds/3600
|
| 102 |
-
if direction in ['S', 'W']:
|
| 103 |
-
dd *= -1
|
| 104 |
-
return dd
|
| 105 |
-
except:
|
| 106 |
-
return None # Fallback or keep original if it was already decimal
|
| 107 |
|
| 108 |
-
# Apply DMS conversion only where it looks like a string with degrees
|
| 109 |
-
# Note: The manual overrides above provided decimal, so we skip those rows
|
| 110 |
for col in ['Latitude', 'Longitude']:
|
| 111 |
-
df[col] = df[col].apply(
|
| 112 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
| 113 |
-
|
| 114 |
-
# Drop rows
|
| 115 |
df = df.dropna(subset=['Latitude', 'Longitude'])
|
| 116 |
-
|
| 117 |
-
# ---
|
| 118 |
-
# 1 MtCO2
|
| 119 |
-
|
| 120 |
-
#
|
| 121 |
-
df['Cars_Equivalent_Millions'] = (df['Emissions_Mt'] * 1000000 / 4600 / 1000000).round(2)
|
| 122 |
-
|
| 123 |
-
# Coal Plant Equivalent: Average coal plant is ~3.5 to 4 MtCO2/year
|
| 124 |
df['Coal_Plants_Equivalent'] = (df['Emissions_Mt'] / 4.0).round(1)
|
| 125 |
|
| 126 |
-
#
|
| 127 |
def get_color(status):
|
| 128 |
s = str(status).lower()
|
| 129 |
-
if 'off-grid' in s or 'gas' in s:
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
return [255, 133, 27, 200] # Orange (Transition)
|
| 133 |
-
else:
|
| 134 |
-
return [0, 116, 217, 200] # Blue (Grid)
|
| 135 |
|
| 136 |
df['color'] = df['Grid Status'].apply(get_color)
|
| 137 |
-
|
| 138 |
-
# Bubble Size (Scaled)
|
| 139 |
-
# Scale factor for visual sizing
|
| 140 |
df['radius'] = df['Emissions_Mt'].apply(lambda x: math.sqrt(x) * 15000)
|
| 141 |
|
| 142 |
return df
|
|
|
|
| 36 |
# --- 1. DATA LOADING & CLEANING ---
|
| 37 |
@st.cache_data
|
| 38 |
def load_data():
|
| 39 |
+
try:
|
| 40 |
+
# Load data
|
| 41 |
+
df = pd.read_csv("Frontier AI DC Emissions - Frontier Timeline.csv")
|
| 42 |
+
|
| 43 |
+
# --- FIX 1: Sanitize Headers ---
|
| 44 |
+
# Removes hidden spaces (e.g. "Power (MW) " -> "Power (MW)")
|
| 45 |
+
df.columns = df.columns.str.strip()
|
| 46 |
+
|
| 47 |
+
# Validation: Check if columns exist, if not, show what was found
|
| 48 |
+
required_cols = ['Power (MW)', 'Carbon Intensity', 'Annual Million tCO2']
|
| 49 |
+
missing = [c for c in required_cols if c not in df.columns]
|
| 50 |
+
if missing:
|
| 51 |
+
st.error(f"❌ Missing columns: {missing}. Found columns: {df.columns.tolist()}")
|
| 52 |
+
st.stop()
|
| 53 |
+
|
| 54 |
+
except FileNotFoundError:
|
| 55 |
+
st.error("❌ File not found. Please ensure 'Frontier AI DC Emissions - Frontier Timeline.csv' is uploaded.")
|
| 56 |
+
st.stop()
|
| 57 |
+
|
| 58 |
+
# --- Data Cleaning ---
|
| 59 |
def clean_numeric(val):
|
| 60 |
if isinstance(val, str):
|
| 61 |
+
val = val.replace(',', '').replace('"', '').strip()
|
| 62 |
return pd.to_numeric(val, errors='coerce')
|
| 63 |
|
| 64 |
df['Power (MW)'] = df['Power (MW)'].apply(clean_numeric)
|
| 65 |
df['Carbon Intensity'] = df['Carbon Intensity'].apply(clean_numeric)
|
| 66 |
df['Annual Million tCO2'] = df['Annual Million tCO2'].apply(clean_numeric)
|
| 67 |
|
| 68 |
+
# --- FIX 2: Math Check (Power * Intensity vs Reported) ---
|
| 69 |
+
# Formula: MW * 8760 hours * (Intensity kg/MWh / 1000 to get tonnes) / 1,000,000 to get Million Tonnes
|
| 70 |
+
df['Calculated_Mt'] = (df['Power (MW)'] * 8760 * df['Carbon Intensity']) / 1e9
|
|
|
|
|
|
|
| 71 |
|
| 72 |
+
# Use the Reported number, but normalize it (Handle the 13,093 vs 13.1 issue)
|
| 73 |
+
# If the number is > 100, it's likely in Kilotonnes, so divide by 1000
|
| 74 |
df['Emissions_Mt'] = df['Annual Million tCO2'].apply(lambda x: x / 1000 if x > 100 else x)
|
| 75 |
|
| 76 |
+
# --- Geocoding (Manual Overrides for missing Lat/Long) ---
|
| 77 |
+
# Add coordinates for known projects if missing
|
| 78 |
+
overrides = {
|
| 79 |
+
'Fermi': [35.344, -101.373], # Amarillo, TX
|
| 80 |
+
'Crane': [40.154, -76.725], # Three Mile Island
|
| 81 |
+
'CleanArc': [38.005, -77.478], # Caroline County, VA
|
| 82 |
+
'Vantage': [38.381, -77.495], # Fredericksburg, VA
|
| 83 |
+
'Stargate': [42.167, -83.850] # Michigan
|
| 84 |
+
}
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
+
for key, coords in overrides.items():
|
| 87 |
+
mask = df['Project'].astype(str).str.contains(key, case=False, na=False)
|
| 88 |
+
df.loc[mask, ['Latitude', 'Longitude']] = coords
|
| 89 |
+
|
| 90 |
+
# Parse DMS coordinates (e.g., 42°40'28"N) if they exist
|
| 91 |
+
def dms_to_dd(val):
|
| 92 |
+
if isinstance(val, str) and '°' in val:
|
| 93 |
+
try:
|
| 94 |
+
parts = val.replace('°', ' ').replace("'", ' ').replace('"', ' ').split()
|
| 95 |
+
dd = float(parts[0]) + float(parts[1])/60 + (float(parts[2]) if len(parts)>2 else 0)/3600
|
| 96 |
+
if 'S' in val or 'W' in val: dd *= -1
|
| 97 |
+
return dd
|
| 98 |
+
except: return None
|
| 99 |
+
return val
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
|
|
|
|
|
|
| 101 |
for col in ['Latitude', 'Longitude']:
|
| 102 |
+
df[col] = df[col].apply(dms_to_dd)
|
| 103 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
| 104 |
+
|
| 105 |
+
# Drop rows that still have no location
|
| 106 |
df = df.dropna(subset=['Latitude', 'Longitude'])
|
| 107 |
+
|
| 108 |
+
# --- Enrichment for Tooltip ---
|
| 109 |
+
# Cars: 1 MtCO2 ≈ 217,000 cars (4.6t/car/yr)
|
| 110 |
+
df['Cars_Equivalent_Millions'] = (df['Emissions_Mt'] * 1_000_000 / 4.6 / 1_000_000).round(2)
|
| 111 |
+
# Coal Plants: 1 Coal Plant ≈ 4.0 MtCO2
|
|
|
|
|
|
|
|
|
|
| 112 |
df['Coal_Plants_Equivalent'] = (df['Emissions_Mt'] / 4.0).round(1)
|
| 113 |
|
| 114 |
+
# Visual Attributes
|
| 115 |
def get_color(status):
|
| 116 |
s = str(status).lower()
|
| 117 |
+
if 'off-grid' in s or 'gas' in s: return [255, 65, 54, 200] # Red
|
| 118 |
+
elif 'hybrid' in s or 'nuclear' in s: return [255, 133, 27, 200] # Orange
|
| 119 |
+
else: return [0, 116, 217, 200] # Blue
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
df['color'] = df['Grid Status'].apply(get_color)
|
|
|
|
|
|
|
|
|
|
| 122 |
df['radius'] = df['Emissions_Mt'].apply(lambda x: math.sqrt(x) * 15000)
|
| 123 |
|
| 124 |
return df
|