import streamlit as st import pandas as pd import pydeck as pdk import math # --- PAGE CONFIGURATION --- st.set_page_config(layout="wide", page_title="Frontier AI Emissions Map") # --- CUSTOM CSS FOR METRICS & STYLE --- st.markdown(""" """, unsafe_allow_html=True) # --- 1. DATA LOADING & CLEANING --- @st.cache_data def load_data(): try: # Load data, skipping the first empty row (header=1 means Row 2 is the header) df = pd.read_csv("Frontier AI DC Emissions - Frontier Timeline.csv", header=1) # Sanitize Headers (removes hidden spaces) df.columns = df.columns.str.strip() # Validation required_cols = ['Power (MW)', 'Carbon Intensity', 'Annual Million tCO2'] missing = [c for c in required_cols if c not in df.columns] if missing: st.error(f"❌ Missing columns: {missing}. Found columns: {df.columns.tolist()}") st.stop() except FileNotFoundError: st.error("❌ File not found. Please ensure 'Frontier AI DC Emissions - Frontier Timeline.csv' is uploaded.") st.stop() # --- Data Cleaning --- def clean_numeric(val): if isinstance(val, str): val = val.replace(',', '').replace('"', '').strip() return pd.to_numeric(val, errors='coerce') df['Power (MW)'] = df['Power (MW)'].apply(clean_numeric) df['Carbon Intensity'] = df['Carbon Intensity'].apply(clean_numeric) df['Annual Million tCO2'] = df['Annual Million tCO2'].apply(clean_numeric) # --- CLEAN OWNER NAMES --- # Remove "#confident", "#likely", etc. if 'Owner' in df.columns: df['Owner'] = df['Owner'].astype(str).str.split('#').str[0].str.strip() # --- SIMPLIFY GRID STATUS --- # Create a clean category for the filter (Grid vs Off-Grid vs Hybrid) def simplify_status(status): s = str(status).lower() if 'off-grid' in s or 'gas' in s: return "Off-Grid / Fossil" elif 'hybrid' in s or 'nuclear' in s: return "Hybrid / Nuclear" elif 'grid' in s: return "Grid Connected" else: return "Unknown" df['Simple_Connection'] = df['Grid Status'].apply(simplify_status) # --- MATH CHECK --- # Formula: MW * 8760 hours * (Intensity kg/MWh / 1000 to get tonnes) / 1,000,000 to get Million Tonnes # We calculate this to double-check the CSV's reported numbers df['Calculated_Mt'] = (df['Power (MW)'] * 8760 * df['Carbon Intensity']) / 1e9 # Use the Reported number, but normalize it (Handle the 13,093 vs 13.1 issue) df['Emissions_Mt'] = df['Annual Million tCO2'].apply(lambda x: x / 1000 if x > 100 else x) # --- Geocoding (Manual Overrides for missing Lat/Long) --- overrides = { 'Fermi': [35.344, -101.373], # Amarillo, TX 'Crane': [40.154, -76.725], # Three Mile Island 'CleanArc': [38.005, -77.478], # Caroline County, VA 'Vantage': [38.381, -77.495], # Fredericksburg, VA 'Stargate': [42.167, -83.850] # Michigan } for key, coords in overrides.items(): mask = df['Project'].astype(str).str.contains(key, case=False, na=False) df.loc[mask, ['Latitude', 'Longitude']] = coords # Parse DMS coordinates def dms_to_dd(val): if isinstance(val, str) and '°' in val: try: parts = val.replace('°', ' ').replace("'", ' ').replace('"', ' ').split() dd = float(parts[0]) + float(parts[1])/60 + (float(parts[2]) if len(parts)>2 else 0)/3600 if 'S' in val or 'W' in val: dd *= -1 return dd except: return None return val for col in ['Latitude', 'Longitude']: df[col] = df[col].apply(dms_to_dd) df[col] = pd.to_numeric(df[col], errors='coerce') df = df.dropna(subset=['Latitude', 'Longitude']) # --- Enrichment for Tooltip --- # Cars: 1 MtCO2 ≈ 217,000 cars (4.6t/car/yr) df['Cars_Equivalent_Millions'] = (df['Emissions_Mt'] * 1_000_000 / 4.6 / 1_000_000).round(2) # Coal Plants: 1 Coal Plant ≈ 4.0 MtCO2 df['Coal_Plants_Equivalent'] = (df['Emissions_Mt'] / 4.0).round(1) # Visual Attributes def get_color(status): s = str(status).lower() if 'off-grid' in s or 'gas' in s: return [255, 65, 54, 200] # Red elif 'hybrid' in s or 'nuclear' in s: return [255, 133, 27, 200] # Orange else: return [0, 116, 217, 200] # Blue df['color'] = df['Grid Status'].apply(get_color) df['radius'] = df['Emissions_Mt'].apply(lambda x: math.sqrt(x) * 15000) return df df = load_data() # --- SIDEBAR CONTROLS --- st.sidebar.header("🌍 Frontier AI Emissions") st.sidebar.markdown("Filter the map to analyze the carbon footprint of planned AI infrastructure.") # Filters: Connection Type (Simplified) # We sort them to ensure consistent order connection_options = sorted(df['Simple_Connection'].unique()) grid_filter = st.sidebar.multiselect( "Connection Type", options=connection_options, default=connection_options ) # Filters: Owner (Cleaned) owner_options = sorted(df['Owner'].unique()) owner_filter = st.sidebar.multiselect( "Owner", options=owner_options, default=owner_options ) # Apply filters filtered_df = df[ (df['Simple_Connection'].isin(grid_filter)) & (df['Owner'].isin(owner_filter)) ] # --- SCORECARD METRICS --- total_power = filtered_df['Power (MW)'].sum() / 1000 # GW total_emissions = filtered_df['Emissions_Mt'].sum() total_cars = filtered_df['Cars_Equivalent_Millions'].sum() avg_intensity = filtered_df['Carbon Intensity'].mean() st.sidebar.divider() st.sidebar.markdown("### 📊 Aggregate Impact") col1, col2 = st.sidebar.columns(2) col1.metric("Total Power", f"{total_power:.1f} GW", help="Total capacity of visible projects") col2.metric("Annual Emissions", f"{total_emissions:.1f} Mt", help="Million Tonnes CO2e/year") st.sidebar.markdown(f"""