Spaces:

genai-impact
/

ecologits-calculator

Running

App Files Files Community

samuelrince commited on Oct 27

Commit

fbcf9c5

1 Parent(s): 67ad2e9

feat: add latency estimator based on openrouter

Browse files

Files changed (4) hide show

src/calculator.py +9 -3
src/data/throughputs.json +244 -0
src/expert.py +23 -11
src/latency_estimator.py +33 -0

src/calculator.py CHANGED Viewed

@@ -3,6 +3,7 @@ import streamlit as st
 from ecologits.tracers.utils import llm_impacts
 from src.impacts import display_impacts, display_equivalent_ghg, display_equivalent_energy
 from src.utils import format_impacts
 from src.content import WARNING_CLOSED_SOURCE, WARNING_MULTI_MODAL, WARNING_BOTH, HOW_TO_TEXT
 from src.models import load_models
@@ -74,11 +75,15 @@ def calculator_mode():
             st.warning(WARNING_BOTH, icon="⚠️")
     try:
         impacts = llm_impacts(
             provider=provider_raw,
             model_name=model_raw,
-            output_token_count=[x[1] for x in PROMPTS if x[0] == output_tokens][0],
-            request_latency=math.inf,
         )
         impacts, _, _ = format_impacts(impacts)
@@ -102,4 +107,5 @@ def calculator_mode():
     except Exception as e:
-        st.error('Could not find the model in the repository. Please try another model.')

 from ecologits.tracers.utils import llm_impacts
 from src.impacts import display_impacts, display_equivalent_ghg, display_equivalent_energy
+from src.latency_estimator import latency_estimator
 from src.utils import format_impacts
 from src.content import WARNING_CLOSED_SOURCE, WARNING_MULTI_MODAL, WARNING_BOTH, HOW_TO_TEXT
 from src.models import load_models
             st.warning(WARNING_BOTH, icon="⚠️")
     try:
+        output_tokens_count = [x[1] for x in PROMPTS if x[0] == output_tokens][0]
+        estimated_latency = latency_estimator.estimate(provider=provider_raw,
+                                                       model_name=model_raw,
+                                                       output_tokens=output_tokens_count)
         impacts = llm_impacts(
             provider=provider_raw,
             model_name=model_raw,
+            output_token_count=output_tokens_count,
+            request_latency=estimated_latency
         )
         impacts, _, _ = format_impacts(impacts)
     except Exception as e:
+        st.error('Could not find the model in the repository. Please try another model.')
+        raise e

src/data/throughputs.json ADDED Viewed

	@@ -0,0 +1,244 @@

+{
+    "models": [
+        {
+            "provider": "openai",
+            "name": "gpt-3.5-turbo",
+            "throughput": 144.6
+        },
+        {
+            "provider": "openai",
+            "name": "gpt-4",
+            "throughput": 33.0
+        },
+        {
+            "provider": "openai",
+            "name": "gpt-4-turbo",
+            "throughput": 46.5
+        },
+        {
+            "provider": "openai",
+            "name": "gpt-4o",
+            "throughput": 68.4
+        },
+        {
+            "provider": "openai",
+            "name": "gpt-4o-mini",
+            "throughput": 59.5
+        },
+        {
+            "provider": "openai",
+            "name": "o1",
+            "throughput": 442.9
+        },
+        {
+            "provider": "openai",
+            "name": "o1-mini",
+            "throughput": 173.2
+        },
+        {
+            "provider": "openai",
+            "name": "o3-mini",
+            "throughput": 597.4
+        },
+        {
+            "provider": "openai",
+            "name": "gpt-4.1-nano",
+            "throughput": 91.9
+        },
+        {
+            "provider": "openai",
+            "name": "gpt-4.1-mini",
+            "throughput": 68.7
+        },
+        {
+            "provider": "openai",
+            "name": "gpt-4.1",
+            "throughput": 61.0
+        },
+        {
+            "provider": "openai",
+            "name": "o4-mini",
+            "throughput": 64.7
+        },
+        {
+            "provider": "openai",
+            "name": "gpt-5-nano",
+            "throughput": 82.4
+        },
+        {
+            "provider": "openai",
+            "name": "gpt-5-mini",
+            "throughput": 47.1
+        },
+        {
+            "provider": "openai",
+            "name": "gpt-5",
+            "throughput": 41.3
+        },
+        {
+            "provider": "anthropic",
+            "name": "claude-3-5-haiku-latest",
+            "throughput": 59.6
+        },
+        {
+            "provider": "anthropic",
+            "name": "claude-3-5-sonnet-latest",
+            "throughput": 52.7
+        },
+        {
+            "provider": "anthropic",
+            "name": "claude-3-7-sonnet-latest",
+            "throughput": 51.9
+        },
+        {
+            "provider": "anthropic",
+            "name": "claude-opus-4-0",
+            "throughput": 37.0
+        },
+        {
+            "provider": "anthropic",
+            "name": "claude-opus-4-1",
+            "throughput": 38.1
+        },
+        {
+            "provider": "anthropic",
+            "name": "claude-sonnet-4-0",
+            "throughput": 60.2
+        },
+        {
+            "provider": "anthropic",
+            "name": "claude-sonnet-4-5",
+            "throughput": 61.4
+        },
+        {
+            "provider": "anthropic",
+            "name": "claude-haiku-4-5",
+            "throughput": 119.8
+        },
+        {
+            "provider": "mistralai",
+            "name": "codestral-latest",
+            "throughput": 272.8
+        },
+        {
+            "provider": "mistralai",
+            "name": "devstral-medium-latest",
+            "throughput": 106.7
+        },
+        {
+            "provider": "mistralai",
+            "name": "devstral-small-latest",
+            "throughput": 187.8
+        },
+        {
+            "provider": "mistralai",
+            "name": "magistral-medium-latest",
+            "throughput": 106.7
+        },
+        {
+            "provider": "mistralai",
+            "name": "magistral-small-latest",
+            "throughput": 187.8
+        },
+        {
+            "provider": "mistralai",
+            "name": "ministral-3b-latest",
+            "throughput": 309.6
+        },
+        {
+            "provider": "mistralai",
+            "name": "ministral-8b-latest",
+            "throughput": 213.7
+        },
+        {
+            "provider": "mistralai",
+            "name": "mistral-large-latest",
+            "throughput": 48.6
+        },
+        {
+            "provider": "mistralai",
+            "name": "mistral-medium-latest",
+            "throughput": 54.6
+        },
+        {
+            "provider": "mistralai",
+            "name": "mistral-small-latest",
+            "throughput": 158.0
+        },
+        {
+            "provider": "mistralai",
+            "name": "mistral-tiny-latest",
+            "throughput": 92.8
+        },
+        {
+            "provider": "mistralai",
+            "name": "open-mistral-7b",
+            "throughput": 121.5
+        },
+        {
+            "provider": "mistralai",
+            "name": "open-mistral-nemo",
+            "throughput": 153.2
+        },
+        {
+            "provider": "mistralai",
+            "name": "open-mixtral-8x22b",
+            "throughput": 85.7
+        },
+        {
+            "provider": "mistralai",
+            "name": "open-mixtral-8x7b",
+            "throughput": 80
+        },
+        {
+            "provider": "google_genai",
+            "name": "gemini-2.0-flash-lite",
+            "throughput": 74.1
+        },
+        {
+            "provider": "google_genai",
+            "name": "gemini-2.0-flash",
+            "throughput": 151.4
+        },
+        {
+            "provider": "google_genai",
+            "name": "gemini-2.5-flash-lite",
+            "throughput": 74.1
+        },
+        {
+            "provider": "google_genai",
+            "name": "gemini-2.5-flash",
+            "throughput": 93.2
+        },
+        {
+            "provider": "google_genai",
+            "name": "gemini-2.5-pro",
+            "throughput": 86.6
+        },
+        {
+            "provider": "cohere",
+            "name": "command-a-03-2025",
+            "throughput": 77.4
+        },
+        {
+            "provider": "cohere",
+            "name": "command-r",
+            "throughput": 125.1
+        },
+        {
+            "provider": "cohere",
+            "name": "command-r-08-2024",
+            "throughput": 67.2
+        },
+        {
+            "provider": "cohere",
+            "name": "command-r-plus-08-2024",
+            "throughput": 26.9
+        },
+        {
+            "provider": "cohere",
+            "name": "command-r7b-12-2024",
+            "throughput": 125.1
+        }
+    ]
+}

src/expert.py CHANGED Viewed

@@ -3,11 +3,13 @@ import streamlit as st
 from ecologits.electricity_mix_repository import electricity_mixes
 from ecologits.impacts.llm import compute_llm_impacts
 from src.utils import format_impacts
 from src.impacts import display_impacts
 from src.electricity_mix import COUNTRY_CODES, format_electricity_mix_criterion, format_country_name
 from src.models import load_models
 from src.constants import PROMPTS
 import plotly.express as px
@@ -71,32 +73,35 @@ def expert_mode():
                 / 2
             )
         ########## Model parameters ##########
-        active_params_col, total_params_col = st.columns(2)
         with active_params_col:
-            active_params = st.number_input(
-                "Active parameters (B)", 0, None, active_params
-            )
         with total_params_col:
-            total_params = st.number_input(
-                "Total parameters (B)", 0, None, total_params
-            )
     with st.container(border=True):
         st.markdown("###### Configure the prompt")
-        provider_col, model_col = st.columns(2)
-        with provider_col:
             output_tokens_exp = st.selectbox(
                 label="Example prompt", options=[x[0] for x in PROMPTS], key=3
             )
-        with model_col:
             output_tokens = st.number_input(
                 label="Output completion tokens",
                 min_value=0,
@@ -155,11 +160,18 @@ def expert_mode():
                 format="%0.3f",
             )
     impacts = compute_llm_impacts(
         model_active_parameter_count=active_params,
         model_total_parameter_count=total_params,
         output_token_count=output_tokens,
-        request_latency=100000,
         if_electricity_mix_gwp=em_gwp,
         if_electricity_mix_adpe=em_adpe,
         if_electricity_mix_pe=em_pe,

 from ecologits.electricity_mix_repository import electricity_mixes
 from ecologits.impacts.llm import compute_llm_impacts
+from src.latency_estimator import latency_estimator
 from src.utils import format_impacts
 from src.impacts import display_impacts
 from src.electricity_mix import COUNTRY_CODES, format_electricity_mix_criterion, format_country_name
 from src.models import load_models
 from src.constants import PROMPTS
+from src.constants import PROMPTS
 import plotly.express as px
                 / 2
             )
+        provider_raw = df_filtered["provider"].values[0]
+        model_name_raw = df_filtered["name"].values[0]
+        tps_raw = latency_estimator.get_throughput(provider_raw, model_name_raw)
         ########## Model parameters ##########
+        active_params_col, total_params_col, throughput_col = st.columns(3)
         with active_params_col:
+            active_params = st.number_input("Active parameters (B)", 0, None, active_params)
         with total_params_col:
+            total_params = st.number_input("Total parameters (B)", 0, None, total_params)
+        with throughput_col:
+            throughput = st.number_input("Average TPS", 1.0, None, tps_raw)
     with st.container(border=True):
         st.markdown("###### Configure the prompt")
+        prompt_col, token_col = st.columns(2)
+        with prompt_col:
             output_tokens_exp = st.selectbox(
                 label="Example prompt", options=[x[0] for x in PROMPTS], key=3
             )
+        with token_col:
             output_tokens = st.number_input(
                 label="Output completion tokens",
                 min_value=0,
                 format="%0.3f",
             )
+    estimated_latency = latency_estimator.estimate(
+        provider=provider_raw,
+        model_name=model_name_raw,
+        output_tokens=output_tokens,
+        throughput=throughput
+    )
     impacts = compute_llm_impacts(
         model_active_parameter_count=active_params,
         model_total_parameter_count=total_params,
         output_token_count=output_tokens,
+        request_latency=estimated_latency,
         if_electricity_mix_gwp=em_gwp,
         if_electricity_mix_adpe=em_adpe,
         if_electricity_mix_pe=em_pe,

src/latency_estimator.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from __future__ import annotations
+import json
+from pathlib import Path
+_BASE_PATH = Path(__file__).parent / "data" / "throughputs.json"
+class LatencyEstimator:
+    __DEFAULT_TPS = 80.0
+    def __init__(self, file_path: str | Path) -> None:
+        with open(file_path, "r") as fd:
+            data = json.load(fd)
+        self.__throughputs = {}
+        for el in data["models"]:
+            self.__throughputs[(el["provider"], el["name"])] = el["throughput"]
+    def get_throughput(self, provider: str, model_name: str) -> float:
+        return float(self.__throughputs.get((provider, model_name), self.__DEFAULT_TPS))
+    def estimate(self,
+                 provider: str,
+                 model_name: str,
+                 output_tokens: int,
+                 throughput: float | None = None) -> float:
+        if throughput is None:
+            throughput = self.__throughputs.get((provider, model_name), self.__DEFAULT_TPS)
+        return float(output_tokens / throughput)
+latency_estimator = LatencyEstimator(file_path=_BASE_PATH)