samuelrince commited on
Commit
fbcf9c5
·
1 Parent(s): 67ad2e9

feat: add latency estimator based on openrouter

Browse files
src/calculator.py CHANGED
@@ -3,6 +3,7 @@ import streamlit as st
3
 
4
  from ecologits.tracers.utils import llm_impacts
5
  from src.impacts import display_impacts, display_equivalent_ghg, display_equivalent_energy
 
6
  from src.utils import format_impacts
7
  from src.content import WARNING_CLOSED_SOURCE, WARNING_MULTI_MODAL, WARNING_BOTH, HOW_TO_TEXT
8
  from src.models import load_models
@@ -74,11 +75,15 @@ def calculator_mode():
74
  st.warning(WARNING_BOTH, icon="⚠️")
75
 
76
  try:
 
 
 
 
77
  impacts = llm_impacts(
78
  provider=provider_raw,
79
  model_name=model_raw,
80
- output_token_count=[x[1] for x in PROMPTS if x[0] == output_tokens][0],
81
- request_latency=math.inf,
82
  )
83
 
84
  impacts, _, _ = format_impacts(impacts)
@@ -102,4 +107,5 @@ def calculator_mode():
102
 
103
 
104
  except Exception as e:
105
- st.error('Could not find the model in the repository. Please try another model.')
 
 
3
 
4
  from ecologits.tracers.utils import llm_impacts
5
  from src.impacts import display_impacts, display_equivalent_ghg, display_equivalent_energy
6
+ from src.latency_estimator import latency_estimator
7
  from src.utils import format_impacts
8
  from src.content import WARNING_CLOSED_SOURCE, WARNING_MULTI_MODAL, WARNING_BOTH, HOW_TO_TEXT
9
  from src.models import load_models
 
75
  st.warning(WARNING_BOTH, icon="⚠️")
76
 
77
  try:
78
+ output_tokens_count = [x[1] for x in PROMPTS if x[0] == output_tokens][0]
79
+ estimated_latency = latency_estimator.estimate(provider=provider_raw,
80
+ model_name=model_raw,
81
+ output_tokens=output_tokens_count)
82
  impacts = llm_impacts(
83
  provider=provider_raw,
84
  model_name=model_raw,
85
+ output_token_count=output_tokens_count,
86
+ request_latency=estimated_latency
87
  )
88
 
89
  impacts, _, _ = format_impacts(impacts)
 
107
 
108
 
109
  except Exception as e:
110
+ st.error('Could not find the model in the repository. Please try another model.')
111
+ raise e
src/data/throughputs.json ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "models": [
3
+ {
4
+ "provider": "openai",
5
+ "name": "gpt-3.5-turbo",
6
+ "throughput": 144.6
7
+ },
8
+ {
9
+ "provider": "openai",
10
+ "name": "gpt-4",
11
+ "throughput": 33.0
12
+ },
13
+ {
14
+ "provider": "openai",
15
+ "name": "gpt-4-turbo",
16
+ "throughput": 46.5
17
+ },
18
+ {
19
+ "provider": "openai",
20
+ "name": "gpt-4o",
21
+ "throughput": 68.4
22
+ },
23
+ {
24
+ "provider": "openai",
25
+ "name": "gpt-4o-mini",
26
+ "throughput": 59.5
27
+ },
28
+ {
29
+ "provider": "openai",
30
+ "name": "o1",
31
+ "throughput": 442.9
32
+ },
33
+ {
34
+ "provider": "openai",
35
+ "name": "o1-mini",
36
+ "throughput": 173.2
37
+ },
38
+ {
39
+ "provider": "openai",
40
+ "name": "o3-mini",
41
+ "throughput": 597.4
42
+ },
43
+ {
44
+ "provider": "openai",
45
+ "name": "gpt-4.1-nano",
46
+ "throughput": 91.9
47
+ },
48
+ {
49
+ "provider": "openai",
50
+ "name": "gpt-4.1-mini",
51
+ "throughput": 68.7
52
+ },
53
+ {
54
+ "provider": "openai",
55
+ "name": "gpt-4.1",
56
+ "throughput": 61.0
57
+ },
58
+ {
59
+ "provider": "openai",
60
+ "name": "o4-mini",
61
+ "throughput": 64.7
62
+ },
63
+ {
64
+ "provider": "openai",
65
+ "name": "gpt-5-nano",
66
+ "throughput": 82.4
67
+ },
68
+ {
69
+ "provider": "openai",
70
+ "name": "gpt-5-mini",
71
+ "throughput": 47.1
72
+ },
73
+ {
74
+ "provider": "openai",
75
+ "name": "gpt-5",
76
+ "throughput": 41.3
77
+ },
78
+ {
79
+ "provider": "anthropic",
80
+ "name": "claude-3-5-haiku-latest",
81
+ "throughput": 59.6
82
+ },
83
+ {
84
+ "provider": "anthropic",
85
+ "name": "claude-3-5-sonnet-latest",
86
+ "throughput": 52.7
87
+ },
88
+ {
89
+ "provider": "anthropic",
90
+ "name": "claude-3-7-sonnet-latest",
91
+ "throughput": 51.9
92
+ },
93
+ {
94
+ "provider": "anthropic",
95
+ "name": "claude-opus-4-0",
96
+ "throughput": 37.0
97
+ },
98
+ {
99
+ "provider": "anthropic",
100
+ "name": "claude-opus-4-1",
101
+ "throughput": 38.1
102
+ },
103
+ {
104
+ "provider": "anthropic",
105
+ "name": "claude-sonnet-4-0",
106
+ "throughput": 60.2
107
+ },
108
+ {
109
+ "provider": "anthropic",
110
+ "name": "claude-sonnet-4-5",
111
+ "throughput": 61.4
112
+ },
113
+ {
114
+ "provider": "anthropic",
115
+ "name": "claude-haiku-4-5",
116
+ "throughput": 119.8
117
+ },
118
+ {
119
+ "provider": "mistralai",
120
+ "name": "codestral-latest",
121
+ "throughput": 272.8
122
+ },
123
+ {
124
+ "provider": "mistralai",
125
+ "name": "devstral-medium-latest",
126
+ "throughput": 106.7
127
+ },
128
+ {
129
+ "provider": "mistralai",
130
+ "name": "devstral-small-latest",
131
+ "throughput": 187.8
132
+ },
133
+ {
134
+ "provider": "mistralai",
135
+ "name": "magistral-medium-latest",
136
+ "throughput": 106.7
137
+ },
138
+ {
139
+ "provider": "mistralai",
140
+ "name": "magistral-small-latest",
141
+ "throughput": 187.8
142
+ },
143
+ {
144
+ "provider": "mistralai",
145
+ "name": "ministral-3b-latest",
146
+ "throughput": 309.6
147
+ },
148
+ {
149
+ "provider": "mistralai",
150
+ "name": "ministral-8b-latest",
151
+ "throughput": 213.7
152
+ },
153
+ {
154
+ "provider": "mistralai",
155
+ "name": "mistral-large-latest",
156
+ "throughput": 48.6
157
+ },
158
+ {
159
+ "provider": "mistralai",
160
+ "name": "mistral-medium-latest",
161
+ "throughput": 54.6
162
+ },
163
+ {
164
+ "provider": "mistralai",
165
+ "name": "mistral-small-latest",
166
+ "throughput": 158.0
167
+ },
168
+ {
169
+ "provider": "mistralai",
170
+ "name": "mistral-tiny-latest",
171
+ "throughput": 92.8
172
+ },
173
+ {
174
+ "provider": "mistralai",
175
+ "name": "open-mistral-7b",
176
+ "throughput": 121.5
177
+ },
178
+ {
179
+ "provider": "mistralai",
180
+ "name": "open-mistral-nemo",
181
+ "throughput": 153.2
182
+ },
183
+ {
184
+ "provider": "mistralai",
185
+ "name": "open-mixtral-8x22b",
186
+ "throughput": 85.7
187
+ },
188
+ {
189
+ "provider": "mistralai",
190
+ "name": "open-mixtral-8x7b",
191
+ "throughput": 80
192
+ },
193
+ {
194
+ "provider": "google_genai",
195
+ "name": "gemini-2.0-flash-lite",
196
+ "throughput": 74.1
197
+ },
198
+ {
199
+ "provider": "google_genai",
200
+ "name": "gemini-2.0-flash",
201
+ "throughput": 151.4
202
+ },
203
+ {
204
+ "provider": "google_genai",
205
+ "name": "gemini-2.5-flash-lite",
206
+ "throughput": 74.1
207
+ },
208
+ {
209
+ "provider": "google_genai",
210
+ "name": "gemini-2.5-flash",
211
+ "throughput": 93.2
212
+ },
213
+ {
214
+ "provider": "google_genai",
215
+ "name": "gemini-2.5-pro",
216
+ "throughput": 86.6
217
+ },
218
+ {
219
+ "provider": "cohere",
220
+ "name": "command-a-03-2025",
221
+ "throughput": 77.4
222
+ },
223
+ {
224
+ "provider": "cohere",
225
+ "name": "command-r",
226
+ "throughput": 125.1
227
+ },
228
+ {
229
+ "provider": "cohere",
230
+ "name": "command-r-08-2024",
231
+ "throughput": 67.2
232
+ },
233
+ {
234
+ "provider": "cohere",
235
+ "name": "command-r-plus-08-2024",
236
+ "throughput": 26.9
237
+ },
238
+ {
239
+ "provider": "cohere",
240
+ "name": "command-r7b-12-2024",
241
+ "throughput": 125.1
242
+ }
243
+ ]
244
+ }
src/expert.py CHANGED
@@ -3,11 +3,13 @@ import streamlit as st
3
  from ecologits.electricity_mix_repository import electricity_mixes
4
  from ecologits.impacts.llm import compute_llm_impacts
5
 
 
6
  from src.utils import format_impacts
7
  from src.impacts import display_impacts
8
  from src.electricity_mix import COUNTRY_CODES, format_electricity_mix_criterion, format_country_name
9
  from src.models import load_models
10
  from src.constants import PROMPTS
 
11
 
12
  import plotly.express as px
13
 
@@ -71,32 +73,35 @@ def expert_mode():
71
  / 2
72
  )
73
 
 
 
 
 
74
  ########## Model parameters ##########
75
 
76
- active_params_col, total_params_col = st.columns(2)
77
 
78
  with active_params_col:
79
- active_params = st.number_input(
80
- "Active parameters (B)", 0, None, active_params
81
- )
82
 
83
  with total_params_col:
84
- total_params = st.number_input(
85
- "Total parameters (B)", 0, None, total_params
86
- )
 
87
 
88
 
89
  with st.container(border=True):
90
  st.markdown("###### Configure the prompt")
91
 
92
- provider_col, model_col = st.columns(2)
93
 
94
- with provider_col:
95
  output_tokens_exp = st.selectbox(
96
  label="Example prompt", options=[x[0] for x in PROMPTS], key=3
97
  )
98
 
99
- with model_col:
100
  output_tokens = st.number_input(
101
  label="Output completion tokens",
102
  min_value=0,
@@ -155,11 +160,18 @@ def expert_mode():
155
  format="%0.3f",
156
  )
157
 
 
 
 
 
 
 
 
158
  impacts = compute_llm_impacts(
159
  model_active_parameter_count=active_params,
160
  model_total_parameter_count=total_params,
161
  output_token_count=output_tokens,
162
- request_latency=100000,
163
  if_electricity_mix_gwp=em_gwp,
164
  if_electricity_mix_adpe=em_adpe,
165
  if_electricity_mix_pe=em_pe,
 
3
  from ecologits.electricity_mix_repository import electricity_mixes
4
  from ecologits.impacts.llm import compute_llm_impacts
5
 
6
+ from src.latency_estimator import latency_estimator
7
  from src.utils import format_impacts
8
  from src.impacts import display_impacts
9
  from src.electricity_mix import COUNTRY_CODES, format_electricity_mix_criterion, format_country_name
10
  from src.models import load_models
11
  from src.constants import PROMPTS
12
+ from src.constants import PROMPTS
13
 
14
  import plotly.express as px
15
 
 
73
  / 2
74
  )
75
 
76
+ provider_raw = df_filtered["provider"].values[0]
77
+ model_name_raw = df_filtered["name"].values[0]
78
+ tps_raw = latency_estimator.get_throughput(provider_raw, model_name_raw)
79
+
80
  ########## Model parameters ##########
81
 
82
+ active_params_col, total_params_col, throughput_col = st.columns(3)
83
 
84
  with active_params_col:
85
+ active_params = st.number_input("Active parameters (B)", 0, None, active_params)
 
 
86
 
87
  with total_params_col:
88
+ total_params = st.number_input("Total parameters (B)", 0, None, total_params)
89
+
90
+ with throughput_col:
91
+ throughput = st.number_input("Average TPS", 1.0, None, tps_raw)
92
 
93
 
94
  with st.container(border=True):
95
  st.markdown("###### Configure the prompt")
96
 
97
+ prompt_col, token_col = st.columns(2)
98
 
99
+ with prompt_col:
100
  output_tokens_exp = st.selectbox(
101
  label="Example prompt", options=[x[0] for x in PROMPTS], key=3
102
  )
103
 
104
+ with token_col:
105
  output_tokens = st.number_input(
106
  label="Output completion tokens",
107
  min_value=0,
 
160
  format="%0.3f",
161
  )
162
 
163
+ estimated_latency = latency_estimator.estimate(
164
+ provider=provider_raw,
165
+ model_name=model_name_raw,
166
+ output_tokens=output_tokens,
167
+ throughput=throughput
168
+ )
169
+
170
  impacts = compute_llm_impacts(
171
  model_active_parameter_count=active_params,
172
  model_total_parameter_count=total_params,
173
  output_token_count=output_tokens,
174
+ request_latency=estimated_latency,
175
  if_electricity_mix_gwp=em_gwp,
176
  if_electricity_mix_adpe=em_adpe,
177
  if_electricity_mix_pe=em_pe,
src/latency_estimator.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+
6
+ _BASE_PATH = Path(__file__).parent / "data" / "throughputs.json"
7
+
8
+
9
+ class LatencyEstimator:
10
+ __DEFAULT_TPS = 80.0
11
+
12
+ def __init__(self, file_path: str | Path) -> None:
13
+ with open(file_path, "r") as fd:
14
+ data = json.load(fd)
15
+
16
+ self.__throughputs = {}
17
+ for el in data["models"]:
18
+ self.__throughputs[(el["provider"], el["name"])] = el["throughput"]
19
+
20
+ def get_throughput(self, provider: str, model_name: str) -> float:
21
+ return float(self.__throughputs.get((provider, model_name), self.__DEFAULT_TPS))
22
+
23
+ def estimate(self,
24
+ provider: str,
25
+ model_name: str,
26
+ output_tokens: int,
27
+ throughput: float | None = None) -> float:
28
+ if throughput is None:
29
+ throughput = self.__throughputs.get((provider, model_name), self.__DEFAULT_TPS)
30
+ return float(output_tokens / throughput)
31
+
32
+
33
+ latency_estimator = LatencyEstimator(file_path=_BASE_PATH)