# patch_ecologits.py # Ecologits contains a bug where the number of input tokens is swapped with the number # of output tokens. This: # input_tokens = chunk.usage_metadata.candidates_token_count # output_tokens = chunk.usage_metadata.total_token_count - output_tokens # must be replaced by: # output_tokens = chunk.usage_metadata.candidates_token_count # input_tokens = chunk.usage_metadata.total_token_count - output_tokens # The bug is repeated in mutliple parts of the Ecologits code base. # We are fixing it here only for the Gemini code. import time from typing import Any, Callable from ecologits.tracers import google_genai_tracer from google.genai.models import Models # Store the original function _original_generator = google_genai_tracer._generator async def _patched_generator(stream, timer_start, model_name): async for chunk in stream: if chunk.candidates[0].finish_reason is None: yield google_genai_tracer.GenerateContentResponse( **chunk.model_dump(), impacts=None ) else: request_latency = time.perf_counter() - timer_start # The fix has been applied here output_tokens = chunk.usage_metadata.candidates_token_count input_tokens = chunk.usage_metadata.total_token_count - output_tokens impacts = google_genai_tracer.llm_impacts( provider=google_genai_tracer.PROVIDER, model_name=model_name, output_token_count=output_tokens, request_latency=request_latency, electricity_mix_zone=google_genai_tracer.EcoLogits.config.electricity_mix_zone, ) if impacts is not None: if google_genai_tracer.EcoLogits.config.opentelemetry: google_genai_tracer.EcoLogits.config.opentelemetry.record_request( input_tokens=input_tokens, output_tokens=output_tokens, request_latency=request_latency, impacts=impacts, provider=google_genai_tracer.PROVIDER, model=model_name, endpoint=f"/v1beta/models/{model_name}:generateContent", ) yield google_genai_tracer.GenerateContentResponse( **chunk.model_dump(), impacts=impacts ) else: yield google_genai_tracer.GenerateContentResponse( **chunk.model_dump(), impacts=None ) def _patched_google_genai_content_wrapper( wrapped: Callable, instance: Models, # noqa: ARG001 args: Any, kwargs: Any, ): """ Function that wraps Google GenAI answer with computed impacts Args: wrapped: Callable that returns the LLM response instance: Never used - for compatibility with `wrapt` args: Arguments of the callable kwargs: Keyword arguments of the callable Returns: A wrapped `GenerateContentResponse` with impacts """ timer_start = time.perf_counter() response = wrapped(*args, **kwargs) request_latency = time.perf_counter() - timer_start model_name = kwargs["model"] # The fix has been applied here output_tokens = response.usage_metadata.candidates_token_count input_tokens = response.usage_metadata.total_token_count - output_tokens impacts = google_genai_tracer.llm_impacts( provider=google_genai_tracer.PROVIDER, model_name=model_name, output_token_count=output_tokens, request_latency=request_latency, electricity_mix_zone=google_genai_tracer.EcoLogits.config.electricity_mix_zone, ) if impacts is not None: if google_genai_tracer.EcoLogits.config.opentelemetry: google_genai_tracer.EcoLogits.config.opentelemetry.record_request( input_tokens=input_tokens, output_tokens=output_tokens, request_latency=request_latency, impacts=impacts, provider=google_genai_tracer.PROVIDER, model=model_name, endpoint=f"/v1beta/models/{model_name}:generateContent", ) return google_genai_tracer.GenerateContentResponse( **response.model_dump(), impacts=impacts ) else: return response # Apply the patch google_genai_tracer._generator = _patched_generator google_genai_tracer.google_genai_content_wrapper = _patched_google_genai_content_wrapper