Spaces:
Running on T4
Running on T4
| # patch_ecologits.py | |
| # Ecologits contains a bug where the number of input tokens is swapped with the number | |
| # of output tokens. This: | |
| # input_tokens = chunk.usage_metadata.candidates_token_count | |
| # output_tokens = chunk.usage_metadata.total_token_count - output_tokens | |
| # must be replaced by: | |
| # output_tokens = chunk.usage_metadata.candidates_token_count | |
| # input_tokens = chunk.usage_metadata.total_token_count - output_tokens | |
| # The bug is repeated in mutliple parts of the Ecologits code base. | |
| # We are fixing it here only for the Gemini code. | |
| import time | |
| from typing import Any, Callable | |
| from ecologits.tracers import google_genai_tracer | |
| from google.genai.models import Models | |
| # Store the original function | |
| _original_generator = google_genai_tracer._generator | |
| async def _patched_generator(stream, timer_start, model_name): | |
| async for chunk in stream: | |
| if chunk.candidates[0].finish_reason is None: | |
| yield google_genai_tracer.GenerateContentResponse( | |
| **chunk.model_dump(), impacts=None | |
| ) | |
| else: | |
| request_latency = time.perf_counter() - timer_start | |
| # The fix has been applied here | |
| output_tokens = chunk.usage_metadata.candidates_token_count | |
| input_tokens = chunk.usage_metadata.total_token_count - output_tokens | |
| impacts = google_genai_tracer.llm_impacts( | |
| provider=google_genai_tracer.PROVIDER, | |
| model_name=model_name, | |
| output_token_count=output_tokens, | |
| request_latency=request_latency, | |
| electricity_mix_zone=google_genai_tracer.EcoLogits.config.electricity_mix_zone, | |
| ) | |
| if impacts is not None: | |
| if google_genai_tracer.EcoLogits.config.opentelemetry: | |
| google_genai_tracer.EcoLogits.config.opentelemetry.record_request( | |
| input_tokens=input_tokens, | |
| output_tokens=output_tokens, | |
| request_latency=request_latency, | |
| impacts=impacts, | |
| provider=google_genai_tracer.PROVIDER, | |
| model=model_name, | |
| endpoint=f"/v1beta/models/{model_name}:generateContent", | |
| ) | |
| yield google_genai_tracer.GenerateContentResponse( | |
| **chunk.model_dump(), impacts=impacts | |
| ) | |
| else: | |
| yield google_genai_tracer.GenerateContentResponse( | |
| **chunk.model_dump(), impacts=None | |
| ) | |
| def _patched_google_genai_content_wrapper( | |
| wrapped: Callable, | |
| instance: Models, # noqa: ARG001 | |
| args: Any, | |
| kwargs: Any, | |
| ): | |
| """ | |
| Function that wraps Google GenAI answer with computed impacts | |
| Args: | |
| wrapped: Callable that returns the LLM response | |
| instance: Never used - for compatibility with `wrapt` | |
| args: Arguments of the callable | |
| kwargs: Keyword arguments of the callable | |
| Returns: | |
| A wrapped `GenerateContentResponse` with impacts | |
| """ | |
| timer_start = time.perf_counter() | |
| response = wrapped(*args, **kwargs) | |
| request_latency = time.perf_counter() - timer_start | |
| model_name = kwargs["model"] | |
| # The fix has been applied here | |
| output_tokens = response.usage_metadata.candidates_token_count | |
| input_tokens = response.usage_metadata.total_token_count - output_tokens | |
| impacts = google_genai_tracer.llm_impacts( | |
| provider=google_genai_tracer.PROVIDER, | |
| model_name=model_name, | |
| output_token_count=output_tokens, | |
| request_latency=request_latency, | |
| electricity_mix_zone=google_genai_tracer.EcoLogits.config.electricity_mix_zone, | |
| ) | |
| if impacts is not None: | |
| if google_genai_tracer.EcoLogits.config.opentelemetry: | |
| google_genai_tracer.EcoLogits.config.opentelemetry.record_request( | |
| input_tokens=input_tokens, | |
| output_tokens=output_tokens, | |
| request_latency=request_latency, | |
| impacts=impacts, | |
| provider=google_genai_tracer.PROVIDER, | |
| model=model_name, | |
| endpoint=f"/v1beta/models/{model_name}:generateContent", | |
| ) | |
| return google_genai_tracer.GenerateContentResponse( | |
| **response.model_dump(), impacts=impacts | |
| ) | |
| else: | |
| return response | |
| # Apply the patch | |
| google_genai_tracer._generator = _patched_generator | |
| google_genai_tracer.google_genai_content_wrapper = _patched_google_genai_content_wrapper | |