Spaces:
Sleeping
Sleeping
| import os | |
| from dataclasses import dataclass | |
| import pandas as pd | |
| class Model(object): | |
| model_display_name: str | |
| model_name: str | |
| api_url: str | |
| provider: str | |
| hourly_cost: int = None | |
| cost: str = None | |
| supports_functions: str = False | |
| size_billion_parameters: int = None # in billion paramters | |
| cost_per_million_tokens: int = None | |
| cost_per_million_input_tokens: int = None | |
| cost_per_million_output_tokens: int = None | |
| def __post_init__(self): | |
| self.cost_per_million_input_tokens = self.cost_per_million_input_tokens or self.cost_per_million_tokens | |
| self.cost_per_million_output_tokens = self.cost_per_million_output_tokens or self.cost_per_million_tokens | |
| if not self.cost and self.hourly_cost: | |
| self.cost = f"${self.hourly_cost} / hour" | |
| if not self.cost and self.cost_per_million_tokens: | |
| self.cost = f"${self.cost_per_million_tokens} / 1M tokens" | |
| elif not self.cost and self.cost_per_million_input_tokens and self.cost_per_million_output_tokens: | |
| self.cost = f"${self.cost_per_million_input_tokens} / 1M input tokens, ${self.cost_per_million_output_tokens} / 1M output tokens" | |
| env = os.environ.get | |
| MODELS = [ | |
| # source: https://openai.com/pricing | |
| # converted costs from dollar/1K tokens to dollar/1M for readability and together_ai comparability | |
| Model( | |
| "gpt-3.5-turbo", | |
| "gpt-3.5-turbo", | |
| None, | |
| "OpenAI", | |
| supports_functions=True, | |
| cost_per_million_input_tokens=1, | |
| cost_per_million_output_tokens=2, | |
| ), | |
| Model( | |
| "gpt-4-turbo", | |
| "gpt-4-1106-preview", | |
| None, | |
| "OpenAI", | |
| supports_functions=True, | |
| cost_per_million_input_tokens=10, | |
| cost_per_million_output_tokens=30, | |
| ), | |
| Model( | |
| "gpt-4", | |
| "gpt-4", | |
| None, | |
| "OpenAI", | |
| supports_functions=True, | |
| cost_per_million_input_tokens=30, | |
| cost_per_million_output_tokens=60, | |
| ), | |
| # we don't test gpt-4-32k because the tasks don't reach gpt-4 limitations | |
| Model( | |
| "gpt-3.5-turbo", | |
| "gpt-3.5-turbo", | |
| None, | |
| "OpenAI", | |
| supports_functions=True, | |
| cost_per_million_input_tokens=1, | |
| cost_per_million_output_tokens=2, | |
| ), | |
| # source: https://www.together.ai/pricing | |
| Model( | |
| "llama-2-70b-chat", | |
| "together_ai/togethercomputer/llama-2-70b-chat", | |
| None, | |
| "Together AI", | |
| cost_per_million_tokens=0.2, | |
| ), | |
| Model( | |
| "Mixtral-8x7B-Instruct-v0.1", | |
| "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1", | |
| None, | |
| "Together AI", | |
| size_billion_parameters=8 * 7, | |
| cost_per_million_tokens=0.9, | |
| ), | |
| # taken from endpoint pages | |
| Model( | |
| "zephyr-7b-beta", | |
| "huggingface/HuggingFaceH4/zephyr-7b-beta", | |
| env("ZEPHYR_7B_BETA_URL"), | |
| "Hugging Face Inference Endpoint", | |
| hourly_cost=1.30, | |
| size_billion_parameters=7, | |
| ), | |
| Model( | |
| "Mistral-7B-Instruct-v0.2", | |
| "huggingface/mistralai/Mistral-7B-Instruct-v0.2", | |
| env("MISTRAL_7B_BETA_URL"), | |
| "Hugging Face Inference Endpoint", | |
| hourly_cost=1.30, | |
| size_billion_parameters=7, | |
| ), | |
| Model( | |
| "TinyLlama/TinyLlama-1.1B-Chat-v1.0", | |
| "huggingface/TinyLlama/TinyLlama-1.1B-Chat-v1.0", | |
| env("TINY_LLAMA_URL"), | |
| "Hugging Face Inference Endpoint", | |
| hourly_cost=0.60, | |
| size_billion_parameters=1.1, | |
| ), | |
| Model( | |
| "gemini-pro", | |
| "gemini-pro", | |
| None, | |
| "Google VertexAI", | |
| # https://ai.google.dev/pricing | |
| cost="$0.25 / 1M input characters, $0.5 / 1K output characters (60 queries per minute are free)", | |
| cost_per_million_input_tokens=0.25, | |
| cost_per_million_output_tokens=0.5, | |
| ), | |
| Model( | |
| "chat-bison", | |
| "chat-bison", | |
| None, | |
| "Google VertexAI", | |
| # https://cloud.google.com/vertex-ai/docs/generative-ai/pricing | |
| cost_per_million_input_tokens=0.25, | |
| cost_per_million_output_tokens=0.5, | |
| ), | |
| Model( | |
| "chat-bison-32k", | |
| "chat-bison-32k", | |
| None, | |
| "Google VertexAI", | |
| # https://cloud.google.com/vertex-ai/docs/generative-ai/pricing | |
| cost_per_million_input_tokens=0.25, | |
| cost_per_million_output_tokens=0.5, | |
| ), | |
| ] | |
| def models_costs(): | |
| return pd.DataFrame( | |
| [(model.model_display_name, model.provider, model.cost) for model in MODELS], | |
| columns=["Model", "Provider", "Cost"], | |
| ) | |