Spaces:
Sleeping
Sleeping
| """ | |
| Collect data from the multiple sources and create a base datafranme for the LLMCalculator table | |
| Latency - https://github.com/clembench/clembench-runs/tree/main/Addenda/Latency | |
| Pricing - pricing.json | |
| Model info - https://github.com/kushal-10/clembench/blob/feat/registry/backends/model_registry_updated.json | |
| """ | |
| import pandas as pd | |
| import json | |
| import requests | |
| from assets.text_content import CLEMBENCH_RUNS_REPO, REGISTRY_URL, BENCHMARK_FILE, LATENCY_FOLDER, RESULT_FILE, LATENCY_SUFFIX | |
| import os | |
| def validate_request(url: str, response) -> bool: | |
| """ | |
| Validate if an HTTP request was successful. | |
| Args: | |
| url (str): The URL that was requested | |
| response (requests.Response): The response object from the request | |
| Returns: | |
| bool: True if request was successful (status code 200), False otherwise | |
| """ | |
| if response.status_code != 200: | |
| print(f"Failed to read file - {url}. Status Code: {response.status_code}") | |
| return False | |
| return True | |
| def fetch_benchmark_data(benchmark: str = "text", version_names: list = []) -> tuple: | |
| """ | |
| Fetch and parse benchmark results and latency data from CSV files. | |
| Args: | |
| benchmark (str): Type of benchmark to fetch ('text' or 'multimodal') | |
| version_names (list): List of version names to search through, sorted by latest first | |
| Returns: | |
| tuple[pd.DataFrame, pd.DataFrame]: A tuple containing: | |
| - results_df: DataFrame with benchmark results | |
| - latency_df: DataFrame with latency measurements | |
| Returns (None, None) if no matching version is found or requests fail | |
| Raises: | |
| requests.RequestException: If there's an error fetching the data | |
| pd.errors.EmptyDataError: If CSV file is empty | |
| pd.errors.ParserError: If CSV parsing fails | |
| """ | |
| for v in version_names: | |
| # Check if version matches benchmark type | |
| is_multimodal = 'multimodal' in v | |
| if (benchmark == "multimodal") != is_multimodal: | |
| continue | |
| # Construct URLs | |
| results_url = os.path.join(CLEMBENCH_RUNS_REPO, v, RESULT_FILE) | |
| latency_url = os.path.join(CLEMBENCH_RUNS_REPO, LATENCY_FOLDER, v + LATENCY_SUFFIX) | |
| try: | |
| results = requests.get(results_url) | |
| latency = requests.get(latency_url) | |
| if validate_request(results_url, results) and validate_request(latency_url, latency): | |
| # Convert the CSV content to pandas DataFrames | |
| results_df = pd.read_csv(pd.io.common.StringIO(results.text)) | |
| latency_df = pd.read_csv(pd.io.common.StringIO(latency.text)) | |
| return results_df, latency_df | |
| except requests.RequestException as e: | |
| print(f"Error fetching data for version {v}: {e}") | |
| except pd.errors.EmptyDataError: | |
| print(f"Error: Empty CSV file found for version {v}") | |
| except pd.errors.ParserError: | |
| print(f"Error: Unable to parse CSV data for version {v}") | |
| return None, None | |
| def fetch_version_metadata() -> tuple: | |
| """ | |
| Fetch and process benchmark metadata from the Clembench GitHub repository. | |
| The data is sourced from: https://github.com/clembench/clembench-runs | |
| Configure the repository path in src/assets/text_content/CLEMBENCH_RUNS_REPO | |
| Returns: | |
| tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]: A tuple containing: | |
| - mm_result: Multimodal benchmark results | |
| - mm_latency: Multimodal latency data | |
| - text_result: Text benchmark results | |
| - text_latency: Text latency data | |
| Returns (None, None, None, None) if the request fails | |
| """ | |
| json_url = CLEMBENCH_RUNS_REPO + BENCHMARK_FILE | |
| response = requests.get(json_url) | |
| # Check if the JSON file request was successful | |
| if not validate_request(json_url, response): | |
| return None, None, None, None | |
| json_data = response.json() | |
| versions = json_data['versions'] | |
| # Sort the versions in benchmark by latest first | |
| version_names = sorted( | |
| [ver['version'] for ver in versions], | |
| key=lambda v: list(map(int, v[1:].split('_')[0].split('.'))), | |
| reverse=True | |
| ) | |
| # Latency is in seconds | |
| mm_result, mm_latency = fetch_benchmark_data("multimodal", version_names) | |
| text_result, text_latency = fetch_benchmark_data("text", version_names) | |
| return mm_latency, mm_result, text_latency, text_result | |
| def fetch_registry_data() -> dict: | |
| """ | |
| Fetch and parse model registry data from the Clembench registry URL. | |
| The data is sourced from the model registry defined in REGISTRY_URL. | |
| Contains information about various LLM models including their specifications | |
| and capabilities. | |
| Returns: | |
| dict: Dictionary containing model registry data. | |
| Returns None if the request fails or the JSON is invalid. | |
| Raises: | |
| requests.RequestException: If there's an error fetching the data | |
| json.JSONDecodeError: If the response cannot be parsed as JSON | |
| """ | |
| try: | |
| response = requests.get(REGISTRY_URL) | |
| if not validate_request(REGISTRY_URL, response): | |
| return None | |
| return response.json() | |
| except requests.RequestException as e: | |
| print(f"Error fetching registry data: {e}") | |
| except json.JSONDecodeError as e: | |
| print(f"Error parsing registry JSON: {e}") | |
| return None | |
| if __name__=="__main__": | |
| fetch_version_metadata() | |
| registry_data = fetch_registry_data() | |
| print(registry_data[0]) | |