| | from huggingface_hub import list_models, model_info |
| | from datetime import datetime |
| | from datasets import Dataset, load_dataset |
| | import pandas as pd |
| | import os |
| | import globals |
| | from typing import List, Tuple |
| |
|
| |
|
| | def get_models_providers() -> List[Tuple[str, List[str]]]: |
| | """Get list of popular text generation models and associated providers from Hugging Face""" |
| | models = list_models( |
| | filter="text-generation", |
| | sort="likes", |
| | direction=-1, |
| | limit=globals.NUM_MODELS_RUN, |
| | expand="inferenceProviderMapping" |
| | ) |
| |
|
| | model_providers = [ |
| | (model.id, [p.provider for p in model.inference_provider_mapping]) |
| | for model in models |
| | if hasattr(model, 'inference_provider_mapping') and model.inference_provider_mapping |
| | ] |
| | return model_providers |
| |
|
| |
|
| | def initialize_models_providers_file(file_path: str = globals.LOCAL_CONFIG_FILE) -> str: |
| | """Initialize the models_providers.txt file with popular models and their providers.""" |
| | model_to_providers = get_models_providers() |
| |
|
| | with open(file_path, 'w') as f: |
| | f.write("# Models and Providers Configuration\n") |
| | f.write("# Format: model_name provider_name\n") |
| | f.write(f"# Auto-generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n") |
| |
|
| | count = 0 |
| | for (model_id, providers) in model_to_providers: |
| | try: |
| | for provider in providers: |
| | f.write(f"{model_id} {provider}\n") |
| | count += 1 |
| | except Exception as e: |
| | print(f"Error processing model {model_id}: {e}") |
| | continue |
| |
|
| | print(f"Successfully wrote {count} model-provider combinations to {file_path}") |
| | return f"Initialized {count} model-provider combinations" |
| |
|
| |
|
| | def load_models_providers(file_path: str = "models_providers.txt") -> List[Tuple[str, str]]: |
| | """Load models and providers from text file.""" |
| | models_providers = [] |
| | try: |
| | with open(file_path, 'r') as f: |
| | for line in f: |
| | line = line.strip() |
| | |
| | if line and not line.startswith('#'): |
| | parts = line.split() |
| | if len(parts) >= 2: |
| | model = parts[0] |
| | provider = parts[1] |
| | models_providers.append((model, provider)) |
| | except Exception as e: |
| | print(f"Error loading models_providers.txt: {str(e)}") |
| | return models_providers |
| |
|
| |
|
| | def save_results() -> None: |
| | """Persist job results to HuggingFace dataset.""" |
| | try: |
| | with globals.results_lock: |
| | if not globals.job_results: |
| | print("No results to save") |
| | return |
| |
|
| | records = list(globals.job_results.values()) |
| | df = pd.DataFrame(records) |
| | dataset = Dataset.from_pandas(df) |
| |
|
| | |
| | dataset.push_to_hub( |
| | globals.RESULTS_DATASET_NAME, |
| | token=os.getenv("HF_TOKEN"), |
| | private=False |
| | ) |
| | print(f"Saved {len(records)} results to dataset") |
| |
|
| | except Exception as e: |
| | print(f"Error saving results to dataset: {e}") |
| |
|
| |
|
| | def load_results() -> None: |
| | """Load job results from HuggingFace dataset.""" |
| | try: |
| | |
| | dataset = load_dataset( |
| | globals.RESULTS_DATASET_NAME, |
| | split="train", |
| | token=os.getenv("HF_TOKEN") |
| | ) |
| |
|
| | |
| | for row in dataset: |
| | key = globals.get_model_provider_key(row["model"], row["provider"]) |
| | globals.job_results[key] = { |
| | "model": row["model"], |
| | "provider": row["provider"], |
| | "last_run": row["last_run"], |
| | "status": row["status"], |
| | "current_score": row["current_score"], |
| | "previous_score": row["previous_score"], |
| | "job_id": row["job_id"] |
| | } |
| |
|
| | print(f"Loaded {len(globals.job_results)} results from dataset") |
| |
|
| | except Exception as e: |
| | print(f"No existing dataset found or error loading: {e}") |
| | print("Starting with empty results") |
| |
|
| | def get_results_table() -> List[List[str]]: |
| | """Return job results as a list for Gradio DataFrame.""" |
| | with globals.results_lock: |
| | if not globals.job_results: |
| | return [] |
| |
|
| | table_data = [] |
| | for key, info in globals.job_results.items(): |
| | current_score = info.get("current_score", "N/A") |
| | if current_score is not None and isinstance(current_score, (int, float)): |
| | current_score = f"{current_score:.4f}" |
| |
|
| | previous_score = info.get("previous_score", "N/A") |
| | if previous_score is not None and isinstance(previous_score, (int, float)): |
| | previous_score = f"{previous_score:.4f}" |
| |
|
| | table_data.append([ |
| | info["model"], |
| | info["provider"], |
| | info["last_run"], |
| | info["status"], |
| | current_score, |
| | previous_score, |
| | info.get("job_id", "N/A") |
| | ]) |
| |
|
| | return table_data |
| |
|
| |
|