Spaces:
Sleeping
Sleeping
| from huggingface_hub import list_models, model_info | |
| from datetime import datetime | |
| from datasets import Dataset, load_dataset | |
| import pandas as pd | |
| import os | |
| import globals | |
| from typing import List, Tuple | |
| def get_models_providers() -> List[Tuple[str, List[str]]]: | |
| """Get list of popular text generation models and associated providers from Hugging Face""" | |
| models = list_models( | |
| filter="text-generation", | |
| sort="likes", | |
| direction=-1, | |
| limit=globals.NUM_MODELS_RUN, | |
| expand="inferenceProviderMapping" | |
| ) | |
| model_providers = [ | |
| (model.id, [p.provider for p in model.inference_provider_mapping]) | |
| for model in models | |
| if hasattr(model, 'inference_provider_mapping') and model.inference_provider_mapping | |
| ] | |
| return model_providers | |
| def initialize_models_providers_file(file_path: str = globals.LOCAL_CONFIG_FILE) -> str: | |
| """Initialize the models_providers.txt file with popular models and their providers.""" | |
| model_to_providers = get_models_providers() | |
| with open(file_path, 'w') as f: | |
| f.write("# Models and Providers Configuration\n") | |
| f.write("# Format: model_name provider_name\n") | |
| f.write(f"# Auto-generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n") | |
| count = 0 | |
| for (model_id, providers) in model_to_providers: | |
| try: | |
| for provider in providers: | |
| f.write(f"{model_id} {provider}\n") | |
| count += 1 | |
| except Exception as e: | |
| print(f"Error processing model {model_id}: {e}") | |
| continue | |
| print(f"Successfully wrote {count} model-provider combinations to {file_path}") | |
| return f"Initialized {count} model-provider combinations", load_models_providers_str() | |
| def load_models_providers_str(file_path: str = globals.LOCAL_CONFIG_FILE) -> str: | |
| mp_list = load_models_providers(file_path) | |
| return "\n".join([f"{model} : {provider}" for (model, provider) in mp_list]) | |
| def load_models_providers(file_path: str = globals.LOCAL_CONFIG_FILE) -> List[Tuple[str, str]]: | |
| """Load models and providers from text file. Creates file if it doesn't exist.""" | |
| models_providers = [] | |
| try: | |
| # Check if file exists, if not, create it | |
| if not os.path.exists(file_path): | |
| print(f"Config file {file_path} not found. Initializing...") | |
| initialize_models_providers_file(file_path) | |
| with open(file_path, 'r') as f: | |
| for line in f: | |
| line = line.strip() | |
| # Skip empty lines and comments | |
| if line and not line.startswith('#'): | |
| parts = line.split() | |
| if len(parts) >= 2: | |
| model = parts[0] | |
| provider = parts[1] | |
| models_providers.append((model, provider)) | |
| except Exception as e: | |
| print(f"Error loading model providers: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| return models_providers | |
| def save_results() -> None: | |
| """Persist job results to HuggingFace dataset.""" | |
| try: | |
| if not globals.job_results: | |
| print("No results to save") | |
| return | |
| records = list(globals.job_results.values()) | |
| df = pd.DataFrame(records) | |
| dataset = Dataset.from_pandas(df) | |
| # Push to HuggingFace Hub | |
| dataset.push_to_hub( | |
| globals.RESULTS_DATASET_NAME, | |
| token=os.getenv("HF_TOKEN"), | |
| private=False | |
| ) | |
| print(f"Saved {len(records)} results to dataset") | |
| except Exception as e: | |
| print(f"Error saving results to dataset: {e}") | |
| def load_results() -> None: | |
| """Load job results from HuggingFace dataset.""" | |
| try: | |
| # Try to load existing dataset | |
| dataset = load_dataset( | |
| globals.RESULTS_DATASET_NAME, | |
| split="train", | |
| token=os.getenv("HF_TOKEN") | |
| ) | |
| # Convert dataset to job_results dict | |
| for row in dataset: | |
| key = globals.get_model_provider_key(row["model"], row["provider"]) | |
| globals.job_results[key] = { | |
| "model": row["model"], | |
| "provider": row["provider"], | |
| "last_run": row["last_run"], | |
| "status": row["status"], | |
| "current_score": row["current_score"], | |
| "previous_score": row["previous_score"], | |
| "job_id": row["job_id"], | |
| "start_time": row.get("start_time"), | |
| "duration": row.get("duration"), | |
| "completed_at": row.get("completed_at"), | |
| "runs": row.get("runs", []), | |
| "score_variance": row.get("score_variance") | |
| } | |
| print(f"Loaded {len(globals.job_results)} results from dataset") | |
| except Exception as e: | |
| print(f"No existing dataset found or error loading: {e}") | |
| print("Starting with empty results") | |
| def style_status(val): | |
| """Style function for status column.""" | |
| if val == "COMPLETED": | |
| return 'background-color: green' | |
| elif val == "ERROR": | |
| return 'background-color: red' | |
| elif val == "RUNNING": | |
| return 'background-color: blue' | |
| return '' | |
| def get_summary_stats(): | |
| """Get summary statistics of job results.""" | |
| if not globals.job_results: | |
| return "📊 **Status:** No jobs yet" | |
| total = len(globals.job_results) | |
| running = sum(1 for info in globals.job_results.values() if info.get("status") == "RUNNING") | |
| completed = sum(1 for info in globals.job_results.values() if info.get("status") == "COMPLETED") | |
| failed = sum(1 for info in globals.job_results.values() if info.get("status") in ["ERROR", "FAILED"]) | |
| return f"📊 **Total:** {total} | 🔵 **Running:** {running} | ✅ **Completed:** {completed} | ❌ **Failed:** {failed}" | |
| def get_results_table(): | |
| """Return job results as a styled pandas DataFrame for Gradio DataFrame.""" | |
| if not globals.job_results: | |
| return pd.DataFrame(columns=["Model", "Provider", "Runs", "Last Run", "Status", "Mean Score", "Variance", "Previous Score", "Duration", "Completed At", "Latest Job Id"]) | |
| table_data = [] | |
| for key, info in globals.job_results.items(): | |
| # Format mean score | |
| current_score = info.get("current_score", "N/A") | |
| if current_score is not None and isinstance(current_score, (int, float)): | |
| current_score = f"{current_score:.4f}" | |
| # Format variance | |
| variance = info.get("score_variance", "N/A") | |
| if variance is not None and isinstance(variance, (int, float)): | |
| variance = f"{variance:.6f}" | |
| # Format previous score | |
| previous_score = info.get("previous_score", "N/A") | |
| if previous_score is not None and isinstance(previous_score, (int, float)): | |
| previous_score = f"{previous_score:.4f}" | |
| # Count runs | |
| runs = info.get("runs", []) | |
| completed_runs = sum(1 for run in runs if run.get("status") == "COMPLETED") | |
| total_runs = len(runs) | |
| runs_str = f"{completed_runs}/{total_runs}" if runs else "0/0" | |
| # Format duration | |
| duration = info.get("duration") | |
| if duration is not None and isinstance(duration, (int, float)): | |
| # Convert seconds to minutes and seconds | |
| minutes = int(duration // 60) | |
| seconds = int(duration % 60) | |
| duration_str = f"{minutes}m {seconds}s" | |
| else: | |
| duration_str = "N/A" | |
| # Get completion time | |
| completed_at = info.get("completed_at", "N/A") | |
| job_id = info.get("job_id", "N/A") | |
| # Create a clickable link for the job ID | |
| if job_id != "N/A": | |
| job_url = f"https://hf.co/jobs/{globals.NAMESPACE}/{job_id}" | |
| job_link = f'{job_id}: <a href="{job_url}" target="_blank">📄</a> ' | |
| else: | |
| job_link = job_id | |
| # Create relaunch link with data attributes for JavaScript to access | |
| model = info["model"] | |
| provider = info["provider"] | |
| relaunch_link = '🔄 Relaunch' | |
| table_data.append([ | |
| model, | |
| provider, | |
| runs_str, | |
| info["last_run"], | |
| info["status"], | |
| current_score, | |
| variance, | |
| previous_score, | |
| duration_str, | |
| completed_at, | |
| job_link, | |
| relaunch_link | |
| ]) | |
| df = pd.DataFrame(table_data, columns=["Model", "Provider", "Runs", "Last Run", "Status", "Mean Score", "Variance", "Previous Score", "Duration", "Completed At", "Job Id and Logs", "Actions"]) | |
| # Apply styling to the Status column | |
| styled_df = df.style.map(style_status, subset=['Status']) | |
| return styled_df | |