Spaces:
Runtime error
Runtime error
| import os | |
| import logging | |
| from typing import List, Dict, Optional, Tuple | |
| from pathlib import Path | |
| import json | |
| import numpy as np | |
| logging.basicConfig(level=logging.INFO) | |
| class BenchChecker: | |
| def __init__(self, base_path: str): | |
| """Initialize BenchChecker with base assets path. | |
| Args: | |
| base_path (str): Base path to assets directory containing benchmark folders | |
| """ | |
| self.base_path = Path(base_path) | |
| self.logger = logging.getLogger(__name__) | |
| def check_benchmark_exists(self, benchmark_name: str) -> bool: | |
| """Check if benchmark folder exists.""" | |
| benchmark_path = self.base_path / benchmark_name | |
| exists = benchmark_path.exists() and benchmark_path.is_dir() | |
| if exists: | |
| self.logger.info(f"Found benchmark directory: {benchmark_name}") | |
| else: | |
| self.logger.error(f"Benchmark directory not found: {benchmark_name}") | |
| return exists | |
| def get_video_list(self, benchmark_name: str) -> List[str]: | |
| """Get list of videos from benchmark's dataset directory. Return empty list if no videos found.""" | |
| dataset_path = self.base_path / benchmark_name / "dataset" | |
| videos = [] | |
| if not dataset_path.exists(): | |
| self.logger.info(f"Dataset directory exists but no videos found for {benchmark_name}") | |
| return videos # 빈 리스트 반환 | |
| # Recursively find all .mp4 files | |
| for category in dataset_path.glob("*"): | |
| if category.is_dir(): | |
| for video_file in category.glob("*.mp4"): | |
| videos.append(video_file.stem) | |
| self.logger.info(f"Found {len(videos)} videos in {benchmark_name} dataset") | |
| return videos | |
| def check_model_exists(self, benchmark_name: str, model_name: str) -> bool: | |
| """Check if model directory exists in benchmark's models directory.""" | |
| model_path = self.base_path / benchmark_name / "models" / model_name | |
| exists = model_path.exists() and model_path.is_dir() | |
| if exists: | |
| self.logger.info(f"Found model directory: {model_name}") | |
| else: | |
| self.logger.error(f"Model directory not found: {model_name}") | |
| return exists | |
| def check_cfg_files(self, benchmark_name: str, model_name: str, cfg_prompt: str) -> Tuple[bool, bool]: | |
| """Check if CFG files/directories exist in both benchmark and model directories.""" | |
| # Check benchmark CFG json | |
| benchmark_cfg = self.base_path / benchmark_name / "CFG" / f"{cfg_prompt}.json" | |
| benchmark_cfg_exists = benchmark_cfg.exists() and benchmark_cfg.is_file() | |
| # Check model CFG directory | |
| model_cfg = self.base_path / benchmark_name / "models" / model_name / "CFG" / cfg_prompt | |
| model_cfg_exists = model_cfg.exists() and model_cfg.is_dir() | |
| if benchmark_cfg_exists: | |
| self.logger.info(f"Found benchmark CFG file: {cfg_prompt}.json") | |
| else: | |
| self.logger.error(f"Benchmark CFG file not found: {cfg_prompt}.json") | |
| if model_cfg_exists: | |
| self.logger.info(f"Found model CFG directory: {cfg_prompt}") | |
| else: | |
| self.logger.error(f"Model CFG directory not found: {cfg_prompt}") | |
| return benchmark_cfg_exists, model_cfg_exists | |
| def check_vector_files(self, benchmark_name: str, model_name: str, video_list: List[str]) -> bool: | |
| """Check if video vectors match with dataset.""" | |
| vector_path = self.base_path / benchmark_name / "models" / model_name / "vector" / "video" | |
| # 비디오가 없는 경우는 무조건 False | |
| if not video_list: | |
| self.logger.error("No videos found in dataset - cannot proceed") | |
| return False | |
| # 벡터 디렉토리가 있는지 확인 | |
| if not vector_path.exists(): | |
| self.logger.error("Vector directory doesn't exist") | |
| return False | |
| # 벡터 파일 리스트 가져오기 | |
| # vector_files = [f.stem for f in vector_path.glob("*.npy")] | |
| vector_files = [f.stem for f in vector_path.rglob("*.npy")] | |
| missing_vectors = set(video_list) - set(vector_files) | |
| extra_vectors = set(vector_files) - set(video_list) | |
| if missing_vectors: | |
| self.logger.error(f"Missing vectors for videos: {missing_vectors}") | |
| return False | |
| if extra_vectors: | |
| self.logger.error(f"Extra vectors found: {extra_vectors}") | |
| return False | |
| self.logger.info(f"Vector status: videos={len(video_list)}, vectors={len(vector_files)}") | |
| return len(video_list) == len(vector_files) | |
| def check_metrics_file(self, benchmark_name: str, model_name: str, cfg_prompt: str) -> bool: | |
| """Check if overall_metrics.json exists in the model's CFG/metrics directory.""" | |
| metrics_path = self.base_path / benchmark_name / "models" / model_name / "CFG" / cfg_prompt / "metric" / "overall_metrics.json" | |
| exists = metrics_path.exists() and metrics_path.is_file() | |
| if exists: | |
| self.logger.info(f"Found overall metrics file for {model_name}") | |
| else: | |
| self.logger.error(f"Overall metrics file not found for {model_name}") | |
| return exists | |
| def check_benchmark(self, benchmark_name: str, model_name: str, cfg_prompt: str) -> Dict[str, bool]: | |
| """ | |
| Perform all benchmark checks and return status. | |
| """ | |
| status = { | |
| 'benchmark_exists': False, | |
| 'model_exists': False, | |
| 'cfg_files_exist': False, | |
| 'vectors_match': False, | |
| 'metrics_exist': False | |
| } | |
| # Check benchmark directory | |
| status['benchmark_exists'] = self.check_benchmark_exists(benchmark_name) | |
| if not status['benchmark_exists']: | |
| return status | |
| # Get video list | |
| video_list = self.get_video_list(benchmark_name) | |
| # Check model directory | |
| status['model_exists'] = self.check_model_exists(benchmark_name, model_name) | |
| if not status['model_exists']: | |
| return status | |
| # Check CFG files | |
| benchmark_cfg, model_cfg = self.check_cfg_files(benchmark_name, model_name, cfg_prompt) | |
| status['cfg_files_exist'] = benchmark_cfg and model_cfg | |
| if not status['cfg_files_exist']: | |
| return status | |
| # Check vectors | |
| status['vectors_match'] = self.check_vector_files(benchmark_name, model_name, video_list) | |
| # Check metrics file (only if vectors match) | |
| if status['vectors_match']: | |
| status['metrics_exist'] = self.check_metrics_file(benchmark_name, model_name, cfg_prompt) | |
| return status | |
| def get_benchmark_status(self, check_status: Dict[str, bool]) -> str: | |
| """Determine which execution path to take based on check results.""" | |
| basic_checks = ['benchmark_exists', 'model_exists', 'cfg_files_exist'] | |
| if not all(check_status[check] for check in basic_checks): | |
| return "cannot_execute" | |
| if check_status['vectors_match'] and check_status['metrics_exist']: | |
| return "all_passed" | |
| elif not check_status['vectors_match']: | |
| return "no_vectors" | |
| else: # vectors exist but no metrics | |
| return "no_metrics" | |
| # Example usage | |
| if __name__ == "__main__": | |
| bench_checker = BenchChecker("assets") | |
| status = bench_checker.check_benchmark( | |
| benchmark_name="huggingface_benchmarks_dataset", | |
| model_name="MSRVTT", | |
| cfg_prompt="topk" | |
| ) | |
| execution_path = bench_checker.get_benchmark_status(status) | |
| print(f"Checks completed. Execution path: {execution_path}") | |
| print(f"Status: {status}") |