import hashlib import os import logging logger = logging.getLogger("ModelVersioning") # Global cache for model hashes _HASH_CACHE = {} def get_file_hash(filepath: str, hash_type: str = "sha256") -> str: """ Calculate MD5 or SHA256 hash of a file with caching to avoid redundant disk I/O. """ if not filepath: return "unknown_no_path" # Standardize path abs_path = os.path.abspath(filepath) if not os.path.exists(abs_path): return "unknown_not_found" if abs_path in _HASH_CACHE: return _HASH_CACHE[abs_path] try: logger.info(f"Computing {hash_type} for {abs_path}...") hash_func = hashlib.sha256() if hash_type == "sha256" else hashlib.md5() with open(abs_path, "rb") as f: for chunk in iter(lambda: f.read(65536), b""): hash_func.update(chunk) file_hash = hash_func.hexdigest() _HASH_CACHE[abs_path] = file_hash logger.info(f"Cached hash for {abs_path}: {file_hash}") return file_hash except Exception as e: logger.error(f"Error computing hash for {abs_path}: {e}") return "error_computing_hash" def get_model_metadata(model_name: str, filepath: str, version: str = "1.0.0") -> dict: """ Return structured model metadata dictionary. """ return { "name": model_name, "version": version, "hash": get_file_hash(filepath) }