| |
|
|
| |
| |
| """ |
| download_models.py |
| Ensures the MMS model files are downloaded into MODELS_DIR. |
| """ |
|
|
| import os |
| import urllib.request |
| import urllib.error |
| from tqdm.auto import tqdm |
| import sys |
|
|
|
|
| def download_file(url: str, download_file_path: str, redownload: bool = False) -> bool: |
| """Download a single file with urllib + tqdm progress bar.""" |
| base_path = os.path.dirname(download_file_path) |
| os.makedirs(base_path, exist_ok=True) |
|
|
| |
| if os.path.exists(download_file_path): |
| if redownload: |
| os.remove(download_file_path) |
| tqdm.write(f"β»οΈ Redownloading: {os.path.basename(download_file_path)}") |
| elif os.path.getsize(download_file_path) > 0: |
| tqdm.write(f"βοΈ Skipped (already exists): {os.path.basename(download_file_path)}") |
| return True |
|
|
| |
| try: |
| request = urllib.request.urlopen(url) |
| total = int(request.headers.get("Content-Length", 0)) |
| except urllib.error.URLError as e: |
| print(f"β Error: Unable to open URL: {url}") |
| print(f"Reason: {e.reason}") |
| return False |
|
|
| |
| with tqdm( |
| total=total, |
| desc=os.path.basename(download_file_path), |
| unit="B", |
| unit_scale=True, |
| unit_divisor=1024, |
| ) as progress: |
| try: |
| urllib.request.urlretrieve( |
| url, |
| download_file_path, |
| reporthook=lambda count, block_size, total_size: progress.update(block_size), |
| ) |
| except urllib.error.URLError as e: |
| print(f"β Error: Failed to download {url}") |
| print(f"Reason: {e.reason}") |
| return False |
|
|
| tqdm.write(f"β¬οΈ Downloaded: {os.path.basename(download_file_path)}") |
| return True |
|
|
|
|
| def main(): |
| |
| MODELS_DIR = os.environ.get("MODELS_DIR", "./models") |
| print(f"π Checking and downloading MMS models to: {MODELS_DIR}") |
|
|
| |
| if not os.access(os.path.dirname(MODELS_DIR) or ".", os.W_OK): |
| print(f"β No write permission to {MODELS_DIR}") |
| sys.exit(1) |
|
|
| |
| model_urls = { |
| "https://dl.fbaipublicfiles.com/mms/torchaudio/ctc_alignment_mling_uroman/dictionary.txt": |
| os.path.join(MODELS_DIR, "ctc_alignment_mling_uroman_model_dict.txt"), |
| "https://dl.fbaipublicfiles.com/mms/torchaudio/ctc_alignment_mling_uroman/model.pt": |
| os.path.join(MODELS_DIR, "ctc_alignment_mling_uroman_model.pt"), |
| } |
|
|
| for url, full_path in model_urls.items(): |
| success = download_file(url, full_path) |
| if not success: |
| print(f"β Failed to fetch: {os.path.basename(full_path)}") |
| sys.exit(1) |
|
|
| print("β
All model files are ready!") |
|
|
| main() |
| |
| |
|
|