| import collections | |
| import json | |
| import os | |
| import time | |
| import requests | |
| from montreal_forced_aligner.models import MODEL_TYPES, ModelManager, ModelRelease | |
| mfa_model_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| UPDATE = False | |
| with open(os.path.join(mfa_model_root, "scripts", "token"), "r") as f: | |
| token = f.read() | |
| CURRENT_VERSION = "v3.3.0" | |
| tag_template = "{model_type}-{model_name}-v{version}" | |
| manager = ModelManager(token=token) | |
| manager.refresh_remote() | |
| model_type_names = { | |
| "acoustic": "Acoustic models", | |
| "dictionary": "Pronunciation dictionaries", | |
| "g2p": "G2P models", | |
| "language_model": "Language models", | |
| "ivector": "Ivector extractors", | |
| "corpus": "Corpora", | |
| "tokenizer": "Tokenizers", | |
| } | |
| print(manager.remote_models) | |
| base_dict_template = "https://github.com/MontrealCorpusTools/mfa-models/tree/main/dictionary/{language}/{phone_set}/{version}/{model_name}.dict" | |
| acoustic_mfas = set() | |
| for model_type, model_class in MODEL_TYPES.items(): | |
| model_directory = os.path.join(mfa_model_root, model_type) | |
| staging_directory = os.path.join(model_directory, "staging") | |
| languages = os.listdir(model_directory) | |
| for lang in languages: | |
| if lang == "staging": | |
| continue | |
| lang_dir = os.path.join(model_directory, lang) | |
| if not os.path.isdir(lang_dir): | |
| continue | |
| if model_type in {"ivector", "tokenizer"}: | |
| versions = os.listdir(lang_dir) | |
| for v in versions: | |
| version_dir = os.path.join(lang_dir, v) | |
| if v == "v2.0.0": | |
| continue | |
| if not os.path.isdir(version_dir): | |
| continue | |
| if not os.listdir(version_dir): | |
| continue | |
| with open(os.path.join(version_dir, "meta.json"), "r", encoding="utf8") as f: | |
| meta = json.load(f) | |
| model_name = meta["name"] | |
| version = meta["version"] | |
| with open(os.path.join(version_dir, "README.md"), "r", encoding="utf8") as f: | |
| readme = f.read() | |
| tag = tag_template.format( | |
| model_type=model_type, model_name=model_name, version=version | |
| ) | |
| if "mfa" in tag and model_type == "acoustic": | |
| acoustic_mfas.add(lang) | |
| elif "mfa" in tag and lang not in acoustic_mfas and model_type != "ivector": | |
| continue | |
| if ("mfa" in tag or "arpa" in tag) and model_type == "dictionary": | |
| dict_url = base_dict_template.format( | |
| language=lang, | |
| phone_set=phone_set, | |
| version=v, | |
| model_name=model_name, | |
| ) | |
| readme = readme.replace( | |
| "\n\n## Installation", | |
| f"\n- The dictionary downloadable from this release has trained pronunciation and silence probabilities. The base dictionary is available [here]({dict_url})\n\n##Installation", | |
| ) | |
| if "../../../../corpus/" in readme: | |
| readme = readme.replace( | |
| "../../../../corpus/", | |
| "https://github.com/MontrealCorpusTools/mfa-models/tree/main/corpus/", | |
| ) | |
| elif "../../../corpus/" in readme: | |
| readme = readme.replace( | |
| "../../../corpus/", | |
| "https://github.com/MontrealCorpusTools/mfa-models/tree/main/corpus/", | |
| ) | |
| existing_releases = manager.remote_models[model_type] | |
| if model_name in existing_releases: | |
| continue | |
| existing = existing_releases[model_name] | |
| if existing.version.replace("v", "") == version: | |
| if UPDATE: | |
| print("UPDATING", existing.release_link) | |
| r = requests.patch(existing.release_link, json={"body": readme}) | |
| time.sleep(5) | |
| continue | |
| release = ModelRelease(model_name, tag, version, "", "") | |
| if model_type == "dictionary": | |
| ext = ".dict" | |
| content_type = "text/tab-separated-values" | |
| else: | |
| ext = ".zip" | |
| content_type = "application/zip" | |
| model_path = os.path.join(staging_directory, model_name + ext) | |
| print(tag, len(readme)) | |
| print(tag) | |
| r = requests.post( | |
| manager.base_url, | |
| json={ | |
| "tag_name": tag, | |
| "name": f"{model_name} v{version}", | |
| "body": readme, | |
| "target_commitish": "main", | |
| "draft": False, | |
| "prerelease": False, | |
| "generate_release_notes": False, | |
| }, | |
| headers={ | |
| "Accept": "application/vnd.github.v3+json", | |
| "Authorization": f"token {token}", | |
| }, | |
| ) | |
| d = r.json() | |
| time.sleep(5) | |
| print(d) | |
| if "errors" in d: | |
| continue | |
| with open(model_path, "rb") as f: | |
| data = f.read() | |
| r2 = requests.post( | |
| d["upload_url"].replace("{?name,label}", ""), | |
| data=data, | |
| params={"name": os.path.basename(model_path)}, | |
| headers={ | |
| "Content-Type": "application/zip", | |
| "Accept": "application/vnd.github.v3+json", | |
| "Authorization": f"token {token}", | |
| }, | |
| ) | |
| print(r2.json()) | |
| print(meta) | |
| print(tag) | |
| time.sleep(5) | |
| else: | |
| for phone_set in os.listdir(lang_dir): | |
| phone_set_dir = os.path.join(lang_dir, phone_set) | |
| if not os.path.isdir(phone_set_dir): | |
| continue | |
| versions = os.listdir(phone_set_dir) | |
| for v in versions: | |
| version_dir = os.path.join(phone_set_dir, v) | |
| if v != CURRENT_VERSION: | |
| continue | |
| if not os.path.isdir(version_dir): | |
| continue | |
| if not os.listdir(version_dir): | |
| continue | |
| with open(os.path.join(version_dir, "meta.json"), "r", encoding="utf8") as f: | |
| meta = json.load(f) | |
| model_name = meta["name"] | |
| version = meta["version"] | |
| with open(os.path.join(version_dir, "README.md"), "r", encoding="utf8") as f: | |
| readme = f.read() | |
| tag = tag_template.format( | |
| model_type=model_type, model_name=model_name, version=version | |
| ) | |
| if "mfa" in tag and model_type == "acoustic": | |
| acoustic_mfas.add(lang) | |
| elif "mfa" in tag and lang not in acoustic_mfas and model_type != "ivector": | |
| continue | |
| if ("mfa" in tag or "arpa" in tag) and model_type == "dictionary": | |
| dict_url = base_dict_template.format( | |
| language=lang, | |
| phone_set=phone_set, | |
| version=v, | |
| model_name=model_name, | |
| ) | |
| readme = readme.replace( | |
| "\n\n## Installation", | |
| f"\n- The dictionary downloadable from this release has trained pronunciation and silence probabilities. The base dictionary is available [here]({dict_url})\n\n##Installation", | |
| ) | |
| if "../../../../corpus/" in readme: | |
| readme = readme.replace( | |
| "../../../../corpus/", | |
| "https://github.com/MontrealCorpusTools/mfa-models/tree/main/corpus/", | |
| ) | |
| elif "../../../corpus/" in readme: | |
| readme = readme.replace( | |
| "../../../corpus/", | |
| "https://github.com/MontrealCorpusTools/mfa-models/tree/main/corpus/", | |
| ) | |
| existing_releases = manager.remote_models[model_type] | |
| if model_name in existing_releases: | |
| existing = existing_releases[model_name] | |
| found_existing = False | |
| for existing_version, model in existing.items(): | |
| if existing_version.replace("v", "") == version: | |
| if UPDATE: | |
| print("UPDATING", existing.release_link) | |
| r = requests.patch( | |
| existing.release_link, json={"body": readme} | |
| ) | |
| time.sleep(5) | |
| found_existing = True | |
| if found_existing: | |
| continue | |
| release = ModelRelease(model_name, tag, version, "", "") | |
| if model_type == "dictionary": | |
| ext = ".dict" | |
| content_type = "text/tab-separated-values" | |
| else: | |
| ext = ".zip" | |
| content_type = "application/zip" | |
| model_path = os.path.join(staging_directory, model_name + ext) | |
| print(tag, len(readme)) | |
| print(tag) | |
| r = requests.post( | |
| manager.base_url, | |
| json={ | |
| "tag_name": tag, | |
| "name": f"{model_name} v{version}", | |
| "body": readme, | |
| "target_commitish": "main", | |
| "draft": False, | |
| "prerelease": False, | |
| "generate_release_notes": False, | |
| }, | |
| headers={ | |
| "Accept": "application/vnd.github.v3+json", | |
| "Authorization": f"token {token}", | |
| }, | |
| ) | |
| d = r.json() | |
| time.sleep(5) | |
| print(d) | |
| if "errors" in d: | |
| continue | |
| with open(model_path, "rb") as f: | |
| data = f.read() | |
| r2 = requests.post( | |
| d["upload_url"].replace("{?name,label}", ""), | |
| data=data, | |
| params={"name": os.path.basename(model_path)}, | |
| headers={ | |
| "Content-Type": "application/zip", | |
| "Accept": "application/vnd.github.v3+json", | |
| "Authorization": f"token {token}", | |
| }, | |
| ) | |
| print(r2.json()) | |
| print(meta) | |
| print(tag) | |
| time.sleep(5) | |