MFA / scripts /release_staged_models.py

MFA

2f6b10b verified 3 months ago

11.9 kB

	import collections
	import json
	import os
	import time

	import requests
	from montreal_forced_aligner.models import MODEL_TYPES, ModelManager, ModelRelease

	mfa_model_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

	UPDATE = False

	with open(os.path.join(mfa_model_root, "scripts", "token"), "r") as f:
	token = f.read()

	CURRENT_VERSION = "v3.3.0"

	tag_template = "{model_type}-{model_name}-v{version}"

	manager = ModelManager(token=token)
	manager.refresh_remote()
	model_type_names = {
	"acoustic": "Acoustic models",
	"dictionary": "Pronunciation dictionaries",
	"g2p": "G2P models",
	"language_model": "Language models",
	"ivector": "Ivector extractors",
	"corpus": "Corpora",
	"tokenizer": "Tokenizers",
	}

	print(manager.remote_models)

	base_dict_template = "https://github.com/MontrealCorpusTools/mfa-models/tree/main/dictionary/{language}/{phone_set}/{version}/{model_name}.dict"

	acoustic_mfas = set()

	for model_type, model_class in MODEL_TYPES.items():
	model_directory = os.path.join(mfa_model_root, model_type)
	staging_directory = os.path.join(model_directory, "staging")
	languages = os.listdir(model_directory)
	for lang in languages:
	if lang == "staging":
	continue
	lang_dir = os.path.join(model_directory, lang)
	if not os.path.isdir(lang_dir):
	continue
	if model_type in {"ivector", "tokenizer"}:
	versions = os.listdir(lang_dir)
	for v in versions:
	version_dir = os.path.join(lang_dir, v)
	if v == "v2.0.0":
	continue
	if not os.path.isdir(version_dir):
	continue
	if not os.listdir(version_dir):
	continue
	with open(os.path.join(version_dir, "meta.json"), "r", encoding="utf8") as f:
	meta = json.load(f)
	model_name = meta["name"]
	version = meta["version"]
	with open(os.path.join(version_dir, "README.md"), "r", encoding="utf8") as f:
	readme = f.read()
	tag = tag_template.format(
	model_type=model_type, model_name=model_name, version=version
	)
	if "mfa" in tag and model_type == "acoustic":
	acoustic_mfas.add(lang)
	elif "mfa" in tag and lang not in acoustic_mfas and model_type != "ivector":
	continue
	if ("mfa" in tag or "arpa" in tag) and model_type == "dictionary":
	dict_url = base_dict_template.format(
	language=lang,
	phone_set=phone_set,
	version=v,
	model_name=model_name,
	)
	readme = readme.replace(
	"\n\n## Installation",
	f"\n- The dictionary downloadable from this release has trained pronunciation and silence probabilities. The base dictionary is available [here]({dict_url})\n\n##Installation",
	)
	if "../../../../corpus/" in readme:
	readme = readme.replace(
	"../../../../corpus/",
	"https://github.com/MontrealCorpusTools/mfa-models/tree/main/corpus/",
	)
	elif "../../../corpus/" in readme:
	readme = readme.replace(
	"../../../corpus/",
	"https://github.com/MontrealCorpusTools/mfa-models/tree/main/corpus/",
	)
	existing_releases = manager.remote_models[model_type]
	if model_name in existing_releases:
	continue
	existing = existing_releases[model_name]
	if existing.version.replace("v", "") == version:
	if UPDATE:
	print("UPDATING", existing.release_link)
	r = requests.patch(existing.release_link, json={"body": readme})
	time.sleep(5)
	continue
	release = ModelRelease(model_name, tag, version, "", "")
	if model_type == "dictionary":
	ext = ".dict"
	content_type = "text/tab-separated-values"
	else:
	ext = ".zip"
	content_type = "application/zip"
	model_path = os.path.join(staging_directory, model_name + ext)
	print(tag, len(readme))
	print(tag)
	r = requests.post(
	manager.base_url,
	json={
	"tag_name": tag,
	"name": f"{model_name} v{version}",
	"body": readme,
	"target_commitish": "main",
	"draft": False,
	"prerelease": False,
	"generate_release_notes": False,
	},
	headers={
	"Accept": "application/vnd.github.v3+json",
	"Authorization": f"token {token}",
	},
	)
	d = r.json()
	time.sleep(5)
	print(d)
	if "errors" in d:
	continue
	with open(model_path, "rb") as f:
	data = f.read()
	r2 = requests.post(
	d["upload_url"].replace("{?name,label}", ""),
	data=data,
	params={"name": os.path.basename(model_path)},
	headers={
	"Content-Type": "application/zip",
	"Accept": "application/vnd.github.v3+json",
	"Authorization": f"token {token}",
	},
	)
	print(r2.json())
	print(meta)
	print(tag)
	time.sleep(5)

	else:
	for phone_set in os.listdir(lang_dir):
	phone_set_dir = os.path.join(lang_dir, phone_set)
	if not os.path.isdir(phone_set_dir):
	continue
	versions = os.listdir(phone_set_dir)
	for v in versions:
	version_dir = os.path.join(phone_set_dir, v)
	if v != CURRENT_VERSION:
	continue
	if not os.path.isdir(version_dir):
	continue
	if not os.listdir(version_dir):
	continue
	with open(os.path.join(version_dir, "meta.json"), "r", encoding="utf8") as f:
	meta = json.load(f)
	model_name = meta["name"]
	version = meta["version"]
	with open(os.path.join(version_dir, "README.md"), "r", encoding="utf8") as f:
	readme = f.read()
	tag = tag_template.format(
	model_type=model_type, model_name=model_name, version=version
	)
	if "mfa" in tag and model_type == "acoustic":
	acoustic_mfas.add(lang)
	elif "mfa" in tag and lang not in acoustic_mfas and model_type != "ivector":
	continue
	if ("mfa" in tag or "arpa" in tag) and model_type == "dictionary":
	dict_url = base_dict_template.format(
	language=lang,
	phone_set=phone_set,
	version=v,
	model_name=model_name,
	)
	readme = readme.replace(
	"\n\n## Installation",
	f"\n- The dictionary downloadable from this release has trained pronunciation and silence probabilities. The base dictionary is available [here]({dict_url})\n\n##Installation",
	)
	if "../../../../corpus/" in readme:
	readme = readme.replace(
	"../../../../corpus/",
	"https://github.com/MontrealCorpusTools/mfa-models/tree/main/corpus/",
	)
	elif "../../../corpus/" in readme:
	readme = readme.replace(
	"../../../corpus/",
	"https://github.com/MontrealCorpusTools/mfa-models/tree/main/corpus/",
	)
	existing_releases = manager.remote_models[model_type]
	if model_name in existing_releases:
	existing = existing_releases[model_name]
	found_existing = False
	for existing_version, model in existing.items():
	if existing_version.replace("v", "") == version:
	if UPDATE:
	print("UPDATING", existing.release_link)
	r = requests.patch(
	existing.release_link, json={"body": readme}
	)
	time.sleep(5)
	found_existing = True
	if found_existing:
	continue
	release = ModelRelease(model_name, tag, version, "", "")
	if model_type == "dictionary":
	ext = ".dict"
	content_type = "text/tab-separated-values"
	else:
	ext = ".zip"
	content_type = "application/zip"
	model_path = os.path.join(staging_directory, model_name + ext)
	print(tag, len(readme))
	print(tag)
	r = requests.post(
	manager.base_url,
	json={
	"tag_name": tag,
	"name": f"{model_name} v{version}",
	"body": readme,
	"target_commitish": "main",
	"draft": False,
	"prerelease": False,
	"generate_release_notes": False,
	},
	headers={
	"Accept": "application/vnd.github.v3+json",
	"Authorization": f"token {token}",
	},
	)
	d = r.json()
	time.sleep(5)
	print(d)
	if "errors" in d:
	continue
	with open(model_path, "rb") as f:
	data = f.read()
	r2 = requests.post(
	d["upload_url"].replace("{?name,label}", ""),
	data=data,
	params={"name": os.path.basename(model_path)},
	headers={
	"Content-Type": "application/zip",
	"Accept": "application/vnd.github.v3+json",
	"Authorization": f"token {token}",
	},
	)
	print(r2.json())
	print(meta)
	print(tag)
	time.sleep(5)