MFA / docs /source /conf.py
niobures's picture
MFA
2f6b10b verified
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
from datetime import date
import os
import sys
from montreal_forced_aligner.utils import get_mfa_version # noqa
# -- Project information -----------------------------------------------------
project = 'mfa model'
copyright = f"2018-{date.today().year}, Montreal Corpus Tools"
author = 'Montreal Corpus Tools'
version = ".".join(get_mfa_version().split(".", maxsplit=2)[:2])
# The full version, including alpha/beta/rc tags.
release = get_mfa_version()
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
extensions = [
"sphinx_needs",
"sphinx_design",
"sphinx.ext.viewcode",
"sphinx.ext.extlinks",
'myst_parser',
'sphinx.ext.autosectionlabel',
'ipa_charts'
]
myst_enable_extensions = ["colon_fence"]
myst_heading_anchors = 3
panels_add_bootstrap_css = False
autosectionlabel_prefix_document = True
needs_include_needs = True
needs_types = [dict(directive="acoustic", title="Acoustic model", prefix="AM_", color="#BFD8D2", style="node"),
dict(directive="corpus", title="Corpus", prefix="", color="#FEDCD2", style="node"),
dict(directive="g2p", title="G2P model", prefix="G2P_", color="#FEDCD2", style="node"),
dict(directive="language_model", title="Language model", prefix="LM_", color="#DF744A", style="node"),
dict(directive="ivector", title="Ivector Extractor", prefix="IE_", color="#DCB239", style="node"),
dict(directive="tokenizer", title="Tokenizer", prefix="T_", color="#DCB239", style="node"),
dict(directive="dictionary", title="Dictionary", prefix="D_", color="#DCB239", style="node"),
]
needs_template_folder = '_templates/needs_templates'
needs_layouts = {
'not_mfa': {
'grid': 'content',
},
'mfa': {
'grid': 'content_side_right',
'layout': {
'side': [
'<<image("https://montreal-forced-aligner.readthedocs.io/en/latest/_static/logo_stacked_light.svg")>>'
]
}
}
}
needs_show_link_title =True
needs_show_link_type =True
needs_role_need_template = "{title}"
needs_extra_options = ['name', 'language', 'dialect', 'architecture', 'phoneset', 'license']
needs_table_style = "datatables"
needs_table_columns = "ID;name;language;dialect;phoneset;tags"
needs_tags = [
dict(name="MFA", description="Maintained by Montreal Forced Aligner"),
dict(name="PROSODYLAB", description="Resources developed by Prosodylab"),
dict(name="PINYIN", description="Pinyin phone set"),
dict(name="CV", description="Maintained by VoxCommunis"),
dict(name="IPA", description="Based on the International Phonetic Alphabet"),
dict(name="Common Voice", description="Corpora in Mozilla's Common Voice collection"),
dict(name="Google", description="Corpora collected and distributed by Google"),
dict(name="Microsoft", description="Corpora collected and distributed by Microsoft"),
dict(name="GlobalPhone", description="Corpora in GlobalPhone collection"),
dict(name="MagicData", description="Corpora in MagicData collection"),
dict(name="ICE", description="Corpora in ICE collection"),
dict(name="Non-native", description="Corpora with non-native speakers"),
dict(name="VoxPopuli", description="Corpora in Vox Populi collection"),
dict(name="MediaSpeech", description="Corpora in MediaSpeech collection"),
dict(name="Multilingual Librispeech", description="Corpora in Multilingual Librispeech collection"),
dict(name="M-AILABS", description="Corpora in M-AILABS's collections"),
dict(name="Multilingual TEDx", description="Corpora in the Multilingual TEDx collection"),
]
current_languages = ["Abkhaz", "Armenian", "Arabic", "Bashkir", "Basque", "Belarusian", "Bulgarian",
"Chuvash", "Croatian", "Czech", "Dutch", "English", "French", "Georgian", "German","Greek",
"Guarani", "Hausa", "Hindi", "Hungarian", "Indonesian", "Italian", "Japanese", "Kazakh", "Korean", "Kurmanji", "Kyrgyz",
"Maltese", "Mandarin", "Polish", "Portuguese", "Punjabi", "Romanian", "Russian", "Sorbian", "Spanish", "Swahili", "Swedish",
"Tamil", "Tatar", "Thai", "Turkish", "Ukrainian", "Urdu", "Uyghur", "Uzbek", "Vietnamese"]
for lang in current_languages:
needs_tags.append({'name': lang,'description':f'{lang} language'})
needs_tags.append({'name': 'Multilingual','description': 'Multiple languages'})
needs_id_regex = '[A-Za-z0-9 .():_]+'
needs_id_required = True
needs_role_need_max_title_length = 0
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
xref_links = {
"wikipedia": ("Wikipedia", "https://en.wikipedia.org/wiki/Main_Page"),
"phoible": ("Phoible", "https://phoible.org/"),
"xpf": ("XPF", "https://cohenpr-xpf.github.io/XPF/"),
"nagisa": ("Nagisa", "https://github.com/taishi-i/nagisa"),
"konlpy": ("KoNLPy", "https://konlpy.org/en/latest/"),
"spacy_pkuseg": ("spacy-pkuseg", "https://github.com/explosion/spacy-pkuseg/"),
"num2chinese": ("num2chinese.py", "https://gist.github.com/gumblex/0d65cad2ba607fd14de7"),
"hanziconv": ("hanziconv", "https://github.com/berniey/hanziconv"),
"num2words": ("num2words", "https://github.com/savoirfairelinux/num2words"),
"thai_word_segmentation": ("thai-word-segmentation", "https://github.com/sertiscorp/thai-word-segmentation"),
"mecab_ko": ("Mecab-KO", "https://bitbucket.org/eunjeon/mecab-ko/src/master/"),
"whisperx": (
"WhisperX",
"https://github.com/m-bain/whisperX",
),
"nemo": (
"NeMo Forced Aligner",
"https://docs.nvidia.com/nemo-framework/user-guide/latest/nemotoolkit/tools/nemo_forced_aligner.html",
),
"wav2vec2": (
"Wav2Vec2",
"https://pytorch.org/audio/stable/tutorials/ctc_forced_alignment_api_tutorial.html",
),
}
needs_extra_links = [
{
"option": "built_with",
"incoming": "Built with",
"outgoing": "Built with",
"allow_dead_links ": True,
}
]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = []
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'pydata_sphinx_theme'
html_logo = "https://montreal-forced-aligner.readthedocs.io/en/latest/_static/logo.svg"
html_favicon = "_static/favicon.ico"
html_theme_options = {
"external_links": [
{
"url": "https://montreal-forced-aligner.readthedocs.io/",
"name": "MFA docs",
},
],
"icon_links": [
{
"name": "GitHub",
"url": "https://github.com/MontrealCorpusTools/mfa-models",
"icon": "fab fa-github",
},
],
"logo": {
"text": "Montreal Forced Aligner",
"image_dark": "https://montreal-forced-aligner.readthedocs.io/en/latest/_static/logo_dark.svg",
},
"analytics":{
"google_analytics_id": "G-31RXW9TT1Z",
},
"show_nav_level": 1,
"navigation_depth": 4,
"show_toc_level": 2,
"collapse_navigation": False,
}
html_context = {
"github_user": "MontrealCorpusTools",
"github_repo": "mfa-models",
"github_version": "main",
"doc_path": "docs/source",
}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_css_files = [
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/fontawesome.min.css",
"https://montreal-forced-aligner.readthedocs.io/en/latest/_static/css/mfa.css",
"css/style.css",
"css/datatables.css",
]
html_js_files = [
'main.js',
]
html_sidebars = {"**": ["search-field.html", "sidebar-nav-bs.html", "sidebar-ethical-ads.html"]}
rst_prolog = """
.. role:: manner
:class: manner
.. role:: submanner
:class: submanner
.. role:: lexical_set
:class: lexical-set
.. role:: ipa_inline
:class: ipa-inline ipa-highlight
"""
from sphinx_needs.api.configuration import add_dynamic_function
license_links = {
'CC-0': 'https://creativecommons.org/publicdomain/zero/1.0/',
'CC BY 4.0': 'https://creativecommons.org/licenses/by/4.0/',
'CC BY-NC-SA 4.0': 'https://creativecommons.org/licenses/by-nc-sa/4.0/',
'CC BY-SA 4.0': 'https://creativecommons.org/licenses/by-sa/4.0/',
'CC BY-NC-ND 4.0': 'https://creativecommons.org/licenses/by-nc-nd/4.0/',
'CC BY-NC 2.0': 'https://creativecommons.org/licenses/by-nc/2.0/',
'Microsoft Research Data License': 'https://msropendata-web-api.azurewebsites.net/licenses/2f933be3-284d-500b-7ea3-2aa2fd0f1bb2/view',
'Apache 2.0': 'https://www.apache.org/licenses/LICENSE-2.0',
'MIT': 'https://opensource.org/licenses/MIT',
'Public domain in the USA': 'https://creativecommons.org/share-your-work/public-domain/cc0/',
'M-AILABS License': 'https://www.caito.de/2019/01/the-m-ailabs-speech-dataset/',
'ELRA': 'https://www.elra.info/en/services-around-lrs/distribution/licensing/',
}
for lic in license_links.keys():
desc = lic
if not lic.endswith(' License') and 'Public' not in lic:
desc += ' License'
needs_tags.append({'name': lic,'description': desc})
phone_set_links = {
'Epitran': 'https://github.com/dmort27/epitran',
'XPF': 'https://github.com/CohenPr-XPF/XPF',
'ARPA': 'https://en.wikipedia.org/wiki/ARPABET',
'MFA': 'https://mfa-models.readthedocs.io/en/refactor/mfa_phone_set.html',
}
for ps in phone_set_links.keys():
needs_tags.append({'name': ps,'description':f'{ps} phone set'})
def name_link(app, need, needs, *args, **kwargs):
target_node = need['target_node']
print(target_node)
return str(target_node)
def language_link(app, need, needs, *args, **kwargs):
target_node = need['target_node']
return str(target_node)
def license_link(app, need, needs, license):
return f"[{license}]({license_links[license]})"
def phone_set_link(app, need, needs, phone_set):
print(need)
print(need['language'])
if phone_set not in phone_set_links:
return phone_set
return f"[{phone_set}]({phone_set_links[phone_set]})"
needs_string_links = {
# Adds link to the Sphinx-Needs configuration page
'external_link': {
'regex': r'^\[(?P<title>.+)\]\((?P<link>.+)\)$',
'link_url': '{{link}}',
'link_name': '{{title}}',
'options': ['phoneset', 'license']
},
# Links to the related github issue
'github_link': {
'regex': r'^(?P<value>\w+)$',
'link_url': 'https://github.com/useblocks/sphinxcontrib-needs/issues/{{value}}',
'link_name': 'GitHub #{{value}}',
'options': ['github']
}
}
def setup(app):
add_dynamic_function(app, name_link)
add_dynamic_function(app, language_link)
add_dynamic_function(app, license_link)
add_dynamic_function(app, phone_set_link)