MFA

File size: 12,090 Bytes

2f6b10b

# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html

# -- Path setup --------------------------------------------------------------

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
from datetime import date
import os
import sys
from montreal_forced_aligner.utils import get_mfa_version  # noqa

# -- Project information -----------------------------------------------------

project = 'mfa model'
copyright = f"2018-{date.today().year}, Montreal Corpus Tools"
author = 'Montreal Corpus Tools'

version = ".".join(get_mfa_version().split(".", maxsplit=2)[:2])
# The full version, including alpha/beta/rc tags.
release = get_mfa_version()


# -- General configuration ---------------------------------------------------

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.

sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

extensions = [
    "sphinx_needs",
    "sphinx_design",
    "sphinx.ext.viewcode",
    "sphinx.ext.extlinks",
    'myst_parser',
    'sphinx.ext.autosectionlabel',
    'ipa_charts'
]
myst_enable_extensions = ["colon_fence"]
myst_heading_anchors = 3
panels_add_bootstrap_css = False
autosectionlabel_prefix_document = True
needs_include_needs = True

needs_types = [dict(directive="acoustic", title="Acoustic model", prefix="AM_", color="#BFD8D2", style="node"),
               dict(directive="corpus", title="Corpus", prefix="", color="#FEDCD2", style="node"),
               dict(directive="g2p", title="G2P model", prefix="G2P_", color="#FEDCD2", style="node"),
               dict(directive="language_model", title="Language model", prefix="LM_", color="#DF744A", style="node"),
               dict(directive="ivector", title="Ivector Extractor", prefix="IE_", color="#DCB239", style="node"),
               dict(directive="tokenizer", title="Tokenizer", prefix="T_", color="#DCB239", style="node"),
               dict(directive="dictionary", title="Dictionary", prefix="D_", color="#DCB239", style="node"),
           ]

needs_template_folder = '_templates/needs_templates'

needs_layouts = {
    'not_mfa': {
        'grid': 'content',
    },
    'mfa': {
        'grid': 'content_side_right',
        'layout': {
            'side': [
                '<<image("https://montreal-forced-aligner.readthedocs.io/en/latest/_static/logo_stacked_light.svg")>>'
            ]
        }
    }
}

needs_show_link_title =True
needs_show_link_type =True
needs_role_need_template = "{title}"
needs_extra_options = ['name', 'language', 'dialect', 'architecture', 'phoneset', 'license']
needs_table_style = "datatables"
needs_table_columns = "ID;name;language;dialect;phoneset;tags"
needs_tags = [
    dict(name="MFA", description="Maintained by Montreal Forced Aligner"),
    dict(name="PROSODYLAB", description="Resources developed by Prosodylab"),
    dict(name="PINYIN", description="Pinyin phone set"),
    dict(name="CV", description="Maintained by VoxCommunis"),
    dict(name="IPA", description="Based on the International Phonetic Alphabet"),
    dict(name="Common Voice", description="Corpora in Mozilla's Common Voice collection"),
    dict(name="Google", description="Corpora collected and distributed by Google"),
    dict(name="Microsoft", description="Corpora collected and distributed by Microsoft"),
    dict(name="GlobalPhone", description="Corpora in GlobalPhone collection"),
    dict(name="MagicData", description="Corpora in MagicData collection"),
    dict(name="ICE", description="Corpora in ICE collection"),
    dict(name="Non-native", description="Corpora with non-native speakers"),
    dict(name="VoxPopuli", description="Corpora in Vox Populi collection"),
    dict(name="MediaSpeech", description="Corpora in MediaSpeech collection"),
    dict(name="Multilingual Librispeech", description="Corpora in Multilingual Librispeech collection"),
    dict(name="M-AILABS", description="Corpora in M-AILABS's collections"),
    dict(name="Multilingual TEDx", description="Corpora in the Multilingual TEDx collection"),
]

current_languages = ["Abkhaz", "Armenian", "Arabic", "Bashkir", "Basque", "Belarusian", "Bulgarian",
                     "Chuvash", "Croatian", "Czech", "Dutch", "English", "French", "Georgian", "German","Greek",
                     "Guarani", "Hausa", "Hindi", "Hungarian", "Indonesian", "Italian", "Japanese", "Kazakh", "Korean", "Kurmanji", "Kyrgyz",
                     "Maltese", "Mandarin", "Polish", "Portuguese", "Punjabi", "Romanian", "Russian", "Sorbian", "Spanish", "Swahili", "Swedish",
                     "Tamil", "Tatar", "Thai", "Turkish", "Ukrainian", "Urdu",  "Uyghur", "Uzbek", "Vietnamese"]
for lang in current_languages:
    needs_tags.append({'name': lang,'description':f'{lang} language'})

needs_tags.append({'name': 'Multilingual','description': 'Multiple languages'})

needs_id_regex = '[A-Za-z0-9 .():_]+'
needs_id_required = True
needs_role_need_max_title_length = 0
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']


xref_links = {
    "wikipedia": ("Wikipedia", "https://en.wikipedia.org/wiki/Main_Page"),
    "phoible": ("Phoible", "https://phoible.org/"),
    "xpf": ("XPF", "https://cohenpr-xpf.github.io/XPF/"),
    "nagisa": ("Nagisa", "https://github.com/taishi-i/nagisa"),
    "konlpy": ("KoNLPy", "https://konlpy.org/en/latest/"),
    "spacy_pkuseg": ("spacy-pkuseg", "https://github.com/explosion/spacy-pkuseg/"),
    "num2chinese": ("num2chinese.py", "https://gist.github.com/gumblex/0d65cad2ba607fd14de7"),
    "hanziconv": ("hanziconv", "https://github.com/berniey/hanziconv"),
    "num2words": ("num2words", "https://github.com/savoirfairelinux/num2words"),
    "thai_word_segmentation": ("thai-word-segmentation", "https://github.com/sertiscorp/thai-word-segmentation"),
    "mecab_ko": ("Mecab-KO", "https://bitbucket.org/eunjeon/mecab-ko/src/master/"),
    "whisperx": (
        "WhisperX",
        "https://github.com/m-bain/whisperX",
    ),
    "nemo": (
        "NeMo Forced Aligner",
        "https://docs.nvidia.com/nemo-framework/user-guide/latest/nemotoolkit/tools/nemo_forced_aligner.html",
    ),
    "wav2vec2": (
        "Wav2Vec2",
        "https://pytorch.org/audio/stable/tutorials/ctc_forced_alignment_api_tutorial.html",
    ),
}

needs_extra_links = [
    {
        "option": "built_with",
        "incoming": "Built with",
        "outgoing": "Built with",
        "allow_dead_links ": True,
}
]

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = []


# -- Options for HTML output -------------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
#
html_theme = 'pydata_sphinx_theme'

html_logo = "https://montreal-forced-aligner.readthedocs.io/en/latest/_static/logo.svg"
html_favicon = "_static/favicon.ico"


html_theme_options = {
    "external_links": [
        {
            "url": "https://montreal-forced-aligner.readthedocs.io/",
            "name": "MFA docs",
        },
    ],
    "icon_links": [
        {
            "name": "GitHub",
            "url": "https://github.com/MontrealCorpusTools/mfa-models",
            "icon": "fab fa-github",
        },
    ],
    "logo": {
        "text": "Montreal Forced Aligner",
        "image_dark": "https://montreal-forced-aligner.readthedocs.io/en/latest/_static/logo_dark.svg",
    },
    "analytics":{

        "google_analytics_id": "G-31RXW9TT1Z",
    },
    "show_nav_level": 1,
    "navigation_depth": 4,
    "show_toc_level": 2,
    "collapse_navigation": False,
}
html_context = {
    "github_user": "MontrealCorpusTools",
    "github_repo": "mfa-models",
    "github_version": "main",
    "doc_path": "docs/source",
}


# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_css_files = [
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/fontawesome.min.css",
    "https://montreal-forced-aligner.readthedocs.io/en/latest/_static/css/mfa.css",
    "css/style.css",
    "css/datatables.css",
]
html_js_files = [
    'main.js',
]
html_sidebars = {"**": ["search-field.html", "sidebar-nav-bs.html", "sidebar-ethical-ads.html"]}
rst_prolog = """
.. role:: manner
   :class: manner

.. role:: submanner
   :class: submanner

.. role:: lexical_set
   :class: lexical-set

.. role:: ipa_inline
   :class: ipa-inline ipa-highlight
"""
from sphinx_needs.api.configuration import add_dynamic_function


license_links = {
    'CC-0': 'https://creativecommons.org/publicdomain/zero/1.0/',
    'CC BY 4.0': 'https://creativecommons.org/licenses/by/4.0/',
    'CC BY-NC-SA 4.0': 'https://creativecommons.org/licenses/by-nc-sa/4.0/',
    'CC BY-SA 4.0': 'https://creativecommons.org/licenses/by-sa/4.0/',
    'CC BY-NC-ND 4.0': 'https://creativecommons.org/licenses/by-nc-nd/4.0/',
    'CC BY-NC 2.0': 'https://creativecommons.org/licenses/by-nc/2.0/',
    'Microsoft Research Data License': 'https://msropendata-web-api.azurewebsites.net/licenses/2f933be3-284d-500b-7ea3-2aa2fd0f1bb2/view',
    'Apache 2.0': 'https://www.apache.org/licenses/LICENSE-2.0',
    'MIT': 'https://opensource.org/licenses/MIT',
    'Public domain in the USA': 'https://creativecommons.org/share-your-work/public-domain/cc0/',
    'M-AILABS License': 'https://www.caito.de/2019/01/the-m-ailabs-speech-dataset/',
    'ELRA': 'https://www.elra.info/en/services-around-lrs/distribution/licensing/',

}

for lic in license_links.keys():
    desc = lic
    if not lic.endswith(' License') and 'Public' not in lic:
        desc += ' License'
    needs_tags.append({'name': lic,'description': desc})

phone_set_links = {
'Epitran': 'https://github.com/dmort27/epitran',
'XPF': 'https://github.com/CohenPr-XPF/XPF',
'ARPA': 'https://en.wikipedia.org/wiki/ARPABET',
'MFA': 'https://mfa-models.readthedocs.io/en/refactor/mfa_phone_set.html',
}

for ps in phone_set_links.keys():
    needs_tags.append({'name': ps,'description':f'{ps} phone set'})


def name_link(app, need, needs, *args, **kwargs):
    target_node = need['target_node']
    print(target_node)
    return str(target_node)

def language_link(app, need, needs, *args, **kwargs):
    target_node = need['target_node']
    return str(target_node)

def license_link(app, need, needs, license):
    return f"[{license}]({license_links[license]})"

def phone_set_link(app, need, needs, phone_set):
    print(need)
    print(need['language'])
    if phone_set not in phone_set_links:
        return phone_set
    return f"[{phone_set}]({phone_set_links[phone_set]})"

needs_string_links = {
    # Adds link to the Sphinx-Needs configuration page
    'external_link': {
        'regex': r'^\[(?P<title>.+)\]\((?P<link>.+)\)$',
        'link_url': '{{link}}',
        'link_name': '{{title}}',
        'options': ['phoneset', 'license']
    },
    # Links to the related github issue
    'github_link': {
        'regex': r'^(?P<value>\w+)$',
        'link_url': 'https://github.com/useblocks/sphinxcontrib-needs/issues/{{value}}',
        'link_name': 'GitHub #{{value}}',
        'options': ['github']
    }
}
def setup(app):
    add_dynamic_function(app, name_link)
    add_dynamic_function(app, language_link)
    add_dynamic_function(app, license_link)
    add_dynamic_function(app, phone_set_link)