Spaces:

imansarraf
/

Azari_ASR

Sleeping

App Files Files Community

imansarraf commited on Jul 30, 2025

Commit

2c77e51

verified ·

1 Parent(s): c0b9248

Upload 4 files

Browse files

Files changed (4) hide show

autosub/__init__-0.4.0.py +405 -0
autosub/__init__.py +438 -0
autosub/constants.py +118 -0
autosub/formatters.py +66 -0

autosub/__init__-0.4.0.py ADDED Viewed

	@@ -0,0 +1,405 @@

+"""
+Defines autosub's main functionality.
+"""
+#!/usr/bin/env python
+from __future__ import absolute_import, print_function, unicode_literals
+import argparse
+import audioop
+import json
+import math
+import multiprocessing
+import os
+import subprocess
+import sys
+import tempfile
+import wave
+import requests
+from googleapiclient.discovery import build
+from progressbar import ProgressBar, Percentage, Bar, ETA
+from autosub.constants import (
+    LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,
+)
+from autosub.formatters import FORMATTERS
+DEFAULT_SUBTITLE_FORMAT = 'srt'
+DEFAULT_CONCURRENCY = 10
+DEFAULT_SRC_LANGUAGE = 'en'
+DEFAULT_DST_LANGUAGE = 'en'
+def percentile(arr, percent):
+    """
+    Calculate the given percentile of arr.
+    """
+    arr = sorted(arr)
+    index = (len(arr) - 1) * percent
+    floor = math.floor(index)
+    ceil = math.ceil(index)
+    if floor == ceil:
+        return arr[int(index)]
+    low_value = arr[int(floor)] * (ceil - index)
+    high_value = arr[int(ceil)] * (index - floor)
+    return low_value + high_value
+class FLACConverter(object): # pylint: disable=too-few-public-methods
+    """
+    Class for converting a region of an input audio or video file into a FLAC audio file
+    """
+    def __init__(self, source_path, include_before=0.25, include_after=0.25):
+        self.source_path = source_path
+        self.include_before = include_before
+        self.include_after = include_after
+    def __call__(self, region):
+        try:
+            start, end = region
+            start = max(0, start - self.include_before)
+            end += self.include_after
+            temp = tempfile.NamedTemporaryFile(suffix='.flac')
+            command = ["ffmpeg", "-ss", str(start), "-t", str(end - start),
+                       "-y", "-i", self.source_path,
+                       "-loglevel", "error", temp.name]
+            use_shell = True if os.name == "nt" else False
+            subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
+            return temp.read()
+        except KeyboardInterrupt:
+            return None
+class SpeechRecognizer(object): # pylint: disable=too-few-public-methods
+    """
+    Class for performing speech-to-text for an input FLAC file.
+    """
+    def __init__(self, language="en", rate=44100, retries=3, api_key=GOOGLE_SPEECH_API_KEY):
+        self.language = language
+        self.rate = rate
+        self.api_key = api_key
+        self.retries = retries
+    def __call__(self, data):
+        try:
+            for _ in range(self.retries):
+                url = GOOGLE_SPEECH_API_URL.format(lang=self.language, key=self.api_key)
+                headers = {"Content-Type": "audio/x-flac; rate=%d" % self.rate}
+                try:
+                    resp = requests.post(url, data=data, headers=headers)
+                except requests.exceptions.ConnectionError:
+                    continue
+                for line in resp.content.decode('utf-8').split("\n"):
+                    try:
+                        line = json.loads(line)
+                        line = line['result'][0]['alternative'][0]['transcript']
+                        return line[:1].upper() + line[1:]
+                    except IndexError:
+                        # no result
+                        continue
+        except KeyboardInterrupt:
+            return None
+class Translator(object): # pylint: disable=too-few-public-methods
+    """
+    Class for translating a sentence from a one language to another.
+    """
+    def __init__(self, language, api_key, src, dst):
+        self.language = language
+        self.api_key = api_key
+        self.service = build('translate', 'v2',
+                             developerKey=self.api_key)
+        self.src = src
+        self.dst = dst
+    def __call__(self, sentence):
+        try:
+            if not sentence:
+                return None
+            result = self.service.translations().list( # pylint: disable=no-member
+                source=self.src,
+                target=self.dst,
+                q=[sentence]
+            ).execute()
+            if 'translations' in result and result['translations'] and \
+                'translatedText' in result['translations'][0]:
+                return result['translations'][0]['translatedText']
+            return None
+        except KeyboardInterrupt:
+            return None
+def which(program):
+    """
+    Return the path for a given executable.
+    """
+    def is_exe(file_path):
+        """
+        Checks whether a file is executable.
+        """
+        return os.path.isfile(file_path) and os.access(file_path, os.X_OK)
+    fpath, _ = os.path.split(program)
+    if fpath:
+        if is_exe(program):
+            return program
+    else:
+        for path in os.environ["PATH"].split(os.pathsep):
+            path = path.strip('"')
+            exe_file = os.path.join(path, program)
+            if is_exe(exe_file):
+                return exe_file
+    return None
+def extract_audio(filename, channels=1, rate=16000):
+    """
+    Extract audio from an input file to a temporary WAV file.
+    """
+    temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
+    if not os.path.isfile(filename):
+        print("The given file does not exist: {}".format(filename))
+        raise Exception("Invalid filepath: {}".format(filename))
+    if not which("ffmpeg"):
+        print("ffmpeg: Executable not found on machine.")
+        raise Exception("Dependency not found: ffmpeg")
+    command = ["ffmpeg", "-y", "-i", filename,
+               "-ac", str(channels), "-ar", str(rate),
+               "-loglevel", "error", temp.name]
+    use_shell = True if os.name == "nt" else False
+    subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
+    return temp.name, rate
+def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6): # pylint: disable=too-many-locals
+    """
+    Perform voice activity detection on a given audio file.
+    """
+    reader = wave.open(filename)
+    sample_width = reader.getsampwidth()
+    rate = reader.getframerate()
+    n_channels = reader.getnchannels()
+    chunk_duration = float(frame_width) / rate
+    n_chunks = int(math.ceil(reader.getnframes()*1.0 / frame_width))
+    energies = []
+    for _ in range(n_chunks):
+        chunk = reader.readframes(frame_width)
+        energies.append(audioop.rms(chunk, sample_width * n_channels))
+    threshold = percentile(energies, 0.2)
+    elapsed_time = 0
+    regions = []
+    region_start = None
+    for energy in energies:
+        is_silence = energy <= threshold
+        max_exceeded = region_start and elapsed_time - region_start >= max_region_size
+        if (max_exceeded or is_silence) and region_start:
+            if elapsed_time - region_start >= min_region_size:
+                regions.append((region_start, elapsed_time))
+                region_start = None
+        elif (not region_start) and (not is_silence):
+            region_start = elapsed_time
+        elapsed_time += chunk_duration
+    return regions
+def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments
+        source_path,
+        output=None,
+        concurrency=DEFAULT_CONCURRENCY,
+        src_language=DEFAULT_SRC_LANGUAGE,
+        dst_language=DEFAULT_DST_LANGUAGE,
+        subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
+        api_key=None,
+    ):
+    """
+    Given an input audio/video file, generate subtitles in the specified language and format.
+    """
+    audio_filename, audio_rate = extract_audio(source_path)
+    regions = find_speech_regions(audio_filename)
+    pool = multiprocessing.Pool(concurrency)
+    converter = FLACConverter(source_path=audio_filename)
+    recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,
+                                  api_key=GOOGLE_SPEECH_API_KEY)
+    transcripts = []
+    if regions:
+        try:
+            widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ',
+                       ETA()]
+            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
+            extracted_regions = []
+            for i, extracted_region in enumerate(pool.imap(converter, regions)):
+                extracted_regions.append(extracted_region)
+                pbar.update(i)
+            pbar.finish()
+            widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()]
+            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
+            for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)):
+                transcripts.append(transcript)
+                pbar.update(i)
+            pbar.finish()
+            if src_language.split("-")[0] != dst_language.split("-")[0]:
+                if api_key:
+                    google_translate_api_key = api_key
+                    translator = Translator(dst_language, google_translate_api_key,
+                                            dst=dst_language,
+                                            src=src_language)
+                    prompt = "Translating from {0} to {1}: ".format(src_language, dst_language)
+                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
+                    pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
+                    translated_transcripts = []
+                    for i, transcript in enumerate(pool.imap(translator, transcripts)):
+                        translated_transcripts.append(transcript)
+                        pbar.update(i)
+                    pbar.finish()
+                    transcripts = translated_transcripts
+                else:
+                    print(
+                        "Error: Subtitle translation requires specified Google Translate API key. "
+                        "See --help for further information."
+                    )
+                    return 1
+        except KeyboardInterrupt:
+            pbar.finish()
+            pool.terminate()
+            pool.join()
+            print("Cancelling transcription")
+            raise
+    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
+    formatter = FORMATTERS.get(subtitle_file_format)
+    formatted_subtitles = formatter(timed_subtitles)
+    dest = output
+    if not dest:
+        base = os.path.splitext(source_path)[0]
+        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)
+    with open(dest, 'wb') as output_file:
+        output_file.write(formatted_subtitles.encode("utf-8"))
+    os.remove(audio_filename)
+    return dest
+def validate(args):
+    """
+    Check that the CLI arguments passed to autosub are valid.
+    """
+    if args.format not in FORMATTERS:
+        print(
+            "Subtitle format not supported. "
+            "Run with --list-formats to see all supported formats."
+        )
+        return False
+    if args.src_language not in LANGUAGE_CODES.keys():
+        print(
+            "Source language not supported. "
+            "Run with --list-languages to see all supported languages."
+        )
+        return False
+    if args.dst_language not in LANGUAGE_CODES.keys():
+        print(
+            "Destination language not supported. "
+            "Run with --list-languages to see all supported languages."
+        )
+        return False
+    if not args.source_path:
+        print("Error: You need to specify a source path.")
+        return False
+    return True
+def main():
+    """
+    Run autosub as a command-line program.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument('source_path', help="Path to the video or audio file to subtitle",
+                        nargs='?')
+    parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make",
+                        type=int, default=DEFAULT_CONCURRENCY)
+    parser.add_argument('-o', '--output',
+                        help="Output path for subtitles (by default, subtitles are saved in \
+                        the same directory and name as the source path)")
+    parser.add_argument('-F', '--format', help="Destination subtitle format",
+                        default=DEFAULT_SUBTITLE_FORMAT)
+    parser.add_argument('-S', '--src-language', help="Language spoken in source file",
+                        default=DEFAULT_SRC_LANGUAGE)
+    parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles",
+                        default=DEFAULT_DST_LANGUAGE)
+    parser.add_argument('-K', '--api-key',
+                        help="The Google Translate API key to be used. \
+                        (Required for subtitle translation)")
+    parser.add_argument('--list-formats', help="List all available subtitle formats",
+                        action='store_true')
+    parser.add_argument('--list-languages', help="List all available source/destination languages",
+                        action='store_true')
+    args = parser.parse_args()
+    if args.list_formats:
+        print("List of formats:")
+        for subtitle_format in FORMATTERS:
+            print("{format}".format(format=subtitle_format))
+        return 0
+    if args.list_languages:
+        print("List of all languages:")
+        for code, language in sorted(LANGUAGE_CODES.items()):
+            print("{code}\t{language}".format(code=code, language=language))
+        return 0
+    if not validate(args):
+        return 1
+    try:
+        subtitle_file_path = generate_subtitles(
+            source_path=args.source_path,
+            concurrency=args.concurrency,
+            src_language=args.src_language,
+            dst_language=args.dst_language,
+            api_key=args.api_key,
+            subtitle_file_format=args.format,
+            output=args.output,
+        )
+        print("Subtitles file created at {}".format(subtitle_file_path))
+    except KeyboardInterrupt:
+        return 1
+    return 0
+if __name__ == '__main__':
+    sys.exit(main())

autosub/__init__.py ADDED Viewed

	@@ -0,0 +1,438 @@

+"""
+Defines autosub's main functionality.
+"""
+#!/usr/bin/env python
+from __future__ import absolute_import, print_function, unicode_literals
+import argparse
+import audioop
+import math
+import multiprocessing
+import os
+from json import JSONDecodeError
+import subprocess
+import sys
+import tempfile
+import wave
+import json
+import requests
+try:
+    from json.decoder import JSONDecodeError
+except ImportError:
+    JSONDecodeError = ValueError
+from googleapiclient.discovery import build
+from progressbar import ProgressBar, Percentage, Bar, ETA
+from autosub.constants import (
+    LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,
+)
+from autosub.formatters import FORMATTERS
+from pathlib import PurePath
+DEFAULT_SUBTITLE_FORMAT = 'srt'
+DEFAULT_CONCURRENCY = 10
+DEFAULT_SRC_LANGUAGE = 'en'
+DEFAULT_DST_LANGUAGE = 'en'
+def percentile(arr, percent):
+    """
+    Calculate the given percentile of arr.
+    """
+    arr = sorted(arr)
+    index = (len(arr) - 1) * percent
+    floor = math.floor(index)
+    ceil = math.ceil(index)
+    if floor == ceil:
+        return arr[int(index)]
+    low_value = arr[int(floor)] * (ceil - index)
+    high_value = arr[int(ceil)] * (index - floor)
+    return low_value + high_value
+class FLACConverter(object): # pylint: disable=too-few-public-methods
+    """
+    Class for converting a region of an input audio or video file into a FLAC audio file
+    """
+    def __init__(self, source_path, include_before=0.25, include_after=0.25):
+        self.source_path = source_path
+        self.include_before = include_before
+        self.include_after = include_after
+    def __call__(self, region):
+        try:
+            start, end = region
+            start = max(0, start - self.include_before)
+            end += self.include_after
+            #delete=False necessary for running on Windows
+            temp = tempfile.NamedTemporaryFile(suffix='.flac', delete=False)
+            program_ffmpeg = which("ffmpeg")
+            command = [str(program_ffmpeg), "-ss", str(start), "-t", str(end - start),
+                       "-y", "-i", self.source_path,
+                       "-loglevel", "error", temp.name]
+            use_shell = True if os.name == "nt" else False
+            subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
+            read_data = temp.read()
+            temp.close()
+            os.unlink(temp.name)
+            return read_data
+        except KeyboardInterrupt:
+            return None
+class SpeechRecognizer(object): # pylint: disable=too-few-public-methods
+    """
+    Class for performing speech-to-text for an input FLAC file.
+    """
+    def __init__(self, language="en", rate=44100, retries=3, api_key=GOOGLE_SPEECH_API_KEY, proxies=None):
+        self.language = language
+        self.rate = rate
+        self.api_key = api_key
+        self.retries = retries
+        self.proxies = proxies
+    def __call__(self, data):
+        try:
+            for _ in range(self.retries):
+                url = GOOGLE_SPEECH_API_URL.format(lang=self.language, key=self.api_key)
+                headers = {"Content-Type": "audio/x-flac; rate=%d" % self.rate}
+                try:
+                    if self.proxies:
+                        resp = requests.post(url, data=data, headers=headers, proxies=self.proxies)
+                    else:
+                        resp = requests.post(url, data=data, headers=headers)
+                except requests.exceptions.ConnectionError:
+                    continue
+                for line in resp.content.decode('utf-8').split("\n"):
+                    try:
+                        line = json.loads(line)
+                        line = line['result'][0]['alternative'][0]['transcript']
+                        return line[:1].upper() + line[1:]
+                    except IndexError:
+                        # no result
+                        continue
+                    except JSONDecodeError:
+                        continue
+        except KeyboardInterrupt:
+            return None
+class Translator(object): # pylint: disable=too-few-public-methods
+    """
+    Class for translating a sentence from a one language to another.
+    """
+    def __init__(self, language, api_key, src, dst):
+        self.language = language
+        self.api_key = api_key
+        self.service = build('translate', 'v2',
+                             developerKey=self.api_key)
+        self.src = src
+        self.dst = dst
+    def __call__(self, sentence):
+        try:
+            if not sentence:
+                return None
+            result = self.service.translations().list( # pylint: disable=no-member
+                source=self.src,
+                target=self.dst,
+                q=[sentence]
+            ).execute()
+            if 'translations' in result and result['translations'] and \
+                'translatedText' in result['translations'][0]:
+                return result['translations'][0]['translatedText']
+            return None
+        except KeyboardInterrupt:
+            return None
+def which(program):
+    """
+    Return the path for a given executable.
+    """
+    def is_exe(file_path):
+        """
+        Checks whether a file is executable.
+        """
+        return os.path.isfile(file_path) and os.access(file_path, os.X_OK)
+    #necessary to run on Windows
+    if os.name == "nt":
+        program += ".exe"
+    fpath, _ = os.path.split(program)
+    if fpath:
+        if is_exe(program):
+            return program
+    else:
+        local_program_path = PurePath(__file__).parent.parent.joinpath(program)
+        str_local_program_path = str(local_program_path)
+        if is_exe(str_local_program_path):
+            return str_local_program_path
+        else:
+            for path in os.environ["PATH"].split(os.pathsep):
+                path = path.strip('"')
+                exe_file = os.path.join(path, program)
+                if is_exe(exe_file):
+                    return exe_file
+    return None
+def extract_audio(filename, channels=1, rate=16000):
+    """
+    Extract audio from an input file to a temporary WAV file.
+    """
+    temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
+    if not os.path.isfile(filename):
+        print("The given file does not exist: {}".format(filename))
+        raise Exception("Invalid filepath: {}".format(filename))
+    program_ffmpeg = which("ffmpeg")
+    if not program_ffmpeg:
+        print("ffmpeg: Executable not found on machine.")
+        raise Exception("Dependency not found: ffmpeg")
+    command = [str(program_ffmpeg), "-y", "-i", filename,
+               "-ac", str(channels), "-ar", str(rate),
+               "-loglevel", "error", temp.name]
+    use_shell = True if os.name == "nt" else False
+    subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
+    return temp.name, rate
+def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6): # pylint: disable=too-many-locals
+    """
+    Perform voice activity detection on a given audio file.
+    """
+    reader = wave.open(filename)
+    sample_width = reader.getsampwidth()
+    rate = reader.getframerate()
+    n_channels = reader.getnchannels()
+    chunk_duration = float(frame_width) / rate
+    n_chunks = int(math.ceil(reader.getnframes()*1.0 / frame_width))
+    energies = []
+    for _ in range(n_chunks):
+        chunk = reader.readframes(frame_width)
+        energies.append(audioop.rms(chunk, sample_width * n_channels))
+    threshold = percentile(energies, 0.2)
+    elapsed_time = 0
+    regions = []
+    region_start = None
+    for energy in energies:
+        is_silence = energy <= threshold
+        max_exceeded = region_start and elapsed_time - region_start >= max_region_size
+        if (max_exceeded or is_silence) and region_start:
+            if elapsed_time - region_start >= min_region_size:
+                regions.append((region_start, elapsed_time))
+                region_start = None
+        elif (not region_start) and (not is_silence):
+            region_start = elapsed_time
+        elapsed_time += chunk_duration
+    return regions
+def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments
+        source_path,
+        output=None,
+        concurrency=DEFAULT_CONCURRENCY,
+        src_language=DEFAULT_SRC_LANGUAGE,
+        dst_language=DEFAULT_DST_LANGUAGE,
+        subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
+        api_key=None,
+        proxies=None
+    ):
+    """
+    Given an input audio/video file, generate subtitles in the specified language and format.
+    """
+    if os.name != "nt" and "Darwin" in os.uname():
+        #the default unix fork method does not work on Mac OS
+        #need to use forkserver
+        if 'forkserver' != multiprocessing.get_start_method(allow_none=True):
+            multiprocessing.set_start_method('forkserver')
+    audio_filename, audio_rate = extract_audio(source_path)
+    regions = find_speech_regions(audio_filename)
+    pool = multiprocessing.Pool(concurrency)
+    converter = FLACConverter(source_path=audio_filename)
+    recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,
+                                  api_key=GOOGLE_SPEECH_API_KEY, proxies=proxies)
+    transcripts = []
+    if regions:
+        try:
+            widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ',
+                       ETA()]
+            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
+            extracted_regions = []
+            for i, extracted_region in enumerate(pool.imap(converter, regions)):
+                extracted_regions.append(extracted_region)
+                pbar.update(i)
+            pbar.finish()
+            widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()]
+            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
+            for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)):
+                transcripts.append(transcript)
+                pbar.update(i)
+            pbar.finish()
+            if src_language.split("-")[0] != dst_language.split("-")[0]:
+                if api_key:
+                    google_translate_api_key = api_key
+                    translator = Translator(dst_language, google_translate_api_key,
+                                            dst=dst_language,
+                                            src=src_language)
+                    prompt = "Translating from {0} to {1}: ".format(src_language, dst_language)
+                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
+                    pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
+                    translated_transcripts = []
+                    for i, transcript in enumerate(pool.imap(translator, transcripts)):
+                        translated_transcripts.append(transcript)
+                        pbar.update(i)
+                    pbar.finish()
+                    transcripts = translated_transcripts
+                else:
+                    print(
+                        "Error: Subtitle translation requires specified Google Translate API key. "
+                        "See --help for further information."
+                    )
+                    return 1
+        except KeyboardInterrupt:
+            pbar.finish()
+            pool.terminate()
+            pool.join()
+            print("Cancelling transcription")
+            raise
+    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
+    formatter = FORMATTERS.get(subtitle_file_format)
+    formatted_subtitles = formatter(timed_subtitles)
+    dest = output
+    if not dest:
+        base = os.path.splitext(source_path)[0]
+        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)
+    with open(dest, 'wb') as output_file:
+        output_file.write(formatted_subtitles.encode("utf-8"))
+    os.remove(audio_filename)
+    return dest
+def validate(args):
+    """
+    Check that the CLI arguments passed to autosub are valid.
+    """
+    if args.format not in FORMATTERS:
+        print(
+            "Subtitle format not supported. "
+            "Run with --list-formats to see all supported formats."
+        )
+        return False
+    if args.src_language not in LANGUAGE_CODES.keys():
+        print(
+            "Source language not supported. "
+            "Run with --list-languages to see all supported languages."
+        )
+        return False
+    if args.dst_language not in LANGUAGE_CODES.keys():
+        print(
+            "Destination language not supported. "
+            "Run with --list-languages to see all supported languages."
+        )
+        return False
+    if not args.source_path:
+        print("Error: You need to specify a source path.")
+        return False
+    return True
+def main():
+    """
+    Run autosub as a command-line program.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument('source_path', help="Path to the video or audio file to subtitle",
+                        nargs='?')
+    parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make",
+                        type=int, default=DEFAULT_CONCURRENCY)
+    parser.add_argument('-o', '--output',
+                        help="Output path for subtitles (by default, subtitles are saved in \
+                        the same directory and name as the source path)")
+    parser.add_argument('-F', '--format', help="Destination subtitle format",
+                        default=DEFAULT_SUBTITLE_FORMAT)
+    parser.add_argument('-S', '--src-language', help="Language spoken in source file",
+                        default=DEFAULT_SRC_LANGUAGE)
+    parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles",
+                        default=DEFAULT_DST_LANGUAGE)
+    parser.add_argument('-K', '--api-key',
+                        help="The Google Translate API key to be used. \
+                        (Required for subtitle translation)")
+    parser.add_argument('--list-formats', help="List all available subtitle formats",
+                        action='store_true')
+    parser.add_argument('--list-languages', help="List all available source/destination languages",
+                        action='store_true')
+    args = parser.parse_args()
+    if args.list_formats:
+        print("List of formats:")
+        for subtitle_format in FORMATTERS:
+            print("{format}".format(format=subtitle_format))
+        return 0
+    if args.list_languages:
+        print("List of all languages:")
+        for code, language in sorted(LANGUAGE_CODES.items()):
+            print("{code}\t{language}".format(code=code, language=language))
+        return 0
+    if not validate(args):
+        return 1
+    try:
+        subtitle_file_path = generate_subtitles(
+            source_path=args.source_path,
+            concurrency=args.concurrency,
+            src_language=args.src_language,
+            dst_language=args.dst_language,
+            api_key=args.api_key,
+            subtitle_file_format=args.format,
+            output=args.output,
+        )
+        print("Subtitles file created at {}".format(subtitle_file_path))
+    except KeyboardInterrupt:
+        return 1
+    return 0
+if __name__ == '__main__':
+    sys.exit(main())

autosub/constants.py ADDED Viewed

	@@ -0,0 +1,118 @@

+"""
+Defines constants used by autosub.
+"""
+from __future__ import unicode_literals
+GOOGLE_SPEECH_API_KEY = "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw"
+GOOGLE_SPEECH_API_URL = "http://www.google.com/speech-api/v2/recognize?client=chromium&lang={lang}&key={key}" # pylint: disable=line-too-long
+LANGUAGE_CODES = {
+    'af': 'Afrikaans',
+    'ar': 'Arabic',
+    'az': 'Azerbaijani',
+    'be': 'Belarusian',
+    'bg': 'Bulgarian',
+    'bn': 'Bengali',
+    'bs': 'Bosnian',
+    'ca': 'Catalan',
+    'ceb': 'Cebuano',
+    'cs': 'Czech',
+    'cy': 'Welsh',
+    'da': 'Danish',
+    'de': 'German',
+    'el': 'Greek',
+    'en-AU': 'English (Australia)',
+    'en-CA': 'English (Canada)',
+    'en-GB': 'English (United Kingdom)',
+    'en-IN': 'English (India)',
+    'en-IE': 'English (Ireland)',
+    'en-NZ': 'English (New Zealand)',
+    'en-PH': 'English (Philippines)',
+    'en-SG': 'English (Singapore)',
+    'en-US': 'English (United States)',
+    'eo': 'Esperanto',
+    'es-AR': 'Spanish (Argentina)',
+    'es-CL': 'Spanish (Chile)',
+    'es-ES': 'Spanish (Spain)',
+    'es-US': 'Spanish (United States)',
+    'es-MX': 'Spanish (Mexico)',
+    'es': 'Spanish',
+    'et': 'Estonian',
+    'eu': 'Basque',
+    'fa': 'Persian',
+    'fi': 'Finnish',
+    'fr': 'French',
+    'ga': 'Irish',
+    'gl': 'Galician',
+    'gu': 'Gujarati',
+    'ha': 'Hausa',
+    'hi': 'Hindi',
+    'hmn': 'Hmong',
+    'hr': 'Croatian',
+    'ht': 'Haitian Creole',
+    'hu': 'Hungarian',
+    'hy': 'Armenian',
+    'id': 'Indonesian',
+    'ig': 'Igbo',
+    'is': 'Icelandic',
+    'it': 'Italian',
+    'iw': 'Hebrew',
+    'ja': 'Japanese',
+    'jw': 'Javanese',
+    'ka': 'Georgian',
+    'kk': 'Kazakh',
+    'km': 'Khmer',
+    'kn': 'Kannada',
+    'ko': 'Korean',
+    'la': 'Latin',
+    'lo': 'Lao',
+    'lt': 'Lithuanian',
+    'lv': 'Latvian',
+    'mg': 'Malagasy',
+    'mi': 'Maori',
+    'mk': 'Macedonian',
+    'ml': 'Malayalam',
+    'mn': 'Mongolian',
+    'mr': 'Marathi',
+    'ms': 'Malay',
+    'mt': 'Maltese',
+    'my': 'Myanmar (Burmese)',
+    'ne': 'Nepali',
+    'nl': 'Dutch',
+    'no': 'Norwegian',
+    'ny': 'Chichewa',
+    'pa': 'Punjabi',
+    'pl': 'Polish',
+    'pt-BR': 'Portuguese (Brazil)',
+    'pt-PT': 'Portuguese (Portugal)',
+    'ro': 'Romanian',
+    'ru': 'Russian',
+    'si': 'Sinhala',
+    'sk': 'Slovak',
+    'sl': 'Slovenian',
+    'so': 'Somali',
+    'sq': 'Albanian',
+    'sr': 'Serbian',
+    'st': 'Sesotho',
+    'su': 'Sudanese',
+    'sv': 'Swedish',
+    'sw': 'Swahili',
+    'ta': 'Tamil',
+    'te': 'Telugu',
+    'tg': 'Tajik',
+    'th': 'Thai',
+    'tl': 'Filipino',
+    'tr': 'Turkish',
+    'uk': 'Ukrainian',
+    'ur': 'Urdu',
+    'uz': 'Uzbek',
+    'vi': 'Vietnamese',
+    'yi': 'Yiddish',
+    'yo': 'Yoruba',
+    'yue-Hant-HK': 'Cantonese, (Traditional HK)',
+    'zh': 'Chinese (Simplified, China)',
+    'zh-HK': 'Chinese (Simplified, Hong Kong)',
+    'zh-TW': 'Chinese (Traditional, Taiwan)',
+    'zu': 'Zulu',
+}

autosub/formatters.py ADDED Viewed

	@@ -0,0 +1,66 @@

+"""
+Defines subtitle formatters used by autosub.
+"""
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+import json
+import pysrt
+import six
+def srt_formatter(subtitles, padding_before=0, padding_after=0):
+    """
+    Serialize a list of subtitles according to the SRT format, with optional time padding.
+    """
+    sub_rip_file = pysrt.SubRipFile()
+    for i, ((start, end), text) in enumerate(subtitles, start=1):
+        item = pysrt.SubRipItem()
+        item.index = i
+        item.text = six.text_type(text)
+        item.start.seconds = max(0, start - padding_before)
+        item.end.seconds = end + padding_after
+        sub_rip_file.append(item)
+    return '\n'.join(six.text_type(item) for item in sub_rip_file)
+def vtt_formatter(subtitles, padding_before=0, padding_after=0):
+    """
+    Serialize a list of subtitles according to the VTT format, with optional time padding.
+    """
+    text = srt_formatter(subtitles, padding_before, padding_after)
+    text = 'WEBVTT\n\n' + text.replace(',', '.')
+    return text
+def json_formatter(subtitles):
+    """
+    Serialize a list of subtitles as a JSON blob.
+    """
+    subtitle_dicts = [
+        {
+            'start': start,
+            'end': end,
+            'content': text,
+        }
+        for ((start, end), text)
+        in subtitles
+    ]
+    return json.dumps(subtitle_dicts)
+def raw_formatter(subtitles):
+    """
+    Serialize a list of subtitles as a newline-delimited string.
+    """
+    return ' '.join(text for (_rng, text) in subtitles)
+FORMATTERS = {
+    'srt': srt_formatter,
+    'vtt': vtt_formatter,
+    'json': json_formatter,
+    'raw': raw_formatter,
+}