Commit
·
bec78d1
1
Parent(s):
4f772d6
change default values in submit_video. improve readme and html.
Browse files- README.md +5 -17
- cli.py +0 -53
- static/landing_page.html +2 -1
- static/submit_video.html +2 -2
README.md
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
## Multilang ASR Captioner
|
| 2 |
|
| 3 |
-
A multilingual automatic speech recognition and video captioning tool using faster whisper
|
|
|
|
|
|
|
| 4 |
|
| 5 |
<video width="400" height="300" src="https://github.com/marquesafonso/multilang-asr-captioner/assets/79766107/fcff8ac1-cdfc-4400-821c-f797d84c2d8a"></video>
|
| 6 |
|
|
@@ -26,7 +28,7 @@ Check the [landing page](http://127.0.0.1:8000).
|
|
| 26 |
|
| 27 |
From there you will see the [submit_video endpoint](http://127.0.0.1:8000/submit_video/) and the [documentation](http://127.0.0.1:8000/docs/)
|
| 28 |
|
| 29 |
-
**Tip**: on Linux or Mac localhost will resolve directly to 0.0.0.0 but on windows you will need to change it to 127.0.0.1
|
| 30 |
|
| 31 |
### Local
|
| 32 |
|
|
@@ -62,18 +64,4 @@ Then check the [landing page](http://127.0.0.1:8000).
|
|
| 62 |
|
| 63 |
From there you will see the [submit_video endpoint](http://127.0.0.1:8000/submit_video/) and the [documentation](http://127.0.0.1:8000/docs/)
|
| 64 |
|
| 65 |
-
**Tip**: on Linux or Mac localhost will resolve directly to 0.0.0.0 but on windows you will need to change it to 127.0.0.1
|
| 66 |
-
|
| 67 |
-
### Command Line Interface
|
| 68 |
-
|
| 69 |
-
Run the following code to use the CLI. The input file must be in mp4 format.
|
| 70 |
-
|
| 71 |
-
```
|
| 72 |
-
pipenv run python cli.py --invideo_filename '<your_file_name>' --max_words_per_line 8
|
| 73 |
-
```
|
| 74 |
-
|
| 75 |
-
Fontsize, Font, Background Color and Text Color arguments are available:
|
| 76 |
-
|
| 77 |
-
```
|
| 78 |
-
pipenv run python cli.py --invideo_filename '<your_file>' --max_words_per_line 8 --fontsize 28 --font "Arial-Bold" --bg_color None --text_color 'white'
|
| 79 |
-
```
|
|
|
|
| 1 |
## Multilang ASR Captioner
|
| 2 |
|
| 3 |
+
A multilingual automatic speech recognition and video captioning tool using faster whisper.
|
| 4 |
+
|
| 5 |
+
Supports real-time translation to english. Runs on consumer grade cpu.
|
| 6 |
|
| 7 |
<video width="400" height="300" src="https://github.com/marquesafonso/multilang-asr-captioner/assets/79766107/fcff8ac1-cdfc-4400-821c-f797d84c2d8a"></video>
|
| 8 |
|
|
|
|
| 28 |
|
| 29 |
From there you will see the [submit_video endpoint](http://127.0.0.1:8000/submit_video/) and the [documentation](http://127.0.0.1:8000/docs/)
|
| 30 |
|
| 31 |
+
**Tip**: on Linux or Mac localhost will resolve directly to 0.0.0.0 but on windows you will need to change it to 127.0.0.1 or localhost
|
| 32 |
|
| 33 |
### Local
|
| 34 |
|
|
|
|
| 64 |
|
| 65 |
From there you will see the [submit_video endpoint](http://127.0.0.1:8000/submit_video/) and the [documentation](http://127.0.0.1:8000/docs/)
|
| 66 |
|
| 67 |
+
**Tip**: on Linux or Mac localhost will resolve directly to 0.0.0.0 but on windows you will need to change it to 127.0.0.1 or localhost
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cli.py
DELETED
|
@@ -1,53 +0,0 @@
|
|
| 1 |
-
from argparse import ArgumentParser
|
| 2 |
-
from utils.transcriber import transcriber
|
| 3 |
-
from utils.subtitler import subtitler
|
| 4 |
-
from utils.convert_video_to_audio import convert_video_to_audio
|
| 5 |
-
import logging, os
|
| 6 |
-
from tqdm import tqdm
|
| 7 |
-
|
| 8 |
-
logging.basicConfig(filename='main.log',
|
| 9 |
-
encoding='utf-8',
|
| 10 |
-
level=logging.DEBUG,
|
| 11 |
-
format='%(asctime)s %(levelname)s %(message)s',
|
| 12 |
-
datefmt='%m/%d/%Y %I:%M:%S %p')
|
| 13 |
-
|
| 14 |
-
def main(invideo_filename:str,
|
| 15 |
-
max_words_per_line:int,
|
| 16 |
-
fontsize:int,
|
| 17 |
-
font:str,
|
| 18 |
-
bg_color:str,
|
| 19 |
-
text_color:str
|
| 20 |
-
):
|
| 21 |
-
INVIDEO_DIR = os.path.join('data/',invideo_filename)
|
| 22 |
-
os.makedirs(INVIDEO_DIR, exist_ok=True)
|
| 23 |
-
SRT_PATH = os.path.join(INVIDEO_DIR, f"{invideo_filename}.srt")
|
| 24 |
-
OUTVIDEO_PATH = os.path.join(INVIDEO_DIR, f"result_{invideo_filename}.mp4")
|
| 25 |
-
with tqdm(total=100, desc="Overall Progress") as pbar:
|
| 26 |
-
INVIDEO_PATH = os.path.join(INVIDEO_DIR, f"{invideo_filename}.mp4")
|
| 27 |
-
INAUDIO_PATH = os.path.join(INVIDEO_DIR, f"{invideo_filename}.m4a")
|
| 28 |
-
if not os.path.exists(INAUDIO_PATH):
|
| 29 |
-
convert_video_to_audio(INVIDEO_PATH,INAUDIO_PATH)
|
| 30 |
-
pbar.update(50)
|
| 31 |
-
if not os.path.exists(SRT_PATH):
|
| 32 |
-
transcriber(INAUDIO_PATH, SRT_PATH, max_words_per_line)
|
| 33 |
-
pbar.update(25)
|
| 34 |
-
subtitler(INVIDEO_PATH, SRT_PATH, OUTVIDEO_PATH, fontsize, font, bg_color, text_color)
|
| 35 |
-
pbar.update(25)
|
| 36 |
-
|
| 37 |
-
if __name__ == '__main__':
|
| 38 |
-
parser = ArgumentParser()
|
| 39 |
-
parser.add_argument('--invideo_filename', required=True, type=str, help='Filename to caption.')
|
| 40 |
-
parser.add_argument("--max_words_per_line", type=int, default=None, help="the maximum number of words in a segment. (int)")
|
| 41 |
-
parser.add_argument('--fontsize', required=False, default=32, type=int, help='Font size for captions (int)')
|
| 42 |
-
parser.add_argument('--font', required=False, default="FuturaPTHeavy", type=str, help='Font style for captions (str)')
|
| 43 |
-
parser.add_argument('--bg_color', required=False, default="#070a13b3", type=str, help='Hex color value for caption background colour. (str)')
|
| 44 |
-
parser.add_argument('--text_color', required=False, default="white", type=str, help='color value for caption text. (str)')
|
| 45 |
-
args = parser.parse_args()
|
| 46 |
-
# Example usage
|
| 47 |
-
main(args.invideo_filename,
|
| 48 |
-
args.max_words_per_line,
|
| 49 |
-
args.fontsize,
|
| 50 |
-
args.font,
|
| 51 |
-
args.bg_color,
|
| 52 |
-
args.text_color
|
| 53 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static/landing_page.html
CHANGED
|
@@ -142,7 +142,8 @@
|
|
| 142 |
<body>
|
| 143 |
<div class="container">
|
| 144 |
<h1>Multilang-ASR-Captioner</h1>
|
| 145 |
-
<p>A multilingual automatic speech recognition and video captioning tool using faster whisper
|
|
|
|
| 146 |
<a href="/submit_video" class="button submit">Submit Video</a>
|
| 147 |
<a href="/docs" class="button docs">Documentation</a>
|
| 148 |
</div>
|
|
|
|
| 142 |
<body>
|
| 143 |
<div class="container">
|
| 144 |
<h1>Multilang-ASR-Captioner</h1>
|
| 145 |
+
<p>A multilingual automatic speech recognition and video captioning tool using faster whisper.</p>
|
| 146 |
+
<p>Supports real-time translation to english. Runs on consumer grade cpu.</p>
|
| 147 |
<a href="/submit_video" class="button submit">Submit Video</a>
|
| 148 |
<a href="/docs" class="button docs">Documentation</a>
|
| 149 |
</div>
|
static/submit_video.html
CHANGED
|
@@ -105,8 +105,8 @@
|
|
| 105 |
<option value="transcribe">Transcribe</option>
|
| 106 |
<option value="translate">Translate</option>
|
| 107 |
</select><br>
|
| 108 |
-
Max words per line: <input type="number" name="max_words_per_line" value="
|
| 109 |
-
Font size: <input type="number" name="fontsize" value="
|
| 110 |
Font: <input type="text" name="font" value="FuturaPTHeavy"><br>
|
| 111 |
Background color (Pro tip: #00FFFF00 = transparent): <input type="text" name="bg_color" value="#070a13b3"><br>
|
| 112 |
Text color: <input type="text" name="text_color" value="white"><br>
|
|
|
|
| 105 |
<option value="transcribe">Transcribe</option>
|
| 106 |
<option value="translate">Translate</option>
|
| 107 |
</select><br>
|
| 108 |
+
Max words per line: <input type="number" name="max_words_per_line" value="6"><br>
|
| 109 |
+
Font size: <input type="number" name="fontsize" value="42"><br>
|
| 110 |
Font: <input type="text" name="font" value="FuturaPTHeavy"><br>
|
| 111 |
Background color (Pro tip: #00FFFF00 = transparent): <input type="text" name="bg_color" value="#070a13b3"><br>
|
| 112 |
Text color: <input type="text" name="text_color" value="white"><br>
|