Spaces:

hammamiomar
/

ForeignWhispersNYU

Runtime error

App Files Files Community

hammamiomar commited on Dec 4, 2023

Commit

da9fc8c

1 Parent(s): cd0c5d8

add all

Browse files

Files changed (10) hide show

.DS_Store +0 -0
.gitignore +8 -0
app.py +57 -2
downloader.py +75 -0
movieCompile.py +61 -0
moviecompile.ipynb +422 -0
requirements.txt +9 -0
speech2text.py +25 -0
text2speech.py +62 -0
text2text.py +52 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+playlist10/
+videos/
+transcriptions/
+captions/
+translated/
+audio/
+output/
+__pycache__

app.py CHANGED Viewed

@@ -1,3 +1,58 @@
 import streamlit as st
-x = st.slider('select a value')
-st.write(x,'squared is',x*x)

 import streamlit as st
+import pandas as pd
+import downloader
+import speech2text
+import text2speech
+import text2text
+import movieCompile
+import os
+st.title("Translate English Youtube Videos to German")
+st.write("Input a youtube video URL, and if you wanna do another one click the reset button first.")
+# Add a text field for inputting the YouTube video URL
+youtube_url = st.text_input("YouTube URL")
+if st.button("Submit"):
+    with st.status("Downloading video and captions")as status:
+        downloader.download_video(youtube_url, "videos", "captions")
+        status.update(label="Downloaded video and captions")
+        status.update(label="Transcribing video")
+        speech2text.transcribeAndSaveFolder("videos", "transcriptions")
+        status.update(label="Transcribed video")
+        status.update(label="Translating video")
+        text2text.translateTsvFolder("transcriptions", "translated")
+        status.update(label="Translated video")
+        status.update(label="Creating audio")
+        englishSubPath = os.path.join("transcriptions", f"{os.listdir('transcriptions')[0]}")
+        germanSubPath = os.path.join("translated", f"{os.listdir('translated')[0]}")
+        englishSub=pd.read_csv(englishSubPath,sep="\t")
+        germanSub=pd.read_csv(germanSubPath,sep="\t")
+        df=pd.merge(englishSub,germanSub,on=['start','end'],how='inner')
+        df['start']=df['start']/1000
+        df['end']=df['end']/1000
+        df.rename(columns={'text_x':'English','text_y':'German'},inplace=True)
+        text2speech.createAudioFolder("translated", "audio")
+        status.update(label="Created audio")
+        status.update(label="Creating video")
+        movieCompile.compileVideo("videos", "audio", "translated", "output")
+        status.update(label="Created video")
+    st.dataframe(df)
+    st.video("output/output_subtitled.mp4")
+resetButton = st.button("Reset")
+if resetButton:
+    os.system("rm -rf videos")
+    os.system("rm -rf captions")
+    os.system("rm -rf transcriptions")
+    os.system("rm -rf translated")
+    os.system("rm -rf audio")
+    os.system("rm -rf output")

downloader.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import os
+import argparse
+from pytube import YouTube, Playlist
+import json
+# Credit: chatgpt assisted
+def download_video(url, video_folder, caption_folder):
+    # Create video and caption folders if they don't exist
+    os.makedirs(video_folder, exist_ok=True)
+    os.makedirs(caption_folder, exist_ok=True)
+    # Initialize the YouTube object
+    yt = YouTube(url)
+    # Download the video
+    try:
+        video = yt.streams.get_highest_resolution()
+    except:
+        print("Age-restricted video. Skipping.")
+        return 1
+    video.download(output_path=video_folder,filename=yt.video_id+".mp4")
+    # Loop through available closed captions
+    for caption in yt.captions:
+        caption2=yt.captions.get_by_language_code(caption.code).json_captions
+        os.makedirs(os.path.join(caption_folder, yt.video_id), exist_ok=True)
+        caption_file = os.path.join(caption_folder, f"{yt.video_id}/{caption.code}.json")
+        with open(caption_file, 'w') as json_file:
+            json.dump(caption2, json_file)
+    return 0
+def download_playlist(playlist_url, num_videos, video_folder, caption_folder):
+    # Create output folder if it doesn't exist
+    os.makedirs(video_folder, exist_ok=True)
+    os.makedirs(caption_folder, exist_ok=True)
+    # Initialize the playlist object
+    playlist = Playlist(playlist_url)
+    # Download each video in the playlist
+    iterations = 0
+    extraIteration = False
+    counter=0
+    while iterations < num_videos or extraIteration:
+        video = playlist.videos[iterations]
+        video_url = video.watch_url
+        skipCheck = download_video(video_url, video_folder, caption_folder)
+        if skipCheck == 1:
+            iterations += 1
+            extraIteration = True
+        else:
+            iterations += 1
+            counter+=1
+        if counter == num_videos:
+            extraIteration = False
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Pytube Video and Caption Downloader")
+    parser.add_argument("url", help="URL of the video to download")
+    parser.add_argument("video_folder", help="Folder to save downloaded videos")
+    parser.add_argument("caption_folder", help="Folder to save downloaded captions")
+    parser.add_argument("--playlist", help="URL of the playlist to download")
+    parser.add_argument("--num_videos", type=int, default=1, help="Number of videos to download from the playlist")
+    parser.add_argument("--output_folder", default="downloads", help="Folder to save downloaded playlist videos")
+    args = parser.parse_args()
+    if args.playlist:
+        download_playlist(args.playlist, args.num_videos, args.output_folder)
+    else:
+        download_video(args.url, args.video_folder, args.caption_folder)

movieCompile.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import subprocess
+import re
+import pandas as pd
+import os
+#credit to chatgpt for alot of help
+def tsv_to_srt(input_file, output_file):
+    df = pd.read_csv(input_file, sep='\t')
+    with open(output_file, 'w', encoding='utf-8') as srt_file:
+        for index, row in df.iterrows():
+            start_time = row['start'] / 1000.0
+            end_time = row['end'] / 1000.0
+            text = row['text']
+            srt_file.write(f"{index + 1}\n")
+            srt_file.write(f"{convert_to_srt_time_format(start_time)} --> {convert_to_srt_time_format(end_time)}\n")
+            srt_file.write(f"{text}\n\n")
+def convert_to_srt_time_format(seconds):
+    hours, remainder = divmod(seconds, 3600)
+    minutes, seconds = divmod(remainder, 60)
+    milliseconds = int((seconds - int(seconds)) * 1000)
+    return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{milliseconds:03d}"
+def replace_audio_and_add_subtitles(input_video, input_audio, output_video, subtitles_file):
+    # Replace audio
+    replace_audio_command = [
+        'ffmpeg',
+        '-i', input_video,
+        '-i', input_audio,
+        '-c:v', 'copy',
+        '-c:a', 'aac',
+        '-strict', 'experimental',
+        '-map', '0:v:0',
+        '-map', '1:a:0',
+        '-max_muxing_queue_size', '9999',
+        output_video
+    ]
+    subprocess.run(replace_audio_command)
+    # Add subtitles
+    add_subtitles_command = [
+        'ffmpeg',
+        '-i', output_video,
+        '-vf', f'subtitles={subtitles_file}',
+        output_video[:-4] + '_subtitled.mp4'  # Output file with subtitles
+    ]
+    subprocess.run(add_subtitles_command)
+def compileVideo(videoFolder,audioFolder,captionFolder,savepath):
+    audiopath =os.path.join(audioFolder,os.listdir(audioFolder)[0])
+    videopath = os.path.join(videoFolder,os.listdir(videoFolder)[0])
+    captionpath = os.path.join(captionFolder,os.listdir(captionFolder)[0])
+    os.makedirs(savepath,exist_ok=True)
+    tsv_to_srt(captionpath, os.path.join(savepath, 'subtitles.srt'))
+    replace_audio_and_add_subtitles(videopath, audiopath, os.path.join(savepath, 'output.mp4'), os.path.join(savepath, 'subtitles.srt'))

moviecompile.ipynb ADDED Viewed

	@@ -0,0 +1,422 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from moviepy.editor import VideoFileClip, AudioFileClip, TextClip, CompositeVideoClip\n",
+    "from moviepy.video.io.VideoFileClip import VideoFileClip\n",
+    "import pandas as pd\n",
+    "from math import floor\n",
+    "import speech2text\n",
+    "\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def compileVideo(videoFolder,audioFolder,captionFolder,savepath):\n",
+    "    audiopath =os.path.join(audioFolder,os.listdir(audioFolder)[0])\n",
+    "    videopath = os.path.join(videoFolder,os.listdir(videoFolder)[0])\n",
+    "    captionpath = os.path.join(captionFolder,os.listdir(captionFolder)[0])\n",
+    "    os.makedirs(savepath,exist_ok=True)\n",
+    "\n",
+    "    video=VideoFileClip(videopath)\n",
+    "    video=video.set_duration(video.duration)\n",
+    "    audio=AudioFileClip(audiopath)\n",
+    "\n",
+    "    video=video.set_audio(audio)\n",
+    "\n",
+    "    captions=pd.read_csv(captionpath,delimiter='\\t')\n",
+    "    captions['start'] /= 1000\n",
+    "    captions['end'] /= 1000\n",
+    "\n",
+    "    captioned_clips=[]\n",
+    "    for index,row in captions.iterrows():\n",
+    "        text=row['text']\n",
+    "        start=row['start']\n",
+    "        end=row['end']\n",
+    "\n",
+    "       # Create a TextClip with the caption\n",
+    "        caption_clip = TextClip(text, fontsize=20, color='white', bg_color='black')\n",
+    "\n",
+    "        # Add the caption to the video at the specified time\n",
+    "        captioned_clip = CompositeVideoClip([video.subclip(start, end),\\\n",
+    "                                             caption_clip.set_pos(('center', 'bottom'))])\n",
+    "\n",
+    "        # Append the captioned clip to the list\n",
+    "        captioned_clips.append(captioned_clip)\n",
+    "\n",
+    "    # Concatenate all captioned clips\n",
+    "    final_clip = CompositeVideoClip(captioned_clips)\n",
+    "    final_clip = final_clip.set_duration(video.duration)\n",
+    "\n",
+    "    # Write the final video with captions\n",
+    "    final_clip.write_videofile(os.path.join(savepath,\"output.mp4\"), codec='libx264', audio_codec='aac',threads=8)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import subprocess\n",
+    "import tempfile\n",
+    "import re\n",
+    "def remove_weird_characters(text):\n",
+    "    # Define the regular expression to match unwanted characters\n",
+    "    pattern = r'[^a-zA-Z0-9 ]'\n",
+    "    # Use str.replace() to replace matched characters with an empty string\n",
+    "    cleaned_text = ''.join(re.sub(pattern, '', str(text)))\n",
+    "    return cleaned_text\n",
+    "def compileVideo(videoFolder,audioFolder,captionFolder,savepath):\n",
+    "    audiopath =os.path.join(audioFolder,os.listdir(audioFolder)[0])\n",
+    "    videopath = os.path.join(videoFolder,os.listdir(videoFolder)[0])\n",
+    "    captionpath = os.path.join(captionFolder,os.listdir(captionFolder)[0])\n",
+    "    os.makedirs(savepath,exist_ok=True)\n",
+    "\n",
+    "    \n",
+    "\n",
+    "    captions=pd.read_csv(captionpath,delimiter='\\t')\n",
+    "    captions['start'] /= 1000\n",
+    "    captions['end'] /= 1000\n",
+    "    captions['text'] = captions['text'].apply(remove_weird_characters)\n",
+    "\n",
+    "    temp_subtitles_file_path = tempfile.mktemp(suffix='.srt')\n",
+    "    temp_subtitles_file = open(temp_subtitles_file_path, 'w')\n",
+    "\n",
+    "    for index,row in captions.iterrows():\n",
+    "        text=row['text']\n",
+    "        start=row['start']\n",
+    "        end=row['end']\n",
+    "        temp_subtitles_file.write(f\"{index + 1}\\n{start} --> {end}\\n{text}\\n\\n\")\n",
+    "    temp_subtitles_file.close()\n",
+    "\n",
+    "# Run ffmpeg command to replace audio and add captions\n",
+    "    ffmpeg_command = [\n",
+    "    'ffmpeg',\n",
+    "    '-i', videopath,\n",
+    "    '-i', audiopath,\n",
+    "    '-vf', f\"subtitles={temp_subtitles_file_path}:force_style='Fontsize=20,PrimaryColour=&HFFFFFF,BackColour=&H000000'\",\n",
+    "    '-c:v', 'libx264',\n",
+    "    '-c:a', 'aac',\n",
+    "    '-strict', 'experimental',\n",
+    "    os.path.join(savepath,\"output.mp4\")\n",
+    "]\n",
+    "\n",
+    "\n",
+    "    # Execute the ffmpeg command using subprocess\n",
+    "    subprocess.run(ffmpeg_command)\n",
+    "    os.remove(temp_subtitles_file.name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "ffmpeg version 6.0 Copyright (c) 2000-2023 the FFmpeg developers\n",
+      "  built with clang version 15.0.7\n",
+      "  configuration: --prefix=/Users/runner/miniforge3/conda-bld/ffmpeg_1696213807101/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl --cc=arm64-apple-darwin20.0.0-clang --cxx=arm64-apple-darwin20.0.0-clang++ --nm=arm64-apple-darwin20.0.0-nm --ar=arm64-apple-darwin20.0.0-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libfontconfig --enable-libopenh264 --enable-libdav1d --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1696213807101/_build_env/bin/x86_64-apple-darwin13.4.0-clang --enable-neon --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-libass --enable-pthreads --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-pic --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libopus --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1696213807101/_build_env/bin/pkg-config\n",
+      "  libavutil      58.  2.100 / 58.  2.100\n",
+      "  libavcodec     60.  3.100 / 60.  3.100\n",
+      "  libavformat    60.  3.100 / 60.  3.100\n",
+      "  libavdevice    60.  1.100 / 60.  1.100\n",
+      "  libavfilter     9.  3.100 /  9.  3.100\n",
+      "  libswscale      7.  1.100 /  7.  1.100\n",
+      "  libswresample   4. 10.100 /  4. 10.100\n",
+      "  libpostproc    57.  1.100 / 57.  1.100\n",
+      "Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'videos/qrvK_KuIeJk.mp4':\n",
+      "  Metadata:\n",
+      "    major_brand     : mp42\n",
+      "    minor_version   : 0\n",
+      "    compatible_brands: isommp42\n",
+      "    creation_time   : 2023-10-09T21:47:52.000000Z\n",
+      "  Duration: 00:13:11.89, start: 0.000000, bitrate: 347 kb/s\n",
+      "  Stream #0:0[0x1](und): Video: h264 (Main) (avc1 / 0x31637661), yuv420p(tv, bt709, progressive), 1280x720 [SAR 1:1 DAR 16:9], 215 kb/s, 29.97 fps, 29.97 tbr, 30k tbn (default)\n",
+      "    Metadata:\n",
+      "      creation_time   : 2023-10-09T21:47:52.000000Z\n",
+      "      handler_name    : ISO Media file produced by Google Inc. Created on: 10/09/2023.\n",
+      "      vendor_id       : [0][0][0][0]\n",
+      "  Stream #0:1[0x2](und): Audio: aac (LC) (mp4a / 0x6134706D), 44100 Hz, stereo, fltp, 127 kb/s (default)\n",
+      "    Metadata:\n",
+      "      creation_time   : 2023-10-09T21:47:52.000000Z\n",
+      "      handler_name    : ISO Media file produced by Google Inc. Created on: 10/09/2023.\n",
+      "      vendor_id       : [0][0][0][0]\n",
+      "Guessed Channel Layout for Input Stream #1.0 : mono\n",
+      "Input #1, wav, from 'audio/merged_audio.wav':\n",
+      "  Duration: 00:13:05.89, bitrate: 352 kb/s\n",
+      "  Stream #1:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 22050 Hz, 1 channels, s16, 352 kb/s\n",
+      "Stream mapping:\n",
+      "  Stream #0:0 -> #0:0 (h264 (native) -> h264 (libx264))\n",
+      "  Stream #0:1 -> #0:1 (aac (native) -> aac (native))\n",
+      "Press [q] to stop, [?] for help\n",
+      "[Parsed_subtitles_0 @ 0x125f05de0] libass API version: 0x1701000\n",
+      "[Parsed_subtitles_0 @ 0x125f05de0] libass source: tarball: 0.17.1\n",
+      "[Parsed_subtitles_0 @ 0x125f05de0] Shaper: FriBidi 1.0.10 (SIMPLE) HarfBuzz-ng 8.2.1 (COMPLEX)\n",
+      "[Parsed_subtitles_0 @ 0x125f05de0] Unable to open /var/folders/1t/_whn9tx16w5f68951h3wrfyr0000gn/T/tmpgueqw446.srt\n",
+      "[AVFilterGraph @ 0x125f05af0] Error initializing filters\n",
+      "Error reinitializing filters!\n",
+      "Failed to inject frame into filter network: Invalid data found when processing input\n",
+      "Error while processing the decoded data for stream #0:0\n",
+      "Conversion failed!\n"
+     ]
+    }
+   ],
+   "source": [
+    "compileVideo('videos','audio','translated','output')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "ffmpeg version 6.0 Copyright (c) 2000-2023 the FFmpeg developers\n",
+      "  built with clang version 15.0.7\n",
+      "  configuration: --prefix=/Users/runner/miniforge3/conda-bld/ffmpeg_1696213807101/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl --cc=arm64-apple-darwin20.0.0-clang --cxx=arm64-apple-darwin20.0.0-clang++ --nm=arm64-apple-darwin20.0.0-nm --ar=arm64-apple-darwin20.0.0-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libfontconfig --enable-libopenh264 --enable-libdav1d --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1696213807101/_build_env/bin/x86_64-apple-darwin13.4.0-clang --enable-neon --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-libass --enable-pthreads --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-pic --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libopus --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1696213807101/_build_env/bin/pkg-config\n",
+      "  libavutil      58.  2.100 / 58.  2.100\n",
+      "  libavcodec     60.  3.100 / 60.  3.100\n",
+      "  libavformat    60.  3.100 / 60.  3.100\n",
+      "  libavdevice    60.  1.100 / 60.  1.100\n",
+      "  libavfilter     9.  3.100 /  9.  3.100\n",
+      "  libswscale      7.  1.100 /  7.  1.100\n",
+      "  libswresample   4. 10.100 /  4. 10.100\n",
+      "  libpostproc    57.  1.100 / 57.  1.100\n",
+      "Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'videos/qrvK_KuIeJk.mp4':\n",
+      "  Metadata:\n",
+      "    major_brand     : mp42\n",
+      "    minor_version   : 0\n",
+      "    compatible_brands: isommp42\n",
+      "    creation_time   : 2023-10-09T21:47:52.000000Z\n",
+      "  Duration: 00:13:11.89, start: 0.000000, bitrate: 347 kb/s\n",
+      "  Stream #0:0[0x1](und): Video: h264 (Main) (avc1 / 0x31637661), yuv420p(tv, bt709, progressive), 1280x720 [SAR 1:1 DAR 16:9], 215 kb/s, 29.97 fps, 29.97 tbr, 30k tbn (default)\n",
+      "    Metadata:\n",
+      "      creation_time   : 2023-10-09T21:47:52.000000Z\n",
+      "      handler_name    : ISO Media file produced by Google Inc. Created on: 10/09/2023.\n",
+      "      vendor_id       : [0][0][0][0]\n",
+      "  Stream #0:1[0x2](und): Audio: aac (LC) (mp4a / 0x6134706D), 44100 Hz, stereo, fltp, 127 kb/s (default)\n",
+      "    Metadata:\n",
+      "      creation_time   : 2023-10-09T21:47:52.000000Z\n",
+      "      handler_name    : ISO Media file produced by Google Inc. Created on: 10/09/2023.\n",
+      "      vendor_id       : [0][0][0][0]\n",
+      "Guessed Channel Layout for Input Stream #1.0 : mono\n",
+      "Input #1, wav, from 'audio/merged_audio.wav':\n",
+      "  Duration: 00:13:05.89, bitrate: 352 kb/s\n",
+      "  Stream #1:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 22050 Hz, 1 channels, s16, 352 kb/s\n",
+      "Stream mapping:\n",
+      "  Stream #0:0 -> #0:0 (copy)\n",
+      "  Stream #1:0 -> #0:1 (pcm_s16le (native) -> aac (native))\n",
+      "Press [q] to stop, [?] for help\n",
+      "Output #0, mp4, to 'output/output_video.mp4':\n",
+      "  Metadata:\n",
+      "    major_brand     : mp42\n",
+      "    minor_version   : 0\n",
+      "    compatible_brands: isommp42\n",
+      "    encoder         : Lavf60.3.100\n",
+      "  Stream #0:0(und): Video: h264 (Main) (avc1 / 0x31637661), yuv420p(tv, bt709, progressive), 1280x720 [SAR 1:1 DAR 16:9], q=2-31, 215 kb/s, 29.97 fps, 29.97 tbr, 30k tbn (default)\n",
+      "    Metadata:\n",
+      "      creation_time   : 2023-10-09T21:47:52.000000Z\n",
+      "      handler_name    : ISO Media file produced by Google Inc. Created on: 10/09/2023.\n",
+      "      vendor_id       : [0][0][0][0]\n",
+      "  Stream #0:1: Audio: aac (LC) (mp4a / 0x6134706D), 22050 Hz, mono, fltp, 69 kb/s\n",
+      "    Metadata:\n",
+      "      encoder         : Lavc60.3.100 aac\n",
+      "frame=23733 fps=4178 q=-1.0 Lsize=   27885kB time=00:13:11.82 bitrate= 288.5kbits/s speed= 139x    \n",
+      "video:20816kB audio:6477kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 2.167844%\n",
+      "[aac @ 0x152709120] Qavg: 12239.573\n",
+      "ffmpeg version 6.0 Copyright (c) 2000-2023 the FFmpeg developers\n",
+      "  built with clang version 15.0.7\n",
+      "  configuration: --prefix=/Users/runner/miniforge3/conda-bld/ffmpeg_1696213807101/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl --cc=arm64-apple-darwin20.0.0-clang --cxx=arm64-apple-darwin20.0.0-clang++ --nm=arm64-apple-darwin20.0.0-nm --ar=arm64-apple-darwin20.0.0-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libfontconfig --enable-libopenh264 --enable-libdav1d --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1696213807101/_build_env/bin/x86_64-apple-darwin13.4.0-clang --enable-neon --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-libass --enable-pthreads --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-pic --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libopus --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1696213807101/_build_env/bin/pkg-config\n",
+      "  libavutil      58.  2.100 / 58.  2.100\n",
+      "  libavcodec     60.  3.100 / 60.  3.100\n",
+      "  libavformat    60.  3.100 / 60.  3.100\n",
+      "  libavdevice    60.  1.100 / 60.  1.100\n",
+      "  libavfilter     9.  3.100 /  9.  3.100\n",
+      "  libswscale      7.  1.100 /  7.  1.100\n",
+      "  libswresample   4. 10.100 /  4. 10.100\n",
+      "  libpostproc    57.  1.100 / 57.  1.100\n",
+      "Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'output/output_video.mp4':\n",
+      "  Metadata:\n",
+      "    major_brand     : isom\n",
+      "    minor_version   : 512\n",
+      "    compatible_brands: isomiso2avc1mp41\n",
+      "    encoder         : Lavf60.3.100\n",
+      "  Duration: 00:13:11.89, start: 0.000000, bitrate: 288 kb/s\n",
+      "  Stream #0:0[0x1](und): Video: h264 (Main) (avc1 / 0x31637661), yuv420p(tv, bt709, progressive), 1280x720 [SAR 1:1 DAR 16:9], 215 kb/s, 29.97 fps, 29.97 tbr, 30k tbn (default)\n",
+      "    Metadata:\n",
+      "      handler_name    : ISO Media file produced by Google Inc. Created on: 10/09/2023.\n",
+      "      vendor_id       : [0][0][0][0]\n",
+      "  Stream #0:1[0x2](und): Audio: aac (LC) (mp4a / 0x6134706D), 22050 Hz, mono, fltp, 67 kb/s (default)\n",
+      "    Metadata:\n",
+      "      handler_name    : SoundHandler\n",
+      "      vendor_id       : [0][0][0][0]\n",
+      "Stream mapping:\n",
+      "  Stream #0:0 -> #0:0 (h264 (native) -> h264 (libx264))\n",
+      "  Stream #0:1 -> #0:1 (aac (native) -> aac (native))\n",
+      "Press [q] to stop, [?] for help\n",
+      "[Parsed_subtitles_0 @ 0x125807250] libass API version: 0x1701000\n",
+      "[Parsed_subtitles_0 @ 0x125807250] libass source: tarball: 0.17.1\n",
+      "[Parsed_subtitles_0 @ 0x125807250] Shaper: FriBidi 1.0.10 (SIMPLE) HarfBuzz-ng 8.2.1 (COMPLEX)\n",
+      "[Parsed_subtitles_0 @ 0x125807250] Using font provider coretext\n",
+      "[Parsed_subtitles_0 @ 0x125807250] fontselect: (Arial, 400, 0) -> /System/Library/Fonts/Supplemental/Arial.ttf, -1, ArialMT\n",
+      "[libx264 @ 0x123f39fc0] using SAR=1/1\n",
+      "[libx264 @ 0x123f39fc0] using cpu capabilities: ARMv8 NEON\n",
+      "[libx264 @ 0x123f39fc0] profile High, level 3.1, 4:2:0, 8-bit\n",
+      "[libx264 @ 0x123f39fc0] 264 - core 164 r3095 baee400 - H.264/MPEG-4 AVC codec - Copyleft 2003-2022 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=-2 threads=15 lookahead_threads=2 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min=25 scenecut=40 intra_refresh=0 rc_lookahead=40 rc=crf mbtree=1 crf=23.0 qcomp=0.60 qpmin=0 qpmax=69 qpstep=4 ip_ratio=1.40 aq=1:1.00\n",
+      "Output #0, mp4, to 'output/output_video_subtitled.mp4':\n",
+      "  Metadata:\n",
+      "    major_brand     : isom\n",
+      "    minor_version   : 512\n",
+      "    compatible_brands: isomiso2avc1mp41\n",
+      "    encoder         : Lavf60.3.100\n",
+      "  Stream #0:0(und): Video: h264 (avc1 / 0x31637661), yuv420p(tv, bt709, progressive), 1280x720 [SAR 1:1 DAR 16:9], q=2-31, 29.97 fps, 30k tbn (default)\n",
+      "    Metadata:\n",
+      "      handler_name    : ISO Media file produced by Google Inc. Created on: 10/09/2023.\n",
+      "      vendor_id       : [0][0][0][0]\n",
+      "      encoder         : Lavc60.3.100 libx264\n",
+      "    Side data:\n",
+      "      cpb: bitrate max/min/avg: 0/0/0 buffer size: 0 vbv_delay: N/A\n",
+      "  Stream #0:1(und): Audio: aac (LC) (mp4a / 0x6134706D), 22050 Hz, mono, fltp, 69 kb/s (default)\n",
+      "    Metadata:\n",
+      "      handler_name    : SoundHandler\n",
+      "      vendor_id       : [0][0][0][0]\n",
+      "      encoder         : Lavc60.3.100 aac\n",
+      "frame=23733 fps=436 q=-1.0 Lsize=   58121kB time=00:13:11.79 bitrate= 601.3kbits/s speed=14.6x    \n",
+      "video:51102kB audio:6407kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 1.064471%\n",
+      "[libx264 @ 0x123f39fc0] frame I:106   Avg QP:15.86  size: 43407\n",
+      "[libx264 @ 0x123f39fc0] frame P:7968  Avg QP:20.48  size:  4366\n",
+      "[libx264 @ 0x123f39fc0] frame B:15659 Avg QP:22.31  size:   826\n",
+      "[libx264 @ 0x123f39fc0] consecutive B-frames:  0.9% 22.0% 34.2% 43.0%\n",
+      "[libx264 @ 0x123f39fc0] mb I  I16..4: 25.1% 53.0% 21.9%\n",
+      "[libx264 @ 0x123f39fc0] mb P  I16..4:  1.9%  3.3%  0.4%  P16..4: 21.2%  3.8%  1.3%  0.0%  0.0%    skip:68.1%\n",
+      "[libx264 @ 0x123f39fc0] mb B  I16..4:  0.1%  0.2%  0.0%  B16..8: 15.4%  0.5%  0.1%  direct: 0.1%  skip:83.7%  L0:40.5% L1:57.9% BI: 1.5%\n",
+      "[libx264 @ 0x123f39fc0] 8x8 transform intra:57.0% inter:79.7%\n",
+      "[libx264 @ 0x123f39fc0] coded y,uvDC,uvAC intra: 32.1% 26.2% 6.0% inter: 1.7% 1.1% 0.0%\n",
+      "[libx264 @ 0x123f39fc0] i16 v,h,dc,p: 33% 25% 10% 31%\n",
+      "[libx264 @ 0x123f39fc0] i8 v,h,dc,ddl,ddr,vr,hd,vl,hu: 31% 21% 30%  2%  3%  3%  3%  3%  3%\n",
+      "[libx264 @ 0x123f39fc0] i4 v,h,dc,ddl,ddr,vr,hd,vl,hu: 31% 25% 15%  4%  6%  5%  6%  4%  4%\n",
+      "[libx264 @ 0x123f39fc0] i8c dc,h,v,p: 69% 15% 13%  3%\n",
+      "[libx264 @ 0x123f39fc0] Weighted P-Frames: Y:1.1% UV:0.4%\n",
+      "[libx264 @ 0x123f39fc0] ref P L0: 67.2% 12.6% 14.7%  5.5%  0.0%\n",
+      "[libx264 @ 0x123f39fc0] ref B L0: 81.9% 15.7%  2.4%\n",
+      "[libx264 @ 0x123f39fc0] ref B L1: 97.7%  2.3%\n",
+      "[libx264 @ 0x123f39fc0] kb/s:528.63\n",
+      "[aac @ 0x123f38490] Qavg: 12473.627\n"
+     ]
+    }
+   ],
+   "source": [
+    "def replace_audio_and_add_subtitles(input_video, input_audio, output_video, subtitles_file):\n",
+    "    # Replace audio\n",
+    "    replace_audio_command = [\n",
+    "        'ffmpeg',\n",
+    "        '-i', input_video,\n",
+    "        '-i', input_audio,\n",
+    "        '-c:v', 'copy',\n",
+    "        '-c:a', 'aac',\n",
+    "        '-strict', 'experimental',\n",
+    "        '-map', '0:v:0',\n",
+    "        '-map', '1:a:0',\n",
+    "        '-max_muxing_queue_size', '9999',\n",
+    "\n",
+    "        output_video\n",
+    "    ]\n",
+    "    subprocess.run(replace_audio_command)\n",
+    "\n",
+    "    # Add subtitles\n",
+    "    add_subtitles_command = [\n",
+    "        'ffmpeg',\n",
+    "        '-i', output_video,\n",
+    "        '-vf', f'subtitles={subtitles_file}',    \n",
+    "        output_video[:-4] + '_subtitled.mp4'  # Output file with subtitles\n",
+    "    ]\n",
+    "    subprocess.run(add_subtitles_command)\n",
+    "\n",
+    "input_video = 'videos/qrvK_KuIeJk.mp4'\n",
+    "input_audio = 'audio/merged_audio.wav'\n",
+    "output_video = 'output/output_video.mp4'\n",
+    "subtitles_file = 'translated/German_qrvK_KuIeJk.srt'\n",
+    "\n",
+    "replace_audio_and_add_subtitles(input_video, input_audio, output_video, subtitles_file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def tsv_to_srt(input_file, output_file):\n",
+    "    df = pd.read_csv(input_file, sep='\\t')\n",
+    "    \n",
+    "    with open(output_file, 'w', encoding='utf-8') as srt_file:\n",
+    "        for index, row in df.iterrows():\n",
+    "            start_time = row['start'] / 1000.0\n",
+    "            end_time = row['end'] / 1000.0\n",
+    "            text = row['text']\n",
+    "\n",
+    "            srt_file.write(f\"{index + 1}\\n\")\n",
+    "            srt_file.write(f\"{convert_to_srt_time_format(start_time)} --> {convert_to_srt_time_format(end_time)}\\n\")\n",
+    "            srt_file.write(f\"{text}\\n\\n\")\n",
+    "\n",
+    "def convert_to_srt_time_format(seconds):\n",
+    "    hours, remainder = divmod(seconds, 3600)\n",
+    "    minutes, seconds = divmod(remainder, 60)\n",
+    "    milliseconds = int((seconds - int(seconds)) * 1000)\n",
+    "\n",
+    "    return f\"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{milliseconds:03d}\"\n",
+    "\n",
+    "\n",
+    "\n",
+    "tsv_file = 'translated/German_qrvK_KuIeJk.tsv'\n",
+    "srt_file = 'translated/German_qrvK_KuIeJk.srt'\n",
+    "\n",
+    "tsv_to_srt(tsv_file, srt_file)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "sports",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+pytube
+whisper
+torch
+transformers
+TTS
+pydub
+soundfile
+pyrubberband
+moviepy

speech2text.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os, re
+import whisper
+from whisper.utils import get_writer
+model=whisper.load_model("base").to('cpu')
+def get_file_stem(file_name):
+    # Use a regular expression to match and extract the file stem
+    match = re.match(r'^(.*/)?([^.]+)\.\w+$', file_name)
+    if match:
+        return match.group(2)
+    else:
+        return None
+def transcribeAndSave(file,outputPath):
+    result = model.transcribe(file)
+    writer = get_writer('tsv',outputPath)
+    writer(result,get_file_stem(file),None)
+def transcribeAndSaveFolder(fileFolder,outputpath):
+    os.makedirs(outputpath, exist_ok=True)
+    for file in os.listdir(fileFolder):
+        filepath=os.path.join(fileFolder,file)
+        transcribeAndSave(filepath,outputpath)

text2speech.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import os
+from pyrubberband import time_stretch
+from TTS.api import TTS
+import torch
+import pandas as pd
+from pydub import AudioSegment
+import soundfile as sf
+def mergeAudio(outputFolder,length):
+    audio_files = [f'{outputFolder}/adjusted_audio_{i}.wav' for i in range(length)]
+    combined = AudioSegment.from_wav(audio_files[0])
+    for file in audio_files[1:]:
+        segment = AudioSegment.from_wav(file)
+        combined = combined + segment
+    #remove all files in the folder
+    for file in audio_files:
+        os.remove(file)
+    combined.export(f'{outputFolder}/merged_audio.wav', format='wav')
+def generate_audio(tsvFile, outputFolder):
+    os.makedirs(outputFolder, exist_ok=True)
+    df = pd.read_csv(tsvFile, sep='\t')
+    tts = TTS("tts_models/de/thorsten/vits").to("cpu")
+    for index, row in df.iterrows():
+        start_time = int(row['start'])
+        end_time = int(row['end'])
+        text = row['text']
+        tts.tts_to_file(text=text, file_path=f'{outputFolder}/audio_{index}.wav')
+        audio =AudioSegment.from_wav(f'{outputFolder}/audio_{index}.wav')
+        y,sr = sf.read(f'{outputFolder}/audio_{index}.wav')
+        speed_ratio = len(audio) / (end_time - start_time)
+        # Time-stretch using pyrubberband
+        y_stretch = time_stretch(y, sr, speed_ratio)
+        # Create a new AudioSegment from the stretched audio data
+        # Save the adjusted audio
+        sf.write(f'{outputFolder}/adjusted_audio_{index}.wav',y_stretch,sr)
+        os.remove(f'{outputFolder}/audio_{index}.wav')
+        # Play the audio for verification
+    mergeAudio(outputFolder,len(df))
+def createAudioFolder(tsvFolder, outputFolder):
+    os.makedirs(outputFolder, exist_ok=True)
+    for file in os.listdir(tsvFolder):
+        filepath = os.path.join(tsvFolder, file)
+        generate_audio(filepath, outputFolder)

text2text.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import os
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+def unpackTsv(tsvFile):
+    with open(tsvFile, 'r') as f:
+        lines = f.readlines()
+        lines = [l.strip().split('\t') for l in lines]
+        return lines
+def translateSentences(sentences,languageSecond="German",languageFirst="English"):
+    tokenizer = T5Tokenizer.from_pretrained("t5-small")
+    model = T5ForConditionalGeneration.from_pretrained("t5-small")
+    task_prefix = f"translate {languageFirst} to {languageSecond}: "
+    # use different length sentences to test batching
+    inputs = tokenizer([task_prefix + sentence for sentence in sentences], return_tensors="pt", padding=True)
+    output_sequences = model.generate(
+        input_ids=inputs["input_ids"],
+        attention_mask=inputs["attention_mask"],
+        do_sample=False,  # disable sampling to test if batching affects output
+    )
+    return (tokenizer.batch_decode(output_sequences, skip_special_tokens=True))
+def translateTsv(tsvFile,languageSecond="German",languageFirst="English"):
+    lines=unpackTsv(tsvFile)
+    sentences=[l[2] for l in lines[1:]]
+    translatedSentences=translateSentences(sentences,languageSecond,languageFirst)
+    for i in range(1,len(lines)):
+        lines[i][2]=translatedSentences[i-1]
+    return lines
+def writeTsv(lines,outputFile):
+    with open(outputFile, 'w') as f:
+        for l in lines:
+            f.write('\t'.join(l)+'\n')
+def translateTsvFile(tsvFile, outputFile, languageSecond="German", languageFirst="English"):
+    lines = translateTsv(tsvFile, languageSecond, languageFirst)
+    outputFilePath = os.path.join(outputFile, f"{languageSecond}_{os.path.basename(tsvFile)}")
+    writeTsv(lines, outputFilePath)
+def translateTsvFolder(tsvFolder, outputFolder, languageSecond="German", languageFirst="English"):
+    os.makedirs(outputFolder, exist_ok=True)
+    for file in os.listdir(tsvFolder):
+        filepath = os.path.join(tsvFolder, file)
+        translateTsvFile(filepath, outputFolder, languageSecond, languageFirst)