Spaces:
Sleeping
Sleeping
| # coding=utf8 | |
| # Youtube Video Translator | |
| # Developed by Ruslan Magana Vsevolodovna | |
| # https://ruslanmv.com/ | |
| # importing all necessary libraries | |
| import pathlib | |
| import sys, os | |
| from gtts import gTTS | |
| import gradio as gr | |
| import os | |
| import speech_recognition as sr | |
| from googletrans import Translator, constants | |
| from pprint import pprint | |
| from moviepy.editor import * | |
| from pytube import YouTube | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from utils import * | |
| def download_video(url): | |
| print("Downloading...") | |
| local_file = ( | |
| YouTube(url) | |
| .streams.filter(progressive=True, file_extension="mp4") | |
| .first() | |
| .download() | |
| ) | |
| print("Downloaded") | |
| return local_file | |
| def validate_youtube(url): | |
| #This creates a youtube objet | |
| try: | |
| yt = YouTube(url) | |
| except Exception: | |
| print("Hi there URL seems invalid") | |
| return True | |
| #This will return the length of the video in sec as an int | |
| video_length = yt.length | |
| if video_length > 600: | |
| print("Your video is larger than 10 minutes") | |
| return True | |
| else: | |
| print("Your video is less than 10 minutes") | |
| return False | |
| def validate_url(url): | |
| import validators | |
| if not validators.url(url): | |
| print("Hi there URL seems invalid ") | |
| return True | |
| else: | |
| return False | |
| def cleanup(): | |
| import pathlib | |
| import glob | |
| types = ('*.mp4', '*.wav') # the tuple of file types | |
| #Finding mp4 and wave files | |
| junks = [] | |
| for files in types: | |
| junks.extend(glob.glob(files)) | |
| try: | |
| # Deleting those files | |
| for junk in junks: | |
| print("Deleting",junk) | |
| # Setting the path for the file to delete | |
| file = pathlib.Path(junk) | |
| # Calling the unlink method on the path | |
| file.unlink() | |
| except Exception: | |
| print("I cannot delete the file because it is being used by another process") | |
| def getSize(filename): | |
| st = os.stat(filename) | |
| return st.st_size | |
| def generate_transcript(url,lang_api): | |
| id = url[url.index("=")+1:] | |
| transcript = YouTubeTranscriptApi.get_transcript(id,languages=[lang_api]) | |
| script = "" | |
| for text in transcript: | |
| t = text["text"] | |
| if t != '[Music]': | |
| script += t + " " | |
| return script | |
| # Set environment variables | |
| home_dir = os.getcwd() | |
| temp_dir=os.path.join(home_dir, "temp") | |
| #Create temp directory | |
| pathlib.Path(temp_dir).mkdir(parents=True, exist_ok=True) | |
| os.environ['home_dir'] = home_dir | |
| os.environ['temp_dir'] = temp_dir | |
| def video_to_translate(url,initial_language,final_language): | |
| print('Checking the url') | |
| check =validate_youtube(url) | |
| if check is True: return "./demo/tryagain2.mp4" | |
| #Internal definitions | |
| if initial_language == "English": | |
| lang_in='en-US' | |
| lang_api='en' | |
| elif initial_language == "Italian": | |
| lang_in='it-IT' | |
| lang_api='it' | |
| elif initial_language == "Spanish": | |
| lang_in='es-MX' | |
| lang_api='es' | |
| elif initial_language == "Russian": | |
| lang_in='ru-RU' | |
| lang_api='rus' | |
| elif initial_language == "German": | |
| lang_in='de-DE' | |
| lang_api='de' | |
| elif initial_language == "Japanese": | |
| lang_in='ja-JP' | |
| lang_api='ja' | |
| if final_language == "English": | |
| lang='en' | |
| elif final_language == "Italian": | |
| lang='it' | |
| elif final_language == "Spanish": | |
| lang='es' | |
| elif final_language == "Russian": | |
| lang='ru' | |
| elif final_language == "German": | |
| lang='de' | |
| elif final_language == "Japanese": | |
| lang='ja' | |
| # Initial directory | |
| home_dir= os.getenv('home_dir') | |
| temp_dir = os.getenv('temp_dir') | |
| #os.chdir(home_dir) | |
| print('Initial directory:',home_dir) | |
| cleanup() | |
| # Temporal directory | |
| print('Temporal directory:',temp_dir) | |
| # Go to temp directory | |
| #os.chdir(temp_dir) | |
| #print('Changing temporal directory',os.getcwd()) | |
| # Cleaning previous files | |
| #cleanup() | |
| file_obj=download_video(url) | |
| print(file_obj) | |
| # Insert Local Video File Path | |
| videoclip = VideoFileClip(file_obj) | |
| try: | |
| # Trying to get transcripts | |
| text = generate_transcript(url,lang_api) | |
| print("Transcript Found") | |
| except Exception: | |
| print("No Transcript Found") | |
| # Trying to recognize audio | |
| # Insert Local Audio File Path | |
| videoclip.audio.write_audiofile("audio.wav",codec='pcm_s16le') | |
| # initialize the recognizer | |
| r = sr.Recognizer() | |
| # open the file | |
| with sr.AudioFile("audio.wav") as source: | |
| # listen for the data (load audio to memory) | |
| audio_data = r.record(source) | |
| # recognize (convert from speech to text) | |
| print("Recognize from ",lang_in) | |
| #There is a limit of 10 MB on all single requests sent to the API using local file | |
| size_wav=getSize("audio.wav") | |
| if size_wav > 50000000: | |
| print("The wav is too large") | |
| audio_chunks=split_audio_wav("audio.wav") | |
| text="" | |
| for chunk in audio_chunks: | |
| print("Converting audio to text",chunk) | |
| try: | |
| text_chunk= r.recognize_google(audio_data, language = lang_in) | |
| except Exception: | |
| print("This video cannot be recognized") | |
| cleanup() | |
| # Return back to main directory | |
| #os.chdir(home_dir) | |
| return "./demo/tryagain.mp4" | |
| text=text+text_chunk+" " | |
| text=str(text) | |
| print(type(text)) | |
| else: | |
| try: | |
| text = r.recognize_google(audio_data, language = lang_in) | |
| except Exception: | |
| print("This video cannot be recognized") | |
| cleanup() | |
| # Return back to main directory | |
| #os.chdir(home_dir) | |
| return "./demo/tryagain.mp4" | |
| #print(text) | |
| print("Destination language ",lang) | |
| # init the Google API translator | |
| translator = Translator() | |
| try: | |
| translation = translator.translate(text, dest=lang) | |
| except Exception: | |
| print("This text cannot be translated") | |
| cleanup() | |
| # Return back to main directory | |
| #os.chdir(home_dir) | |
| return "./demo/tryagain.mp4" | |
| #translation.text | |
| trans=translation.text | |
| myobj = gTTS(text=trans, lang=lang, slow=False) | |
| myobj.save("audio.wav") | |
| # loading audio file | |
| audioclip = AudioFileClip("audio.wav") | |
| # adding audio to the video clip | |
| new_audioclip = CompositeAudioClip([audioclip]) | |
| videoclip.audio = new_audioclip | |
| new_video="video_translated_"+lang+".mp4" | |
| # Return back to main directory | |
| os.chdir(home_dir) | |
| print('Final directory',os.getcwd()) | |
| videoclip.write_videofile(new_video) | |
| videoclip.close() | |
| del file_obj | |
| return new_video | |
| initial_language = gr.inputs.Dropdown(["English","Italian","Japanese","Russian","Spanish","German"]) | |
| final_language = gr.inputs.Dropdown([ "Russian","Italian","Spanish","German","English","Japanese"]) | |
| url =gr.inputs.Textbox(label = "Enter the YouTube URL below:") | |
| gr.Interface(fn = video_to_translate, | |
| inputs = [url,initial_language,final_language], | |
| outputs = 'video', | |
| verbose = True, | |
| title = 'Video Youtube Translator', | |
| description = 'A simple application that translates Youtube small videos from English, Italian, Japanese, Russian, Spanish, and German to Italian, Spanish, Russian, English and Japanese. Wait one minute to process.', | |
| article = | |
| '''<div> | |
| <p style="text-align: center"> All you need to do is to paste the Youtube link and hit submit,, then wait for compiling. After that click on Play/Pause for listing to the video. The video is saved in an mp4 format. | |
| The lenght video limit is 10 minutes. For more information visit <a href="https://ruslanmv.com/">ruslanmv.com</a> | |
| </p> | |
| </div>''', | |
| examples = [ | |
| ["https://www.youtube.com/watch?v=Cu3R5it4cQs&list", "English","Italian"], | |
| ["https://www.youtube.com/watch?v=fkGCLIQx1MI", "English","Spanish"], | |
| ["https://www.youtube.com/watch?v=fkGCLIQx1MI", "English","Russian"], | |
| ["https://www.youtube.com/watch?v=QbkhvLrlex4", "Russian","English"], | |
| ["https://www.youtube.com/watch?v=qzzweIQoIOU", "Japanese","English"], | |
| ["https://www.youtube.com/watch?v=nOGZvu6tJFE", "German","Spanish"] | |
| ] | |
| ).launch() |