salomonsky commited on
Commit
0242288
·
1 Parent(s): 983a76a

Update dub.py

Browse files
Files changed (1) hide show
  1. dub.py +44 -25
dub.py CHANGED
@@ -1,34 +1,53 @@
1
  import os
 
2
  from gtts import gTTS
3
- import speech_recognition as sr
4
  from pydub import AudioSegment
5
- from pydub.playback import play
 
 
6
 
7
- def text_to_speech(text, language='es'):
8
- tts = gTTS(text=text, lang=language, slow=False)
9
- tts.save('output.mp3')
10
- return 'output.mp3'
 
11
 
12
  def play_audio(audio_file):
13
- os.system(f'start {audio_file}')
 
 
14
 
15
  def transcribe_and_dub(video_file):
16
- recognizer = sr.Recognizer()
17
- audio_clip = AudioSegment.from_file(video_file, format="mp4")
18
- audio_wav = audio_clip.export("audio.wav", format="wav")
 
 
 
 
 
 
 
 
19
 
20
- with sr.AudioFile("audio.wav") as source:
21
- audio_data = recognizer.record(source)
22
-
23
- try:
24
- recognized_text = recognizer.recognize_google(audio_data, language="es")
25
- audio_file = text_to_speech(recognized_text)
26
- dubbed_file = f"{video_file.replace('.mp4', '_dubbed.mp4')}"
27
- video_clip = VideoFileClip(video_file)
28
- dubbed_audio = AudioSegment.from_file(audio_file, format="mp3")
29
- video_clip = video_clip.set_audio(dubbed_audio)
30
- video_clip.write_videofile(dubbed_file, codec="libx264", audio_codec="aac", verbose=False)
31
- play_audio(dubbed_file)
32
- return dubbed_file
33
- except sr.UnknownValueError:
34
- return None
 
 
 
 
 
 
 
1
  import os
2
+ import tempfile
3
  from gtts import gTTS
 
4
  from pydub import AudioSegment
5
+ import moviepy.editor as mp
6
+ from vosk import Model, KaldiRecognizer
7
+ import json
8
 
9
+ def text_to_speech(text, lang='es'):
10
+ tts = gTTS(text=text, lang=lang, slow=False)
11
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
12
+ tts.save(temp_file.name)
13
+ return temp_file.name
14
 
15
  def play_audio(audio_file):
16
+ sound = AudioSegment.from_mp3(audio_file)
17
+ sound.export("temp.wav", format="wav")
18
+ os.system("start temp.wav")
19
 
20
  def transcribe_and_dub(video_file):
21
+ temp_folder = tempfile.mkdtemp()
22
+ recognizer = KaldiRecognizer(Model("vosk-model-es-0.10"), 16000)
23
+
24
+ with mp.VideoFileClip(video_file) as video:
25
+ audio_file = os.path.join(temp_folder, "audio.wav")
26
+ video.audio.write_audiofile(audio_file)
27
+
28
+ process = os.popen(f'ffmpeg -i "{audio_file}" -ar 16000 -ac 1 "{os.path.join(temp_folder, "audio16000.wav")}"')
29
+ process.close()
30
+
31
+ audio_file = os.path.join(temp_folder, "audio16000.wav")
32
 
33
+ with open(audio_file, 'rb') as f:
34
+ data = f.read(1024)
35
+ while data:
36
+ recognizer.AcceptWaveform(data)
37
+ data = f.read(1024)
38
+
39
+ result = json.loads(recognizer.Result())
40
+ text = result.get("text", "")
41
+
42
+ dubbed_audio_file = text_to_speech(text)
43
+ dubbed_video_file = os.path.join(temp_folder, "dubbed_video.mp4")
44
+
45
+ with mp.VideoFileClip(video_file) as original_video:
46
+ dubbed_audio = AudioSegment.from_mp3(dubbed_audio_file)
47
+ dubbed_audio = dubbed_audio[:len(original_video.audio)]
48
+ dubbed_audio.export(os.path.join(temp_folder, "dubbed_audio.wav"), format="wav")
49
+
50
+ process = os.popen(f'ffmpeg -i "{video_file}" -i "{os.path.join(temp_folder, "dubbed_audio.wav")}" -c:v copy -c:a aac -strict experimental -map 0:v:0 -map 1:a:0 "{dubbed_video_file}"')
51
+ process.close()
52
+
53
+ return dubbed_video_file