salomonsky commited on
Commit
b33151f
·
1 Parent(s): c56787e

Update dub.py

Browse files
Files changed (1) hide show
  1. dub.py +9 -26
dub.py CHANGED
@@ -3,26 +3,13 @@ import tempfile
3
  from gtts import gTTS
4
  from pydub import AudioSegment
5
  import moviepy.editor as mp
6
- from vosk import Model, KaldiRecognizer
7
- import json
8
-
9
- model = Model("vosk-model-es-0.10")
10
-
11
- def text_to_speech(text, lang='es'):
12
- tts = gTTS(text=text, lang=lang, slow=False)
13
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
14
- tts.save(temp_file.name)
15
- return temp_file.name
16
-
17
- def play_audio(audio_file):
18
- sound = AudioSegment.from_mp3(audio_file)
19
- sound.export("temp.wav", format="wav")
20
- os.system("start temp.wav")
21
 
22
  def transcribe_and_dub(video_file):
23
  temp_folder = tempfile.mkdtemp()
24
- recognizer = KaldiRecognizer(Model("vosk-model-es-0.10"), 16000)
25
 
 
 
26
  with mp.VideoFileClip(video_file) as video:
27
  audio_file = os.path.join(temp_folder, "audio.wav")
28
  video.audio.write_audiofile(audio_file)
@@ -32,18 +19,14 @@ def transcribe_and_dub(video_file):
32
 
33
  audio_file = os.path.join(temp_folder, "audio16000.wav")
34
 
35
- with open(audio_file, 'rb') as f:
36
- data = f.read(1024)
37
- while data:
38
- recognizer.AcceptWaveform(data)
39
- data = f.read(1024)
40
-
41
- result = json.loads(recognizer.Result())
42
- text = result.get("text", "")
43
-
44
  dubbed_audio_file = text_to_speech(text)
45
  dubbed_video_file = os.path.join(temp_folder, "dubbed_video.mp4")
46
-
47
  with mp.VideoFileClip(video_file) as original_video:
48
  dubbed_audio = AudioSegment.from_mp3(dubbed_audio_file)
49
  dubbed_audio = dubbed_audio[:len(original_video.audio)]
 
3
  from gtts import gTTS
4
  from pydub import AudioSegment
5
  import moviepy.editor as mp
6
+ import speech_recognition as sr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  def transcribe_and_dub(video_file):
9
  temp_folder = tempfile.mkdtemp()
 
10
 
11
+ recognizer = sr.Recognizer()
12
+
13
  with mp.VideoFileClip(video_file) as video:
14
  audio_file = os.path.join(temp_folder, "audio.wav")
15
  video.audio.write_audiofile(audio_file)
 
19
 
20
  audio_file = os.path.join(temp_folder, "audio16000.wav")
21
 
22
+ with sr.AudioFile(audio_file) as source:
23
+ audio = recognizer.record(source)
24
+
25
+ text = recognizer.recognize_google(audio, language='es')
26
+
 
 
 
 
27
  dubbed_audio_file = text_to_speech(text)
28
  dubbed_video_file = os.path.join(temp_folder, "dubbed_video.mp4")
29
+
30
  with mp.VideoFileClip(video_file) as original_video:
31
  dubbed_audio = AudioSegment.from_mp3(dubbed_audio_file)
32
  dubbed_audio = dubbed_audio[:len(original_video.audio)]