salomonsky commited on
Commit
3a78a97
·
1 Parent(s): 90de24b

Update dub.py

Browse files
Files changed (1) hide show
  1. dub.py +25 -44
dub.py CHANGED
@@ -1,20 +1,24 @@
1
  import os
2
  import tempfile
3
- import json
4
- import requests
5
  from gtts import gTTS
6
  from pydub import AudioSegment
7
  import moviepy.editor as mp
8
- from vosk import Model, KaldiRecognizer
9
- from moviepy.editor import VideoFileClip, AudioFileClip
10
 
11
- def download_vosk_model(model_url, destination_folder):
12
- os.makedirs(destination_folder, exist_ok=True)
13
- model_zip = os.path.join(destination_folder, "vosk-model-es-0.42.zip")
14
- response = requests.get(model_url)
15
- with open(model_zip, 'wb') as f:
16
- f.write(response.content)
17
- return model_zip
 
 
 
 
 
 
 
18
 
19
  def text_to_speech(text, lang='es'):
20
  tts = gTTS(text=text, lang=lang, slow=False)
@@ -25,27 +29,21 @@ def text_to_speech(text, lang='es'):
25
  def transcribe_and_dub(video_file):
26
  temp_folder = tempfile.mkdtemp()
27
 
28
- vosk_model_url = "https://alphacephei.com/vosk/models/vosk-model-es-0.42.zip"
29
- model_path = os.path.join(temp_folder, "vosk-model-es-0.42")
30
- model_zip = download_vosk_model(vosk_model_url, temp_folder)
31
-
32
  try:
33
- import zipfile
34
- with zipfile.ZipFile(model_zip, 'r') as zip_ref:
35
- zip_ref.extractall(model_path)
36
-
37
  with mp.VideoFileClip(video_file) as video:
38
  audio_file = os.path.join(temp_folder, "audio.wav")
39
- video.audio.write_audiofile(audio_file, codec='pcm_s16le')
40
 
41
- model = Model(model_path)
42
- recognizer = KaldiRecognizer(model, AudioFileClip(audio_file).raw_data)
43
 
44
- result = json.loads(recognizer.result())
 
45
 
46
- text = result['text']
 
47
 
48
  dubbed_audio_file = text_to_speech(text)
 
49
 
50
  original_audio = AudioSegment.from_file(audio_file, format="wav")
51
  dubbed_audio = AudioSegment.from_mp3(dubbed_audio_file)
@@ -56,27 +54,10 @@ def transcribe_and_dub(video_file):
56
 
57
  dubbed_video_file = os.path.join(temp_folder, "dubbed_video.mp4")
58
  video_with_dubbed_audio.write_videofile(dubbed_video_file, codec="libx264", audio_codec="aac", verbose=False)
 
59
 
60
  return dubbed_video_file
61
 
62
  except Exception as e:
63
- print(f"Error during transcription and dubbing: {str(e)}")
64
- return None
65
-
66
- finally:
67
- os.remove(model_zip)
68
-
69
- def merge_video_audio(video_file, audio_file, output_file):
70
- video_clip = VideoFileClip(video_file)
71
-
72
- if audio_file is not None:
73
- audio_clip = AudioFileClip(audio_file)
74
-
75
- if audio_clip.duration < video_clip.duration:
76
- audio_clip = audio_clip.set_duration(video_clip.duration)
77
- else:
78
- audio_clip = audio_clip.subclip(0, video_clip.duration)
79
-
80
- video_clip = video_clip.set_audio(audio_clip)
81
-
82
- video_clip.write_videofile(output_file, codec="libx264", audio_codec="aac", verbose=False)
 
1
  import os
2
  import tempfile
 
 
3
  from gtts import gTTS
4
  from pydub import AudioSegment
5
  import moviepy.editor as mp
6
+ import speech_recognition as sr
 
7
 
8
+ def merge_video_audio(video_file, audio_file, output_file):
9
+ video_clip = mp.VideoFileClip(video_file)
10
+
11
+ if audio_file is not None:
12
+ audio_clip = mp.AudioFileClip(audio_file)
13
+
14
+ if audio_clip.duration < video_clip.duration:
15
+ audio_clip = audio_clip.set_duration(video_clip.duration)
16
+ else:
17
+ audio_clip = audio_clip.subclip(0, video_clip.duration)
18
+
19
+ video_clip = video_clip.set_audio(audio_clip)
20
+
21
+ video_clip.write_videofile(output_file, codec="libx264", audio_codec="aac", verbose=False)
22
 
23
  def text_to_speech(text, lang='es'):
24
  tts = gTTS(text=text, lang=lang, slow=False)
 
29
  def transcribe_and_dub(video_file):
30
  temp_folder = tempfile.mkdtemp()
31
 
 
 
 
 
32
  try:
 
 
 
 
33
  with mp.VideoFileClip(video_file) as video:
34
  audio_file = os.path.join(temp_folder, "audio.wav")
35
+ video.audio.write_audiofile(audio_file)
36
 
37
+ recognizer = sr.Recognizer()
 
38
 
39
+ with sr.AudioFile(audio_file) as source:
40
+ audio = recognizer.record(source)
41
 
42
+ text = recognizer.recognize_google(audio, language='es')
43
+ print(f"Texto reconocido: {text}")
44
 
45
  dubbed_audio_file = text_to_speech(text)
46
+ print(f"Archivo de audio doblado: {dubbed_audio_file}")
47
 
48
  original_audio = AudioSegment.from_file(audio_file, format="wav")
49
  dubbed_audio = AudioSegment.from_mp3(dubbed_audio_file)
 
54
 
55
  dubbed_video_file = os.path.join(temp_folder, "dubbed_video.mp4")
56
  video_with_dubbed_audio.write_videofile(dubbed_video_file, codec="libx264", audio_codec="aac", verbose=False)
57
+ print(f"Archivo de video doblado: {dubbed_video_file}")
58
 
59
  return dubbed_video_file
60
 
61
  except Exception as e:
62
+ print(f"Error durante la transcripción y el doblaje: {str(e)}")
63
+ return None