Spaces:

Curify
/

Studio_V0

Sleeping

App Files Files Community

qqwjq1981 commited on Jan 16, 2025

Commit

9c80aa8

verified ·

1 Parent(s): aeaf04e

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -22

app.py CHANGED Viewed

@@ -16,6 +16,8 @@ import json
 from nltk.tokenize import sent_tokenize
 import logging
 from textblob import TextBlob
 # Configure logging
 logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
@@ -30,27 +32,23 @@ def transcribe_video(video_path):
     video = VideoFileClip(video_path)
     audio_path = "audio.wav"
     video.audio.write_audiofile(audio_path)
-    # Initialize recognizer class (for recognizing the speech)
-    recognizer = sr.Recognizer()
-    # Use SpeechRecognition to transcribe audio
-    with sr.AudioFile(audio_path) as source:
-        audio_text = recognizer.record(source)
-        transcript = recognizer.recognize_google(audio_text)
-    # Split transcript into sentences
-    sentences = split_into_sentences(transcript)
-    # Create a list of timestamps for each sentence
-    timestamps = []
-    duration_per_sentence = len(audio_text.frame_data) / len(sentences) / 44100  # Approximate duration per sentence in seconds
-    for i, sentence in enumerate(sentences):
-        start_time = i * duration_per_sentence
-        timestamps.append({"start": start_time, "text": sentence})
-    return timestamps
 # Function to get the appropriate translation model based on target language
 def get_translation_model(target_language):
@@ -79,7 +77,8 @@ def translate_text(transcription_json, target_language):
         translated_json.append({
             "start": entry["start"],
             "original": original_text,
-            "translated": translated_text
         })
     # Return the translated timestamps as a JSON string
@@ -94,10 +93,10 @@ def add_transcript_to_video(video_path, timestamps, output_path):
     for entry in timestamps:
         # Create a text clip for each sentence
-        txt_clip = TextClip(entry["text"], fontsize=24, color='white', bg_color='black', size=video.size)
         # Set the start time and duration for each text clip
-        txt_clip = txt_clip.set_start(entry["start"]).set_duration(3).set_position(('bottom')).set_opacity(0.7)  # Display each sentence for 3 seconds
         # Append the text clip to the list
         text_clips.append(txt_clip)

 from nltk.tokenize import sent_tokenize
 import logging
 from textblob import TextBlob
+import whisper
 # Configure logging
 logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
     video = VideoFileClip(video_path)
     audio_path = "audio.wav"
     video.audio.write_audiofile(audio_path)
+    # Load Whisper model
+    model = whisper.load_model("base")  # Options: tiny, base, small, medium, large
+    # Transcribe with Whisper
+    result = model.transcribe(audio_path, word_timestamps=True)
+    # Extract timestamps and text
+    transcript_with_timestamps = [
+        {
+            "start": segment["start"],
+            "end": segment["end"],
+            "text": segment["text"]
+        }
+        for segment in result["segments"]
+    ]
+    return transcript_with_timestamps
 # Function to get the appropriate translation model based on target language
 def get_translation_model(target_language):
         translated_json.append({
             "start": entry["start"],
             "original": original_text,
+            "translated": translated_text,
+            "end": entry["end"]
         })
     # Return the translated timestamps as a JSON string
     for entry in timestamps:
         # Create a text clip for each sentence
+        txt_clip = TextClip(entry["translated"], fontsize=24, color='white', bg_color='black', size=video.size)
         # Set the start time and duration for each text clip
+        txt_clip = txt_clip.set_start(entry["start"]).set_duration(entry["end"] - entry["start"]).set_position(('bottom')).set_opacity(0.7)  # Display each sentence for 3 seconds
         # Append the text clip to the list
         text_clips.append(txt_clip)