Spaces:

Curify
/

Studio_V0

Sleeping

App Files Files Community

qqwjq1981 commited on Jan 16, 2025

Commit

9d68248

verified ·

1 Parent(s): 7b0ce86

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -35

app.py CHANGED Viewed

@@ -3,28 +3,41 @@ from datetime import datetime
 import random
 from transformers import pipeline
 from transformers.pipelines.audio_utils import ffmpeg_read
-# Initialize the Whisper pipeline
-whisper_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-medium")
-def transcribe_audio_from_file(file_path):
-    """
-    Transcribes audio from a local file using the Whisper pipeline.
-    Args:
-        file_path (str): Path to the local media file.
-    Returns:
-        str: Transcription text if successful, otherwise None.
-    """
-    try:
-        # Transcribe the audio using Whisper
-        transcription = whisper_pipeline(file_path, return_timestamps=True)
-        logger.debug(f"Transcription: {transcription['text']}")
-        return transcription["text"]
-    except Exception as e:
-        logger.error(f"An error occurred during transcription: {e}")
-        return None
 # Initialize the translation pipeline
 translation_pipeline = pipeline("translation", model="Helsinki-NLP/opus-mt-en-{target_language}")
@@ -40,11 +53,52 @@ def get_translation_model(target_language):
     }
     return model_map.get(target_language, "Helsinki-NLP/opus-mt-en-fr")  # Default to French if not found
-# Example usage in your application
-def translate_text(text, target_language):
     translation_model_id = get_translation_model(target_language)
     translator = pipeline("translation", model=translation_model_id)
-    return translator(text)[0]['translation_text']
 # Mock functions for platform actions and analytics
 def mock_post_to_platform(platform, content_title):
@@ -61,19 +115,27 @@ def upload_and_manage(file, platform, language):
     if file is None:
         return "Please upload a video/audio file.", None, None, None
-    # Transcribe audio from uploaded media file
-    transcription = transcribe_audio_from_media_file(file.name)
-    # Translate transcription to the selected language
-    translation = translate_text(transcription, language)
-    # Mock posting action
     post_message = mock_post_to_platform(platform, file.name)
     # Mock analytics generation
     analytics = mock_analytics()
-    return post_message, transcription, translation, analytics
 def generate_dashboard(analytics):
     if not analytics:
@@ -100,12 +162,12 @@ def build_interface():
             with gr.Row():
                 post_output = gr.Textbox(label="Posting Status", interactive=False)
-                transcription_output = gr.Textbox(label="Transcription", interactive=False)
-                translation_output = gr.Textbox(label="Translation", interactive=False)
             submit_button.click(upload_and_manage,
                                 inputs=[file_input, platform_input, language_input],
-                                outputs=[post_output, transcription_output, translation_output, gr.State()])
         with gr.Tab("Analytics Dashboard"):
             gr.Markdown("## Content Performance Analytics")
@@ -116,5 +178,6 @@ def build_interface():
     return demo
 demo = build_interface()
-demo.launch()

 import random
 from transformers import pipeline
 from transformers.pipelines.audio_utils import ffmpeg_read
+import moviepy.editor as mp
+import speech_recognition as sr
+import json
+from nltk.tokenize import sent_tokenize
+def transcribe_video(video_path):
+    # Load the video file and extract audio
+    video = mp.VideoFileClip(video_path)
+    audio_path = "audio.wav"
+    video.audio.write_audiofile(audio_path)
+    # Initialize recognizer class (for recognizing the speech)
+    recognizer = sr.Recognizer()
+    # Use SpeechRecognition to transcribe audio
+    with sr.AudioFile(audio_path) as source:
+        audio_text = recognizer.record(source)
+        transcript = recognizer.recognize_google(audio_text)
+    # Split transcript into sentences
+    sentences = sent_tokenize(transcript)
+    # Create a list of timestamps for each sentence
+    timestamps = []
+    duration_per_sentence = len(audio_text.frame_data) / len(sentences) / 44100  # Approximate duration per sentence in seconds
+    for i, sentence in enumerate(sentences):
+        start_time = i * duration_per_sentence
+        timestamps.append({"start": start_time, "text": sentence})
+    return timestamps
+def save_transcript_to_json(timestamps, json_file):
+    with open(json_file, 'w') as f:
+        json.dump(timestamps, f, indent=4)
 # Initialize the translation pipeline
 translation_pipeline = pipeline("translation", model="Helsinki-NLP/opus-mt-en-{target_language}")
     }
     return model_map.get(target_language, "Helsinki-NLP/opus-mt-en-fr")  # Default to French if not found
+def translate_text(timestamps_json, target_language):
+    # Load the translation model for the specified target language
     translation_model_id = get_translation_model(target_language)
     translator = pipeline("translation", model=translation_model_id)
+    # Parse the input JSON
+    timestamps = json.loads(timestamps_json)
+    # Prepare output structure
+    translated_timestamps = []
+    # Translate each sentence and store it with its start time
+    for entry in timestamps:
+        original_text = entry["text"]
+        translated_text = translator(original_text)[0]['translation_text']
+        translated_timestamps.append({
+            "start": entry["start"],
+            "original": original_text,
+            "translated": translated_text
+        })
+    # Return the translated timestamps as a JSON string
+    return json.dumps(translated_timestamps, indent=4)
+def add_transcript_to_video(video_path, timestamps, output_path):
+    # Load the video file
+    video = mp.VideoFileClip(video_path)
+    # Create text clips based on timestamps
+    text_clips = []
+    for entry in timestamps:
+        # Create a text clip for each sentence
+        txt_clip = mp.TextClip(entry["text"], fontsize=24, color='white', bg_color='black', size=video.size)
+        # Set the start time and duration for each text clip
+        txt_clip = txt_clip.set_start(entry["start"]).set_duration(3).set_position(('bottom')).set_opacity(0.7)  # Display each sentence for 3 seconds
+        # Append the text clip to the list
+        text_clips.append(txt_clip)
+    # Overlay all text clips on the original video
+    final_video = mp.CompositeVideoClip([video] + text_clips)
+    # Write the result to a file
+    final_video.write_videofile(output_path, codec='libx264', audio_codec='aac')
 # Mock functions for platform actions and analytics
 def mock_post_to_platform(platform, content_title):
     if file is None:
         return "Please upload a video/audio file.", None, None, None
+    # Define paths for audio and output files
+    audio_path = "audio.wav"
+    json_file = "transcript.json"
+    output_video_path = "output_video.mp4"
+    # Transcribe audio from uploaded media file and get timestamps
+    timestamps = transcribe_video(file.name)
+    # Save transcript to JSON
+    save_transcript_to_json(timestamps, json_file)
+    # Add transcript to video based on timestamps
+    add_transcript_to_video(file.name, timestamps, output_video_path)
+    # Mock posting action (you can implement this as needed)
     post_message = mock_post_to_platform(platform, file.name)
     # Mock analytics generation
     analytics = mock_analytics()
+    return post_message, timestamps, json_file, analytics
 def generate_dashboard(analytics):
     if not analytics:
             with gr.Row():
                 post_output = gr.Textbox(label="Posting Status", interactive=False)
+                transcription_output = gr.Textbox(label="Transcription Timestamps (JSON)", interactive=False)
+                json_output = gr.Textbox(label="Transcript JSON File", interactive=False)
             submit_button.click(upload_and_manage,
                                 inputs=[file_input, platform_input, language_input],
+                                outputs=[post_output, transcription_output, json_output, gr.State()])
         with gr.Tab("Analytics Dashboard"):
             gr.Markdown("## Content Performance Analytics")
     return demo
+# Launch the Gradio interface
 demo = build_interface()
+demo.launch()