Spaces:

akazmi
/

video

Sleeping

File size: 2,990 Bytes

from transformers import pipeline
from gtts import gTTS
import subprocess
import streamlit as st
import os

# Step 1: Extract audio from the video
def extract_audio_from_video(video_path, audio_path="extracted_audio.mp3"):
    # Use FFmpeg to extract audio from the video file
    ffmpeg_command = [
        "ffmpeg", 
        "-i", video_path,  # Input video
        "-vn",             # Disable video processing
        "-acodec", "libmp3lame",  # Set audio codec to mp3
        "-ar", "44100",     # Set audio sample rate
        "-ac", "2",         # Set number of audio channels
        audio_path
    ]
    subprocess.run(ffmpeg_command)
    print(f"Audio extracted to {audio_path}")
    return audio_path

# Step 2: Extract text from the audio using Hugging Face Transformers (Whisper)
def extract_text_from_audio(audio_path):
    # Load the ASR pipeline from Hugging Face with a Whisper-like model
    transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base")
    
    # Transcribe the audio file
    transcription = transcriber(audio_path)
    text = transcription["text"]
    
    # Save transcribed text to a file (optional)
    with open("video_text.txt", "w") as f:
        f.write(text)
    
    return text

# Step 3: Generate voice-over using gTTS
def generate_voice_over(text, output_audio_path="voice_over.mp3"):
    # Generate audio with gTTS
    tts = gTTS(text=text, lang="en")
    tts.save(output_audio_path)
    print(f"Voice-over saved as {output_audio_path}")
    return output_audio_path

# Step 4: Combine voice-over with original video using FFmpeg
def add_voice_over_to_video(video_path, audio_path, output_video_path="output_video_with_voice.mp4"):
    # Use FFmpeg to combine video with new audio
    ffmpeg_command = [
        "ffmpeg",
        "-i", video_path,
        "-i", audio_path,
        "-c:v", "copy",
        "-map", "0:v:0",
        "-map", "1:a:0",
        "-shortest",  # Ensure the video ends when the audio ends
        output_video_path
    ]
    subprocess.run(ffmpeg_command)
    print(f"Final video with voice-over saved as {output_video_path}")

# Step 5: Run the complete process
def main(video_path):
    # Step 1: Extract audio from the video
    audio_path = extract_audio_from_video(video_path)
    
    # Step 2: Extract text from the audio
    text = extract_text_from_audio(audio_path)
    print("Extracted Text:", text)
    
    # Step 3: Generate voice-over from extracted text
    voice_over_path = generate_voice_over(text)
    
    # Step 4: Add voice-over to the video
    add_voice_over_to_video(video_path, voice_over_path)

# Streamlit interface to upload video file
uploaded_file = st.file_uploader("Upload a video file", type=["mp4"])
if uploaded_file is not None:
    # Save the uploaded file as input_video.mp4
    with open("input_video.mp4", "wb") as f:
        f.write(uploaded_file.getbuffer())
    
    # Call the main function after the video is uploaded
    main("input_video.mp4")