File size: 2,990 Bytes
d219730 62e2507 9e51fe9 2974e01 e6ed917 2974e01 e6ed917 d219730 62e2507 d219730 62e2507 d219730 62e2507 e6ed917 62e2507 e6ed917 62e2507 e6ed917 62e2507 e6ed917 62e2507 e6ed917 62e2507 e6ed917 62e2507 e6ed917 62e2507 93476c0 e6ed917 93476c0 e6ed917 93476c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
from transformers import pipeline
from gtts import gTTS
import subprocess
import streamlit as st
import os
# Step 1: Extract audio from the video
def extract_audio_from_video(video_path, audio_path="extracted_audio.mp3"):
# Use FFmpeg to extract audio from the video file
ffmpeg_command = [
"ffmpeg",
"-i", video_path, # Input video
"-vn", # Disable video processing
"-acodec", "libmp3lame", # Set audio codec to mp3
"-ar", "44100", # Set audio sample rate
"-ac", "2", # Set number of audio channels
audio_path
]
subprocess.run(ffmpeg_command)
print(f"Audio extracted to {audio_path}")
return audio_path
# Step 2: Extract text from the audio using Hugging Face Transformers (Whisper)
def extract_text_from_audio(audio_path):
# Load the ASR pipeline from Hugging Face with a Whisper-like model
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base")
# Transcribe the audio file
transcription = transcriber(audio_path)
text = transcription["text"]
# Save transcribed text to a file (optional)
with open("video_text.txt", "w") as f:
f.write(text)
return text
# Step 3: Generate voice-over using gTTS
def generate_voice_over(text, output_audio_path="voice_over.mp3"):
# Generate audio with gTTS
tts = gTTS(text=text, lang="en")
tts.save(output_audio_path)
print(f"Voice-over saved as {output_audio_path}")
return output_audio_path
# Step 4: Combine voice-over with original video using FFmpeg
def add_voice_over_to_video(video_path, audio_path, output_video_path="output_video_with_voice.mp4"):
# Use FFmpeg to combine video with new audio
ffmpeg_command = [
"ffmpeg",
"-i", video_path,
"-i", audio_path,
"-c:v", "copy",
"-map", "0:v:0",
"-map", "1:a:0",
"-shortest", # Ensure the video ends when the audio ends
output_video_path
]
subprocess.run(ffmpeg_command)
print(f"Final video with voice-over saved as {output_video_path}")
# Step 5: Run the complete process
def main(video_path):
# Step 1: Extract audio from the video
audio_path = extract_audio_from_video(video_path)
# Step 2: Extract text from the audio
text = extract_text_from_audio(audio_path)
print("Extracted Text:", text)
# Step 3: Generate voice-over from extracted text
voice_over_path = generate_voice_over(text)
# Step 4: Add voice-over to the video
add_voice_over_to_video(video_path, voice_over_path)
# Streamlit interface to upload video file
uploaded_file = st.file_uploader("Upload a video file", type=["mp4"])
if uploaded_file is not None:
# Save the uploaded file as input_video.mp4
with open("input_video.mp4", "wb") as f:
f.write(uploaded_file.getbuffer())
# Call the main function after the video is uploaded
main("input_video.mp4")
|