minishorts / app.py
yasvanthkumar's picture
Update app.py
88bd56c verified
# πŸ‘‡ COPY FROM HERE
import os
import tempfile
import streamlit as st
import cv2
import numpy as np
import whisper
from moviepy.editor import *
from moviepy.video.fx.all import crop
from scenedetect import detect, ContentDetector
import warnings
warnings.filterwarnings("ignore")
os.environ["TOKENIZERS_PARALLELISM"] = "false"
@st.cache_resource
def load_whisper_model():
return whisper.load_model("base")
def detect_engaging_scenes(video_path, num_scenes=5):
scene_list = detect(video_path, ContentDetector(min_scene_len=15))
return [(s[0].get_seconds(), s[1].get_seconds()) for s in scene_list][:num_scenes]
def detect_face_center(frame):
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.1, 4)
if len(faces) > 0:
x, y, w, h = faces[0]
return (x + w//2, y + h//2)
return None
def create_vertical_short(original_clip, scene, transcript, bg_music_path, idx):
start_time, end_time = scene
duration = end_time - start_time
clip = original_clip.subclip(start_time, end_time)
mid_time = start_time + duration/2
try:
frame = original_clip.get_frame(mid_time)
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
face_center = detect_face_center(frame)
except:
face_center = None
w, h = clip.size
target_width = min(w, int(h * 9/16))
target_height = min(h, int(w * 16/9))
if face_center:
x_center, y_center = face_center
x1 = max(0, x_center - target_width//2)
x2 = min(w, x_center + target_width//2)
y1 = max(0, y_center - target_height//2)
y2 = min(h, y_center + target_height//2)
else:
x1 = (w - target_width) // 2
x2 = x1 + target_width
y1 = (h - target_height) // 2
y2 = y1 + target_height
cropped_clip = crop(clip, x1=x1, y1=y1, x2=x2, y2=y2)
vertical_clip = cropped_clip.resize((1080, 1920))
caption_text = ""
for seg in transcript['segments']:
seg_start = seg['start']
seg_end = seg['end']
if seg_start >= start_time and seg_end <= end_time:
caption_text += seg['text'] + " "
words = caption_text.split()
if len(words) > 8:
line1 = " ".join(words[:len(words)//2])
line2 = " ".join(words[len(words)//2:])
else:
line1 = caption_text
line2 = ""
txt_clip1 = TextClip(line1, fontsize=60, color='white', font='Arial-Bold',
stroke_color='black', stroke_width=2, size=(1000, 100)).set_position(('center', 1400)).set_duration(duration)
caption_clips = [txt_clip1]
if line2:
txt_clip2 = TextClip(line2, fontsize=60, color='white', font='Arial-Bold',
stroke_color='black', stroke_width=2, size=(1000, 100)).set_position(('center', 1480)).set_duration(duration)
caption_clips.append(txt_clip2)
bg_music = AudioFileClip(bg_music_path).volumex(0.2)
if bg_music.duration < duration:
bg_music = afx.audio_loop(bg_music, duration=duration)
else:
bg_music = bg_music.subclip(0, duration)
original_audio = vertical_clip.audio.volumex(0.8)
final_audio = CompositeAudioClip([original_audio, bg_music])
final_clip = CompositeVideoClip([vertical_clip] + caption_clips)
final_clip = final_clip.set_audio(final_audio)
output_path = f"short_{idx+1}.mp4"
final_clip.write_videofile(output_path, codec="libx264", audio_codec="aac", fps=24, threads=4,
preset='ultrafast', ffmpeg_params=["-crf", "28"])
return output_path
def main():
st.set_page_config(page_title="YouTube Shorts Generator", page_icon="🎬", layout="centered")
st.title("🎬 YouTube Shorts Generator")
st.subheader("Create Vertical AI Shorts from Any Video")
st.info("""
**πŸŽ₯ Can't paste YouTube link directly?**
πŸ‘‰ Use our [Google Colab YouTube Downloader](https://colab.research.google.com/drive/1Sy52KiOtN-l7N2rZ8A2JcmW0gYOYFMLx)
Paste your link, download `.mp4`, then upload it below πŸ‘‡
""")
uploaded_file = st.file_uploader("πŸ“ Upload a YouTube video (.mp4)", type=["mp4"])
if st.button("Generate Shorts", type="primary", use_container_width=True):
if not uploaded_file:
st.warning("Please upload a video first.")
return
bg_music_path = "background_music.mp3"
if not os.path.exists(bg_music_path):
st.error("Missing background music: background_music.mp3")
return
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
tmp.write(uploaded_file.read())
video_path = tmp.name
with st.status("Processing...", expanded=True) as status:
try:
original_clip = VideoFileClip(video_path)
duration = original_clip.duration
st.success(f"Video loaded: {duration:.1f} sec")
scenes = detect_engaging_scenes(video_path)
if len(scenes) < 5:
st.warning("Less than 5 scenes detected, using time slices instead")
scenes = [(i*15, min((i+1)*15, duration)) for i in range(5)]
else:
scenes = scenes[:5]
st.success("Scenes selected")
audio_path = video_path.replace(".mp4", ".wav")
original_clip.audio.write_audiofile(audio_path, logger=None)
model = load_whisper_model()
transcript = model.transcribe(audio_path)
st.success(f"Transcribed {len(transcript['segments'])} segments")
output_files = []
for i, scene in enumerate(scenes):
st.write(f"Creating Short {i+1}/5...")
output = create_vertical_short(original_clip, scene, transcript, bg_music_path, i)
output_files.append(output)
st.success(f"Short {i+1} done βœ…")
status.update(label="All Shorts Ready!", state="complete")
except Exception as e:
st.error(f"Error: {str(e)}")
return
st.balloons()
st.subheader("πŸŽ‰ Download Your Shorts")
cols = st.columns(5)
for i, file in enumerate(output_files):
with open(file, "rb") as f:
cols[i].download_button(f"Short {i+1}", f, file_name=file, mime="video/mp4")
original_clip.close()
os.remove(video_path)
if os.path.exists(audio_path):
os.remove(audio_path)
for file in output_files:
os.remove(file)
if __name__ == "__main__":
main()
# πŸ‘† TO HERE