# 👇 COPY FROM HERE import os import tempfile import streamlit as st import cv2 import numpy as np import whisper from moviepy.editor import * from moviepy.video.fx.all import crop from scenedetect import detect, ContentDetector import warnings warnings.filterwarnings("ignore") os.environ["TOKENIZERS_PARALLELISM"] = "false" @st.cache_resource def load_whisper_model(): return whisper.load_model("base") def detect_engaging_scenes(video_path, num_scenes=5): scene_list = detect(video_path, ContentDetector(min_scene_len=15)) return [(s[0].get_seconds(), s[1].get_seconds()) for s in scene_list][:num_scenes] def detect_face_center(frame): face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml') gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale(gray, 1.1, 4) if len(faces) > 0: x, y, w, h = faces[0] return (x + w//2, y + h//2) return None def create_vertical_short(original_clip, scene, transcript, bg_music_path, idx): start_time, end_time = scene duration = end_time - start_time clip = original_clip.subclip(start_time, end_time) mid_time = start_time + duration/2 try: frame = original_clip.get_frame(mid_time) frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) face_center = detect_face_center(frame) except: face_center = None w, h = clip.size target_width = min(w, int(h * 9/16)) target_height = min(h, int(w * 16/9)) if face_center: x_center, y_center = face_center x1 = max(0, x_center - target_width//2) x2 = min(w, x_center + target_width//2) y1 = max(0, y_center - target_height//2) y2 = min(h, y_center + target_height//2) else: x1 = (w - target_width) // 2 x2 = x1 + target_width y1 = (h - target_height) // 2 y2 = y1 + target_height cropped_clip = crop(clip, x1=x1, y1=y1, x2=x2, y2=y2) vertical_clip = cropped_clip.resize((1080, 1920)) caption_text = "" for seg in transcript['segments']: seg_start = seg['start'] seg_end = seg['end'] if seg_start >= start_time and seg_end <= end_time: caption_text += seg['text'] + " " words = caption_text.split() if len(words) > 8: line1 = " ".join(words[:len(words)//2]) line2 = " ".join(words[len(words)//2:]) else: line1 = caption_text line2 = "" txt_clip1 = TextClip(line1, fontsize=60, color='white', font='Arial-Bold', stroke_color='black', stroke_width=2, size=(1000, 100)).set_position(('center', 1400)).set_duration(duration) caption_clips = [txt_clip1] if line2: txt_clip2 = TextClip(line2, fontsize=60, color='white', font='Arial-Bold', stroke_color='black', stroke_width=2, size=(1000, 100)).set_position(('center', 1480)).set_duration(duration) caption_clips.append(txt_clip2) bg_music = AudioFileClip(bg_music_path).volumex(0.2) if bg_music.duration < duration: bg_music = afx.audio_loop(bg_music, duration=duration) else: bg_music = bg_music.subclip(0, duration) original_audio = vertical_clip.audio.volumex(0.8) final_audio = CompositeAudioClip([original_audio, bg_music]) final_clip = CompositeVideoClip([vertical_clip] + caption_clips) final_clip = final_clip.set_audio(final_audio) output_path = f"short_{idx+1}.mp4" final_clip.write_videofile(output_path, codec="libx264", audio_codec="aac", fps=24, threads=4, preset='ultrafast', ffmpeg_params=["-crf", "28"]) return output_path def main(): st.set_page_config(page_title="YouTube Shorts Generator", page_icon="🎬", layout="centered") st.title("🎬 YouTube Shorts Generator") st.subheader("Create Vertical AI Shorts from Any Video") st.info(""" **🎥 Can't paste YouTube link directly?** 👉 Use our [Google Colab YouTube Downloader](https://colab.research.google.com/drive/1Sy52KiOtN-l7N2rZ8A2JcmW0gYOYFMLx) Paste your link, download `.mp4`, then upload it below 👇 """) uploaded_file = st.file_uploader("📁 Upload a YouTube video (.mp4)", type=["mp4"]) if st.button("Generate Shorts", type="primary", use_container_width=True): if not uploaded_file: st.warning("Please upload a video first.") return bg_music_path = "background_music.mp3" if not os.path.exists(bg_music_path): st.error("Missing background music: background_music.mp3") return with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp: tmp.write(uploaded_file.read()) video_path = tmp.name with st.status("Processing...", expanded=True) as status: try: original_clip = VideoFileClip(video_path) duration = original_clip.duration st.success(f"Video loaded: {duration:.1f} sec") scenes = detect_engaging_scenes(video_path) if len(scenes) < 5: st.warning("Less than 5 scenes detected, using time slices instead") scenes = [(i*15, min((i+1)*15, duration)) for i in range(5)] else: scenes = scenes[:5] st.success("Scenes selected") audio_path = video_path.replace(".mp4", ".wav") original_clip.audio.write_audiofile(audio_path, logger=None) model = load_whisper_model() transcript = model.transcribe(audio_path) st.success(f"Transcribed {len(transcript['segments'])} segments") output_files = [] for i, scene in enumerate(scenes): st.write(f"Creating Short {i+1}/5...") output = create_vertical_short(original_clip, scene, transcript, bg_music_path, i) output_files.append(output) st.success(f"Short {i+1} done ✅") status.update(label="All Shorts Ready!", state="complete") except Exception as e: st.error(f"Error: {str(e)}") return st.balloons() st.subheader("🎉 Download Your Shorts") cols = st.columns(5) for i, file in enumerate(output_files): with open(file, "rb") as f: cols[i].download_button(f"Short {i+1}", f, file_name=file, mime="video/mp4") original_clip.close() os.remove(video_path) if os.path.exists(audio_path): os.remove(audio_path) for file in output_files: os.remove(file) if __name__ == "__main__": main() # 👆 TO HERE