#!/usr/bin/env python # # YouTube to X (Twitter) Thread Generator # This Gradio app automates the process of turning a YouTube video # into a multi-part X thread with corresponding video clips. # # --- 1. Installation --- # Ensure you have all necessary packages installed: # pip install gradio supadata google-generativeai pydantic yt-dlp moviepy tweepy pandas # --- 2. Imports --- import gradio as gr import os import re import threading import time import glob from supadata import Supadata import google.generativeai as genai from pydantic import BaseModel, Field from datetime import timedelta import yt_dlp from moviepy.video.io.VideoFileClip import VideoFileClip import tweepy import pandas as pd import traceback # --- 3. Video Cleanup System --- def cleanup_old_videos(): """Clean up video files older than 15 minutes""" try: current_time = time.time() # Find all video files video_patterns = ["*.mp4", "*.webm", "*.mkv", "downloaded_video.*", "clip_*"] for pattern in video_patterns: for file_path in glob.glob(pattern): try: # Check if file is older than 15 minutes (900 seconds) file_age = current_time - os.path.getmtime(file_path) if file_age > 900: # 15 minutes = 900 seconds os.remove(file_path) print(f"๐Ÿ—‘๏ธ Cleaned up old video file: {file_path}") except Exception as e: print(f"Failed to remove {file_path}: {e}") except Exception as e: print(f"Cleanup error: {e}") def start_cleanup_scheduler(): """Start the background cleanup scheduler""" def cleanup_loop(): while True: time.sleep(900) # Wait 15 minutes (900 seconds) cleanup_old_videos() cleanup_thread = threading.Thread(target=cleanup_loop, daemon=True) cleanup_thread.start() print("๐Ÿงน Video cleanup scheduler started (runs every 15 minutes)") # --- 4. Pydantic Model for Structured LLM Output --- class StructuredXPosts(BaseModel): """Defines the expected JSON structure from the AI model.""" post_contents: list[str] = Field(description="A list of content for X posts.") timestamps: list[str] = Field(description="Timestamps in 'HH:MM:SS-HH:MM:SS' format for each post.") # --- 5. Helper Functions --- def get_youtube_id(url: str) -> str | None: """Extracts the YouTube video ID from various URL formats.""" regex = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})" match = re.search(regex, url) return match.group(1) if match else None def ms_to_hhmmss(ms: int) -> str: """Converts milliseconds to HH:MM:SS format.""" sec = ms // 1000 return str(timedelta(seconds=sec)) def time_to_seconds(t: str) -> float: """Converts a HH:MM:SS or MM:SS string to total seconds.""" parts = [float(p) for p in t.strip().split(":")] if len(parts) == 3: return parts[0] * 3600 + parts[1] * 60 + parts[2] if len(parts) == 2: return parts[0] * 60 + parts[1] return parts[0] # --- 6. AI Prompt Template --- HEAD_PROMPT_TEMPLATE = """ Below is a transcript of a [VIDEO_TYPE] video. I want to create a X thread with this format. The first post will be the opener with a video clip of the [SUBJECT_TYPE]. Opener Post Format: [MAIN_HOOK_STATEMENT]: [KEY_POINT_1] [KEY_POINT_2] [KEY_POINT_3] [CONTEXT_OR_SETUP] [INTRIGUING_HOOK_LINE] ๐Ÿงต Follow-up Posts Format: Each follow-up post should: Start with an engaging hook related to the subject. Present 2-4 key points or insights from the transcript. Maintain narrative flow toward the conclusion. Closing Post Format: [KEY_TAKEAWAYS_OR_ADVICE]: [ACTIONABLE_POINT_1] [ACTIONABLE_POINT_2] [ACTIONABLE_POINT_3] [MEMORABLE_CLOSING_LINE] CRITICAL INSTRUCTIONS: 1. Do not include any markdown formatting in the posts. But include line breaks for better readability. 2. Do not include any hashtags in the posts. 3. Only the first post should have the ๐Ÿงต emoji. 4. Each post must be less than 280 characters. 5. Provide timestamps for video extraction from the transcript for each post. The timestamp range should be 30 seconds to 1 minute. """ # --- 7. Main Processing Function --- def create_video_thread( youtube_url: str, num_posts: int, video_type: str, subject_type: str, post_to_x: bool, twitter_api_key: str, twitter_api_secret: str, twitter_access_token: str, twitter_access_secret: str, progress=gr.Progress(track_tqdm=True) ): """ The main workflow function that powers the Gradio app. Orchestrates transcript fetching, AI content generation, video clipping, and posting. """ # --- HARDCODED API KEYS --- # WARNING: This is a security risk for public applications. supadata_api_key = "sd_f5d8d8c915ea3cd8d96ed0a12840635d" gemini_api_key = "AIzaSyCoGuPenJnmvOYasBLFhH4_TtCVUZj1kdQ" try: # --- Stage 0: Validation & Setup --- progress(0, desc="๐Ÿš€ Starting...") if not all([youtube_url, num_posts, video_type, subject_type]): raise gr.Error("Please fill in all required fields: URL, Number of Posts, Video Type, and Subject Type.") if post_to_x and not all([twitter_api_key, twitter_api_secret, twitter_access_token, twitter_access_secret]): raise gr.Error("To post to X, all four X API keys are required.") yt_video_id = get_youtube_id(youtube_url) if not yt_video_id: raise gr.Error("Invalid YouTube URL. Could not extract video ID.") # --- Stage 1: Get Transcript --- progress(0.1, desc="๐Ÿ“„ Fetching video transcript...") supadata = Supadata(api_key=supadata_api_key) transcript = supadata.youtube.transcript(video_id=yt_video_id, lang="en") if not transcript.content: raise gr.Error("Could not fetch transcript. The video might not have one, or it could be private.") transcript_arr = [ "{} [{} - {}]".format( chunk.text.strip().replace("\n", " "), ms_to_hhmmss(int(chunk.offset)), ms_to_hhmmss(int(chunk.offset) + int(chunk.duration)) ) for chunk in transcript.content ] # --- Stage 2: Generate Posts with LLM --- progress(0.25, desc="๐Ÿค– Generating X thread with AI...") genai.configure(api_key=gemini_api_key) head_prompt = HEAD_PROMPT_TEMPLATE.replace("[VIDEO_TYPE]", video_type).replace("[SUBJECT_TYPE]", subject_type) full_prompt = f"""{head_prompt}\nInstructions: You should create {num_posts} such posts.\n\nTranscript:\n{transcript_arr}\n\nPlease provide your response as a JSON object that strictly adheres to the following schema: {StructuredXPosts.model_json_schema()}""" model = genai.GenerativeModel('gemini-1.5-flash') response = model.generate_content( full_prompt, generation_config=genai.types.GenerationConfig(response_mime_type="application/json") ) structured_data = StructuredXPosts.model_validate_json(response.text) all_post_contents = structured_data.post_contents all_timestamps = structured_data.timestamps if not all_post_contents or not all_timestamps: raise gr.Error("AI failed to generate posts. The transcript might be too short or the topic unclear.") # --- Stage 3: Download Video --- progress(0.5, desc="๐Ÿ“ฅ Downloading original YouTube video (this may take a moment)...") video_url_full = f"https://www.youtube.com/watch?v={yt_video_id}" output_path_template = "downloaded_video.%(ext)s" ydl_opts = { 'format': 'bestvideo[height<=720]+bestaudio/best[height<=720]', 'outtmpl': output_path_template, 'merge_output_format': 'mp4', 'quiet': True, } downloaded_filepath = "" with yt_dlp.YoutubeDL(ydl_opts) as ydl: result = ydl.extract_info(video_url_full, download=True) base, _ = os.path.splitext(ydl.prepare_filename(result)) downloaded_filepath = base + '.mp4' if not os.path.exists(downloaded_filepath): raise gr.Error(f"Failed to download video file. Expected at: {downloaded_filepath}") # --- Stage 4: Clip Videos --- progress(0.7, desc="โœ‚๏ธ Slicing video into clips...") video = VideoFileClip(downloaded_filepath) output_clips = [] for i, r in enumerate(progress.tqdm(all_timestamps, desc="Clipping")): try: start_str, end_str = r.split("-") start_sec = time_to_seconds(start_str.strip()) end_sec = time_to_seconds(end_str.strip()) if start_sec >= end_sec or end_sec > video.duration: continue subclip = video.subclip(start_sec, end_sec) clip_output_path = f"clip_{yt_video_id}_{i+1}.mp4" subclip.write_videofile(clip_output_path, codec="libx264", audio_codec="aac", verbose=False, logger=None) output_clips.append(clip_output_path) except Exception as e: print(f"Skipping clip for timestamp '{r}' due to error: {e}") continue video.close() df = pd.DataFrame({ "Post Content": all_post_contents[:len(output_clips)], "Timestamp": all_timestamps[:len(output_clips)] }) # --- Stage 5: Post to X (Optional) --- tweet_links_md = "### Tweet URLs\n*Posting to X was not selected.*" if post_to_x: progress(0.9, desc="๐Ÿ•Š๏ธ Posting thread to X...") client = tweepy.Client( consumer_key=twitter_api_key, consumer_secret=twitter_api_secret, access_token=twitter_access_token, access_token_secret=twitter_access_secret ) auth = tweepy.OAuth1UserHandler( consumer_key=twitter_api_key, consumer_secret=twitter_api_secret, access_token=twitter_access_token, access_token_secret=twitter_access_secret ) api = tweepy.API(auth) previous_tweet_id = None tweet_links = [] user_info = client.get_me(user_fields=["username"]).data username = user_info.username for i in progress.tqdm(range(len(output_clips)), desc="Tweeting"): media = api.media_upload(filename=output_clips[i], media_category='tweet_video', chunked=True) tweet = client.create_tweet( text=df["Post Content"].iloc[i], media_ids=[media.media_id], in_reply_to_tweet_id=previous_tweet_id ) previous_tweet_id = tweet.data['id'] tweet_links.append(f"https://x.com/{username}/status/{previous_tweet_id}") client.create_tweet(text=f"Source video: {youtube_url}", in_reply_to_tweet_id=previous_tweet_id) tweet_links_md = "### โœ… Successfully Posted Tweet URLs\n" + "\n".join([f"* [Tweet {i+1}]({url})" for i, url in enumerate(tweet_links)]) progress(1, desc="๐ŸŽ‰ Done!") # Clean up the main downloaded video immediately if os.path.exists(downloaded_filepath): os.remove(downloaded_filepath) # Note: Clip files will be automatically cleaned up by the background scheduler return "Generation Complete!", df, output_clips, gr.update(value=tweet_links_md, visible=True) except Exception as e: traceback.print_exc() error_message = f"An error occurred: {e}" return error_message, pd.DataFrame(), [], gr.update(visible=False) # --- 8. Gradio UI Layout --- with gr.Blocks(theme=gr.themes.Soft(), title="YouTube to X Thread Generator") as app: gr.Markdown("# ๐Ÿš€ YouTube to X Thread Generator") gr.Markdown("Turn any YouTube video into an engaging, multi-part X (Twitter) thread with video clips.") with gr.Row(): with gr.Column(scale=2): gr.Markdown("### 1. Input Video & Content Details") youtube_url = gr.Textbox(label="YouTube Video URL", placeholder="e.g., https://www.youtube.com/watch?v=VISDGlpX0WI") num_posts = gr.Slider(minimum=3, maximum=15, value=8, step=1, label="Number of Posts in the Thread") with gr.Row(): video_type = gr.Textbox(label="Video Type", placeholder="e.g., 'podcast', 'documentary'") subject_type = gr.Textbox(label="Subject Type", placeholder="e.g., 'CEO', 'historical event'") with gr.Accordion("๐Ÿ”‘ X/Twitter API Keys (Optional)", open=False): gr.Markdown("*Enter your X/Twitter keys below ONLY if you want to post the thread directly.*") twitter_api_key = gr.Textbox(label="X API Key", type="password") twitter_api_secret = gr.Textbox(label="X API Key Secret", type="password") twitter_access_token = gr.Textbox(label="X Access Token", type="password") twitter_access_secret = gr.Textbox(label="X Access Token Secret", type="password") with gr.Row(elem_id="action_buttons"): post_to_x_checkbox = gr.Checkbox(label="โœ… Post Thread directly to X?", value=False) submit_btn = gr.Button("Generate Thread", variant="primary") with gr.Column(scale=3): gr.Markdown("### 2. Generated Content & Clips") status_output = gr.Textbox(label="Status", interactive=False, show_copy_button=True) posts_output = gr.DataFrame(headers=["Post Content", "Timestamp"], label="Generated Posts", interactive=False, wrap=True) clips_output = gr.Gallery(label="Generated Video Clips", show_label=False, elem_id="gallery", columns=[3], rows=[2], object_fit="contain", height="auto") tweet_urls_output = gr.Markdown("### Tweet URLs\n*No tweets posted yet.*", visible=False) submit_btn.click( fn=create_video_thread, inputs=[ youtube_url, num_posts, video_type, subject_type, post_to_x_checkbox, twitter_api_key, twitter_api_secret, twitter_access_token, twitter_access_secret ], outputs=[status_output, posts_output, clips_output, tweet_urls_output] ) if __name__ == "__main__": # Start the automatic video cleanup scheduler start_cleanup_scheduler() # Launch the app app.launch(debug=True, share=True)