Spaces:

LordPatil
/

ThreadX_demo

Runtime error

App Files Files Community

LordPatil commited on Jun 27, 2025

Commit

1f1864e

verified ·

1 Parent(s): 373d198

Create app.py

Browse files

Files changed (1) hide show

app.py +332 -0

app.py ADDED Viewed

	@@ -0,0 +1,332 @@

+#!/usr/bin/env python
+#
+# YouTube to X (Twitter) Thread Generator
+# This Gradio app automates the process of turning a YouTube video
+# into a multi-part X thread with corresponding video clips.
+#
+# --- 1. Installation ---
+# Ensure you have all necessary packages installed:
+# pip install gradio supadata google-generativeai pydantic yt-dlp moviepy tweepy pandas
+# --- 2. Imports ---
+import gradio as gr
+import os
+import re
+import threading
+import time
+import glob
+from supadata import Supadata
+import google.generativeai as genai
+from pydantic import BaseModel, Field
+from datetime import timedelta
+import yt_dlp
+from moviepy.video.io.VideoFileClip import VideoFileClip
+import tweepy
+import pandas as pd
+import traceback
+# --- 3. Video Cleanup System ---
+def cleanup_old_videos():
+    """Clean up video files older than 15 minutes"""
+    try:
+        current_time = time.time()
+        # Find all video files
+        video_patterns = ["*.mp4", "*.webm", "*.mkv", "downloaded_video.*", "clip_*"]
+        for pattern in video_patterns:
+            for file_path in glob.glob(pattern):
+                try:
+                    # Check if file is older than 15 minutes (900 seconds)
+                    file_age = current_time - os.path.getmtime(file_path)
+                    if file_age > 900:  # 15 minutes = 900 seconds
+                        os.remove(file_path)
+                        print(f"🗑️ Cleaned up old video file: {file_path}")
+                except Exception as e:
+                    print(f"Failed to remove {file_path}: {e}")
+    except Exception as e:
+        print(f"Cleanup error: {e}")
+def start_cleanup_scheduler():
+    """Start the background cleanup scheduler"""
+    def cleanup_loop():
+        while True:
+            time.sleep(900)  # Wait 15 minutes (900 seconds)
+            cleanup_old_videos()
+    cleanup_thread = threading.Thread(target=cleanup_loop, daemon=True)
+    cleanup_thread.start()
+    print("🧹 Video cleanup scheduler started (runs every 15 minutes)")
+# --- 4. Pydantic Model for Structured LLM Output ---
+class StructuredXPosts(BaseModel):
+    """Defines the expected JSON structure from the AI model."""
+    post_contents: list[str] = Field(description="A list of content for X posts.")
+    timestamps: list[str] = Field(description="Timestamps in 'HH:MM:SS-HH:MM:SS' format for each post.")
+# --- 5. Helper Functions ---
+def get_youtube_id(url: str) -> str | None:
+    """Extracts the YouTube video ID from various URL formats."""
+    regex = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
+    match = re.search(regex, url)
+    return match.group(1) if match else None
+def ms_to_hhmmss(ms: int) -> str:
+    """Converts milliseconds to HH:MM:SS format."""
+    sec = ms // 1000
+    return str(timedelta(seconds=sec))
+def time_to_seconds(t: str) -> float:
+    """Converts a HH:MM:SS or MM:SS string to total seconds."""
+    parts = [float(p) for p in t.strip().split(":")]
+    if len(parts) == 3:
+        return parts[0] * 3600 + parts[1] * 60 + parts[2]
+    if len(parts) == 2:
+        return parts[0] * 60 + parts[1]
+    return parts[0]
+# --- 6. AI Prompt Template ---
+HEAD_PROMPT_TEMPLATE = """
+Below is a transcript of a [VIDEO_TYPE] video.
+I want to create a X thread with this format. The first post will be the opener with a video clip of the [SUBJECT_TYPE].
+Opener Post Format:
+[MAIN_HOOK_STATEMENT]:
+[KEY_POINT_1]
+[KEY_POINT_2]
+[KEY_POINT_3]
+[CONTEXT_OR_SETUP]
+[INTRIGUING_HOOK_LINE] 🧵
+Follow-up Posts Format:
+Each follow-up post should:
+Start with an engaging hook related to the subject.
+Present 2-4 key points or insights from the transcript.
+Maintain narrative flow toward the conclusion.
+Closing Post Format:
+[KEY_TAKEAWAYS_OR_ADVICE]:
+[ACTIONABLE_POINT_1]
+[ACTIONABLE_POINT_2]
+[ACTIONABLE_POINT_3]
+[MEMORABLE_CLOSING_LINE]
+CRITICAL INSTRUCTIONS:
+1. Do not include any markdown formatting in the posts. But include line breaks for better readability.
+2. Do not include any hashtags in the posts.
+3. Only the first post should have the 🧵 emoji.
+4. Each post must be less than 280 characters.
+5. Provide timestamps for video extraction from the transcript for each post. The timestamp range should be 30 seconds to 1 minute.
+"""
+# --- 7. Main Processing Function ---
+def create_video_thread(
+    youtube_url: str,
+    num_posts: int,
+    video_type: str,
+    subject_type: str,
+    post_to_x: bool,
+    twitter_api_key: str,
+    twitter_api_secret: str,
+    twitter_access_token: str,
+    twitter_access_secret: str,
+    progress=gr.Progress(track_tqdm=True)
+):
+    """
+    The main workflow function that powers the Gradio app.
+    Orchestrates transcript fetching, AI content generation, video clipping, and posting.
+    """
+    # --- HARDCODED API KEYS ---
+    # WARNING: This is a security risk for public applications.
+    supadata_api_key = "sd_f5d8d8c915ea3cd8d96ed0a12840635d"
+    gemini_api_key = "AIzaSyCoGuPenJnmvOYasBLFhH4_TtCVUZj1kdQ"
+    try:
+        # --- Stage 0: Validation & Setup ---
+        progress(0, desc="🚀 Starting...")
+        if not all([youtube_url, num_posts, video_type, subject_type]):
+            raise gr.Error("Please fill in all required fields: URL, Number of Posts, Video Type, and Subject Type.")
+        if post_to_x and not all([twitter_api_key, twitter_api_secret, twitter_access_token, twitter_access_secret]):
+            raise gr.Error("To post to X, all four X API keys are required.")
+        yt_video_id = get_youtube_id(youtube_url)
+        if not yt_video_id:
+            raise gr.Error("Invalid YouTube URL. Could not extract video ID.")
+        # --- Stage 1: Get Transcript ---
+        progress(0.1, desc="📄 Fetching video transcript...")
+        supadata = Supadata(api_key=supadata_api_key)
+        transcript = supadata.youtube.transcript(video_id=yt_video_id, lang="en")
+        if not transcript.content:
+            raise gr.Error("Could not fetch transcript. The video might not have one, or it could be private.")
+        transcript_arr = [
+            f"{chunk.text.strip().replace("\n", " ")} [{ms_to_hhmmss(int(chunk.offset))} - {ms_to_hhmmss(int(chunk.offset) + int(chunk.duration))}]"
+            for chunk in transcript.content
+        ]
+        # --- Stage 2: Generate Posts with LLM ---
+        progress(0.25, desc="🤖 Generating X thread with AI...")
+        genai.configure(api_key=gemini_api_key)
+        head_prompt = HEAD_PROMPT_TEMPLATE.replace("[VIDEO_TYPE]", video_type).replace("[SUBJECT_TYPE]", subject_type)
+        full_prompt = f"""{head_prompt}\nInstructions: You should create {num_posts} such posts.\n\nTranscript:\n{transcript_arr}\n\nPlease provide your response as a JSON object that strictly adheres to the following schema: {StructuredXPosts.model_json_schema()}"""
+        model = genai.GenerativeModel('gemini-1.5-flash')
+        response = model.generate_content(
+            full_prompt,
+            generation_config=genai.types.GenerationConfig(response_mime_type="application/json")
+        )
+        structured_data = StructuredXPosts.model_validate_json(response.text)
+        all_post_contents = structured_data.post_contents
+        all_timestamps = structured_data.timestamps
+        if not all_post_contents or not all_timestamps:
+            raise gr.Error("AI failed to generate posts. The transcript might be too short or the topic unclear.")
+        # --- Stage 3: Download Video ---
+        progress(0.5, desc="📥 Downloading original YouTube video (this may take a moment)...")
+        video_url_full = f"https://www.youtube.com/watch?v={yt_video_id}"
+        output_path_template = "downloaded_video.%(ext)s"
+        ydl_opts = {
+            'format': 'bestvideo[height<=720]+bestaudio/best[height<=720]',
+            'outtmpl': output_path_template,
+            'merge_output_format': 'mp4',
+            'quiet': True,
+        }
+        downloaded_filepath = ""
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            result = ydl.extract_info(video_url_full, download=True)
+            base, _ = os.path.splitext(ydl.prepare_filename(result))
+            downloaded_filepath = base + '.mp4'
+        if not os.path.exists(downloaded_filepath):
+             raise gr.Error(f"Failed to download video file. Expected at: {downloaded_filepath}")
+        # --- Stage 4: Clip Videos ---
+        progress(0.7, desc="✂️ Slicing video into clips...")
+        video = VideoFileClip(downloaded_filepath)
+        output_clips = []
+        for i, r in enumerate(progress.tqdm(all_timestamps, desc="Clipping")):
+            try:
+                start_str, end_str = r.split("-")
+                start_sec = time_to_seconds(start_str.strip())
+                end_sec = time_to_seconds(end_str.strip())
+                if start_sec >= end_sec or end_sec > video.duration: continue
+                subclip = video.subclip(start_sec, end_sec)
+                clip_output_path = f"clip_{yt_video_id}_{i+1}.mp4"
+                subclip.write_videofile(clip_output_path, codec="libx264", audio_codec="aac", verbose=False, logger=None)
+                output_clips.append(clip_output_path)
+            except Exception as e:
+                print(f"Skipping clip for timestamp '{r}' due to error: {e}")
+                continue
+        video.close()
+        df = pd.DataFrame({
+            "Post Content": all_post_contents[:len(output_clips)],
+            "Timestamp": all_timestamps[:len(output_clips)]
+        })
+        # --- Stage 5: Post to X (Optional) ---
+        tweet_links_md = "###  Tweet URLs\n*Posting to X was not selected.*"
+        if post_to_x:
+            progress(0.9, desc="🕊️ Posting thread to X...")
+            client = tweepy.Client(
+                consumer_key=twitter_api_key,
+                consumer_secret=twitter_api_secret,
+                access_token=twitter_access_token,
+                access_token_secret=twitter_access_secret
+            )
+            auth = tweepy.OAuth1UserHandler(
+                consumer_key=twitter_api_key,
+                consumer_secret=twitter_api_secret,
+                access_token=twitter_access_token,
+                access_token_secret=twitter_access_secret
+            )
+            api = tweepy.API(auth)
+            previous_tweet_id = None
+            tweet_links = []
+            user_info = client.get_me(user_fields=["username"]).data
+            username = user_info.username
+            for i in progress.tqdm(range(len(output_clips)), desc="Tweeting"):
+                media = api.media_upload(filename=output_clips[i], media_category='tweet_video', chunked=True)
+                tweet = client.create_tweet(
+                    text=df["Post Content"].iloc[i],
+                    media_ids=[media.media_id],
+                    in_reply_to_tweet_id=previous_tweet_id
+                )
+                previous_tweet_id = tweet.data['id']
+                tweet_links.append(f"https://x.com/{username}/status/{previous_tweet_id}")
+            client.create_tweet(text=f"Source video: {youtube_url}", in_reply_to_tweet_id=previous_tweet_id)
+            tweet_links_md = "### ✅ Successfully Posted Tweet URLs\n" + "\n".join([f"* [Tweet {i+1}]({url})" for i, url in enumerate(tweet_links)])
+        progress(1, desc="🎉 Done!")
+        # Clean up the main downloaded video immediately
+        if os.path.exists(downloaded_filepath):
+            os.remove(downloaded_filepath)
+        # Note: Clip files will be automatically cleaned up by the background scheduler
+        return "Generation Complete!", df, output_clips, gr.update(value=tweet_links_md, visible=True)
+    except Exception as e:
+        traceback.print_exc()
+        error_message = f"An error occurred: {e}"
+        return error_message, pd.DataFrame(), [], gr.update(visible=False)
+# --- 8. Gradio UI Layout ---
+with gr.Blocks(theme=gr.themes.Soft(), title="YouTube to X Thread Generator") as app:
+    gr.Markdown("# 🚀 YouTube to X Thread Generator")
+    gr.Markdown("Turn any YouTube video into an engaging, multi-part X (Twitter) thread with video clips.")
+    with gr.Row():
+        with gr.Column(scale=2):
+            gr.Markdown("### 1. Input Video & Content Details")
+            youtube_url = gr.Textbox(label="YouTube Video URL", placeholder="e.g., https://www.youtube.com/watch?v=VISDGlpX0WI")
+            num_posts = gr.Slider(minimum=3, maximum=15, value=8, step=1, label="Number of Posts in the Thread")
+            with gr.Row():
+                video_type = gr.Textbox(label="Video Type", placeholder="e.g., 'podcast', 'documentary'")
+                subject_type = gr.Textbox(label="Subject Type", placeholder="e.g., 'CEO', 'historical event'")
+            with gr.Accordion("🔑 X/Twitter API Keys (Optional)", open=False):
+                 gr.Markdown("*Enter your X/Twitter keys below ONLY if you want to post the thread directly.*")
+                 twitter_api_key = gr.Textbox(label="X API Key", type="password")
+                 twitter_api_secret = gr.Textbox(label="X API Key Secret", type="password")
+                 twitter_access_token = gr.Textbox(label="X Access Token", type="password")
+                 twitter_access_secret = gr.Textbox(label="X Access Token Secret", type="password")
+            with gr.Row(elem_id="action_buttons"):
+                post_to_x_checkbox = gr.Checkbox(label="✅ Post Thread directly to X?", value=False)
+                submit_btn = gr.Button("Generate Thread", variant="primary")
+        with gr.Column(scale=3):
+            gr.Markdown("### 2. Generated Content & Clips")
+            status_output = gr.Textbox(label="Status", interactive=False, show_copy_button=True)
+            posts_output = gr.DataFrame(headers=["Post Content", "Timestamp"], label="Generated Posts", interactive=False, wrap=True)
+            clips_output = gr.Gallery(label="Generated Video Clips", show_label=False, elem_id="gallery", columns=[3], rows=[2], object_fit="contain", height="auto")
+            tweet_urls_output = gr.Markdown("### Tweet URLs\n*No tweets posted yet.*", visible=False)
+    submit_btn.click(
+        fn=create_video_thread,
+        inputs=[
+            youtube_url, num_posts, video_type, subject_type,
+            post_to_x_checkbox,
+            twitter_api_key, twitter_api_secret, twitter_access_token, twitter_access_secret
+        ],
+        outputs=[status_output, posts_output, clips_output, tweet_urls_output]
+    )
+if __name__ == "__main__":
+    # Start the automatic video cleanup scheduler
+    start_cleanup_scheduler()
+    # Launch the app
+    app.launch(debug=True, share=True)