Spaces:
Runtime error
Runtime error
File size: 14,809 Bytes
1f1864e 1e20198 1f1864e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 | #!/usr/bin/env python
#
# YouTube to X (Twitter) Thread Generator
# This Gradio app automates the process of turning a YouTube video
# into a multi-part X thread with corresponding video clips.
#
# --- 1. Installation ---
# Ensure you have all necessary packages installed:
# pip install gradio supadata google-generativeai pydantic yt-dlp moviepy tweepy pandas
# --- 2. Imports ---
import gradio as gr
import os
import re
import threading
import time
import glob
from supadata import Supadata
import google.generativeai as genai
from pydantic import BaseModel, Field
from datetime import timedelta
import yt_dlp
from moviepy.video.io.VideoFileClip import VideoFileClip
import tweepy
import pandas as pd
import traceback
# --- 3. Video Cleanup System ---
def cleanup_old_videos():
"""Clean up video files older than 15 minutes"""
try:
current_time = time.time()
# Find all video files
video_patterns = ["*.mp4", "*.webm", "*.mkv", "downloaded_video.*", "clip_*"]
for pattern in video_patterns:
for file_path in glob.glob(pattern):
try:
# Check if file is older than 15 minutes (900 seconds)
file_age = current_time - os.path.getmtime(file_path)
if file_age > 900: # 15 minutes = 900 seconds
os.remove(file_path)
print(f"ποΈ Cleaned up old video file: {file_path}")
except Exception as e:
print(f"Failed to remove {file_path}: {e}")
except Exception as e:
print(f"Cleanup error: {e}")
def start_cleanup_scheduler():
"""Start the background cleanup scheduler"""
def cleanup_loop():
while True:
time.sleep(900) # Wait 15 minutes (900 seconds)
cleanup_old_videos()
cleanup_thread = threading.Thread(target=cleanup_loop, daemon=True)
cleanup_thread.start()
print("π§Ή Video cleanup scheduler started (runs every 15 minutes)")
# --- 4. Pydantic Model for Structured LLM Output ---
class StructuredXPosts(BaseModel):
"""Defines the expected JSON structure from the AI model."""
post_contents: list[str] = Field(description="A list of content for X posts.")
timestamps: list[str] = Field(description="Timestamps in 'HH:MM:SS-HH:MM:SS' format for each post.")
# --- 5. Helper Functions ---
def get_youtube_id(url: str) -> str | None:
"""Extracts the YouTube video ID from various URL formats."""
regex = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
match = re.search(regex, url)
return match.group(1) if match else None
def ms_to_hhmmss(ms: int) -> str:
"""Converts milliseconds to HH:MM:SS format."""
sec = ms // 1000
return str(timedelta(seconds=sec))
def time_to_seconds(t: str) -> float:
"""Converts a HH:MM:SS or MM:SS string to total seconds."""
parts = [float(p) for p in t.strip().split(":")]
if len(parts) == 3:
return parts[0] * 3600 + parts[1] * 60 + parts[2]
if len(parts) == 2:
return parts[0] * 60 + parts[1]
return parts[0]
# --- 6. AI Prompt Template ---
HEAD_PROMPT_TEMPLATE = """
Below is a transcript of a [VIDEO_TYPE] video.
I want to create a X thread with this format. The first post will be the opener with a video clip of the [SUBJECT_TYPE].
Opener Post Format:
[MAIN_HOOK_STATEMENT]:
[KEY_POINT_1]
[KEY_POINT_2]
[KEY_POINT_3]
[CONTEXT_OR_SETUP]
[INTRIGUING_HOOK_LINE] π§΅
Follow-up Posts Format:
Each follow-up post should:
Start with an engaging hook related to the subject.
Present 2-4 key points or insights from the transcript.
Maintain narrative flow toward the conclusion.
Closing Post Format:
[KEY_TAKEAWAYS_OR_ADVICE]:
[ACTIONABLE_POINT_1]
[ACTIONABLE_POINT_2]
[ACTIONABLE_POINT_3]
[MEMORABLE_CLOSING_LINE]
CRITICAL INSTRUCTIONS:
1. Do not include any markdown formatting in the posts. But include line breaks for better readability.
2. Do not include any hashtags in the posts.
3. Only the first post should have the π§΅ emoji.
4. Each post must be less than 280 characters.
5. Provide timestamps for video extraction from the transcript for each post. The timestamp range should be 30 seconds to 1 minute.
"""
# --- 7. Main Processing Function ---
def create_video_thread(
youtube_url: str,
num_posts: int,
video_type: str,
subject_type: str,
post_to_x: bool,
twitter_api_key: str,
twitter_api_secret: str,
twitter_access_token: str,
twitter_access_secret: str,
progress=gr.Progress(track_tqdm=True)
):
"""
The main workflow function that powers the Gradio app.
Orchestrates transcript fetching, AI content generation, video clipping, and posting.
"""
# --- HARDCODED API KEYS ---
# WARNING: This is a security risk for public applications.
supadata_api_key = "sd_f5d8d8c915ea3cd8d96ed0a12840635d"
gemini_api_key = "AIzaSyCoGuPenJnmvOYasBLFhH4_TtCVUZj1kdQ"
try:
# --- Stage 0: Validation & Setup ---
progress(0, desc="π Starting...")
if not all([youtube_url, num_posts, video_type, subject_type]):
raise gr.Error("Please fill in all required fields: URL, Number of Posts, Video Type, and Subject Type.")
if post_to_x and not all([twitter_api_key, twitter_api_secret, twitter_access_token, twitter_access_secret]):
raise gr.Error("To post to X, all four X API keys are required.")
yt_video_id = get_youtube_id(youtube_url)
if not yt_video_id:
raise gr.Error("Invalid YouTube URL. Could not extract video ID.")
# --- Stage 1: Get Transcript ---
progress(0.1, desc="π Fetching video transcript...")
supadata = Supadata(api_key=supadata_api_key)
transcript = supadata.youtube.transcript(video_id=yt_video_id, lang="en")
if not transcript.content:
raise gr.Error("Could not fetch transcript. The video might not have one, or it could be private.")
transcript_arr = [
"{} [{} - {}]".format(
chunk.text.strip().replace("\n", " "),
ms_to_hhmmss(int(chunk.offset)),
ms_to_hhmmss(int(chunk.offset) + int(chunk.duration))
)
for chunk in transcript.content
]
# --- Stage 2: Generate Posts with LLM ---
progress(0.25, desc="π€ Generating X thread with AI...")
genai.configure(api_key=gemini_api_key)
head_prompt = HEAD_PROMPT_TEMPLATE.replace("[VIDEO_TYPE]", video_type).replace("[SUBJECT_TYPE]", subject_type)
full_prompt = f"""{head_prompt}\nInstructions: You should create {num_posts} such posts.\n\nTranscript:\n{transcript_arr}\n\nPlease provide your response as a JSON object that strictly adheres to the following schema: {StructuredXPosts.model_json_schema()}"""
model = genai.GenerativeModel('gemini-1.5-flash')
response = model.generate_content(
full_prompt,
generation_config=genai.types.GenerationConfig(response_mime_type="application/json")
)
structured_data = StructuredXPosts.model_validate_json(response.text)
all_post_contents = structured_data.post_contents
all_timestamps = structured_data.timestamps
if not all_post_contents or not all_timestamps:
raise gr.Error("AI failed to generate posts. The transcript might be too short or the topic unclear.")
# --- Stage 3: Download Video ---
progress(0.5, desc="π₯ Downloading original YouTube video (this may take a moment)...")
video_url_full = f"https://www.youtube.com/watch?v={yt_video_id}"
output_path_template = "downloaded_video.%(ext)s"
ydl_opts = {
'format': 'bestvideo[height<=720]+bestaudio/best[height<=720]',
'outtmpl': output_path_template,
'merge_output_format': 'mp4',
'quiet': True,
}
downloaded_filepath = ""
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
result = ydl.extract_info(video_url_full, download=True)
base, _ = os.path.splitext(ydl.prepare_filename(result))
downloaded_filepath = base + '.mp4'
if not os.path.exists(downloaded_filepath):
raise gr.Error(f"Failed to download video file. Expected at: {downloaded_filepath}")
# --- Stage 4: Clip Videos ---
progress(0.7, desc="βοΈ Slicing video into clips...")
video = VideoFileClip(downloaded_filepath)
output_clips = []
for i, r in enumerate(progress.tqdm(all_timestamps, desc="Clipping")):
try:
start_str, end_str = r.split("-")
start_sec = time_to_seconds(start_str.strip())
end_sec = time_to_seconds(end_str.strip())
if start_sec >= end_sec or end_sec > video.duration: continue
subclip = video.subclip(start_sec, end_sec)
clip_output_path = f"clip_{yt_video_id}_{i+1}.mp4"
subclip.write_videofile(clip_output_path, codec="libx264", audio_codec="aac", verbose=False, logger=None)
output_clips.append(clip_output_path)
except Exception as e:
print(f"Skipping clip for timestamp '{r}' due to error: {e}")
continue
video.close()
df = pd.DataFrame({
"Post Content": all_post_contents[:len(output_clips)],
"Timestamp": all_timestamps[:len(output_clips)]
})
# --- Stage 5: Post to X (Optional) ---
tweet_links_md = "### Tweet URLs\n*Posting to X was not selected.*"
if post_to_x:
progress(0.9, desc="ποΈ Posting thread to X...")
client = tweepy.Client(
consumer_key=twitter_api_key,
consumer_secret=twitter_api_secret,
access_token=twitter_access_token,
access_token_secret=twitter_access_secret
)
auth = tweepy.OAuth1UserHandler(
consumer_key=twitter_api_key,
consumer_secret=twitter_api_secret,
access_token=twitter_access_token,
access_token_secret=twitter_access_secret
)
api = tweepy.API(auth)
previous_tweet_id = None
tweet_links = []
user_info = client.get_me(user_fields=["username"]).data
username = user_info.username
for i in progress.tqdm(range(len(output_clips)), desc="Tweeting"):
media = api.media_upload(filename=output_clips[i], media_category='tweet_video', chunked=True)
tweet = client.create_tweet(
text=df["Post Content"].iloc[i],
media_ids=[media.media_id],
in_reply_to_tweet_id=previous_tweet_id
)
previous_tweet_id = tweet.data['id']
tweet_links.append(f"https://x.com/{username}/status/{previous_tweet_id}")
client.create_tweet(text=f"Source video: {youtube_url}", in_reply_to_tweet_id=previous_tweet_id)
tweet_links_md = "### β
Successfully Posted Tweet URLs\n" + "\n".join([f"* [Tweet {i+1}]({url})" for i, url in enumerate(tweet_links)])
progress(1, desc="π Done!")
# Clean up the main downloaded video immediately
if os.path.exists(downloaded_filepath):
os.remove(downloaded_filepath)
# Note: Clip files will be automatically cleaned up by the background scheduler
return "Generation Complete!", df, output_clips, gr.update(value=tweet_links_md, visible=True)
except Exception as e:
traceback.print_exc()
error_message = f"An error occurred: {e}"
return error_message, pd.DataFrame(), [], gr.update(visible=False)
# --- 8. Gradio UI Layout ---
with gr.Blocks(theme=gr.themes.Soft(), title="YouTube to X Thread Generator") as app:
gr.Markdown("# π YouTube to X Thread Generator")
gr.Markdown("Turn any YouTube video into an engaging, multi-part X (Twitter) thread with video clips.")
with gr.Row():
with gr.Column(scale=2):
gr.Markdown("### 1. Input Video & Content Details")
youtube_url = gr.Textbox(label="YouTube Video URL", placeholder="e.g., https://www.youtube.com/watch?v=VISDGlpX0WI")
num_posts = gr.Slider(minimum=3, maximum=15, value=8, step=1, label="Number of Posts in the Thread")
with gr.Row():
video_type = gr.Textbox(label="Video Type", placeholder="e.g., 'podcast', 'documentary'")
subject_type = gr.Textbox(label="Subject Type", placeholder="e.g., 'CEO', 'historical event'")
with gr.Accordion("π X/Twitter API Keys (Optional)", open=False):
gr.Markdown("*Enter your X/Twitter keys below ONLY if you want to post the thread directly.*")
twitter_api_key = gr.Textbox(label="X API Key", type="password")
twitter_api_secret = gr.Textbox(label="X API Key Secret", type="password")
twitter_access_token = gr.Textbox(label="X Access Token", type="password")
twitter_access_secret = gr.Textbox(label="X Access Token Secret", type="password")
with gr.Row(elem_id="action_buttons"):
post_to_x_checkbox = gr.Checkbox(label="β
Post Thread directly to X?", value=False)
submit_btn = gr.Button("Generate Thread", variant="primary")
with gr.Column(scale=3):
gr.Markdown("### 2. Generated Content & Clips")
status_output = gr.Textbox(label="Status", interactive=False, show_copy_button=True)
posts_output = gr.DataFrame(headers=["Post Content", "Timestamp"], label="Generated Posts", interactive=False, wrap=True)
clips_output = gr.Gallery(label="Generated Video Clips", show_label=False, elem_id="gallery", columns=[3], rows=[2], object_fit="contain", height="auto")
tweet_urls_output = gr.Markdown("### Tweet URLs\n*No tweets posted yet.*", visible=False)
submit_btn.click(
fn=create_video_thread,
inputs=[
youtube_url, num_posts, video_type, subject_type,
post_to_x_checkbox,
twitter_api_key, twitter_api_secret, twitter_access_token, twitter_access_secret
],
outputs=[status_output, posts_output, clips_output, tweet_urls_output]
)
if __name__ == "__main__":
# Start the automatic video cleanup scheduler
start_cleanup_scheduler()
# Launch the app
app.launch(debug=True, share=True) |