ThreadX_demo / app.py
LordPatil's picture
Update app.py
1e20198 verified
#!/usr/bin/env python
#
# YouTube to X (Twitter) Thread Generator
# This Gradio app automates the process of turning a YouTube video
# into a multi-part X thread with corresponding video clips.
#
# --- 1. Installation ---
# Ensure you have all necessary packages installed:
# pip install gradio supadata google-generativeai pydantic yt-dlp moviepy tweepy pandas
# --- 2. Imports ---
import gradio as gr
import os
import re
import threading
import time
import glob
from supadata import Supadata
import google.generativeai as genai
from pydantic import BaseModel, Field
from datetime import timedelta
import yt_dlp
from moviepy.video.io.VideoFileClip import VideoFileClip
import tweepy
import pandas as pd
import traceback
# --- 3. Video Cleanup System ---
def cleanup_old_videos():
"""Clean up video files older than 15 minutes"""
try:
current_time = time.time()
# Find all video files
video_patterns = ["*.mp4", "*.webm", "*.mkv", "downloaded_video.*", "clip_*"]
for pattern in video_patterns:
for file_path in glob.glob(pattern):
try:
# Check if file is older than 15 minutes (900 seconds)
file_age = current_time - os.path.getmtime(file_path)
if file_age > 900: # 15 minutes = 900 seconds
os.remove(file_path)
print(f"πŸ—‘οΈ Cleaned up old video file: {file_path}")
except Exception as e:
print(f"Failed to remove {file_path}: {e}")
except Exception as e:
print(f"Cleanup error: {e}")
def start_cleanup_scheduler():
"""Start the background cleanup scheduler"""
def cleanup_loop():
while True:
time.sleep(900) # Wait 15 minutes (900 seconds)
cleanup_old_videos()
cleanup_thread = threading.Thread(target=cleanup_loop, daemon=True)
cleanup_thread.start()
print("🧹 Video cleanup scheduler started (runs every 15 minutes)")
# --- 4. Pydantic Model for Structured LLM Output ---
class StructuredXPosts(BaseModel):
"""Defines the expected JSON structure from the AI model."""
post_contents: list[str] = Field(description="A list of content for X posts.")
timestamps: list[str] = Field(description="Timestamps in 'HH:MM:SS-HH:MM:SS' format for each post.")
# --- 5. Helper Functions ---
def get_youtube_id(url: str) -> str | None:
"""Extracts the YouTube video ID from various URL formats."""
regex = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
match = re.search(regex, url)
return match.group(1) if match else None
def ms_to_hhmmss(ms: int) -> str:
"""Converts milliseconds to HH:MM:SS format."""
sec = ms // 1000
return str(timedelta(seconds=sec))
def time_to_seconds(t: str) -> float:
"""Converts a HH:MM:SS or MM:SS string to total seconds."""
parts = [float(p) for p in t.strip().split(":")]
if len(parts) == 3:
return parts[0] * 3600 + parts[1] * 60 + parts[2]
if len(parts) == 2:
return parts[0] * 60 + parts[1]
return parts[0]
# --- 6. AI Prompt Template ---
HEAD_PROMPT_TEMPLATE = """
Below is a transcript of a [VIDEO_TYPE] video.
I want to create a X thread with this format. The first post will be the opener with a video clip of the [SUBJECT_TYPE].
Opener Post Format:
[MAIN_HOOK_STATEMENT]:
[KEY_POINT_1]
[KEY_POINT_2]
[KEY_POINT_3]
[CONTEXT_OR_SETUP]
[INTRIGUING_HOOK_LINE] 🧡
Follow-up Posts Format:
Each follow-up post should:
Start with an engaging hook related to the subject.
Present 2-4 key points or insights from the transcript.
Maintain narrative flow toward the conclusion.
Closing Post Format:
[KEY_TAKEAWAYS_OR_ADVICE]:
[ACTIONABLE_POINT_1]
[ACTIONABLE_POINT_2]
[ACTIONABLE_POINT_3]
[MEMORABLE_CLOSING_LINE]
CRITICAL INSTRUCTIONS:
1. Do not include any markdown formatting in the posts. But include line breaks for better readability.
2. Do not include any hashtags in the posts.
3. Only the first post should have the 🧡 emoji.
4. Each post must be less than 280 characters.
5. Provide timestamps for video extraction from the transcript for each post. The timestamp range should be 30 seconds to 1 minute.
"""
# --- 7. Main Processing Function ---
def create_video_thread(
youtube_url: str,
num_posts: int,
video_type: str,
subject_type: str,
post_to_x: bool,
twitter_api_key: str,
twitter_api_secret: str,
twitter_access_token: str,
twitter_access_secret: str,
progress=gr.Progress(track_tqdm=True)
):
"""
The main workflow function that powers the Gradio app.
Orchestrates transcript fetching, AI content generation, video clipping, and posting.
"""
# --- HARDCODED API KEYS ---
# WARNING: This is a security risk for public applications.
supadata_api_key = "sd_f5d8d8c915ea3cd8d96ed0a12840635d"
gemini_api_key = "AIzaSyCoGuPenJnmvOYasBLFhH4_TtCVUZj1kdQ"
try:
# --- Stage 0: Validation & Setup ---
progress(0, desc="πŸš€ Starting...")
if not all([youtube_url, num_posts, video_type, subject_type]):
raise gr.Error("Please fill in all required fields: URL, Number of Posts, Video Type, and Subject Type.")
if post_to_x and not all([twitter_api_key, twitter_api_secret, twitter_access_token, twitter_access_secret]):
raise gr.Error("To post to X, all four X API keys are required.")
yt_video_id = get_youtube_id(youtube_url)
if not yt_video_id:
raise gr.Error("Invalid YouTube URL. Could not extract video ID.")
# --- Stage 1: Get Transcript ---
progress(0.1, desc="πŸ“„ Fetching video transcript...")
supadata = Supadata(api_key=supadata_api_key)
transcript = supadata.youtube.transcript(video_id=yt_video_id, lang="en")
if not transcript.content:
raise gr.Error("Could not fetch transcript. The video might not have one, or it could be private.")
transcript_arr = [
"{} [{} - {}]".format(
chunk.text.strip().replace("\n", " "),
ms_to_hhmmss(int(chunk.offset)),
ms_to_hhmmss(int(chunk.offset) + int(chunk.duration))
)
for chunk in transcript.content
]
# --- Stage 2: Generate Posts with LLM ---
progress(0.25, desc="πŸ€– Generating X thread with AI...")
genai.configure(api_key=gemini_api_key)
head_prompt = HEAD_PROMPT_TEMPLATE.replace("[VIDEO_TYPE]", video_type).replace("[SUBJECT_TYPE]", subject_type)
full_prompt = f"""{head_prompt}\nInstructions: You should create {num_posts} such posts.\n\nTranscript:\n{transcript_arr}\n\nPlease provide your response as a JSON object that strictly adheres to the following schema: {StructuredXPosts.model_json_schema()}"""
model = genai.GenerativeModel('gemini-1.5-flash')
response = model.generate_content(
full_prompt,
generation_config=genai.types.GenerationConfig(response_mime_type="application/json")
)
structured_data = StructuredXPosts.model_validate_json(response.text)
all_post_contents = structured_data.post_contents
all_timestamps = structured_data.timestamps
if not all_post_contents or not all_timestamps:
raise gr.Error("AI failed to generate posts. The transcript might be too short or the topic unclear.")
# --- Stage 3: Download Video ---
progress(0.5, desc="πŸ“₯ Downloading original YouTube video (this may take a moment)...")
video_url_full = f"https://www.youtube.com/watch?v={yt_video_id}"
output_path_template = "downloaded_video.%(ext)s"
ydl_opts = {
'format': 'bestvideo[height<=720]+bestaudio/best[height<=720]',
'outtmpl': output_path_template,
'merge_output_format': 'mp4',
'quiet': True,
}
downloaded_filepath = ""
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
result = ydl.extract_info(video_url_full, download=True)
base, _ = os.path.splitext(ydl.prepare_filename(result))
downloaded_filepath = base + '.mp4'
if not os.path.exists(downloaded_filepath):
raise gr.Error(f"Failed to download video file. Expected at: {downloaded_filepath}")
# --- Stage 4: Clip Videos ---
progress(0.7, desc="βœ‚οΈ Slicing video into clips...")
video = VideoFileClip(downloaded_filepath)
output_clips = []
for i, r in enumerate(progress.tqdm(all_timestamps, desc="Clipping")):
try:
start_str, end_str = r.split("-")
start_sec = time_to_seconds(start_str.strip())
end_sec = time_to_seconds(end_str.strip())
if start_sec >= end_sec or end_sec > video.duration: continue
subclip = video.subclip(start_sec, end_sec)
clip_output_path = f"clip_{yt_video_id}_{i+1}.mp4"
subclip.write_videofile(clip_output_path, codec="libx264", audio_codec="aac", verbose=False, logger=None)
output_clips.append(clip_output_path)
except Exception as e:
print(f"Skipping clip for timestamp '{r}' due to error: {e}")
continue
video.close()
df = pd.DataFrame({
"Post Content": all_post_contents[:len(output_clips)],
"Timestamp": all_timestamps[:len(output_clips)]
})
# --- Stage 5: Post to X (Optional) ---
tweet_links_md = "### Tweet URLs\n*Posting to X was not selected.*"
if post_to_x:
progress(0.9, desc="πŸ•ŠοΈ Posting thread to X...")
client = tweepy.Client(
consumer_key=twitter_api_key,
consumer_secret=twitter_api_secret,
access_token=twitter_access_token,
access_token_secret=twitter_access_secret
)
auth = tweepy.OAuth1UserHandler(
consumer_key=twitter_api_key,
consumer_secret=twitter_api_secret,
access_token=twitter_access_token,
access_token_secret=twitter_access_secret
)
api = tweepy.API(auth)
previous_tweet_id = None
tweet_links = []
user_info = client.get_me(user_fields=["username"]).data
username = user_info.username
for i in progress.tqdm(range(len(output_clips)), desc="Tweeting"):
media = api.media_upload(filename=output_clips[i], media_category='tweet_video', chunked=True)
tweet = client.create_tweet(
text=df["Post Content"].iloc[i],
media_ids=[media.media_id],
in_reply_to_tweet_id=previous_tweet_id
)
previous_tweet_id = tweet.data['id']
tweet_links.append(f"https://x.com/{username}/status/{previous_tweet_id}")
client.create_tweet(text=f"Source video: {youtube_url}", in_reply_to_tweet_id=previous_tweet_id)
tweet_links_md = "### βœ… Successfully Posted Tweet URLs\n" + "\n".join([f"* [Tweet {i+1}]({url})" for i, url in enumerate(tweet_links)])
progress(1, desc="πŸŽ‰ Done!")
# Clean up the main downloaded video immediately
if os.path.exists(downloaded_filepath):
os.remove(downloaded_filepath)
# Note: Clip files will be automatically cleaned up by the background scheduler
return "Generation Complete!", df, output_clips, gr.update(value=tweet_links_md, visible=True)
except Exception as e:
traceback.print_exc()
error_message = f"An error occurred: {e}"
return error_message, pd.DataFrame(), [], gr.update(visible=False)
# --- 8. Gradio UI Layout ---
with gr.Blocks(theme=gr.themes.Soft(), title="YouTube to X Thread Generator") as app:
gr.Markdown("# πŸš€ YouTube to X Thread Generator")
gr.Markdown("Turn any YouTube video into an engaging, multi-part X (Twitter) thread with video clips.")
with gr.Row():
with gr.Column(scale=2):
gr.Markdown("### 1. Input Video & Content Details")
youtube_url = gr.Textbox(label="YouTube Video URL", placeholder="e.g., https://www.youtube.com/watch?v=VISDGlpX0WI")
num_posts = gr.Slider(minimum=3, maximum=15, value=8, step=1, label="Number of Posts in the Thread")
with gr.Row():
video_type = gr.Textbox(label="Video Type", placeholder="e.g., 'podcast', 'documentary'")
subject_type = gr.Textbox(label="Subject Type", placeholder="e.g., 'CEO', 'historical event'")
with gr.Accordion("πŸ”‘ X/Twitter API Keys (Optional)", open=False):
gr.Markdown("*Enter your X/Twitter keys below ONLY if you want to post the thread directly.*")
twitter_api_key = gr.Textbox(label="X API Key", type="password")
twitter_api_secret = gr.Textbox(label="X API Key Secret", type="password")
twitter_access_token = gr.Textbox(label="X Access Token", type="password")
twitter_access_secret = gr.Textbox(label="X Access Token Secret", type="password")
with gr.Row(elem_id="action_buttons"):
post_to_x_checkbox = gr.Checkbox(label="βœ… Post Thread directly to X?", value=False)
submit_btn = gr.Button("Generate Thread", variant="primary")
with gr.Column(scale=3):
gr.Markdown("### 2. Generated Content & Clips")
status_output = gr.Textbox(label="Status", interactive=False, show_copy_button=True)
posts_output = gr.DataFrame(headers=["Post Content", "Timestamp"], label="Generated Posts", interactive=False, wrap=True)
clips_output = gr.Gallery(label="Generated Video Clips", show_label=False, elem_id="gallery", columns=[3], rows=[2], object_fit="contain", height="auto")
tweet_urls_output = gr.Markdown("### Tweet URLs\n*No tweets posted yet.*", visible=False)
submit_btn.click(
fn=create_video_thread,
inputs=[
youtube_url, num_posts, video_type, subject_type,
post_to_x_checkbox,
twitter_api_key, twitter_api_secret, twitter_access_token, twitter_access_secret
],
outputs=[status_output, posts_output, clips_output, tweet_urls_output]
)
if __name__ == "__main__":
# Start the automatic video cleanup scheduler
start_cleanup_scheduler()
# Launch the app
app.launch(debug=True, share=True)