Spaces:

LordPatil
/

ThreadX_demo

Runtime error

App Files Files Community

ThreadX_demo / app.py

LordPatil

Update app.py

1e20198 verified 8 months ago

raw

history blame contribute delete

14.8 kB

	#!/usr/bin/env python

	#
	# YouTube to X (Twitter) Thread Generator
	# This Gradio app automates the process of turning a YouTube video
	# into a multi-part X thread with corresponding video clips.
	#

	# --- 1. Installation ---
	# Ensure you have all necessary packages installed:
	# pip install gradio supadata google-generativeai pydantic yt-dlp moviepy tweepy pandas

	# --- 2. Imports ---
	import gradio as gr
	import os
	import re
	import threading
	import time
	import glob
	from supadata import Supadata
	import google.generativeai as genai
	from pydantic import BaseModel, Field
	from datetime import timedelta
	import yt_dlp
	from moviepy.video.io.VideoFileClip import VideoFileClip
	import tweepy
	import pandas as pd
	import traceback

	# --- 3. Video Cleanup System ---
	def cleanup_old_videos():
	"""Clean up video files older than 15 minutes"""
	try:
	current_time = time.time()
	# Find all video files
	video_patterns = [".mp4", ".webm", ".mkv", "downloaded_video.", "clip_*"]

	for pattern in video_patterns:
	for file_path in glob.glob(pattern):
	try:
	# Check if file is older than 15 minutes (900 seconds)
	file_age = current_time - os.path.getmtime(file_path)
	if file_age > 900: # 15 minutes = 900 seconds
	os.remove(file_path)
	print(f"🗑️ Cleaned up old video file: {file_path}")
	except Exception as e:
	print(f"Failed to remove {file_path}: {e}")
	except Exception as e:
	print(f"Cleanup error: {e}")

	def start_cleanup_scheduler():
	"""Start the background cleanup scheduler"""
	def cleanup_loop():
	while True:
	time.sleep(900) # Wait 15 minutes (900 seconds)
	cleanup_old_videos()

	cleanup_thread = threading.Thread(target=cleanup_loop, daemon=True)
	cleanup_thread.start()
	print("🧹 Video cleanup scheduler started (runs every 15 minutes)")

	# --- 4. Pydantic Model for Structured LLM Output ---
	class StructuredXPosts(BaseModel):
	"""Defines the expected JSON structure from the AI model."""
	post_contents: list[str] = Field(description="A list of content for X posts.")
	timestamps: list[str] = Field(description="Timestamps in 'HH:MM:SS-HH:MM:SS' format for each post.")

	# --- 5. Helper Functions ---
	def get_youtube_id(url: str) -> str \| None:
	"""Extracts the YouTube video ID from various URL formats."""
	regex = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/\|(?:v\|e(?:mbed)?)\/\|\S*?[?&]v=)\|youtu\.be\/)([a-zA-Z0-9_-]{11})"
	match = re.search(regex, url)
	return match.group(1) if match else None

	def ms_to_hhmmss(ms: int) -> str:
	"""Converts milliseconds to HH:MM:SS format."""
	sec = ms // 1000
	return str(timedelta(seconds=sec))

	def time_to_seconds(t: str) -> float:
	"""Converts a HH:MM:SS or MM:SS string to total seconds."""
	parts = [float(p) for p in t.strip().split(":")]
	if len(parts) == 3:
	return parts[0] * 3600 + parts[1] * 60 + parts[2]
	if len(parts) == 2:
	return parts[0] * 60 + parts[1]
	return parts[0]

	# --- 6. AI Prompt Template ---
	HEAD_PROMPT_TEMPLATE = """
	Below is a transcript of a [VIDEO_TYPE] video.
	I want to create a X thread with this format. The first post will be the opener with a video clip of the [SUBJECT_TYPE].

	Opener Post Format:
	[MAIN_HOOK_STATEMENT]:

	[KEY_POINT_1]
	[KEY_POINT_2]
	[KEY_POINT_3]
	[CONTEXT_OR_SETUP]
	[INTRIGUING_HOOK_LINE] 🧵

	Follow-up Posts Format:
	Each follow-up post should:
	Start with an engaging hook related to the subject.
	Present 2-4 key points or insights from the transcript.
	Maintain narrative flow toward the conclusion.

	Closing Post Format:
	[KEY_TAKEAWAYS_OR_ADVICE]:

	[ACTIONABLE_POINT_1]
	[ACTIONABLE_POINT_2]
	[ACTIONABLE_POINT_3]
	[MEMORABLE_CLOSING_LINE]

	CRITICAL INSTRUCTIONS:
	1. Do not include any markdown formatting in the posts. But include line breaks for better readability.
	2. Do not include any hashtags in the posts.
	3. Only the first post should have the 🧵 emoji.
	4. Each post must be less than 280 characters.
	5. Provide timestamps for video extraction from the transcript for each post. The timestamp range should be 30 seconds to 1 minute.
	"""

	# --- 7. Main Processing Function ---
	def create_video_thread(
	youtube_url: str,
	num_posts: int,
	video_type: str,
	subject_type: str,
	post_to_x: bool,
	twitter_api_key: str,
	twitter_api_secret: str,
	twitter_access_token: str,
	twitter_access_secret: str,
	progress=gr.Progress(track_tqdm=True)
	):
	"""
	The main workflow function that powers the Gradio app.
	Orchestrates transcript fetching, AI content generation, video clipping, and posting.
	"""
	# --- HARDCODED API KEYS ---
	# WARNING: This is a security risk for public applications.
	supadata_api_key = "sd_f5d8d8c915ea3cd8d96ed0a12840635d"
	gemini_api_key = "AIzaSyCoGuPenJnmvOYasBLFhH4_TtCVUZj1kdQ"

	try:
	# --- Stage 0: Validation & Setup ---
	progress(0, desc="🚀 Starting...")
	if not all([youtube_url, num_posts, video_type, subject_type]):
	raise gr.Error("Please fill in all required fields: URL, Number of Posts, Video Type, and Subject Type.")
	if post_to_x and not all([twitter_api_key, twitter_api_secret, twitter_access_token, twitter_access_secret]):
	raise gr.Error("To post to X, all four X API keys are required.")

	yt_video_id = get_youtube_id(youtube_url)
	if not yt_video_id:
	raise gr.Error("Invalid YouTube URL. Could not extract video ID.")

	# --- Stage 1: Get Transcript ---
	progress(0.1, desc="📄 Fetching video transcript...")
	supadata = Supadata(api_key=supadata_api_key)
	transcript = supadata.youtube.transcript(video_id=yt_video_id, lang="en")
	if not transcript.content:
	raise gr.Error("Could not fetch transcript. The video might not have one, or it could be private.")

	transcript_arr = [
	"{} [{} - {}]".format(
	chunk.text.strip().replace("\n", " "),
	ms_to_hhmmss(int(chunk.offset)),
	ms_to_hhmmss(int(chunk.offset) + int(chunk.duration))
	)
	for chunk in transcript.content
	]

	# --- Stage 2: Generate Posts with LLM ---
	progress(0.25, desc="🤖 Generating X thread with AI...")
	genai.configure(api_key=gemini_api_key)

	head_prompt = HEAD_PROMPT_TEMPLATE.replace("[VIDEO_TYPE]", video_type).replace("[SUBJECT_TYPE]", subject_type)
	full_prompt = f"""{head_prompt}\nInstructions: You should create {num_posts} such posts.\n\nTranscript:\n{transcript_arr}\n\nPlease provide your response as a JSON object that strictly adheres to the following schema: {StructuredXPosts.model_json_schema()}"""

	model = genai.GenerativeModel('gemini-1.5-flash')
	response = model.generate_content(
	full_prompt,
	generation_config=genai.types.GenerationConfig(response_mime_type="application/json")
	)

	structured_data = StructuredXPosts.model_validate_json(response.text)
	all_post_contents = structured_data.post_contents
	all_timestamps = structured_data.timestamps

	if not all_post_contents or not all_timestamps:
	raise gr.Error("AI failed to generate posts. The transcript might be too short or the topic unclear.")

	# --- Stage 3: Download Video ---
	progress(0.5, desc="📥 Downloading original YouTube video (this may take a moment)...")
	video_url_full = f"https://www.youtube.com/watch?v={yt_video_id}"
	output_path_template = "downloaded_video.%(ext)s"
	ydl_opts = {
	'format': 'bestvideo[height<=720]+bestaudio/best[height<=720]',
	'outtmpl': output_path_template,
	'merge_output_format': 'mp4',
	'quiet': True,
	}
	downloaded_filepath = ""
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	result = ydl.extract_info(video_url_full, download=True)
	base, _ = os.path.splitext(ydl.prepare_filename(result))
	downloaded_filepath = base + '.mp4'

	if not os.path.exists(downloaded_filepath):
	raise gr.Error(f"Failed to download video file. Expected at: {downloaded_filepath}")

	# --- Stage 4: Clip Videos ---
	progress(0.7, desc="✂️ Slicing video into clips...")
	video = VideoFileClip(downloaded_filepath)
	output_clips = []
	for i, r in enumerate(progress.tqdm(all_timestamps, desc="Clipping")):
	try:
	start_str, end_str = r.split("-")
	start_sec = time_to_seconds(start_str.strip())
	end_sec = time_to_seconds(end_str.strip())

	if start_sec >= end_sec or end_sec > video.duration: continue

	subclip = video.subclip(start_sec, end_sec)
	clip_output_path = f"clip_{yt_video_id}_{i+1}.mp4"
	subclip.write_videofile(clip_output_path, codec="libx264", audio_codec="aac", verbose=False, logger=None)
	output_clips.append(clip_output_path)
	except Exception as e:
	print(f"Skipping clip for timestamp '{r}' due to error: {e}")
	continue

	video.close()
	df = pd.DataFrame({
	"Post Content": all_post_contents[:len(output_clips)],
	"Timestamp": all_timestamps[:len(output_clips)]
	})

	# --- Stage 5: Post to X (Optional) ---
	tweet_links_md = "### Tweet URLs\nPosting to X was not selected."
	if post_to_x:
	progress(0.9, desc="🕊️ Posting thread to X...")
	client = tweepy.Client(
	consumer_key=twitter_api_key,
	consumer_secret=twitter_api_secret,
	access_token=twitter_access_token,
	access_token_secret=twitter_access_secret
	)
	auth = tweepy.OAuth1UserHandler(
	consumer_key=twitter_api_key,
	consumer_secret=twitter_api_secret,
	access_token=twitter_access_token,
	access_token_secret=twitter_access_secret
	)
	api = tweepy.API(auth)
	previous_tweet_id = None
	tweet_links = []
	user_info = client.get_me(user_fields=["username"]).data
	username = user_info.username

	for i in progress.tqdm(range(len(output_clips)), desc="Tweeting"):
	media = api.media_upload(filename=output_clips[i], media_category='tweet_video', chunked=True)
	tweet = client.create_tweet(
	text=df["Post Content"].iloc[i],
	media_ids=[media.media_id],
	in_reply_to_tweet_id=previous_tweet_id
	)
	previous_tweet_id = tweet.data['id']
	tweet_links.append(f"https://x.com/{username}/status/{previous_tweet_id}")

	client.create_tweet(text=f"Source video: {youtube_url}", in_reply_to_tweet_id=previous_tweet_id)
	tweet_links_md = "### ✅ Successfully Posted Tweet URLs\n" + "\n".join([f"* [Tweet {i+1}]({url})" for i, url in enumerate(tweet_links)])

	progress(1, desc="🎉 Done!")
	# Clean up the main downloaded video immediately
	if os.path.exists(downloaded_filepath):
	os.remove(downloaded_filepath)

	# Note: Clip files will be automatically cleaned up by the background scheduler

	return "Generation Complete!", df, output_clips, gr.update(value=tweet_links_md, visible=True)

	except Exception as e:
	traceback.print_exc()
	error_message = f"An error occurred: {e}"
	return error_message, pd.DataFrame(), [], gr.update(visible=False)

	# --- 8. Gradio UI Layout ---
	with gr.Blocks(theme=gr.themes.Soft(), title="YouTube to X Thread Generator") as app:
	gr.Markdown("# 🚀 YouTube to X Thread Generator")
	gr.Markdown("Turn any YouTube video into an engaging, multi-part X (Twitter) thread with video clips.")

	with gr.Row():
	with gr.Column(scale=2):
	gr.Markdown("### 1. Input Video & Content Details")
	youtube_url = gr.Textbox(label="YouTube Video URL", placeholder="e.g., https://www.youtube.com/watch?v=VISDGlpX0WI")
	num_posts = gr.Slider(minimum=3, maximum=15, value=8, step=1, label="Number of Posts in the Thread")
	with gr.Row():
	video_type = gr.Textbox(label="Video Type", placeholder="e.g., 'podcast', 'documentary'")
	subject_type = gr.Textbox(label="Subject Type", placeholder="e.g., 'CEO', 'historical event'")

	with gr.Accordion("🔑 X/Twitter API Keys (Optional)", open=False):
	gr.Markdown("Enter your X/Twitter keys below ONLY if you want to post the thread directly.")
	twitter_api_key = gr.Textbox(label="X API Key", type="password")
	twitter_api_secret = gr.Textbox(label="X API Key Secret", type="password")
	twitter_access_token = gr.Textbox(label="X Access Token", type="password")
	twitter_access_secret = gr.Textbox(label="X Access Token Secret", type="password")

	with gr.Row(elem_id="action_buttons"):
	post_to_x_checkbox = gr.Checkbox(label="✅ Post Thread directly to X?", value=False)
	submit_btn = gr.Button("Generate Thread", variant="primary")

	with gr.Column(scale=3):
	gr.Markdown("### 2. Generated Content & Clips")
	status_output = gr.Textbox(label="Status", interactive=False, show_copy_button=True)
	posts_output = gr.DataFrame(headers=["Post Content", "Timestamp"], label="Generated Posts", interactive=False, wrap=True)
	clips_output = gr.Gallery(label="Generated Video Clips", show_label=False, elem_id="gallery", columns=[3], rows=[2], object_fit="contain", height="auto")
	tweet_urls_output = gr.Markdown("### Tweet URLs\nNo tweets posted yet.", visible=False)

	submit_btn.click(
	fn=create_video_thread,
	inputs=[
	youtube_url, num_posts, video_type, subject_type,
	post_to_x_checkbox,
	twitter_api_key, twitter_api_secret, twitter_access_token, twitter_access_secret
	],
	outputs=[status_output, posts_output, clips_output, tweet_urls_output]
	)

	if __name__ == "__main__":
	# Start the automatic video cleanup scheduler
	start_cleanup_scheduler()

	# Launch the app
	app.launch(debug=True, share=True)