File size: 14,809 Bytes
1f1864e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1e20198
 
 
 
 
1f1864e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
#!/usr/bin/env python

#
# YouTube to X (Twitter) Thread Generator
# This Gradio app automates the process of turning a YouTube video
# into a multi-part X thread with corresponding video clips.
#

# --- 1. Installation ---
# Ensure you have all necessary packages installed:
# pip install gradio supadata google-generativeai pydantic yt-dlp moviepy tweepy pandas

# --- 2. Imports ---
import gradio as gr
import os
import re
import threading
import time
import glob
from supadata import Supadata
import google.generativeai as genai
from pydantic import BaseModel, Field
from datetime import timedelta
import yt_dlp
from moviepy.video.io.VideoFileClip import VideoFileClip
import tweepy
import pandas as pd
import traceback

# --- 3. Video Cleanup System ---
def cleanup_old_videos():
    """Clean up video files older than 15 minutes"""
    try:
        current_time = time.time()
        # Find all video files
        video_patterns = ["*.mp4", "*.webm", "*.mkv", "downloaded_video.*", "clip_*"]
        
        for pattern in video_patterns:
            for file_path in glob.glob(pattern):
                try:
                    # Check if file is older than 15 minutes (900 seconds)
                    file_age = current_time - os.path.getmtime(file_path)
                    if file_age > 900:  # 15 minutes = 900 seconds
                        os.remove(file_path)
                        print(f"πŸ—‘οΈ Cleaned up old video file: {file_path}")
                except Exception as e:
                    print(f"Failed to remove {file_path}: {e}")
    except Exception as e:
        print(f"Cleanup error: {e}")

def start_cleanup_scheduler():
    """Start the background cleanup scheduler"""
    def cleanup_loop():
        while True:
            time.sleep(900)  # Wait 15 minutes (900 seconds)
            cleanup_old_videos()
    
    cleanup_thread = threading.Thread(target=cleanup_loop, daemon=True)
    cleanup_thread.start()
    print("🧹 Video cleanup scheduler started (runs every 15 minutes)")

# --- 4. Pydantic Model for Structured LLM Output ---
class StructuredXPosts(BaseModel):
    """Defines the expected JSON structure from the AI model."""
    post_contents: list[str] = Field(description="A list of content for X posts.")
    timestamps: list[str] = Field(description="Timestamps in 'HH:MM:SS-HH:MM:SS' format for each post.")

# --- 5. Helper Functions ---
def get_youtube_id(url: str) -> str | None:
    """Extracts the YouTube video ID from various URL formats."""
    regex = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
    match = re.search(regex, url)
    return match.group(1) if match else None

def ms_to_hhmmss(ms: int) -> str:
    """Converts milliseconds to HH:MM:SS format."""
    sec = ms // 1000
    return str(timedelta(seconds=sec))

def time_to_seconds(t: str) -> float:
    """Converts a HH:MM:SS or MM:SS string to total seconds."""
    parts = [float(p) for p in t.strip().split(":")]
    if len(parts) == 3:
        return parts[0] * 3600 + parts[1] * 60 + parts[2]
    if len(parts) == 2:
        return parts[0] * 60 + parts[1]
    return parts[0]

# --- 6. AI Prompt Template ---
HEAD_PROMPT_TEMPLATE = """
Below is a transcript of a [VIDEO_TYPE] video.
I want to create a X thread with this format. The first post will be the opener with a video clip of the [SUBJECT_TYPE].

Opener Post Format:
[MAIN_HOOK_STATEMENT]:

[KEY_POINT_1]
[KEY_POINT_2]
[KEY_POINT_3]
[CONTEXT_OR_SETUP]
[INTRIGUING_HOOK_LINE] 🧡

Follow-up Posts Format:
Each follow-up post should:
Start with an engaging hook related to the subject.
Present 2-4 key points or insights from the transcript.
Maintain narrative flow toward the conclusion.

Closing Post Format:
[KEY_TAKEAWAYS_OR_ADVICE]:

[ACTIONABLE_POINT_1]
[ACTIONABLE_POINT_2]
[ACTIONABLE_POINT_3]
[MEMORABLE_CLOSING_LINE]

CRITICAL INSTRUCTIONS:
1. Do not include any markdown formatting in the posts. But include line breaks for better readability.
2. Do not include any hashtags in the posts.
3. Only the first post should have the 🧡 emoji.
4. Each post must be less than 280 characters.
5. Provide timestamps for video extraction from the transcript for each post. The timestamp range should be 30 seconds to 1 minute.
"""

# --- 7. Main Processing Function ---
def create_video_thread(
    youtube_url: str,
    num_posts: int,
    video_type: str,
    subject_type: str,
    post_to_x: bool,
    twitter_api_key: str,
    twitter_api_secret: str,
    twitter_access_token: str,
    twitter_access_secret: str,
    progress=gr.Progress(track_tqdm=True)
):
    """
    The main workflow function that powers the Gradio app.
    Orchestrates transcript fetching, AI content generation, video clipping, and posting.
    """
    # --- HARDCODED API KEYS ---
    # WARNING: This is a security risk for public applications.
    supadata_api_key = "sd_f5d8d8c915ea3cd8d96ed0a12840635d"
    gemini_api_key = "AIzaSyCoGuPenJnmvOYasBLFhH4_TtCVUZj1kdQ"
    
    try:
        # --- Stage 0: Validation & Setup ---
        progress(0, desc="πŸš€ Starting...")
        if not all([youtube_url, num_posts, video_type, subject_type]):
            raise gr.Error("Please fill in all required fields: URL, Number of Posts, Video Type, and Subject Type.")
        if post_to_x and not all([twitter_api_key, twitter_api_secret, twitter_access_token, twitter_access_secret]):
            raise gr.Error("To post to X, all four X API keys are required.")

        yt_video_id = get_youtube_id(youtube_url)
        if not yt_video_id:
            raise gr.Error("Invalid YouTube URL. Could not extract video ID.")

        # --- Stage 1: Get Transcript ---
        progress(0.1, desc="πŸ“„ Fetching video transcript...")
        supadata = Supadata(api_key=supadata_api_key)
        transcript = supadata.youtube.transcript(video_id=yt_video_id, lang="en")
        if not transcript.content:
            raise gr.Error("Could not fetch transcript. The video might not have one, or it could be private.")
        
        transcript_arr = [
            "{} [{} - {}]".format(
                chunk.text.strip().replace("\n", " "),
                ms_to_hhmmss(int(chunk.offset)),
                ms_to_hhmmss(int(chunk.offset) + int(chunk.duration))
            )
            for chunk in transcript.content
        ]

        # --- Stage 2: Generate Posts with LLM ---
        progress(0.25, desc="πŸ€– Generating X thread with AI...")
        genai.configure(api_key=gemini_api_key)
        
        head_prompt = HEAD_PROMPT_TEMPLATE.replace("[VIDEO_TYPE]", video_type).replace("[SUBJECT_TYPE]", subject_type)
        full_prompt = f"""{head_prompt}\nInstructions: You should create {num_posts} such posts.\n\nTranscript:\n{transcript_arr}\n\nPlease provide your response as a JSON object that strictly adheres to the following schema: {StructuredXPosts.model_json_schema()}"""

        model = genai.GenerativeModel('gemini-1.5-flash')
        response = model.generate_content(
            full_prompt,
            generation_config=genai.types.GenerationConfig(response_mime_type="application/json")
        )
        
        structured_data = StructuredXPosts.model_validate_json(response.text)
        all_post_contents = structured_data.post_contents
        all_timestamps = structured_data.timestamps

        if not all_post_contents or not all_timestamps:
            raise gr.Error("AI failed to generate posts. The transcript might be too short or the topic unclear.")

        # --- Stage 3: Download Video ---
        progress(0.5, desc="πŸ“₯ Downloading original YouTube video (this may take a moment)...")
        video_url_full = f"https://www.youtube.com/watch?v={yt_video_id}"
        output_path_template = "downloaded_video.%(ext)s"
        ydl_opts = {
            'format': 'bestvideo[height<=720]+bestaudio/best[height<=720]',
            'outtmpl': output_path_template,
            'merge_output_format': 'mp4',
            'quiet': True,
        }
        downloaded_filepath = ""
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            result = ydl.extract_info(video_url_full, download=True)
            base, _ = os.path.splitext(ydl.prepare_filename(result))
            downloaded_filepath = base + '.mp4'
        
        if not os.path.exists(downloaded_filepath):
             raise gr.Error(f"Failed to download video file. Expected at: {downloaded_filepath}")

        # --- Stage 4: Clip Videos ---
        progress(0.7, desc="βœ‚οΈ Slicing video into clips...")
        video = VideoFileClip(downloaded_filepath)
        output_clips = []
        for i, r in enumerate(progress.tqdm(all_timestamps, desc="Clipping")):
            try:
                start_str, end_str = r.split("-")
                start_sec = time_to_seconds(start_str.strip())
                end_sec = time_to_seconds(end_str.strip())
                
                if start_sec >= end_sec or end_sec > video.duration: continue

                subclip = video.subclip(start_sec, end_sec)
                clip_output_path = f"clip_{yt_video_id}_{i+1}.mp4"
                subclip.write_videofile(clip_output_path, codec="libx264", audio_codec="aac", verbose=False, logger=None)
                output_clips.append(clip_output_path)
            except Exception as e:
                print(f"Skipping clip for timestamp '{r}' due to error: {e}")
                continue
        
        video.close()
        df = pd.DataFrame({
            "Post Content": all_post_contents[:len(output_clips)],
            "Timestamp": all_timestamps[:len(output_clips)]
        })
        
        # --- Stage 5: Post to X (Optional) ---
        tweet_links_md = "###  Tweet URLs\n*Posting to X was not selected.*"
        if post_to_x:
            progress(0.9, desc="πŸ•ŠοΈ Posting thread to X...")
            client = tweepy.Client(
                consumer_key=twitter_api_key,
                consumer_secret=twitter_api_secret,
                access_token=twitter_access_token,
                access_token_secret=twitter_access_secret
            )
            auth = tweepy.OAuth1UserHandler(
                consumer_key=twitter_api_key,
                consumer_secret=twitter_api_secret,
                access_token=twitter_access_token,
                access_token_secret=twitter_access_secret
            )
            api = tweepy.API(auth)
            previous_tweet_id = None
            tweet_links = []
            user_info = client.get_me(user_fields=["username"]).data
            username = user_info.username

            for i in progress.tqdm(range(len(output_clips)), desc="Tweeting"):
                media = api.media_upload(filename=output_clips[i], media_category='tweet_video', chunked=True)
                tweet = client.create_tweet(
                    text=df["Post Content"].iloc[i],
                    media_ids=[media.media_id],
                    in_reply_to_tweet_id=previous_tweet_id
                )
                previous_tweet_id = tweet.data['id']
                tweet_links.append(f"https://x.com/{username}/status/{previous_tweet_id}")

            client.create_tweet(text=f"Source video: {youtube_url}", in_reply_to_tweet_id=previous_tweet_id)
            tweet_links_md = "### βœ… Successfully Posted Tweet URLs\n" + "\n".join([f"* [Tweet {i+1}]({url})" for i, url in enumerate(tweet_links)])

        progress(1, desc="πŸŽ‰ Done!")
        # Clean up the main downloaded video immediately
        if os.path.exists(downloaded_filepath): 
            os.remove(downloaded_filepath)
        
        # Note: Clip files will be automatically cleaned up by the background scheduler

        return "Generation Complete!", df, output_clips, gr.update(value=tweet_links_md, visible=True)

    except Exception as e:
        traceback.print_exc()
        error_message = f"An error occurred: {e}"
        return error_message, pd.DataFrame(), [], gr.update(visible=False)

# --- 8. Gradio UI Layout ---
with gr.Blocks(theme=gr.themes.Soft(), title="YouTube to X Thread Generator") as app:
    gr.Markdown("# πŸš€ YouTube to X Thread Generator")
    gr.Markdown("Turn any YouTube video into an engaging, multi-part X (Twitter) thread with video clips.")

    with gr.Row():
        with gr.Column(scale=2):
            gr.Markdown("### 1. Input Video & Content Details")
            youtube_url = gr.Textbox(label="YouTube Video URL", placeholder="e.g., https://www.youtube.com/watch?v=VISDGlpX0WI")
            num_posts = gr.Slider(minimum=3, maximum=15, value=8, step=1, label="Number of Posts in the Thread")
            with gr.Row():
                video_type = gr.Textbox(label="Video Type", placeholder="e.g., 'podcast', 'documentary'")
                subject_type = gr.Textbox(label="Subject Type", placeholder="e.g., 'CEO', 'historical event'")

            with gr.Accordion("πŸ”‘ X/Twitter API Keys (Optional)", open=False):
                 gr.Markdown("*Enter your X/Twitter keys below ONLY if you want to post the thread directly.*")
                 twitter_api_key = gr.Textbox(label="X API Key", type="password")
                 twitter_api_secret = gr.Textbox(label="X API Key Secret", type="password")
                 twitter_access_token = gr.Textbox(label="X Access Token", type="password")
                 twitter_access_secret = gr.Textbox(label="X Access Token Secret", type="password")
            
            with gr.Row(elem_id="action_buttons"):
                post_to_x_checkbox = gr.Checkbox(label="βœ… Post Thread directly to X?", value=False)
                submit_btn = gr.Button("Generate Thread", variant="primary")

        with gr.Column(scale=3):
            gr.Markdown("### 2. Generated Content & Clips")
            status_output = gr.Textbox(label="Status", interactive=False, show_copy_button=True)
            posts_output = gr.DataFrame(headers=["Post Content", "Timestamp"], label="Generated Posts", interactive=False, wrap=True)
            clips_output = gr.Gallery(label="Generated Video Clips", show_label=False, elem_id="gallery", columns=[3], rows=[2], object_fit="contain", height="auto")
            tweet_urls_output = gr.Markdown("### Tweet URLs\n*No tweets posted yet.*", visible=False)

    submit_btn.click(
        fn=create_video_thread,
        inputs=[
            youtube_url, num_posts, video_type, subject_type,
            post_to_x_checkbox,
            twitter_api_key, twitter_api_secret, twitter_access_token, twitter_access_secret
        ],
        outputs=[status_output, posts_output, clips_output, tweet_urls_output]
    )

if __name__ == "__main__":
    # Start the automatic video cleanup scheduler
    start_cleanup_scheduler()
    
    # Launch the app
    app.launch(debug=True, share=True)