LordPatil commited on
Commit
1f1864e
Β·
verified Β·
1 Parent(s): 373d198

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +332 -0
app.py ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ #
4
+ # YouTube to X (Twitter) Thread Generator
5
+ # This Gradio app automates the process of turning a YouTube video
6
+ # into a multi-part X thread with corresponding video clips.
7
+ #
8
+
9
+ # --- 1. Installation ---
10
+ # Ensure you have all necessary packages installed:
11
+ # pip install gradio supadata google-generativeai pydantic yt-dlp moviepy tweepy pandas
12
+
13
+ # --- 2. Imports ---
14
+ import gradio as gr
15
+ import os
16
+ import re
17
+ import threading
18
+ import time
19
+ import glob
20
+ from supadata import Supadata
21
+ import google.generativeai as genai
22
+ from pydantic import BaseModel, Field
23
+ from datetime import timedelta
24
+ import yt_dlp
25
+ from moviepy.video.io.VideoFileClip import VideoFileClip
26
+ import tweepy
27
+ import pandas as pd
28
+ import traceback
29
+
30
+ # --- 3. Video Cleanup System ---
31
+ def cleanup_old_videos():
32
+ """Clean up video files older than 15 minutes"""
33
+ try:
34
+ current_time = time.time()
35
+ # Find all video files
36
+ video_patterns = ["*.mp4", "*.webm", "*.mkv", "downloaded_video.*", "clip_*"]
37
+
38
+ for pattern in video_patterns:
39
+ for file_path in glob.glob(pattern):
40
+ try:
41
+ # Check if file is older than 15 minutes (900 seconds)
42
+ file_age = current_time - os.path.getmtime(file_path)
43
+ if file_age > 900: # 15 minutes = 900 seconds
44
+ os.remove(file_path)
45
+ print(f"πŸ—‘οΈ Cleaned up old video file: {file_path}")
46
+ except Exception as e:
47
+ print(f"Failed to remove {file_path}: {e}")
48
+ except Exception as e:
49
+ print(f"Cleanup error: {e}")
50
+
51
+ def start_cleanup_scheduler():
52
+ """Start the background cleanup scheduler"""
53
+ def cleanup_loop():
54
+ while True:
55
+ time.sleep(900) # Wait 15 minutes (900 seconds)
56
+ cleanup_old_videos()
57
+
58
+ cleanup_thread = threading.Thread(target=cleanup_loop, daemon=True)
59
+ cleanup_thread.start()
60
+ print("🧹 Video cleanup scheduler started (runs every 15 minutes)")
61
+
62
+ # --- 4. Pydantic Model for Structured LLM Output ---
63
+ class StructuredXPosts(BaseModel):
64
+ """Defines the expected JSON structure from the AI model."""
65
+ post_contents: list[str] = Field(description="A list of content for X posts.")
66
+ timestamps: list[str] = Field(description="Timestamps in 'HH:MM:SS-HH:MM:SS' format for each post.")
67
+
68
+ # --- 5. Helper Functions ---
69
+ def get_youtube_id(url: str) -> str | None:
70
+ """Extracts the YouTube video ID from various URL formats."""
71
+ regex = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
72
+ match = re.search(regex, url)
73
+ return match.group(1) if match else None
74
+
75
+ def ms_to_hhmmss(ms: int) -> str:
76
+ """Converts milliseconds to HH:MM:SS format."""
77
+ sec = ms // 1000
78
+ return str(timedelta(seconds=sec))
79
+
80
+ def time_to_seconds(t: str) -> float:
81
+ """Converts a HH:MM:SS or MM:SS string to total seconds."""
82
+ parts = [float(p) for p in t.strip().split(":")]
83
+ if len(parts) == 3:
84
+ return parts[0] * 3600 + parts[1] * 60 + parts[2]
85
+ if len(parts) == 2:
86
+ return parts[0] * 60 + parts[1]
87
+ return parts[0]
88
+
89
+ # --- 6. AI Prompt Template ---
90
+ HEAD_PROMPT_TEMPLATE = """
91
+ Below is a transcript of a [VIDEO_TYPE] video.
92
+ I want to create a X thread with this format. The first post will be the opener with a video clip of the [SUBJECT_TYPE].
93
+
94
+ Opener Post Format:
95
+ [MAIN_HOOK_STATEMENT]:
96
+
97
+ [KEY_POINT_1]
98
+ [KEY_POINT_2]
99
+ [KEY_POINT_3]
100
+ [CONTEXT_OR_SETUP]
101
+ [INTRIGUING_HOOK_LINE] 🧡
102
+
103
+ Follow-up Posts Format:
104
+ Each follow-up post should:
105
+ Start with an engaging hook related to the subject.
106
+ Present 2-4 key points or insights from the transcript.
107
+ Maintain narrative flow toward the conclusion.
108
+
109
+ Closing Post Format:
110
+ [KEY_TAKEAWAYS_OR_ADVICE]:
111
+
112
+ [ACTIONABLE_POINT_1]
113
+ [ACTIONABLE_POINT_2]
114
+ [ACTIONABLE_POINT_3]
115
+ [MEMORABLE_CLOSING_LINE]
116
+
117
+ CRITICAL INSTRUCTIONS:
118
+ 1. Do not include any markdown formatting in the posts. But include line breaks for better readability.
119
+ 2. Do not include any hashtags in the posts.
120
+ 3. Only the first post should have the 🧡 emoji.
121
+ 4. Each post must be less than 280 characters.
122
+ 5. Provide timestamps for video extraction from the transcript for each post. The timestamp range should be 30 seconds to 1 minute.
123
+ """
124
+
125
+ # --- 7. Main Processing Function ---
126
+ def create_video_thread(
127
+ youtube_url: str,
128
+ num_posts: int,
129
+ video_type: str,
130
+ subject_type: str,
131
+ post_to_x: bool,
132
+ twitter_api_key: str,
133
+ twitter_api_secret: str,
134
+ twitter_access_token: str,
135
+ twitter_access_secret: str,
136
+ progress=gr.Progress(track_tqdm=True)
137
+ ):
138
+ """
139
+ The main workflow function that powers the Gradio app.
140
+ Orchestrates transcript fetching, AI content generation, video clipping, and posting.
141
+ """
142
+ # --- HARDCODED API KEYS ---
143
+ # WARNING: This is a security risk for public applications.
144
+ supadata_api_key = "sd_f5d8d8c915ea3cd8d96ed0a12840635d"
145
+ gemini_api_key = "AIzaSyCoGuPenJnmvOYasBLFhH4_TtCVUZj1kdQ"
146
+
147
+ try:
148
+ # --- Stage 0: Validation & Setup ---
149
+ progress(0, desc="πŸš€ Starting...")
150
+ if not all([youtube_url, num_posts, video_type, subject_type]):
151
+ raise gr.Error("Please fill in all required fields: URL, Number of Posts, Video Type, and Subject Type.")
152
+ if post_to_x and not all([twitter_api_key, twitter_api_secret, twitter_access_token, twitter_access_secret]):
153
+ raise gr.Error("To post to X, all four X API keys are required.")
154
+
155
+ yt_video_id = get_youtube_id(youtube_url)
156
+ if not yt_video_id:
157
+ raise gr.Error("Invalid YouTube URL. Could not extract video ID.")
158
+
159
+ # --- Stage 1: Get Transcript ---
160
+ progress(0.1, desc="πŸ“„ Fetching video transcript...")
161
+ supadata = Supadata(api_key=supadata_api_key)
162
+ transcript = supadata.youtube.transcript(video_id=yt_video_id, lang="en")
163
+ if not transcript.content:
164
+ raise gr.Error("Could not fetch transcript. The video might not have one, or it could be private.")
165
+
166
+ transcript_arr = [
167
+ f"{chunk.text.strip().replace("\n", " ")} [{ms_to_hhmmss(int(chunk.offset))} - {ms_to_hhmmss(int(chunk.offset) + int(chunk.duration))}]"
168
+ for chunk in transcript.content
169
+ ]
170
+
171
+ # --- Stage 2: Generate Posts with LLM ---
172
+ progress(0.25, desc="πŸ€– Generating X thread with AI...")
173
+ genai.configure(api_key=gemini_api_key)
174
+
175
+ head_prompt = HEAD_PROMPT_TEMPLATE.replace("[VIDEO_TYPE]", video_type).replace("[SUBJECT_TYPE]", subject_type)
176
+ full_prompt = f"""{head_prompt}\nInstructions: You should create {num_posts} such posts.\n\nTranscript:\n{transcript_arr}\n\nPlease provide your response as a JSON object that strictly adheres to the following schema: {StructuredXPosts.model_json_schema()}"""
177
+
178
+ model = genai.GenerativeModel('gemini-1.5-flash')
179
+ response = model.generate_content(
180
+ full_prompt,
181
+ generation_config=genai.types.GenerationConfig(response_mime_type="application/json")
182
+ )
183
+
184
+ structured_data = StructuredXPosts.model_validate_json(response.text)
185
+ all_post_contents = structured_data.post_contents
186
+ all_timestamps = structured_data.timestamps
187
+
188
+ if not all_post_contents or not all_timestamps:
189
+ raise gr.Error("AI failed to generate posts. The transcript might be too short or the topic unclear.")
190
+
191
+ # --- Stage 3: Download Video ---
192
+ progress(0.5, desc="πŸ“₯ Downloading original YouTube video (this may take a moment)...")
193
+ video_url_full = f"https://www.youtube.com/watch?v={yt_video_id}"
194
+ output_path_template = "downloaded_video.%(ext)s"
195
+ ydl_opts = {
196
+ 'format': 'bestvideo[height<=720]+bestaudio/best[height<=720]',
197
+ 'outtmpl': output_path_template,
198
+ 'merge_output_format': 'mp4',
199
+ 'quiet': True,
200
+ }
201
+ downloaded_filepath = ""
202
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
203
+ result = ydl.extract_info(video_url_full, download=True)
204
+ base, _ = os.path.splitext(ydl.prepare_filename(result))
205
+ downloaded_filepath = base + '.mp4'
206
+
207
+ if not os.path.exists(downloaded_filepath):
208
+ raise gr.Error(f"Failed to download video file. Expected at: {downloaded_filepath}")
209
+
210
+ # --- Stage 4: Clip Videos ---
211
+ progress(0.7, desc="βœ‚οΈ Slicing video into clips...")
212
+ video = VideoFileClip(downloaded_filepath)
213
+ output_clips = []
214
+ for i, r in enumerate(progress.tqdm(all_timestamps, desc="Clipping")):
215
+ try:
216
+ start_str, end_str = r.split("-")
217
+ start_sec = time_to_seconds(start_str.strip())
218
+ end_sec = time_to_seconds(end_str.strip())
219
+
220
+ if start_sec >= end_sec or end_sec > video.duration: continue
221
+
222
+ subclip = video.subclip(start_sec, end_sec)
223
+ clip_output_path = f"clip_{yt_video_id}_{i+1}.mp4"
224
+ subclip.write_videofile(clip_output_path, codec="libx264", audio_codec="aac", verbose=False, logger=None)
225
+ output_clips.append(clip_output_path)
226
+ except Exception as e:
227
+ print(f"Skipping clip for timestamp '{r}' due to error: {e}")
228
+ continue
229
+
230
+ video.close()
231
+ df = pd.DataFrame({
232
+ "Post Content": all_post_contents[:len(output_clips)],
233
+ "Timestamp": all_timestamps[:len(output_clips)]
234
+ })
235
+
236
+ # --- Stage 5: Post to X (Optional) ---
237
+ tweet_links_md = "### Tweet URLs\n*Posting to X was not selected.*"
238
+ if post_to_x:
239
+ progress(0.9, desc="πŸ•ŠοΈ Posting thread to X...")
240
+ client = tweepy.Client(
241
+ consumer_key=twitter_api_key,
242
+ consumer_secret=twitter_api_secret,
243
+ access_token=twitter_access_token,
244
+ access_token_secret=twitter_access_secret
245
+ )
246
+ auth = tweepy.OAuth1UserHandler(
247
+ consumer_key=twitter_api_key,
248
+ consumer_secret=twitter_api_secret,
249
+ access_token=twitter_access_token,
250
+ access_token_secret=twitter_access_secret
251
+ )
252
+ api = tweepy.API(auth)
253
+ previous_tweet_id = None
254
+ tweet_links = []
255
+ user_info = client.get_me(user_fields=["username"]).data
256
+ username = user_info.username
257
+
258
+ for i in progress.tqdm(range(len(output_clips)), desc="Tweeting"):
259
+ media = api.media_upload(filename=output_clips[i], media_category='tweet_video', chunked=True)
260
+ tweet = client.create_tweet(
261
+ text=df["Post Content"].iloc[i],
262
+ media_ids=[media.media_id],
263
+ in_reply_to_tweet_id=previous_tweet_id
264
+ )
265
+ previous_tweet_id = tweet.data['id']
266
+ tweet_links.append(f"https://x.com/{username}/status/{previous_tweet_id}")
267
+
268
+ client.create_tweet(text=f"Source video: {youtube_url}", in_reply_to_tweet_id=previous_tweet_id)
269
+ tweet_links_md = "### βœ… Successfully Posted Tweet URLs\n" + "\n".join([f"* [Tweet {i+1}]({url})" for i, url in enumerate(tweet_links)])
270
+
271
+ progress(1, desc="πŸŽ‰ Done!")
272
+ # Clean up the main downloaded video immediately
273
+ if os.path.exists(downloaded_filepath):
274
+ os.remove(downloaded_filepath)
275
+
276
+ # Note: Clip files will be automatically cleaned up by the background scheduler
277
+
278
+ return "Generation Complete!", df, output_clips, gr.update(value=tweet_links_md, visible=True)
279
+
280
+ except Exception as e:
281
+ traceback.print_exc()
282
+ error_message = f"An error occurred: {e}"
283
+ return error_message, pd.DataFrame(), [], gr.update(visible=False)
284
+
285
+ # --- 8. Gradio UI Layout ---
286
+ with gr.Blocks(theme=gr.themes.Soft(), title="YouTube to X Thread Generator") as app:
287
+ gr.Markdown("# πŸš€ YouTube to X Thread Generator")
288
+ gr.Markdown("Turn any YouTube video into an engaging, multi-part X (Twitter) thread with video clips.")
289
+
290
+ with gr.Row():
291
+ with gr.Column(scale=2):
292
+ gr.Markdown("### 1. Input Video & Content Details")
293
+ youtube_url = gr.Textbox(label="YouTube Video URL", placeholder="e.g., https://www.youtube.com/watch?v=VISDGlpX0WI")
294
+ num_posts = gr.Slider(minimum=3, maximum=15, value=8, step=1, label="Number of Posts in the Thread")
295
+ with gr.Row():
296
+ video_type = gr.Textbox(label="Video Type", placeholder="e.g., 'podcast', 'documentary'")
297
+ subject_type = gr.Textbox(label="Subject Type", placeholder="e.g., 'CEO', 'historical event'")
298
+
299
+ with gr.Accordion("πŸ”‘ X/Twitter API Keys (Optional)", open=False):
300
+ gr.Markdown("*Enter your X/Twitter keys below ONLY if you want to post the thread directly.*")
301
+ twitter_api_key = gr.Textbox(label="X API Key", type="password")
302
+ twitter_api_secret = gr.Textbox(label="X API Key Secret", type="password")
303
+ twitter_access_token = gr.Textbox(label="X Access Token", type="password")
304
+ twitter_access_secret = gr.Textbox(label="X Access Token Secret", type="password")
305
+
306
+ with gr.Row(elem_id="action_buttons"):
307
+ post_to_x_checkbox = gr.Checkbox(label="βœ… Post Thread directly to X?", value=False)
308
+ submit_btn = gr.Button("Generate Thread", variant="primary")
309
+
310
+ with gr.Column(scale=3):
311
+ gr.Markdown("### 2. Generated Content & Clips")
312
+ status_output = gr.Textbox(label="Status", interactive=False, show_copy_button=True)
313
+ posts_output = gr.DataFrame(headers=["Post Content", "Timestamp"], label="Generated Posts", interactive=False, wrap=True)
314
+ clips_output = gr.Gallery(label="Generated Video Clips", show_label=False, elem_id="gallery", columns=[3], rows=[2], object_fit="contain", height="auto")
315
+ tweet_urls_output = gr.Markdown("### Tweet URLs\n*No tweets posted yet.*", visible=False)
316
+
317
+ submit_btn.click(
318
+ fn=create_video_thread,
319
+ inputs=[
320
+ youtube_url, num_posts, video_type, subject_type,
321
+ post_to_x_checkbox,
322
+ twitter_api_key, twitter_api_secret, twitter_access_token, twitter_access_secret
323
+ ],
324
+ outputs=[status_output, posts_output, clips_output, tweet_urls_output]
325
+ )
326
+
327
+ if __name__ == "__main__":
328
+ # Start the automatic video cleanup scheduler
329
+ start_cleanup_scheduler()
330
+
331
+ # Launch the app
332
+ app.launch(debug=True, share=True)