SeaWolf-AI commited on
Commit
a8fdab7
·
verified ·
1 Parent(s): 5d2e47d

Deploy MoneyPrinterV2 YouTube Shorts Generator to HF Spaces

Browse files
app.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MoneyPrinterV2 — Hugging Face Spaces Gradio UI
3
+
4
+ Generates YouTube Shorts (video only, no upload) using:
5
+ - HF Inference API for LLM text generation
6
+ - Gemini API (Nano Banana 2) for AI image generation
7
+ - KittenTTS for text-to-speech
8
+ - faster-whisper for subtitle generation
9
+ - MoviePy for video assembly
10
+ """
11
+
12
+ import os
13
+ import sys
14
+ import json
15
+ import tempfile
16
+ import shutil
17
+ import traceback
18
+
19
+ # Ensure src/ is importable (same trick as src/main.py)
20
+ _root = os.path.dirname(os.path.abspath(__file__))
21
+ _src = os.path.join(_root, "src")
22
+ if _src not in sys.path:
23
+ sys.path.insert(0, _src)
24
+ # Set sys.path[0] so config.ROOT_DIR resolves correctly
25
+ if sys.path[0] != _src:
26
+ sys.path.insert(0, _src)
27
+
28
+ import gradio as gr
29
+ from config import assert_folder_structure, ROOT_DIR
30
+ from llm_provider import select_model, list_models, generate_text
31
+ from status import info, success, error, warning
32
+
33
+ # Ensure .mp directory exists
34
+ assert_folder_structure()
35
+
36
+ # Available TTS voices (KittenTTS)
37
+ TTS_VOICES = ["Jasper", "Bella", "Luna", "Bruno", "Rosie", "Hugo", "Kiki", "Leo"]
38
+
39
+ LANGUAGES = ["English", "Korean", "Spanish", "French", "German", "Japanese", "Chinese", "Portuguese", "Russian", "Arabic"]
40
+
41
+ # LLM model choices
42
+ LLM_MODELS = list_models()
43
+
44
+
45
+ def generate_short(
46
+ niche: str,
47
+ language: str,
48
+ llm_model: str,
49
+ tts_voice: str,
50
+ sentence_length: int,
51
+ progress=gr.Progress(track_tqdm=True),
52
+ ):
53
+ """Main generation pipeline — returns (video_path, metadata_json, log_text)."""
54
+ log_lines = []
55
+
56
+ def log(msg):
57
+ log_lines.append(msg)
58
+
59
+ if not niche.strip():
60
+ return None, {}, "Please enter a niche/topic."
61
+
62
+ # Select LLM model
63
+ select_model(llm_model)
64
+ log(f"Using LLM model: {llm_model}")
65
+
66
+ # Override TTS voice via env (config reads it)
67
+ os.environ["TTS_VOICE"] = tts_voice
68
+ # Override script sentence length
69
+ os.environ["SCRIPT_SENTENCE_LENGTH"] = str(int(sentence_length))
70
+
71
+ try:
72
+ # Import YouTube class (browser-free mode)
73
+ from classes.YouTube import YouTube
74
+ from classes.Tts import TTS
75
+
76
+ log("Initializing YouTube pipeline (browser-free)...")
77
+ yt = YouTube(
78
+ account_uuid="gradio-session",
79
+ account_nickname="gradio-user",
80
+ fp_profile_path="",
81
+ niche=niche,
82
+ language=language,
83
+ use_browser=False,
84
+ )
85
+
86
+ # Step 1: Generate topic
87
+ log("Generating topic...")
88
+ topic = yt.generate_topic()
89
+ log(f"Topic: {topic}")
90
+
91
+ # Step 2: Generate script
92
+ log("Generating script...")
93
+ script = yt.generate_script()
94
+ log(f"Script: {script[:200]}...")
95
+
96
+ # Step 3: Generate metadata
97
+ log("Generating metadata (title, description)...")
98
+ metadata = yt.generate_metadata()
99
+ log(f"Title: {metadata['title']}")
100
+
101
+ # Step 4: Generate image prompts
102
+ log("Generating image prompts...")
103
+ prompts = yt.generate_prompts()
104
+ log(f"Generated {len(prompts)} image prompts")
105
+
106
+ # Step 5: Generate images
107
+ log("Generating images...")
108
+ generated_count = 0
109
+ for i, prompt in enumerate(prompts):
110
+ log(f" Image {i+1}/{len(prompts)}: {prompt[:80]}...")
111
+ result = yt.generate_image(prompt)
112
+ if result:
113
+ generated_count += 1
114
+ log(f"Generated {generated_count}/{len(prompts)} images")
115
+
116
+ if generated_count == 0:
117
+ return None, metadata, "\n".join(log_lines + ["ERROR: No images were generated. Check your GEMINI_API_KEY."])
118
+
119
+ # Step 6: TTS
120
+ log("Generating speech (TTS)...")
121
+ tts = TTS()
122
+ yt.generate_script_to_speech(tts)
123
+ log("TTS complete")
124
+
125
+ # Step 7: Combine into video
126
+ log("Combining into final video (this may take a few minutes)...")
127
+ video_path = yt.combine()
128
+ log(f"Video generated: {video_path}")
129
+
130
+ full_metadata = {
131
+ "title": metadata["title"],
132
+ "description": metadata["description"],
133
+ "topic": topic,
134
+ "script": script,
135
+ "image_prompts": prompts,
136
+ "images_generated": generated_count,
137
+ }
138
+
139
+ return video_path, full_metadata, "\n".join(log_lines)
140
+
141
+ except Exception as e:
142
+ log_lines.append(f"ERROR: {e}")
143
+ log_lines.append(traceback.format_exc())
144
+ return None, {}, "\n".join(log_lines)
145
+
146
+
147
+ # ---------------------------------------------------------------------------
148
+ # Gradio UI
149
+ # ---------------------------------------------------------------------------
150
+
151
+ with gr.Blocks(title="MoneyPrinterV2 — YouTube Shorts Generator", theme=gr.themes.Soft()) as demo:
152
+ gr.Markdown("# MoneyPrinterV2 — YouTube Shorts Generator")
153
+ gr.Markdown(
154
+ "Generate YouTube Shorts videos automatically using AI. "
155
+ "The pipeline generates a topic, script, images, speech, subtitles, and assembles them into a video."
156
+ )
157
+
158
+ with gr.Row():
159
+ with gr.Column(scale=1):
160
+ niche_input = gr.Textbox(
161
+ label="Niche / Topic",
162
+ placeholder="e.g. 'artificial intelligence', 'cooking tips', 'space exploration'",
163
+ lines=2,
164
+ )
165
+ language_input = gr.Dropdown(
166
+ choices=LANGUAGES,
167
+ value="English",
168
+ label="Language",
169
+ )
170
+ llm_model_input = gr.Dropdown(
171
+ choices=LLM_MODELS,
172
+ value=LLM_MODELS[0] if LLM_MODELS else "",
173
+ label="LLM Model",
174
+ )
175
+ tts_voice_input = gr.Dropdown(
176
+ choices=TTS_VOICES,
177
+ value="Jasper",
178
+ label="TTS Voice",
179
+ )
180
+ sentence_length_input = gr.Slider(
181
+ minimum=2,
182
+ maximum=8,
183
+ value=4,
184
+ step=1,
185
+ label="Script Sentence Count",
186
+ )
187
+ generate_btn = gr.Button("Generate Video", variant="primary", size="lg")
188
+
189
+ with gr.Column(scale=2):
190
+ video_output = gr.Video(label="Generated Video")
191
+ metadata_output = gr.JSON(label="Metadata")
192
+ log_output = gr.Textbox(label="Progress Log", lines=15, interactive=False)
193
+
194
+ generate_btn.click(
195
+ fn=generate_short,
196
+ inputs=[niche_input, language_input, llm_model_input, tts_voice_input, sentence_length_input],
197
+ outputs=[video_output, metadata_output, log_output],
198
+ )
199
+
200
+ gr.Markdown(
201
+ "---\n"
202
+ "**Required HF Space Secrets:** `HF_TOKEN` (for LLM), `GEMINI_API_KEY` (for image generation)\n\n"
203
+ "**Note:** This demo generates videos only. YouTube upload requires browser automation and is not available on HF Spaces."
204
+ )
205
+
206
+
207
+ if __name__ == "__main__":
208
+ demo.launch()
fonts/bold_font.ttf ADDED
Binary file (28.9 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio>=4.0
2
+ huggingface_hub>=0.20.0
3
+ termcolor
4
+ requests
5
+ kittentts @ https://github.com/KittenML/KittenTTS/releases/download/0.8.1/kittentts-0.8.1-py3-none-any.whl
6
+ soundfile
7
+ moviepy
8
+ Pillow>=10.0.0
9
+ faster-whisper
10
+ srt_equalizer
src/__init__.py ADDED
File without changes
src/classes/Tts.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import soundfile as sf
3
+ from kittentts import KittenTTS as KittenModel
4
+
5
+ from config import ROOT_DIR, get_tts_voice
6
+
7
+ KITTEN_MODEL = "KittenML/kitten-tts-mini-0.8"
8
+ KITTEN_SAMPLE_RATE = 24000
9
+
10
+ class TTS:
11
+ def __init__(self) -> None:
12
+ self._model = KittenModel(KITTEN_MODEL)
13
+ self._voice = get_tts_voice()
14
+
15
+ def synthesize(self, text, output_file=os.path.join(ROOT_DIR, ".mp", "audio.wav")):
16
+ audio = self._model.generate(text, voice=self._voice)
17
+ sf.write(output_file, audio, KITTEN_SAMPLE_RATE)
18
+ return output_file
src/classes/YouTube.py ADDED
@@ -0,0 +1,928 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import base64
3
+ import json
4
+ import time
5
+ import os
6
+ import requests
7
+
8
+ from utils import *
9
+ from .Tts import TTS
10
+ from llm_provider import generate_text
11
+ from config import *
12
+ from status import *
13
+ from uuid import uuid4
14
+ from typing import List
15
+ from termcolor import colored
16
+ from datetime import datetime
17
+
18
+ # Lazy imports for browser-dependent modules
19
+ _browser_imports_done = False
20
+
21
+ def _ensure_browser_imports():
22
+ global _browser_imports_done
23
+ if _browser_imports_done:
24
+ return
25
+ global aai, webdriver, By, Service, Options, GeckoDriverManager
26
+ global YOUTUBE_TEXTBOX_ID, YOUTUBE_MADE_FOR_KIDS_NAME, YOUTUBE_NOT_MADE_FOR_KIDS_NAME
27
+ global YOUTUBE_NEXT_BUTTON_ID, YOUTUBE_RADIO_BUTTON_XPATH, YOUTUBE_DONE_BUTTON_ID
28
+ global get_youtube_cache_path
29
+ import assemblyai as aai
30
+ import selenium_firefox # noqa: F401
31
+ from selenium import webdriver
32
+ from selenium.webdriver.common.by import By
33
+ from selenium.webdriver.firefox.service import Service
34
+ from selenium.webdriver.firefox.options import Options
35
+ from webdriver_manager.firefox import GeckoDriverManager
36
+ import constants
37
+ YOUTUBE_TEXTBOX_ID = constants.YOUTUBE_TEXTBOX_ID
38
+ YOUTUBE_MADE_FOR_KIDS_NAME = constants.YOUTUBE_MADE_FOR_KIDS_NAME
39
+ YOUTUBE_NOT_MADE_FOR_KIDS_NAME = constants.YOUTUBE_NOT_MADE_FOR_KIDS_NAME
40
+ YOUTUBE_NEXT_BUTTON_ID = constants.YOUTUBE_NEXT_BUTTON_ID
41
+ YOUTUBE_RADIO_BUTTON_XPATH = constants.YOUTUBE_RADIO_BUTTON_XPATH
42
+ YOUTUBE_DONE_BUTTON_ID = constants.YOUTUBE_DONE_BUTTON_ID
43
+ from cache import get_youtube_cache_path
44
+ _browser_imports_done = True
45
+
46
+ # MoviePy imports (always needed for video generation)
47
+ from moviepy.editor import (
48
+ ImageClip, AudioFileClip, TextClip, CompositeVideoClip,
49
+ CompositeAudioClip, concatenate_videoclips, afx,
50
+ )
51
+ from moviepy.video.fx.all import crop
52
+ from moviepy.config import change_settings
53
+ from moviepy.video.tools.subtitles import SubtitlesClip
54
+
55
+ # Set ImageMagick Path
56
+ imgk = get_imagemagick_path()
57
+ if imgk:
58
+ change_settings({"IMAGEMAGICK_BINARY": imgk})
59
+
60
+
61
+ class YouTube:
62
+ """
63
+ Class for YouTube Automation.
64
+
65
+ Steps to create a YouTube Short:
66
+ 1. Generate a topic [DONE]
67
+ 2. Generate a script [DONE]
68
+ 3. Generate metadata (Title, Description, Tags) [DONE]
69
+ 4. Generate AI Image Prompts [DONE]
70
+ 4. Generate Images based on generated Prompts [DONE]
71
+ 5. Convert Text-to-Speech [DONE]
72
+ 6. Show images each for n seconds, n: Duration of TTS / Amount of images [DONE]
73
+ 7. Combine Concatenated Images with the Text-to-Speech [DONE]
74
+ """
75
+
76
+ def __init__(
77
+ self,
78
+ account_uuid: str,
79
+ account_nickname: str,
80
+ fp_profile_path: str,
81
+ niche: str,
82
+ language: str,
83
+ use_browser: bool = True,
84
+ ) -> None:
85
+ """
86
+ Constructor for YouTube Class.
87
+
88
+ Args:
89
+ account_uuid (str): The unique identifier for the YouTube account.
90
+ account_nickname (str): The nickname for the YouTube account.
91
+ fp_profile_path (str): Path to the firefox profile that is logged into the specificed YouTube Account.
92
+ niche (str): The niche of the provided YouTube Channel.
93
+ language (str): The language of the Automation.
94
+ use_browser (bool): If False, skip Selenium initialization (for headless video generation).
95
+
96
+ Returns:
97
+ None
98
+ """
99
+ self._account_uuid: str = account_uuid
100
+ self._account_nickname: str = account_nickname
101
+ self._fp_profile_path: str = fp_profile_path
102
+ self._niche: str = niche
103
+ self._language: str = language
104
+ self._use_browser: bool = use_browser
105
+
106
+ self.images = []
107
+
108
+ if not self._use_browser:
109
+ self.browser = None
110
+ return
111
+
112
+ _ensure_browser_imports()
113
+
114
+ # Initialize the Firefox profile
115
+ self.options = Options()
116
+
117
+ # Set headless state of browser
118
+ if get_headless():
119
+ self.options.add_argument("--headless")
120
+
121
+ if not os.path.isdir(self._fp_profile_path):
122
+ raise ValueError(
123
+ f"Firefox profile path does not exist or is not a directory: {self._fp_profile_path}"
124
+ )
125
+
126
+ self.options.add_argument("-profile")
127
+ self.options.add_argument(self._fp_profile_path)
128
+
129
+ # Set the service
130
+ self.service = Service(GeckoDriverManager().install())
131
+
132
+ # Initialize the browser
133
+ self.browser = webdriver.Firefox(
134
+ service=self.service, options=self.options
135
+ )
136
+
137
+ @property
138
+ def niche(self) -> str:
139
+ """
140
+ Getter Method for the niche.
141
+
142
+ Returns:
143
+ niche (str): The niche
144
+ """
145
+ return self._niche
146
+
147
+ @property
148
+ def language(self) -> str:
149
+ """
150
+ Getter Method for the language to use.
151
+
152
+ Returns:
153
+ language (str): The language
154
+ """
155
+ return self._language
156
+
157
+ def generate_response(self, prompt: str, model_name: str = None) -> str:
158
+ """
159
+ Generates an LLM Response based on a prompt and the user-provided model.
160
+
161
+ Args:
162
+ prompt (str): The prompt to use in the text generation.
163
+
164
+ Returns:
165
+ response (str): The generated AI Repsonse.
166
+ """
167
+ return generate_text(prompt, model_name=model_name)
168
+
169
+ def generate_topic(self) -> str:
170
+ """
171
+ Generates a topic based on the YouTube Channel niche.
172
+
173
+ Returns:
174
+ topic (str): The generated topic.
175
+ """
176
+ completion = self.generate_response(
177
+ f"Please generate a specific video idea that takes about the following topic: {self.niche}. Make it exactly one sentence. Only return the topic, nothing else."
178
+ )
179
+
180
+ if not completion:
181
+ error("Failed to generate Topic.")
182
+
183
+ self.subject = completion
184
+
185
+ return completion
186
+
187
+ def generate_script(self) -> str:
188
+ """
189
+ Generate a script for a video, depending on the subject of the video, the number of paragraphs, and the AI model.
190
+
191
+ Returns:
192
+ script (str): The script of the video.
193
+ """
194
+ sentence_length = get_script_sentence_length()
195
+ prompt = f"""
196
+ Generate a script for a video in {sentence_length} sentences, depending on the subject of the video.
197
+
198
+ The script is to be returned as a string with the specified number of paragraphs.
199
+
200
+ Here is an example of a string:
201
+ "This is an example string."
202
+
203
+ Do not under any circumstance reference this prompt in your response.
204
+
205
+ Get straight to the point, don't start with unnecessary things like, "welcome to this video".
206
+
207
+ Obviously, the script should be related to the subject of the video.
208
+
209
+ YOU MUST NOT EXCEED THE {sentence_length} SENTENCES LIMIT. MAKE SURE THE {sentence_length} SENTENCES ARE SHORT.
210
+ YOU MUST NOT INCLUDE ANY TYPE OF MARKDOWN OR FORMATTING IN THE SCRIPT, NEVER USE A TITLE.
211
+ YOU MUST WRITE THE SCRIPT IN THE LANGUAGE SPECIFIED IN [LANGUAGE].
212
+ ONLY RETURN THE RAW CONTENT OF THE SCRIPT. DO NOT INCLUDE "VOICEOVER", "NARRATOR" OR SIMILAR INDICATORS OF WHAT SHOULD BE SPOKEN AT THE BEGINNING OF EACH PARAGRAPH OR LINE. YOU MUST NOT MENTION THE PROMPT, OR ANYTHING ABOUT THE SCRIPT ITSELF. ALSO, NEVER TALK ABOUT THE AMOUNT OF PARAGRAPHS OR LINES. JUST WRITE THE SCRIPT
213
+
214
+ Subject: {self.subject}
215
+ Language: {self.language}
216
+ """
217
+ max_retries = 3
218
+ for attempt in range(max_retries):
219
+ completion = self.generate_response(prompt)
220
+ completion = re.sub(r"\*", "", completion)
221
+
222
+ if not completion:
223
+ error("The generated script is empty.")
224
+ return
225
+
226
+ if len(completion) <= 5000:
227
+ self.script = completion
228
+ return completion
229
+
230
+ if get_verbose():
231
+ warning(f"Generated Script is too long (attempt {attempt + 1}/{max_retries}). Retrying...")
232
+
233
+ self.script = completion
234
+ return completion
235
+
236
+ def generate_metadata(self) -> dict:
237
+ """
238
+ Generates Video metadata for the to-be-uploaded YouTube Short (Title, Description).
239
+
240
+ Returns:
241
+ metadata (dict): The generated metadata.
242
+ """
243
+ max_retries = 3
244
+ title = ""
245
+ for attempt in range(max_retries):
246
+ title = self.generate_response(
247
+ f"Please generate a YouTube Video Title for the following subject, including hashtags: {self.subject}. Only return the title, nothing else. Limit the title under 100 characters."
248
+ )
249
+ if len(title) <= 100:
250
+ break
251
+ if get_verbose():
252
+ warning(f"Generated Title is too long (attempt {attempt + 1}/{max_retries}). Retrying...")
253
+
254
+ description = self.generate_response(
255
+ f"Please generate a YouTube Video Description for the following script: {self.script}. Only return the description, nothing else."
256
+ )
257
+
258
+ self.metadata = {"title": title, "description": description}
259
+
260
+ return self.metadata
261
+
262
+ def generate_prompts(self) -> List[str]:
263
+ """
264
+ Generates AI Image Prompts based on the provided Video Script.
265
+
266
+ Returns:
267
+ image_prompts (List[str]): Generated List of image prompts.
268
+ """
269
+ n_prompts = len(self.script) / 3
270
+
271
+ prompt = f"""
272
+ Generate {n_prompts} Image Prompts for AI Image Generation,
273
+ depending on the subject of a video.
274
+ Subject: {self.subject}
275
+
276
+ The image prompts are to be returned as
277
+ a JSON-Array of strings.
278
+
279
+ Each search term should consist of a full sentence,
280
+ always add the main subject of the video.
281
+
282
+ Be emotional and use interesting adjectives to make the
283
+ Image Prompt as detailed as possible.
284
+
285
+ YOU MUST ONLY RETURN THE JSON-ARRAY OF STRINGS.
286
+ YOU MUST NOT RETURN ANYTHING ELSE.
287
+ YOU MUST NOT RETURN THE SCRIPT.
288
+
289
+ The search terms must be related to the subject of the video.
290
+ Here is an example of a JSON-Array of strings:
291
+ ["image prompt 1", "image prompt 2", "image prompt 3"]
292
+
293
+ For context, here is the full text:
294
+ {self.script}
295
+ """
296
+
297
+ completion = (
298
+ str(self.generate_response(prompt))
299
+ .replace("```json", "")
300
+ .replace("```", "")
301
+ )
302
+
303
+ image_prompts = []
304
+
305
+ if "image_prompts" in completion:
306
+ image_prompts = json.loads(completion)["image_prompts"]
307
+ else:
308
+ try:
309
+ image_prompts = json.loads(completion)
310
+ if get_verbose():
311
+ info(f" => Generated Image Prompts: {image_prompts}")
312
+ except Exception:
313
+ if get_verbose():
314
+ warning(
315
+ "LLM returned an unformatted response. Attempting to clean..."
316
+ )
317
+
318
+ # Get everything between [ and ], and turn it into a list
319
+ r = re.compile(r"\[.*\]")
320
+ image_prompts = r.findall(completion)
321
+ if len(image_prompts) == 0:
322
+ if get_verbose():
323
+ warning("Failed to generate Image Prompts.")
324
+ image_prompts = [self.subject]
325
+
326
+ if len(image_prompts) > n_prompts:
327
+ image_prompts = image_prompts[: int(n_prompts)]
328
+
329
+ self.image_prompts = image_prompts
330
+
331
+ success(f"Generated {len(image_prompts)} Image Prompts.")
332
+
333
+ return image_prompts
334
+
335
+ def _persist_image(self, image_bytes: bytes, provider_label: str) -> str:
336
+ """
337
+ Writes generated image bytes to a PNG file in .mp.
338
+
339
+ Args:
340
+ image_bytes (bytes): Image payload
341
+ provider_label (str): Label for logging
342
+
343
+ Returns:
344
+ path (str): Absolute image path
345
+ """
346
+ image_path = os.path.join(ROOT_DIR, ".mp", str(uuid4()) + ".png")
347
+
348
+ with open(image_path, "wb") as image_file:
349
+ image_file.write(image_bytes)
350
+
351
+ if get_verbose():
352
+ info(f' => Wrote image from {provider_label} to "{image_path}"')
353
+
354
+ self.images.append(image_path)
355
+ return image_path
356
+
357
+ def generate_image_nanobanana2(self, prompt: str) -> str:
358
+ """
359
+ Generates an AI Image using Nano Banana 2 API (Gemini image API).
360
+
361
+ Args:
362
+ prompt (str): Prompt for image generation
363
+
364
+ Returns:
365
+ path (str): The path to the generated image.
366
+ """
367
+ print(f"Generating Image using Nano Banana 2 API: {prompt}")
368
+
369
+ api_key = get_nanobanana2_api_key()
370
+ if not api_key:
371
+ error("nanobanana2_api_key is not configured.")
372
+ return None
373
+
374
+ base_url = get_nanobanana2_api_base_url().rstrip("/")
375
+ model = get_nanobanana2_model()
376
+ aspect_ratio = get_nanobanana2_aspect_ratio()
377
+
378
+ endpoint = f"{base_url}/models/{model}:generateContent"
379
+ payload = {
380
+ "contents": [{"parts": [{"text": prompt}]}],
381
+ "generationConfig": {
382
+ "responseModalities": ["IMAGE"],
383
+ "imageConfig": {"aspectRatio": aspect_ratio},
384
+ },
385
+ }
386
+
387
+ try:
388
+ response = requests.post(
389
+ endpoint,
390
+ headers={"x-goog-api-key": api_key, "Content-Type": "application/json"},
391
+ json=payload,
392
+ timeout=300,
393
+ )
394
+ response.raise_for_status()
395
+ body = response.json()
396
+
397
+ candidates = body.get("candidates", [])
398
+ for candidate in candidates:
399
+ content = candidate.get("content", {})
400
+ for part in content.get("parts", []):
401
+ inline_data = part.get("inlineData") or part.get("inline_data")
402
+ if not inline_data:
403
+ continue
404
+ data = inline_data.get("data")
405
+ mime_type = inline_data.get("mimeType") or inline_data.get("mime_type", "")
406
+ if data and str(mime_type).startswith("image/"):
407
+ image_bytes = base64.b64decode(data)
408
+ return self._persist_image(image_bytes, "Nano Banana 2 API")
409
+
410
+ if get_verbose():
411
+ warning(f"Nano Banana 2 did not return an image payload. Response: {body}")
412
+ return None
413
+ except Exception as e:
414
+ if get_verbose():
415
+ warning(f"Failed to generate image with Nano Banana 2 API: {str(e)}")
416
+ return None
417
+
418
+ def generate_image(self, prompt: str) -> str:
419
+ """
420
+ Generates an AI Image based on the given prompt using Nano Banana 2.
421
+
422
+ Args:
423
+ prompt (str): Reference for image generation
424
+
425
+ Returns:
426
+ path (str): The path to the generated image.
427
+ """
428
+ return self.generate_image_nanobanana2(prompt)
429
+
430
+ def generate_script_to_speech(self, tts_instance: TTS) -> str:
431
+ """
432
+ Converts the generated script into Speech using KittenTTS and returns the path to the wav file.
433
+
434
+ Args:
435
+ tts_instance (tts): Instance of TTS Class.
436
+
437
+ Returns:
438
+ path_to_wav (str): Path to generated audio (WAV Format).
439
+ """
440
+ path = os.path.join(ROOT_DIR, ".mp", str(uuid4()) + ".wav")
441
+
442
+ # Clean script, remove every character that is not a word character, a space, a period, a question mark, or an exclamation mark.
443
+ self.script = re.sub(r"[^\w\s.?!]", "", self.script)
444
+
445
+ tts_instance.synthesize(self.script, path)
446
+
447
+ self.tts_path = path
448
+
449
+ if get_verbose():
450
+ info(f' => Wrote TTS to "{path}"')
451
+
452
+ return path
453
+
454
+ def add_video(self, video: dict) -> None:
455
+ """
456
+ Adds a video to the cache.
457
+
458
+ Args:
459
+ video (dict): The video to add
460
+
461
+ Returns:
462
+ None
463
+ """
464
+ _ensure_browser_imports()
465
+ videos = self.get_videos()
466
+ videos.append(video)
467
+
468
+ cache = get_youtube_cache_path()
469
+
470
+ with open(cache, "r") as file:
471
+ previous_json = json.loads(file.read())
472
+
473
+ # Find our account
474
+ accounts = previous_json["accounts"]
475
+ for account in accounts:
476
+ if account["id"] == self._account_uuid:
477
+ account["videos"].append(video)
478
+
479
+ # Commit changes
480
+ with open(cache, "w") as f:
481
+ f.write(json.dumps(previous_json))
482
+
483
+ def generate_subtitles(self, audio_path: str) -> str:
484
+ """
485
+ Generates subtitles for the audio using the configured STT provider.
486
+
487
+ Args:
488
+ audio_path (str): The path to the audio file.
489
+
490
+ Returns:
491
+ path (str): The path to the generated SRT File.
492
+ """
493
+ provider = str(get_stt_provider() or "local_whisper").lower()
494
+
495
+ if provider == "local_whisper":
496
+ return self.generate_subtitles_local_whisper(audio_path)
497
+
498
+ if provider == "third_party_assemblyai":
499
+ return self.generate_subtitles_assemblyai(audio_path)
500
+
501
+ warning(f"Unknown stt_provider '{provider}'. Falling back to local_whisper.")
502
+ return self.generate_subtitles_local_whisper(audio_path)
503
+
504
+ def generate_subtitles_assemblyai(self, audio_path: str) -> str:
505
+ """
506
+ Generates subtitles using AssemblyAI.
507
+
508
+ Args:
509
+ audio_path (str): Audio file path
510
+
511
+ Returns:
512
+ path (str): Path to SRT file
513
+ """
514
+ aai.settings.api_key = get_assemblyai_api_key()
515
+ config = aai.TranscriptionConfig()
516
+ transcriber = aai.Transcriber(config=config)
517
+ transcript = transcriber.transcribe(audio_path)
518
+ subtitles = transcript.export_subtitles_srt()
519
+
520
+ srt_path = os.path.join(ROOT_DIR, ".mp", str(uuid4()) + ".srt")
521
+
522
+ with open(srt_path, "w") as file:
523
+ file.write(subtitles)
524
+
525
+ return srt_path
526
+
527
+ def _format_srt_timestamp(self, seconds: float) -> str:
528
+ """
529
+ Formats a timestamp in seconds to SRT format.
530
+
531
+ Args:
532
+ seconds (float): Seconds
533
+
534
+ Returns:
535
+ ts (str): HH:MM:SS,mmm
536
+ """
537
+ total_millis = max(0, int(round(seconds * 1000)))
538
+ hours = total_millis // 3600000
539
+ minutes = (total_millis % 3600000) // 60000
540
+ secs = (total_millis % 60000) // 1000
541
+ millis = total_millis % 1000
542
+ return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}"
543
+
544
+ def generate_subtitles_local_whisper(self, audio_path: str) -> str:
545
+ """
546
+ Generates subtitles using local Whisper (faster-whisper).
547
+
548
+ Args:
549
+ audio_path (str): Audio file path
550
+
551
+ Returns:
552
+ path (str): Path to SRT file
553
+ """
554
+ try:
555
+ from faster_whisper import WhisperModel
556
+ except ImportError:
557
+ error(
558
+ "Local STT selected but 'faster-whisper' is not installed. "
559
+ "Install it or switch stt_provider to third_party_assemblyai."
560
+ )
561
+ raise
562
+
563
+ model = WhisperModel(
564
+ get_whisper_model(),
565
+ device=get_whisper_device(),
566
+ compute_type=get_whisper_compute_type(),
567
+ )
568
+ segments, _ = model.transcribe(audio_path, vad_filter=True)
569
+
570
+ lines = []
571
+ for idx, segment in enumerate(segments, start=1):
572
+ start = self._format_srt_timestamp(segment.start)
573
+ end = self._format_srt_timestamp(segment.end)
574
+ text = str(segment.text).strip()
575
+
576
+ if not text:
577
+ continue
578
+
579
+ lines.append(str(idx))
580
+ lines.append(f"{start} --> {end}")
581
+ lines.append(text)
582
+ lines.append("")
583
+
584
+ subtitles = "\n".join(lines)
585
+ srt_path = os.path.join(ROOT_DIR, ".mp", str(uuid4()) + ".srt")
586
+ with open(srt_path, "w", encoding="utf-8") as file:
587
+ file.write(subtitles)
588
+
589
+ return srt_path
590
+
591
+ def combine(self) -> str:
592
+ """
593
+ Combines everything into the final video.
594
+
595
+ Returns:
596
+ path (str): The path to the generated MP4 File.
597
+ """
598
+ combined_image_path = os.path.join(ROOT_DIR, ".mp", str(uuid4()) + ".mp4")
599
+ threads = get_threads()
600
+ tts_clip = AudioFileClip(self.tts_path)
601
+ max_duration = tts_clip.duration
602
+ req_dur = max_duration / len(self.images)
603
+
604
+ # Make a generator that returns a TextClip when called with consecutive
605
+ generator = lambda txt: TextClip(
606
+ txt,
607
+ font=os.path.join(get_fonts_dir(), get_font()),
608
+ fontsize=100,
609
+ color="#FFFF00",
610
+ stroke_color="black",
611
+ stroke_width=5,
612
+ size=(1080, 1920),
613
+ method="caption",
614
+ )
615
+
616
+ print(colored("[+] Combining images...", "blue"))
617
+
618
+ clips = []
619
+ tot_dur = 0
620
+ # Add downloaded clips over and over until the duration of the audio (max_duration) has been reached
621
+ while tot_dur < max_duration:
622
+ for image_path in self.images:
623
+ clip = ImageClip(image_path)
624
+ clip.duration = req_dur
625
+ clip = clip.set_fps(30)
626
+
627
+ # Not all images are same size,
628
+ # so we need to resize them
629
+ if round((clip.w / clip.h), 4) < 0.5625:
630
+ if get_verbose():
631
+ info(f" => Resizing Image: {image_path} to 1080x1920")
632
+ clip = crop(
633
+ clip,
634
+ width=clip.w,
635
+ height=round(clip.w / 0.5625),
636
+ x_center=clip.w / 2,
637
+ y_center=clip.h / 2,
638
+ )
639
+ else:
640
+ if get_verbose():
641
+ info(f" => Resizing Image: {image_path} to 1920x1080")
642
+ clip = crop(
643
+ clip,
644
+ width=round(0.5625 * clip.h),
645
+ height=clip.h,
646
+ x_center=clip.w / 2,
647
+ y_center=clip.h / 2,
648
+ )
649
+ clip = clip.resize((1080, 1920))
650
+
651
+ # FX (Fade In)
652
+ # clip = clip.fadein(2)
653
+
654
+ clips.append(clip)
655
+ tot_dur += clip.duration
656
+
657
+ final_clip = concatenate_videoclips(clips)
658
+ final_clip = final_clip.set_fps(30)
659
+ random_song = choose_random_song()
660
+
661
+ subtitles = None
662
+ try:
663
+ subtitles_path = self.generate_subtitles(self.tts_path)
664
+ equalize_subtitles(subtitles_path, 10)
665
+ subtitles = SubtitlesClip(subtitles_path, generator)
666
+ subtitles.set_pos(("center", "center"))
667
+ except Exception as e:
668
+ warning(f"Failed to generate subtitles, continuing without subtitles: {e}")
669
+
670
+ random_song_clip = AudioFileClip(random_song).set_fps(44100)
671
+
672
+ # Turn down volume
673
+ random_song_clip = random_song_clip.fx(afx.volumex, 0.1)
674
+ comp_audio = CompositeAudioClip([tts_clip.set_fps(44100), random_song_clip])
675
+
676
+ final_clip = final_clip.set_audio(comp_audio)
677
+ final_clip = final_clip.set_duration(tts_clip.duration)
678
+
679
+ if subtitles is not None:
680
+ final_clip = CompositeVideoClip([final_clip, subtitles])
681
+
682
+ final_clip.write_videofile(combined_image_path, threads=threads)
683
+
684
+ success(f'Wrote Video to "{combined_image_path}"')
685
+
686
+ return combined_image_path
687
+
688
+ def generate_video(self, tts_instance: TTS) -> str:
689
+ """
690
+ Generates a YouTube Short based on the provided niche and language.
691
+
692
+ Args:
693
+ tts_instance (TTS): Instance of TTS Class.
694
+
695
+ Returns:
696
+ path (str): The path to the generated MP4 File.
697
+ """
698
+ # Generate the Topic
699
+ self.generate_topic()
700
+
701
+ # Generate the Script
702
+ self.generate_script()
703
+
704
+ # Generate the Metadata
705
+ self.generate_metadata()
706
+
707
+ # Generate the Image Prompts
708
+ self.generate_prompts()
709
+
710
+ # Generate the Images
711
+ for prompt in self.image_prompts:
712
+ self.generate_image(prompt)
713
+
714
+ # Generate the TTS
715
+ self.generate_script_to_speech(tts_instance)
716
+
717
+ # Combine everything
718
+ path = self.combine()
719
+
720
+ if get_verbose():
721
+ info(f" => Generated Video: {path}")
722
+
723
+ self.video_path = os.path.abspath(path)
724
+
725
+ return path
726
+
727
+ def _require_browser(self):
728
+ if not self._use_browser or self.browser is None:
729
+ raise RuntimeError(
730
+ "Browser is not available. This method requires use_browser=True."
731
+ )
732
+
733
+ def get_channel_id(self) -> str:
734
+ """
735
+ Gets the Channel ID of the YouTube Account.
736
+
737
+ Returns:
738
+ channel_id (str): The Channel ID.
739
+ """
740
+ self._require_browser()
741
+ driver = self.browser
742
+ driver.get("https://studio.youtube.com")
743
+ time.sleep(2)
744
+ channel_id = driver.current_url.split("/")[-1]
745
+ self.channel_id = channel_id
746
+
747
+ return channel_id
748
+
749
+ def upload_video(self) -> bool:
750
+ """
751
+ Uploads the video to YouTube.
752
+
753
+ Returns:
754
+ success (bool): Whether the upload was successful or not.
755
+ """
756
+ self._require_browser()
757
+ _ensure_browser_imports()
758
+ try:
759
+ self.get_channel_id()
760
+
761
+ driver = self.browser
762
+ verbose = get_verbose()
763
+
764
+ # Go to youtube.com/upload
765
+ driver.get("https://www.youtube.com/upload")
766
+
767
+ # Set video file
768
+ FILE_PICKER_TAG = "ytcp-uploads-file-picker"
769
+ file_picker = driver.find_element(By.TAG_NAME, FILE_PICKER_TAG)
770
+ INPUT_TAG = "input"
771
+ file_input = file_picker.find_element(By.TAG_NAME, INPUT_TAG)
772
+ file_input.send_keys(self.video_path)
773
+
774
+ # Wait for upload to finish
775
+ time.sleep(5)
776
+
777
+ # Set title
778
+ textboxes = driver.find_elements(By.ID, YOUTUBE_TEXTBOX_ID)
779
+
780
+ title_el = textboxes[0]
781
+ description_el = textboxes[-1]
782
+
783
+ if verbose:
784
+ info("\t=> Setting title...")
785
+
786
+ title_el.click()
787
+ time.sleep(1)
788
+ title_el.clear()
789
+ title_el.send_keys(self.metadata["title"])
790
+
791
+ if verbose:
792
+ info("\t=> Setting description...")
793
+
794
+ # Set description
795
+ time.sleep(10)
796
+ description_el.click()
797
+ time.sleep(0.5)
798
+ description_el.clear()
799
+ description_el.send_keys(self.metadata["description"])
800
+
801
+ time.sleep(0.5)
802
+
803
+ # Set `made for kids` option
804
+ if verbose:
805
+ info("\t=> Setting `made for kids` option...")
806
+
807
+ is_for_kids_checkbox = driver.find_element(
808
+ By.NAME, YOUTUBE_MADE_FOR_KIDS_NAME
809
+ )
810
+ is_not_for_kids_checkbox = driver.find_element(
811
+ By.NAME, YOUTUBE_NOT_MADE_FOR_KIDS_NAME
812
+ )
813
+
814
+ if not get_is_for_kids():
815
+ is_not_for_kids_checkbox.click()
816
+ else:
817
+ is_for_kids_checkbox.click()
818
+
819
+ time.sleep(0.5)
820
+
821
+ # Click next
822
+ if verbose:
823
+ info("\t=> Clicking next...")
824
+
825
+ next_button = driver.find_element(By.ID, YOUTUBE_NEXT_BUTTON_ID)
826
+ next_button.click()
827
+
828
+ # Click next again
829
+ if verbose:
830
+ info("\t=> Clicking next again...")
831
+ next_button = driver.find_element(By.ID, YOUTUBE_NEXT_BUTTON_ID)
832
+ next_button.click()
833
+
834
+ # Wait for 2 seconds
835
+ time.sleep(2)
836
+
837
+ # Click next again
838
+ if verbose:
839
+ info("\t=> Clicking next again...")
840
+ next_button = driver.find_element(By.ID, YOUTUBE_NEXT_BUTTON_ID)
841
+ next_button.click()
842
+
843
+ # Set as unlisted
844
+ if verbose:
845
+ info("\t=> Setting as unlisted...")
846
+
847
+ radio_button = driver.find_elements(By.XPATH, YOUTUBE_RADIO_BUTTON_XPATH)
848
+ radio_button[2].click()
849
+
850
+ if verbose:
851
+ info("\t=> Clicking done button...")
852
+
853
+ # Click done button
854
+ done_button = driver.find_element(By.ID, YOUTUBE_DONE_BUTTON_ID)
855
+ done_button.click()
856
+
857
+ # Wait for 2 seconds
858
+ time.sleep(2)
859
+
860
+ # Get latest video
861
+ if verbose:
862
+ info("\t=> Getting video URL...")
863
+
864
+ # Get the latest uploaded video URL
865
+ driver.get(
866
+ f"https://studio.youtube.com/channel/{self.channel_id}/videos/short"
867
+ )
868
+ time.sleep(2)
869
+ videos = driver.find_elements(By.TAG_NAME, "ytcp-video-row")
870
+ first_video = videos[0]
871
+ anchor_tag = first_video.find_element(By.TAG_NAME, "a")
872
+ href = anchor_tag.get_attribute("href")
873
+ if verbose:
874
+ info(f"\t=> Extracting video ID from URL: {href}")
875
+ video_id = href.split("/")[-2]
876
+
877
+ # Build URL
878
+ url = build_url(video_id)
879
+
880
+ self.uploaded_video_url = url
881
+
882
+ if verbose:
883
+ success(f" => Uploaded Video: {url}")
884
+
885
+ # Add video to cache
886
+ self.add_video(
887
+ {
888
+ "title": self.metadata["title"],
889
+ "description": self.metadata["description"],
890
+ "url": url,
891
+ "date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
892
+ }
893
+ )
894
+
895
+ # Close the browser
896
+ driver.quit()
897
+
898
+ return True
899
+ except Exception as e:
900
+ error(f"Failed to upload video: {e}")
901
+ self.browser.quit()
902
+ return False
903
+
904
+ def get_videos(self) -> List[dict]:
905
+ """
906
+ Gets the uploaded videos from the YouTube Channel.
907
+
908
+ Returns:
909
+ videos (List[dict]): The uploaded videos.
910
+ """
911
+ _ensure_browser_imports()
912
+ if not os.path.exists(get_youtube_cache_path()):
913
+ # Create the cache file
914
+ with open(get_youtube_cache_path(), "w") as file:
915
+ json.dump({"videos": []}, file, indent=4)
916
+ return []
917
+
918
+ videos = []
919
+ # Read the cache file
920
+ with open(get_youtube_cache_path(), "r") as file:
921
+ previous_json = json.loads(file.read())
922
+ # Find our account
923
+ accounts = previous_json["accounts"]
924
+ for account in accounts:
925
+ if account["id"] == self._account_uuid:
926
+ videos = account["videos"]
927
+
928
+ return videos
src/classes/__init__.py ADDED
File without changes
src/config.py ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import json
4
+ import srt_equalizer
5
+
6
+ from termcolor import colored
7
+
8
+ ROOT_DIR = os.path.dirname(sys.path[0])
9
+
10
+
11
+ def is_running_in_spaces() -> bool:
12
+ """Returns True when running inside a Hugging Face Space."""
13
+ return bool(os.environ.get("SPACE_ID"))
14
+
15
+
16
+ def _load_config() -> dict:
17
+ """
18
+ Loads config.json if available; falls back to environment variables
19
+ when running on HF Spaces or when the file is missing.
20
+ """
21
+ config_path = os.path.join(ROOT_DIR, "config.json")
22
+ if os.path.exists(config_path):
23
+ with open(config_path, "r") as f:
24
+ return json.load(f)
25
+
26
+ # Fallback: build minimal config from environment variables
27
+ return {
28
+ "verbose": os.environ.get("VERBOSE", "true").lower() == "true",
29
+ "firefox_profile": "",
30
+ "headless": True,
31
+ "ollama_base_url": os.environ.get("OLLAMA_BASE_URL", "http://127.0.0.1:11434"),
32
+ "ollama_model": os.environ.get("OLLAMA_MODEL", ""),
33
+ "twitter_language": os.environ.get("TWITTER_LANGUAGE", "English"),
34
+ "nanobanana2_api_base_url": os.environ.get(
35
+ "NANOBANANA2_API_BASE_URL",
36
+ "https://generativelanguage.googleapis.com/v1beta",
37
+ ),
38
+ "nanobanana2_api_key": os.environ.get("GEMINI_API_KEY", ""),
39
+ "nanobanana2_model": os.environ.get(
40
+ "NANOBANANA2_MODEL", "gemini-3.1-flash-image-preview"
41
+ ),
42
+ "nanobanana2_aspect_ratio": os.environ.get("NANOBANANA2_ASPECT_RATIO", "9:16"),
43
+ "threads": int(os.environ.get("THREADS", "2")),
44
+ "zip_url": os.environ.get("ZIP_URL", ""),
45
+ "is_for_kids": False,
46
+ "stt_provider": os.environ.get("STT_PROVIDER", "local_whisper"),
47
+ "whisper_model": os.environ.get("WHISPER_MODEL", "tiny"),
48
+ "whisper_device": os.environ.get("WHISPER_DEVICE", "cpu"),
49
+ "whisper_compute_type": os.environ.get("WHISPER_COMPUTE_TYPE", "int8"),
50
+ "assembly_ai_api_key": os.environ.get("ASSEMBLYAI_API_KEY", ""),
51
+ "tts_voice": os.environ.get("TTS_VOICE", "Jasper"),
52
+ "font": os.environ.get("FONT", "bold_font.ttf"),
53
+ "imagemagick_path": os.environ.get("IMAGEMAGICK_PATH", "/usr/bin/convert"),
54
+ "script_sentence_length": int(os.environ.get("SCRIPT_SENTENCE_LENGTH", "4")),
55
+ "email": {"smtp_server": "", "smtp_port": 587, "username": "", "password": ""},
56
+ "post_bridge": {
57
+ "enabled": False,
58
+ "api_key": "",
59
+ "platforms": [],
60
+ "account_ids": [],
61
+ "auto_crosspost": False,
62
+ },
63
+ }
64
+
65
+
66
+ def assert_folder_structure() -> None:
67
+ """
68
+ Make sure that the nessecary folder structure is present.
69
+
70
+ Returns:
71
+ None
72
+ """
73
+ # Create the .mp folder
74
+ if not os.path.exists(os.path.join(ROOT_DIR, ".mp")):
75
+ if get_verbose():
76
+ print(colored(f"=> Creating .mp folder at {os.path.join(ROOT_DIR, '.mp')}", "green"))
77
+ os.makedirs(os.path.join(ROOT_DIR, ".mp"))
78
+
79
+ def get_first_time_running() -> bool:
80
+ """
81
+ Checks if the program is running for the first time by checking if .mp folder exists.
82
+
83
+ Returns:
84
+ exists (bool): True if the program is running for the first time, False otherwise
85
+ """
86
+ return not os.path.exists(os.path.join(ROOT_DIR, ".mp"))
87
+
88
+ def get_email_credentials() -> dict:
89
+ return _load_config()["email"]
90
+
91
+ def get_verbose() -> bool:
92
+ return _load_config()["verbose"]
93
+
94
+ def get_firefox_profile_path() -> str:
95
+ return _load_config()["firefox_profile"]
96
+
97
+ def get_headless() -> bool:
98
+ return _load_config()["headless"]
99
+
100
+ def get_ollama_base_url() -> str:
101
+ return _load_config().get("ollama_base_url", "http://127.0.0.1:11434")
102
+
103
+ def get_ollama_model() -> str:
104
+ return _load_config().get("ollama_model", "")
105
+
106
+ def get_twitter_language() -> str:
107
+ return _load_config()["twitter_language"]
108
+
109
+ def get_nanobanana2_api_base_url() -> str:
110
+ return _load_config().get(
111
+ "nanobanana2_api_base_url",
112
+ "https://generativelanguage.googleapis.com/v1beta",
113
+ )
114
+
115
+ def get_nanobanana2_api_key() -> str:
116
+ configured = _load_config().get("nanobanana2_api_key", "")
117
+ return configured or os.environ.get("GEMINI_API_KEY", "")
118
+
119
+ def get_nanobanana2_model() -> str:
120
+ return _load_config().get("nanobanana2_model", "gemini-3.1-flash-image-preview")
121
+
122
+ def get_nanobanana2_aspect_ratio() -> str:
123
+ return _load_config().get("nanobanana2_aspect_ratio", "9:16")
124
+
125
+ def get_threads() -> int:
126
+ return _load_config()["threads"]
127
+
128
+ def get_zip_url() -> str:
129
+ return _load_config()["zip_url"]
130
+
131
+ def get_is_for_kids() -> bool:
132
+ return _load_config()["is_for_kids"]
133
+
134
+ def get_google_maps_scraper_zip_url() -> str:
135
+ return _load_config()["google_maps_scraper"]
136
+
137
+ def get_google_maps_scraper_niche() -> str:
138
+ return _load_config()["google_maps_scraper_niche"]
139
+
140
+ def get_scraper_timeout() -> int:
141
+ return _load_config()["scraper_timeout"] or 300
142
+
143
+ def get_outreach_message_subject() -> str:
144
+ return _load_config()["outreach_message_subject"]
145
+
146
+ def get_outreach_message_body_file() -> str:
147
+ return _load_config()["outreach_message_body_file"]
148
+
149
+ def get_tts_voice() -> str:
150
+ return _load_config().get("tts_voice", "Jasper")
151
+
152
+ def get_assemblyai_api_key() -> str:
153
+ return _load_config()["assembly_ai_api_key"]
154
+
155
+ def get_stt_provider() -> str:
156
+ return _load_config().get("stt_provider", "local_whisper")
157
+
158
+ def get_whisper_model() -> str:
159
+ return _load_config().get("whisper_model", "base")
160
+
161
+ def get_whisper_device() -> str:
162
+ return _load_config().get("whisper_device", "auto")
163
+
164
+ def get_whisper_compute_type() -> str:
165
+ return _load_config().get("whisper_compute_type", "int8")
166
+
167
+ def equalize_subtitles(srt_path: str, max_chars: int = 10) -> None:
168
+ """
169
+ Equalizes the subtitles in a SRT file.
170
+
171
+ Args:
172
+ srt_path (str): The path to the SRT file
173
+ max_chars (int): The maximum amount of characters in a subtitle
174
+
175
+ Returns:
176
+ None
177
+ """
178
+ srt_equalizer.equalize_srt_file(srt_path, srt_path, max_chars)
179
+
180
+ def get_font() -> str:
181
+ return _load_config()["font"]
182
+
183
+ def get_fonts_dir() -> str:
184
+ return os.path.join(ROOT_DIR, "fonts")
185
+
186
+ def get_imagemagick_path() -> str:
187
+ path = _load_config().get("imagemagick_path", "")
188
+ if not path and is_running_in_spaces():
189
+ return "/usr/bin/convert"
190
+ return path
191
+
192
+ def get_script_sentence_length() -> int:
193
+ val = _load_config().get("script_sentence_length")
194
+ return val if val is not None else 4
195
+
196
+ def get_post_bridge_config() -> dict:
197
+ defaults = {
198
+ "enabled": False,
199
+ "api_key": "",
200
+ "platforms": ["tiktok", "instagram"],
201
+ "account_ids": [],
202
+ "auto_crosspost": False,
203
+ }
204
+ supported_platforms = {"tiktok", "instagram"}
205
+
206
+ config_json = _load_config()
207
+
208
+ raw_config = config_json.get("post_bridge", {})
209
+ if not isinstance(raw_config, dict):
210
+ raw_config = {}
211
+
212
+ raw_platforms = raw_config.get("platforms")
213
+ normalized_platforms = []
214
+ seen_platforms = set()
215
+
216
+ if raw_platforms is None:
217
+ normalized_platforms = defaults["platforms"].copy()
218
+ elif isinstance(raw_platforms, list):
219
+ for platform in raw_platforms:
220
+ normalized_platform = str(platform).strip().lower()
221
+ if (
222
+ normalized_platform in supported_platforms
223
+ and normalized_platform not in seen_platforms
224
+ ):
225
+ normalized_platforms.append(normalized_platform)
226
+ seen_platforms.add(normalized_platform)
227
+ else:
228
+ normalized_platforms = []
229
+
230
+ raw_account_ids = raw_config.get("account_ids", defaults["account_ids"])
231
+ normalized_account_ids = []
232
+ if isinstance(raw_account_ids, list):
233
+ for account_id in raw_account_ids:
234
+ try:
235
+ normalized_account_ids.append(int(account_id))
236
+ except (TypeError, ValueError):
237
+ continue
238
+
239
+ api_key = str(raw_config.get("api_key", "")).strip()
240
+ if not api_key:
241
+ api_key = os.environ.get("POST_BRIDGE_API_KEY", "").strip()
242
+
243
+ return {
244
+ "enabled": bool(raw_config.get("enabled", defaults["enabled"])),
245
+ "api_key": api_key,
246
+ "platforms": normalized_platforms,
247
+ "account_ids": normalized_account_ids,
248
+ "auto_crosspost": bool(
249
+ raw_config.get("auto_crosspost", defaults["auto_crosspost"])
250
+ ),
251
+ }
src/llm_provider.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from config import is_running_in_spaces
4
+
5
+ _selected_model: str | None = None
6
+
7
+
8
+ def _use_hf_backend() -> bool:
9
+ """Use HF Inference API when running on Spaces or when HF_TOKEN is set and Ollama is absent."""
10
+ if is_running_in_spaces():
11
+ return True
12
+ if os.environ.get("HF_TOKEN") and not os.environ.get("OLLAMA_BASE_URL"):
13
+ return True
14
+ return False
15
+
16
+
17
+ # ---------------------------------------------------------------------------
18
+ # HF Inference API backend
19
+ # ---------------------------------------------------------------------------
20
+
21
+ def _hf_client():
22
+ from huggingface_hub import InferenceClient
23
+ token = os.environ.get("HF_TOKEN", "")
24
+ return InferenceClient(token=token)
25
+
26
+
27
+ def _hf_list_models() -> list[str]:
28
+ return [
29
+ "meta-llama/Llama-3.1-8B-Instruct",
30
+ "mistralai/Mistral-7B-Instruct-v0.3",
31
+ "google/gemma-2-9b-it",
32
+ ]
33
+
34
+
35
+ def _hf_generate_text(prompt: str, model: str) -> str:
36
+ response = _hf_client().chat_completion(
37
+ model=model,
38
+ messages=[{"role": "user", "content": prompt}],
39
+ max_tokens=2048,
40
+ )
41
+ return response.choices[0].message.content.strip()
42
+
43
+
44
+ # ---------------------------------------------------------------------------
45
+ # Ollama backend (original)
46
+ # ---------------------------------------------------------------------------
47
+
48
+ def _ollama_client():
49
+ import ollama
50
+ from config import get_ollama_base_url
51
+ return ollama.Client(host=get_ollama_base_url())
52
+
53
+
54
+ def _ollama_list_models() -> list[str]:
55
+ response = _ollama_client().list()
56
+ return sorted(m.model for m in response.models)
57
+
58
+
59
+ def _ollama_generate_text(prompt: str, model: str) -> str:
60
+ response = _ollama_client().chat(
61
+ model=model,
62
+ messages=[{"role": "user", "content": prompt}],
63
+ )
64
+ return response["message"]["content"].strip()
65
+
66
+
67
+ # ---------------------------------------------------------------------------
68
+ # Public API (unchanged interface)
69
+ # ---------------------------------------------------------------------------
70
+
71
+ def list_models() -> list[str]:
72
+ if _use_hf_backend():
73
+ return _hf_list_models()
74
+ return _ollama_list_models()
75
+
76
+
77
+ def select_model(model: str) -> None:
78
+ global _selected_model
79
+ _selected_model = model
80
+
81
+
82
+ def get_active_model() -> str | None:
83
+ return _selected_model
84
+
85
+
86
+ def generate_text(prompt: str, model_name: str = None) -> str:
87
+ model = model_name or _selected_model
88
+ if not model:
89
+ raise RuntimeError(
90
+ "No model selected. Call select_model() first or pass model_name."
91
+ )
92
+
93
+ if _use_hf_backend():
94
+ return _hf_generate_text(prompt, model)
95
+ return _ollama_generate_text(prompt, model)
src/status.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from termcolor import colored
2
+
3
+ def error(message: str, show_emoji: bool = True) -> None:
4
+ """
5
+ Prints an error message.
6
+
7
+ Args:
8
+ message (str): The error message
9
+ show_emoji (bool): Whether to show the emoji
10
+
11
+ Returns:
12
+ None
13
+ """
14
+ emoji = "❌" if show_emoji else ""
15
+ print(colored(f"{emoji} {message}", "red"))
16
+
17
+ def success(message: str, show_emoji: bool = True) -> None:
18
+ """
19
+ Prints a success message.
20
+
21
+ Args:
22
+ message (str): The success message
23
+ show_emoji (bool): Whether to show the emoji
24
+
25
+ Returns:
26
+ None
27
+ """
28
+ emoji = "✅" if show_emoji else ""
29
+ print(colored(f"{emoji} {message}", "green"))
30
+
31
+ def info(message: str, show_emoji: bool = True) -> None:
32
+ """
33
+ Prints an info message.
34
+
35
+ Args:
36
+ message (str): The info message
37
+ show_emoji (bool): Whether to show the emoji
38
+
39
+ Returns:
40
+ None
41
+ """
42
+ emoji = "ℹ️" if show_emoji else ""
43
+ print(colored(f"{emoji} {message}", "magenta"))
44
+
45
+ def warning(message: str, show_emoji: bool = True) -> None:
46
+ """
47
+ Prints a warning message.
48
+
49
+ Args:
50
+ message (str): The warning message
51
+ show_emoji (bool): Whether to show the emoji
52
+
53
+ Returns:
54
+ None
55
+ """
56
+ emoji = "⚠️" if show_emoji else ""
57
+ print(colored(f"{emoji} {message}", "yellow"))
58
+
59
+ def question(message: str, show_emoji: bool = True) -> str:
60
+ """
61
+ Prints a question message and returns the user's input.
62
+
63
+ Args:
64
+ message (str): The question message
65
+ show_emoji (bool): Whether to show the emoji
66
+
67
+ Returns:
68
+ user_input (str): The user's input
69
+ """
70
+ emoji = "❓" if show_emoji else ""
71
+ return input(colored(f"{emoji} {message}", "magenta"))
src/utils.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random
3
+ import zipfile
4
+ import requests
5
+ import platform
6
+
7
+ from status import *
8
+ from config import *
9
+
10
+ DEFAULT_SONG_ARCHIVE_URLS = []
11
+
12
+
13
+ def close_running_selenium_instances() -> None:
14
+ """
15
+ Closes any running Selenium instances.
16
+
17
+ Returns:
18
+ None
19
+ """
20
+ try:
21
+ info(" => Closing running Selenium instances...")
22
+
23
+ # Kill all running Firefox instances
24
+ if platform.system() == "Windows":
25
+ os.system("taskkill /f /im firefox.exe")
26
+ else:
27
+ os.system("pkill firefox")
28
+
29
+ success(" => Closed running Selenium instances.")
30
+
31
+ except Exception as e:
32
+ error(f"Error occurred while closing running Selenium instances: {str(e)}")
33
+
34
+
35
+ def build_url(youtube_video_id: str) -> str:
36
+ """
37
+ Builds the URL to the YouTube video.
38
+
39
+ Args:
40
+ youtube_video_id (str): The YouTube video ID.
41
+
42
+ Returns:
43
+ url (str): The URL to the YouTube video.
44
+ """
45
+ return f"https://www.youtube.com/watch?v={youtube_video_id}"
46
+
47
+
48
+ def rem_temp_files() -> None:
49
+ """
50
+ Removes temporary files in the `.mp` directory.
51
+
52
+ Returns:
53
+ None
54
+ """
55
+ # Path to the `.mp` directory
56
+ mp_dir = os.path.join(ROOT_DIR, ".mp")
57
+
58
+ files = os.listdir(mp_dir)
59
+
60
+ for file in files:
61
+ if not file.endswith(".json"):
62
+ os.remove(os.path.join(mp_dir, file))
63
+
64
+
65
+ def fetch_songs() -> None:
66
+ """
67
+ Downloads songs into songs/ directory to use with geneated videos.
68
+
69
+ Returns:
70
+ None
71
+ """
72
+ try:
73
+ info(f" => Fetching songs...")
74
+
75
+ files_dir = os.path.join(ROOT_DIR, "Songs")
76
+ if not os.path.exists(files_dir):
77
+ os.mkdir(files_dir)
78
+ if get_verbose():
79
+ info(f" => Created directory: {files_dir}")
80
+ else:
81
+ existing_audio_files = [
82
+ name
83
+ for name in os.listdir(files_dir)
84
+ if os.path.isfile(os.path.join(files_dir, name))
85
+ and name.lower().endswith((".mp3", ".wav", ".m4a", ".aac", ".ogg"))
86
+ ]
87
+ if len(existing_audio_files) > 0:
88
+ return
89
+
90
+ configured_url = get_zip_url().strip()
91
+ download_urls = [configured_url] if configured_url else []
92
+ download_urls.extend(DEFAULT_SONG_ARCHIVE_URLS)
93
+
94
+ archive_path = os.path.join(files_dir, "songs.zip")
95
+ downloaded = False
96
+
97
+ for download_url in download_urls:
98
+ try:
99
+ response = requests.get(download_url, timeout=60)
100
+ response.raise_for_status()
101
+
102
+ with open(archive_path, "wb") as file:
103
+ file.write(response.content)
104
+
105
+ SAFE_EXTENSIONS = (".mp3", ".wav", ".m4a", ".aac", ".ogg", ".flac")
106
+ with zipfile.ZipFile(archive_path, "r") as zf:
107
+ for member in zf.namelist():
108
+ basename = os.path.basename(member)
109
+ if not basename or not basename.lower().endswith(SAFE_EXTENSIONS):
110
+ warning(f"Skipping non-audio file in archive: {member}")
111
+ continue
112
+ if ".." in member or member.startswith("/"):
113
+ warning(f"Skipping suspicious path in archive: {member}")
114
+ continue
115
+ zf.extract(member, files_dir)
116
+
117
+ downloaded = True
118
+ break
119
+ except Exception as err:
120
+ warning(f"Failed to fetch songs from {download_url}: {err}")
121
+
122
+ if not downloaded:
123
+ raise RuntimeError(
124
+ "Could not download a valid songs archive from any configured URL"
125
+ )
126
+
127
+ # Remove the zip file
128
+ if os.path.exists(archive_path):
129
+ os.remove(archive_path)
130
+
131
+ success(" => Downloaded Songs to ../Songs.")
132
+
133
+ except Exception as e:
134
+ error(f"Error occurred while fetching songs: {str(e)}")
135
+
136
+
137
+ def choose_random_song() -> str:
138
+ """
139
+ Chooses a random song from the songs/ directory.
140
+
141
+ Returns:
142
+ str: The path to the chosen song.
143
+ """
144
+ try:
145
+ songs_dir = os.path.join(ROOT_DIR, "Songs")
146
+ songs = [
147
+ name
148
+ for name in os.listdir(songs_dir)
149
+ if os.path.isfile(os.path.join(songs_dir, name))
150
+ and name.lower().endswith((".mp3", ".wav", ".m4a", ".aac", ".ogg"))
151
+ ]
152
+ if len(songs) == 0:
153
+ raise RuntimeError("No audio files found in Songs directory")
154
+ song = random.choice(songs)
155
+ success(f" => Chose song: {song}")
156
+ return os.path.join(ROOT_DIR, "Songs", song)
157
+ except Exception as e:
158
+ error(f"Error occurred while choosing random song: {str(e)}")
159
+ raise