Spaces:

FINAL-Bench
/

MoneyPrinterV2

Sleeping

App Files Files Community

SeaWolf-AI commited on Apr 5

Commit

a8fdab7

verified ·

1 Parent(s): 5d2e47d

Deploy MoneyPrinterV2 YouTube Shorts Generator to HF Spaces

Browse files

Files changed (11) hide show

app.py +208 -0
fonts/bold_font.ttf +0 -0
requirements.txt +10 -0
src/__init__.py +0 -0
src/classes/Tts.py +18 -0
src/classes/YouTube.py +928 -0
src/classes/__init__.py +0 -0
src/config.py +251 -0
src/llm_provider.py +95 -0
src/status.py +71 -0
src/utils.py +159 -0

app.py ADDED Viewed

	@@ -0,0 +1,208 @@

+"""
+MoneyPrinterV2 — Hugging Face Spaces Gradio UI
+Generates YouTube Shorts (video only, no upload) using:
+  - HF Inference API for LLM text generation
+  - Gemini API (Nano Banana 2) for AI image generation
+  - KittenTTS for text-to-speech
+  - faster-whisper for subtitle generation
+  - MoviePy for video assembly
+"""
+import os
+import sys
+import json
+import tempfile
+import shutil
+import traceback
+# Ensure src/ is importable (same trick as src/main.py)
+_root = os.path.dirname(os.path.abspath(__file__))
+_src = os.path.join(_root, "src")
+if _src not in sys.path:
+    sys.path.insert(0, _src)
+# Set sys.path[0] so config.ROOT_DIR resolves correctly
+if sys.path[0] != _src:
+    sys.path.insert(0, _src)
+import gradio as gr
+from config import assert_folder_structure, ROOT_DIR
+from llm_provider import select_model, list_models, generate_text
+from status import info, success, error, warning
+# Ensure .mp directory exists
+assert_folder_structure()
+# Available TTS voices (KittenTTS)
+TTS_VOICES = ["Jasper", "Bella", "Luna", "Bruno", "Rosie", "Hugo", "Kiki", "Leo"]
+LANGUAGES = ["English", "Korean", "Spanish", "French", "German", "Japanese", "Chinese", "Portuguese", "Russian", "Arabic"]
+# LLM model choices
+LLM_MODELS = list_models()
+def generate_short(
+    niche: str,
+    language: str,
+    llm_model: str,
+    tts_voice: str,
+    sentence_length: int,
+    progress=gr.Progress(track_tqdm=True),
+):
+    """Main generation pipeline — returns (video_path, metadata_json, log_text)."""
+    log_lines = []
+    def log(msg):
+        log_lines.append(msg)
+    if not niche.strip():
+        return None, {}, "Please enter a niche/topic."
+    # Select LLM model
+    select_model(llm_model)
+    log(f"Using LLM model: {llm_model}")
+    # Override TTS voice via env (config reads it)
+    os.environ["TTS_VOICE"] = tts_voice
+    # Override script sentence length
+    os.environ["SCRIPT_SENTENCE_LENGTH"] = str(int(sentence_length))
+    try:
+        # Import YouTube class (browser-free mode)
+        from classes.YouTube import YouTube
+        from classes.Tts import TTS
+        log("Initializing YouTube pipeline (browser-free)...")
+        yt = YouTube(
+            account_uuid="gradio-session",
+            account_nickname="gradio-user",
+            fp_profile_path="",
+            niche=niche,
+            language=language,
+            use_browser=False,
+        )
+        # Step 1: Generate topic
+        log("Generating topic...")
+        topic = yt.generate_topic()
+        log(f"Topic: {topic}")
+        # Step 2: Generate script
+        log("Generating script...")
+        script = yt.generate_script()
+        log(f"Script: {script[:200]}...")
+        # Step 3: Generate metadata
+        log("Generating metadata (title, description)...")
+        metadata = yt.generate_metadata()
+        log(f"Title: {metadata['title']}")
+        # Step 4: Generate image prompts
+        log("Generating image prompts...")
+        prompts = yt.generate_prompts()
+        log(f"Generated {len(prompts)} image prompts")
+        # Step 5: Generate images
+        log("Generating images...")
+        generated_count = 0
+        for i, prompt in enumerate(prompts):
+            log(f"  Image {i+1}/{len(prompts)}: {prompt[:80]}...")
+            result = yt.generate_image(prompt)
+            if result:
+                generated_count += 1
+        log(f"Generated {generated_count}/{len(prompts)} images")
+        if generated_count == 0:
+            return None, metadata, "\n".join(log_lines + ["ERROR: No images were generated. Check your GEMINI_API_KEY."])
+        # Step 6: TTS
+        log("Generating speech (TTS)...")
+        tts = TTS()
+        yt.generate_script_to_speech(tts)
+        log("TTS complete")
+        # Step 7: Combine into video
+        log("Combining into final video (this may take a few minutes)...")
+        video_path = yt.combine()
+        log(f"Video generated: {video_path}")
+        full_metadata = {
+            "title": metadata["title"],
+            "description": metadata["description"],
+            "topic": topic,
+            "script": script,
+            "image_prompts": prompts,
+            "images_generated": generated_count,
+        }
+        return video_path, full_metadata, "\n".join(log_lines)
+    except Exception as e:
+        log_lines.append(f"ERROR: {e}")
+        log_lines.append(traceback.format_exc())
+        return None, {}, "\n".join(log_lines)
+# ---------------------------------------------------------------------------
+# Gradio UI
+# ---------------------------------------------------------------------------
+with gr.Blocks(title="MoneyPrinterV2 — YouTube Shorts Generator", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# MoneyPrinterV2 — YouTube Shorts Generator")
+    gr.Markdown(
+        "Generate YouTube Shorts videos automatically using AI. "
+        "The pipeline generates a topic, script, images, speech, subtitles, and assembles them into a video."
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            niche_input = gr.Textbox(
+                label="Niche / Topic",
+                placeholder="e.g. 'artificial intelligence', 'cooking tips', 'space exploration'",
+                lines=2,
+            )
+            language_input = gr.Dropdown(
+                choices=LANGUAGES,
+                value="English",
+                label="Language",
+            )
+            llm_model_input = gr.Dropdown(
+                choices=LLM_MODELS,
+                value=LLM_MODELS[0] if LLM_MODELS else "",
+                label="LLM Model",
+            )
+            tts_voice_input = gr.Dropdown(
+                choices=TTS_VOICES,
+                value="Jasper",
+                label="TTS Voice",
+            )
+            sentence_length_input = gr.Slider(
+                minimum=2,
+                maximum=8,
+                value=4,
+                step=1,
+                label="Script Sentence Count",
+            )
+            generate_btn = gr.Button("Generate Video", variant="primary", size="lg")
+        with gr.Column(scale=2):
+            video_output = gr.Video(label="Generated Video")
+            metadata_output = gr.JSON(label="Metadata")
+            log_output = gr.Textbox(label="Progress Log", lines=15, interactive=False)
+    generate_btn.click(
+        fn=generate_short,
+        inputs=[niche_input, language_input, llm_model_input, tts_voice_input, sentence_length_input],
+        outputs=[video_output, metadata_output, log_output],
+    )
+    gr.Markdown(
+        "---\n"
+        "**Required HF Space Secrets:** `HF_TOKEN` (for LLM), `GEMINI_API_KEY` (for image generation)\n\n"
+        "**Note:** This demo generates videos only. YouTube upload requires browser automation and is not available on HF Spaces."
+    )
+if __name__ == "__main__":
+    demo.launch()

fonts/bold_font.ttf ADDED Viewed

Binary file (28.9 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+gradio>=4.0
+huggingface_hub>=0.20.0
+termcolor
+requests
+kittentts @ https://github.com/KittenML/KittenTTS/releases/download/0.8.1/kittentts-0.8.1-py3-none-any.whl
+soundfile
+moviepy
+Pillow>=10.0.0
+faster-whisper
+srt_equalizer

src/__init__.py ADDED Viewed

File without changes

src/classes/Tts.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import os
+import soundfile as sf
+from kittentts import KittenTTS as KittenModel
+from config import ROOT_DIR, get_tts_voice
+KITTEN_MODEL = "KittenML/kitten-tts-mini-0.8"
+KITTEN_SAMPLE_RATE = 24000
+class TTS:
+    def __init__(self) -> None:
+        self._model = KittenModel(KITTEN_MODEL)
+        self._voice = get_tts_voice()
+    def synthesize(self, text, output_file=os.path.join(ROOT_DIR, ".mp", "audio.wav")):
+        audio = self._model.generate(text, voice=self._voice)
+        sf.write(output_file, audio, KITTEN_SAMPLE_RATE)
+        return output_file

src/classes/YouTube.py ADDED Viewed

	@@ -0,0 +1,928 @@

+import re
+import base64
+import json
+import time
+import os
+import requests
+from utils import *
+from .Tts import TTS
+from llm_provider import generate_text
+from config import *
+from status import *
+from uuid import uuid4
+from typing import List
+from termcolor import colored
+from datetime import datetime
+# Lazy imports for browser-dependent modules
+_browser_imports_done = False
+def _ensure_browser_imports():
+    global _browser_imports_done
+    if _browser_imports_done:
+        return
+    global aai, webdriver, By, Service, Options, GeckoDriverManager
+    global YOUTUBE_TEXTBOX_ID, YOUTUBE_MADE_FOR_KIDS_NAME, YOUTUBE_NOT_MADE_FOR_KIDS_NAME
+    global YOUTUBE_NEXT_BUTTON_ID, YOUTUBE_RADIO_BUTTON_XPATH, YOUTUBE_DONE_BUTTON_ID
+    global get_youtube_cache_path
+    import assemblyai as aai
+    import selenium_firefox  # noqa: F401
+    from selenium import webdriver
+    from selenium.webdriver.common.by import By
+    from selenium.webdriver.firefox.service import Service
+    from selenium.webdriver.firefox.options import Options
+    from webdriver_manager.firefox import GeckoDriverManager
+    import constants
+    YOUTUBE_TEXTBOX_ID = constants.YOUTUBE_TEXTBOX_ID
+    YOUTUBE_MADE_FOR_KIDS_NAME = constants.YOUTUBE_MADE_FOR_KIDS_NAME
+    YOUTUBE_NOT_MADE_FOR_KIDS_NAME = constants.YOUTUBE_NOT_MADE_FOR_KIDS_NAME
+    YOUTUBE_NEXT_BUTTON_ID = constants.YOUTUBE_NEXT_BUTTON_ID
+    YOUTUBE_RADIO_BUTTON_XPATH = constants.YOUTUBE_RADIO_BUTTON_XPATH
+    YOUTUBE_DONE_BUTTON_ID = constants.YOUTUBE_DONE_BUTTON_ID
+    from cache import get_youtube_cache_path
+    _browser_imports_done = True
+# MoviePy imports (always needed for video generation)
+from moviepy.editor import (
+    ImageClip, AudioFileClip, TextClip, CompositeVideoClip,
+    CompositeAudioClip, concatenate_videoclips, afx,
+)
+from moviepy.video.fx.all import crop
+from moviepy.config import change_settings
+from moviepy.video.tools.subtitles import SubtitlesClip
+# Set ImageMagick Path
+imgk = get_imagemagick_path()
+if imgk:
+    change_settings({"IMAGEMAGICK_BINARY": imgk})
+class YouTube:
+    """
+    Class for YouTube Automation.
+    Steps to create a YouTube Short:
+    1. Generate a topic [DONE]
+    2. Generate a script [DONE]
+    3. Generate metadata (Title, Description, Tags) [DONE]
+    4. Generate AI Image Prompts [DONE]
+    4. Generate Images based on generated Prompts [DONE]
+    5. Convert Text-to-Speech [DONE]
+    6. Show images each for n seconds, n: Duration of TTS / Amount of images [DONE]
+    7. Combine Concatenated Images with the Text-to-Speech [DONE]
+    """
+    def __init__(
+        self,
+        account_uuid: str,
+        account_nickname: str,
+        fp_profile_path: str,
+        niche: str,
+        language: str,
+        use_browser: bool = True,
+    ) -> None:
+        """
+        Constructor for YouTube Class.
+        Args:
+            account_uuid (str): The unique identifier for the YouTube account.
+            account_nickname (str): The nickname for the YouTube account.
+            fp_profile_path (str): Path to the firefox profile that is logged into the specificed YouTube Account.
+            niche (str): The niche of the provided YouTube Channel.
+            language (str): The language of the Automation.
+            use_browser (bool): If False, skip Selenium initialization (for headless video generation).
+        Returns:
+            None
+        """
+        self._account_uuid: str = account_uuid
+        self._account_nickname: str = account_nickname
+        self._fp_profile_path: str = fp_profile_path
+        self._niche: str = niche
+        self._language: str = language
+        self._use_browser: bool = use_browser
+        self.images = []
+        if not self._use_browser:
+            self.browser = None
+            return
+        _ensure_browser_imports()
+        # Initialize the Firefox profile
+        self.options = Options()
+        # Set headless state of browser
+        if get_headless():
+            self.options.add_argument("--headless")
+        if not os.path.isdir(self._fp_profile_path):
+            raise ValueError(
+                f"Firefox profile path does not exist or is not a directory: {self._fp_profile_path}"
+            )
+        self.options.add_argument("-profile")
+        self.options.add_argument(self._fp_profile_path)
+        # Set the service
+        self.service = Service(GeckoDriverManager().install())
+        # Initialize the browser
+        self.browser = webdriver.Firefox(
+            service=self.service, options=self.options
+        )
+    @property
+    def niche(self) -> str:
+        """
+        Getter Method for the niche.
+        Returns:
+            niche (str): The niche
+        """
+        return self._niche
+    @property
+    def language(self) -> str:
+        """
+        Getter Method for the language to use.
+        Returns:
+            language (str): The language
+        """
+        return self._language
+    def generate_response(self, prompt: str, model_name: str = None) -> str:
+        """
+        Generates an LLM Response based on a prompt and the user-provided model.
+        Args:
+            prompt (str): The prompt to use in the text generation.
+        Returns:
+            response (str): The generated AI Repsonse.
+        """
+        return generate_text(prompt, model_name=model_name)
+    def generate_topic(self) -> str:
+        """
+        Generates a topic based on the YouTube Channel niche.
+        Returns:
+            topic (str): The generated topic.
+        """
+        completion = self.generate_response(
+            f"Please generate a specific video idea that takes about the following topic: {self.niche}. Make it exactly one sentence. Only return the topic, nothing else."
+        )
+        if not completion:
+            error("Failed to generate Topic.")
+        self.subject = completion
+        return completion
+    def generate_script(self) -> str:
+        """
+        Generate a script for a video, depending on the subject of the video, the number of paragraphs, and the AI model.
+        Returns:
+            script (str): The script of the video.
+        """
+        sentence_length = get_script_sentence_length()
+        prompt = f"""
+        Generate a script for a video in {sentence_length} sentences, depending on the subject of the video.
+        The script is to be returned as a string with the specified number of paragraphs.
+        Here is an example of a string:
+        "This is an example string."
+        Do not under any circumstance reference this prompt in your response.
+        Get straight to the point, don't start with unnecessary things like, "welcome to this video".
+        Obviously, the script should be related to the subject of the video.
+        YOU MUST NOT EXCEED THE {sentence_length} SENTENCES LIMIT. MAKE SURE THE {sentence_length} SENTENCES ARE SHORT.
+        YOU MUST NOT INCLUDE ANY TYPE OF MARKDOWN OR FORMATTING IN THE SCRIPT, NEVER USE A TITLE.
+        YOU MUST WRITE THE SCRIPT IN THE LANGUAGE SPECIFIED IN [LANGUAGE].
+        ONLY RETURN THE RAW CONTENT OF THE SCRIPT. DO NOT INCLUDE "VOICEOVER", "NARRATOR" OR SIMILAR INDICATORS OF WHAT SHOULD BE SPOKEN AT THE BEGINNING OF EACH PARAGRAPH OR LINE. YOU MUST NOT MENTION THE PROMPT, OR ANYTHING ABOUT THE SCRIPT ITSELF. ALSO, NEVER TALK ABOUT THE AMOUNT OF PARAGRAPHS OR LINES. JUST WRITE THE SCRIPT
+        Subject: {self.subject}
+        Language: {self.language}
+        """
+        max_retries = 3
+        for attempt in range(max_retries):
+            completion = self.generate_response(prompt)
+            completion = re.sub(r"\*", "", completion)
+            if not completion:
+                error("The generated script is empty.")
+                return
+            if len(completion) <= 5000:
+                self.script = completion
+                return completion
+            if get_verbose():
+                warning(f"Generated Script is too long (attempt {attempt + 1}/{max_retries}). Retrying...")
+        self.script = completion
+        return completion
+    def generate_metadata(self) -> dict:
+        """
+        Generates Video metadata for the to-be-uploaded YouTube Short (Title, Description).
+        Returns:
+            metadata (dict): The generated metadata.
+        """
+        max_retries = 3
+        title = ""
+        for attempt in range(max_retries):
+            title = self.generate_response(
+                f"Please generate a YouTube Video Title for the following subject, including hashtags: {self.subject}. Only return the title, nothing else. Limit the title under 100 characters."
+            )
+            if len(title) <= 100:
+                break
+            if get_verbose():
+                warning(f"Generated Title is too long (attempt {attempt + 1}/{max_retries}). Retrying...")
+        description = self.generate_response(
+            f"Please generate a YouTube Video Description for the following script: {self.script}. Only return the description, nothing else."
+        )
+        self.metadata = {"title": title, "description": description}
+        return self.metadata
+    def generate_prompts(self) -> List[str]:
+        """
+        Generates AI Image Prompts based on the provided Video Script.
+        Returns:
+            image_prompts (List[str]): Generated List of image prompts.
+        """
+        n_prompts = len(self.script) / 3
+        prompt = f"""
+        Generate {n_prompts} Image Prompts for AI Image Generation,
+        depending on the subject of a video.
+        Subject: {self.subject}
+        The image prompts are to be returned as
+        a JSON-Array of strings.
+        Each search term should consist of a full sentence,
+        always add the main subject of the video.
+        Be emotional and use interesting adjectives to make the
+        Image Prompt as detailed as possible.
+        YOU MUST ONLY RETURN THE JSON-ARRAY OF STRINGS.
+        YOU MUST NOT RETURN ANYTHING ELSE.
+        YOU MUST NOT RETURN THE SCRIPT.
+        The search terms must be related to the subject of the video.
+        Here is an example of a JSON-Array of strings:
+        ["image prompt 1", "image prompt 2", "image prompt 3"]
+        For context, here is the full text:
+        {self.script}
+        """
+        completion = (
+            str(self.generate_response(prompt))
+            .replace("```json", "")
+            .replace("```", "")
+        )
+        image_prompts = []
+        if "image_prompts" in completion:
+            image_prompts = json.loads(completion)["image_prompts"]
+        else:
+            try:
+                image_prompts = json.loads(completion)
+                if get_verbose():
+                    info(f" => Generated Image Prompts: {image_prompts}")
+            except Exception:
+                if get_verbose():
+                    warning(
+                        "LLM returned an unformatted response. Attempting to clean..."
+                    )
+                # Get everything between [ and ], and turn it into a list
+                r = re.compile(r"\[.*\]")
+                image_prompts = r.findall(completion)
+                if len(image_prompts) == 0:
+                    if get_verbose():
+                        warning("Failed to generate Image Prompts.")
+                    image_prompts = [self.subject]
+        if len(image_prompts) > n_prompts:
+            image_prompts = image_prompts[: int(n_prompts)]
+        self.image_prompts = image_prompts
+        success(f"Generated {len(image_prompts)} Image Prompts.")
+        return image_prompts
+    def _persist_image(self, image_bytes: bytes, provider_label: str) -> str:
+        """
+        Writes generated image bytes to a PNG file in .mp.
+        Args:
+            image_bytes (bytes): Image payload
+            provider_label (str): Label for logging
+        Returns:
+            path (str): Absolute image path
+        """
+        image_path = os.path.join(ROOT_DIR, ".mp", str(uuid4()) + ".png")
+        with open(image_path, "wb") as image_file:
+            image_file.write(image_bytes)
+        if get_verbose():
+            info(f' => Wrote image from {provider_label} to "{image_path}"')
+        self.images.append(image_path)
+        return image_path
+    def generate_image_nanobanana2(self, prompt: str) -> str:
+        """
+        Generates an AI Image using Nano Banana 2 API (Gemini image API).
+        Args:
+            prompt (str): Prompt for image generation
+        Returns:
+            path (str): The path to the generated image.
+        """
+        print(f"Generating Image using Nano Banana 2 API: {prompt}")
+        api_key = get_nanobanana2_api_key()
+        if not api_key:
+            error("nanobanana2_api_key is not configured.")
+            return None
+        base_url = get_nanobanana2_api_base_url().rstrip("/")
+        model = get_nanobanana2_model()
+        aspect_ratio = get_nanobanana2_aspect_ratio()
+        endpoint = f"{base_url}/models/{model}:generateContent"
+        payload = {
+            "contents": [{"parts": [{"text": prompt}]}],
+            "generationConfig": {
+                "responseModalities": ["IMAGE"],
+                "imageConfig": {"aspectRatio": aspect_ratio},
+            },
+        }
+        try:
+            response = requests.post(
+                endpoint,
+                headers={"x-goog-api-key": api_key, "Content-Type": "application/json"},
+                json=payload,
+                timeout=300,
+            )
+            response.raise_for_status()
+            body = response.json()
+            candidates = body.get("candidates", [])
+            for candidate in candidates:
+                content = candidate.get("content", {})
+                for part in content.get("parts", []):
+                    inline_data = part.get("inlineData") or part.get("inline_data")
+                    if not inline_data:
+                        continue
+                    data = inline_data.get("data")
+                    mime_type = inline_data.get("mimeType") or inline_data.get("mime_type", "")
+                    if data and str(mime_type).startswith("image/"):
+                        image_bytes = base64.b64decode(data)
+                        return self._persist_image(image_bytes, "Nano Banana 2 API")
+            if get_verbose():
+                warning(f"Nano Banana 2 did not return an image payload. Response: {body}")
+            return None
+        except Exception as e:
+            if get_verbose():
+                warning(f"Failed to generate image with Nano Banana 2 API: {str(e)}")
+            return None
+    def generate_image(self, prompt: str) -> str:
+        """
+        Generates an AI Image based on the given prompt using Nano Banana 2.
+        Args:
+            prompt (str): Reference for image generation
+        Returns:
+            path (str): The path to the generated image.
+        """
+        return self.generate_image_nanobanana2(prompt)
+    def generate_script_to_speech(self, tts_instance: TTS) -> str:
+        """
+        Converts the generated script into Speech using KittenTTS and returns the path to the wav file.
+        Args:
+            tts_instance (tts): Instance of TTS Class.
+        Returns:
+            path_to_wav (str): Path to generated audio (WAV Format).
+        """
+        path = os.path.join(ROOT_DIR, ".mp", str(uuid4()) + ".wav")
+        # Clean script, remove every character that is not a word character, a space, a period, a question mark, or an exclamation mark.
+        self.script = re.sub(r"[^\w\s.?!]", "", self.script)
+        tts_instance.synthesize(self.script, path)
+        self.tts_path = path
+        if get_verbose():
+            info(f' => Wrote TTS to "{path}"')
+        return path
+    def add_video(self, video: dict) -> None:
+        """
+        Adds a video to the cache.
+        Args:
+            video (dict): The video to add
+        Returns:
+            None
+        """
+        _ensure_browser_imports()
+        videos = self.get_videos()
+        videos.append(video)
+        cache = get_youtube_cache_path()
+        with open(cache, "r") as file:
+            previous_json = json.loads(file.read())
+            # Find our account
+            accounts = previous_json["accounts"]
+            for account in accounts:
+                if account["id"] == self._account_uuid:
+                    account["videos"].append(video)
+            # Commit changes
+            with open(cache, "w") as f:
+                f.write(json.dumps(previous_json))
+    def generate_subtitles(self, audio_path: str) -> str:
+        """
+        Generates subtitles for the audio using the configured STT provider.
+        Args:
+            audio_path (str): The path to the audio file.
+        Returns:
+            path (str): The path to the generated SRT File.
+        """
+        provider = str(get_stt_provider() or "local_whisper").lower()
+        if provider == "local_whisper":
+            return self.generate_subtitles_local_whisper(audio_path)
+        if provider == "third_party_assemblyai":
+            return self.generate_subtitles_assemblyai(audio_path)
+        warning(f"Unknown stt_provider '{provider}'. Falling back to local_whisper.")
+        return self.generate_subtitles_local_whisper(audio_path)
+    def generate_subtitles_assemblyai(self, audio_path: str) -> str:
+        """
+        Generates subtitles using AssemblyAI.
+        Args:
+            audio_path (str): Audio file path
+        Returns:
+            path (str): Path to SRT file
+        """
+        aai.settings.api_key = get_assemblyai_api_key()
+        config = aai.TranscriptionConfig()
+        transcriber = aai.Transcriber(config=config)
+        transcript = transcriber.transcribe(audio_path)
+        subtitles = transcript.export_subtitles_srt()
+        srt_path = os.path.join(ROOT_DIR, ".mp", str(uuid4()) + ".srt")
+        with open(srt_path, "w") as file:
+            file.write(subtitles)
+        return srt_path
+    def _format_srt_timestamp(self, seconds: float) -> str:
+        """
+        Formats a timestamp in seconds to SRT format.
+        Args:
+            seconds (float): Seconds
+        Returns:
+            ts (str): HH:MM:SS,mmm
+        """
+        total_millis = max(0, int(round(seconds * 1000)))
+        hours = total_millis // 3600000
+        minutes = (total_millis % 3600000) // 60000
+        secs = (total_millis % 60000) // 1000
+        millis = total_millis % 1000
+        return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}"
+    def generate_subtitles_local_whisper(self, audio_path: str) -> str:
+        """
+        Generates subtitles using local Whisper (faster-whisper).
+        Args:
+            audio_path (str): Audio file path
+        Returns:
+            path (str): Path to SRT file
+        """
+        try:
+            from faster_whisper import WhisperModel
+        except ImportError:
+            error(
+                "Local STT selected but 'faster-whisper' is not installed. "
+                "Install it or switch stt_provider to third_party_assemblyai."
+            )
+            raise
+        model = WhisperModel(
+            get_whisper_model(),
+            device=get_whisper_device(),
+            compute_type=get_whisper_compute_type(),
+        )
+        segments, _ = model.transcribe(audio_path, vad_filter=True)
+        lines = []
+        for idx, segment in enumerate(segments, start=1):
+            start = self._format_srt_timestamp(segment.start)
+            end = self._format_srt_timestamp(segment.end)
+            text = str(segment.text).strip()
+            if not text:
+                continue
+            lines.append(str(idx))
+            lines.append(f"{start} --> {end}")
+            lines.append(text)
+            lines.append("")
+        subtitles = "\n".join(lines)
+        srt_path = os.path.join(ROOT_DIR, ".mp", str(uuid4()) + ".srt")
+        with open(srt_path, "w", encoding="utf-8") as file:
+            file.write(subtitles)
+        return srt_path
+    def combine(self) -> str:
+        """
+        Combines everything into the final video.
+        Returns:
+            path (str): The path to the generated MP4 File.
+        """
+        combined_image_path = os.path.join(ROOT_DIR, ".mp", str(uuid4()) + ".mp4")
+        threads = get_threads()
+        tts_clip = AudioFileClip(self.tts_path)
+        max_duration = tts_clip.duration
+        req_dur = max_duration / len(self.images)
+        # Make a generator that returns a TextClip when called with consecutive
+        generator = lambda txt: TextClip(
+            txt,
+            font=os.path.join(get_fonts_dir(), get_font()),
+            fontsize=100,
+            color="#FFFF00",
+            stroke_color="black",
+            stroke_width=5,
+            size=(1080, 1920),
+            method="caption",
+        )
+        print(colored("[+] Combining images...", "blue"))
+        clips = []
+        tot_dur = 0
+        # Add downloaded clips over and over until the duration of the audio (max_duration) has been reached
+        while tot_dur < max_duration:
+            for image_path in self.images:
+                clip = ImageClip(image_path)
+                clip.duration = req_dur
+                clip = clip.set_fps(30)
+                # Not all images are same size,
+                # so we need to resize them
+                if round((clip.w / clip.h), 4) < 0.5625:
+                    if get_verbose():
+                        info(f" => Resizing Image: {image_path} to 1080x1920")
+                    clip = crop(
+                        clip,
+                        width=clip.w,
+                        height=round(clip.w / 0.5625),
+                        x_center=clip.w / 2,
+                        y_center=clip.h / 2,
+                    )
+                else:
+                    if get_verbose():
+                        info(f" => Resizing Image: {image_path} to 1920x1080")
+                    clip = crop(
+                        clip,
+                        width=round(0.5625 * clip.h),
+                        height=clip.h,
+                        x_center=clip.w / 2,
+                        y_center=clip.h / 2,
+                    )
+                clip = clip.resize((1080, 1920))
+                # FX (Fade In)
+                # clip = clip.fadein(2)
+                clips.append(clip)
+                tot_dur += clip.duration
+        final_clip = concatenate_videoclips(clips)
+        final_clip = final_clip.set_fps(30)
+        random_song = choose_random_song()
+        subtitles = None
+        try:
+            subtitles_path = self.generate_subtitles(self.tts_path)
+            equalize_subtitles(subtitles_path, 10)
+            subtitles = SubtitlesClip(subtitles_path, generator)
+            subtitles.set_pos(("center", "center"))
+        except Exception as e:
+            warning(f"Failed to generate subtitles, continuing without subtitles: {e}")
+        random_song_clip = AudioFileClip(random_song).set_fps(44100)
+        # Turn down volume
+        random_song_clip = random_song_clip.fx(afx.volumex, 0.1)
+        comp_audio = CompositeAudioClip([tts_clip.set_fps(44100), random_song_clip])
+        final_clip = final_clip.set_audio(comp_audio)
+        final_clip = final_clip.set_duration(tts_clip.duration)
+        if subtitles is not None:
+            final_clip = CompositeVideoClip([final_clip, subtitles])
+        final_clip.write_videofile(combined_image_path, threads=threads)
+        success(f'Wrote Video to "{combined_image_path}"')
+        return combined_image_path
+    def generate_video(self, tts_instance: TTS) -> str:
+        """
+        Generates a YouTube Short based on the provided niche and language.
+        Args:
+            tts_instance (TTS): Instance of TTS Class.
+        Returns:
+            path (str): The path to the generated MP4 File.
+        """
+        # Generate the Topic
+        self.generate_topic()
+        # Generate the Script
+        self.generate_script()
+        # Generate the Metadata
+        self.generate_metadata()
+        # Generate the Image Prompts
+        self.generate_prompts()
+        # Generate the Images
+        for prompt in self.image_prompts:
+            self.generate_image(prompt)
+        # Generate the TTS
+        self.generate_script_to_speech(tts_instance)
+        # Combine everything
+        path = self.combine()
+        if get_verbose():
+            info(f" => Generated Video: {path}")
+        self.video_path = os.path.abspath(path)
+        return path
+    def _require_browser(self):
+        if not self._use_browser or self.browser is None:
+            raise RuntimeError(
+                "Browser is not available. This method requires use_browser=True."
+            )
+    def get_channel_id(self) -> str:
+        """
+        Gets the Channel ID of the YouTube Account.
+        Returns:
+            channel_id (str): The Channel ID.
+        """
+        self._require_browser()
+        driver = self.browser
+        driver.get("https://studio.youtube.com")
+        time.sleep(2)
+        channel_id = driver.current_url.split("/")[-1]
+        self.channel_id = channel_id
+        return channel_id
+    def upload_video(self) -> bool:
+        """
+        Uploads the video to YouTube.
+        Returns:
+            success (bool): Whether the upload was successful or not.
+        """
+        self._require_browser()
+        _ensure_browser_imports()
+        try:
+            self.get_channel_id()
+            driver = self.browser
+            verbose = get_verbose()
+            # Go to youtube.com/upload
+            driver.get("https://www.youtube.com/upload")
+            # Set video file
+            FILE_PICKER_TAG = "ytcp-uploads-file-picker"
+            file_picker = driver.find_element(By.TAG_NAME, FILE_PICKER_TAG)
+            INPUT_TAG = "input"
+            file_input = file_picker.find_element(By.TAG_NAME, INPUT_TAG)
+            file_input.send_keys(self.video_path)
+            # Wait for upload to finish
+            time.sleep(5)
+            # Set title
+            textboxes = driver.find_elements(By.ID, YOUTUBE_TEXTBOX_ID)
+            title_el = textboxes[0]
+            description_el = textboxes[-1]
+            if verbose:
+                info("\t=> Setting title...")
+            title_el.click()
+            time.sleep(1)
+            title_el.clear()
+            title_el.send_keys(self.metadata["title"])
+            if verbose:
+                info("\t=> Setting description...")
+            # Set description
+            time.sleep(10)
+            description_el.click()
+            time.sleep(0.5)
+            description_el.clear()
+            description_el.send_keys(self.metadata["description"])
+            time.sleep(0.5)
+            # Set `made for kids` option
+            if verbose:
+                info("\t=> Setting `made for kids` option...")
+            is_for_kids_checkbox = driver.find_element(
+                By.NAME, YOUTUBE_MADE_FOR_KIDS_NAME
+            )
+            is_not_for_kids_checkbox = driver.find_element(
+                By.NAME, YOUTUBE_NOT_MADE_FOR_KIDS_NAME
+            )
+            if not get_is_for_kids():
+                is_not_for_kids_checkbox.click()
+            else:
+                is_for_kids_checkbox.click()
+            time.sleep(0.5)
+            # Click next
+            if verbose:
+                info("\t=> Clicking next...")
+            next_button = driver.find_element(By.ID, YOUTUBE_NEXT_BUTTON_ID)
+            next_button.click()
+            # Click next again
+            if verbose:
+                info("\t=> Clicking next again...")
+            next_button = driver.find_element(By.ID, YOUTUBE_NEXT_BUTTON_ID)
+            next_button.click()
+            # Wait for 2 seconds
+            time.sleep(2)
+            # Click next again
+            if verbose:
+                info("\t=> Clicking next again...")
+            next_button = driver.find_element(By.ID, YOUTUBE_NEXT_BUTTON_ID)
+            next_button.click()
+            # Set as unlisted
+            if verbose:
+                info("\t=> Setting as unlisted...")
+            radio_button = driver.find_elements(By.XPATH, YOUTUBE_RADIO_BUTTON_XPATH)
+            radio_button[2].click()
+            if verbose:
+                info("\t=> Clicking done button...")
+            # Click done button
+            done_button = driver.find_element(By.ID, YOUTUBE_DONE_BUTTON_ID)
+            done_button.click()
+            # Wait for 2 seconds
+            time.sleep(2)
+            # Get latest video
+            if verbose:
+                info("\t=> Getting video URL...")
+            # Get the latest uploaded video URL
+            driver.get(
+                f"https://studio.youtube.com/channel/{self.channel_id}/videos/short"
+            )
+            time.sleep(2)
+            videos = driver.find_elements(By.TAG_NAME, "ytcp-video-row")
+            first_video = videos[0]
+            anchor_tag = first_video.find_element(By.TAG_NAME, "a")
+            href = anchor_tag.get_attribute("href")
+            if verbose:
+                info(f"\t=> Extracting video ID from URL: {href}")
+            video_id = href.split("/")[-2]
+            # Build URL
+            url = build_url(video_id)
+            self.uploaded_video_url = url
+            if verbose:
+                success(f" => Uploaded Video: {url}")
+            # Add video to cache
+            self.add_video(
+                {
+                    "title": self.metadata["title"],
+                    "description": self.metadata["description"],
+                    "url": url,
+                    "date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                }
+            )
+            # Close the browser
+            driver.quit()
+            return True
+        except Exception as e:
+            error(f"Failed to upload video: {e}")
+            self.browser.quit()
+            return False
+    def get_videos(self) -> List[dict]:
+        """
+        Gets the uploaded videos from the YouTube Channel.
+        Returns:
+            videos (List[dict]): The uploaded videos.
+        """
+        _ensure_browser_imports()
+        if not os.path.exists(get_youtube_cache_path()):
+            # Create the cache file
+            with open(get_youtube_cache_path(), "w") as file:
+                json.dump({"videos": []}, file, indent=4)
+            return []
+        videos = []
+        # Read the cache file
+        with open(get_youtube_cache_path(), "r") as file:
+            previous_json = json.loads(file.read())
+            # Find our account
+            accounts = previous_json["accounts"]
+            for account in accounts:
+                if account["id"] == self._account_uuid:
+                    videos = account["videos"]
+        return videos

src/classes/__init__.py ADDED Viewed

File without changes

src/config.py ADDED Viewed

	@@ -0,0 +1,251 @@

+import os
+import sys
+import json
+import srt_equalizer
+from termcolor import colored
+ROOT_DIR = os.path.dirname(sys.path[0])
+def is_running_in_spaces() -> bool:
+    """Returns True when running inside a Hugging Face Space."""
+    return bool(os.environ.get("SPACE_ID"))
+def _load_config() -> dict:
+    """
+    Loads config.json if available; falls back to environment variables
+    when running on HF Spaces or when the file is missing.
+    """
+    config_path = os.path.join(ROOT_DIR, "config.json")
+    if os.path.exists(config_path):
+        with open(config_path, "r") as f:
+            return json.load(f)
+    # Fallback: build minimal config from environment variables
+    return {
+        "verbose": os.environ.get("VERBOSE", "true").lower() == "true",
+        "firefox_profile": "",
+        "headless": True,
+        "ollama_base_url": os.environ.get("OLLAMA_BASE_URL", "http://127.0.0.1:11434"),
+        "ollama_model": os.environ.get("OLLAMA_MODEL", ""),
+        "twitter_language": os.environ.get("TWITTER_LANGUAGE", "English"),
+        "nanobanana2_api_base_url": os.environ.get(
+            "NANOBANANA2_API_BASE_URL",
+            "https://generativelanguage.googleapis.com/v1beta",
+        ),
+        "nanobanana2_api_key": os.environ.get("GEMINI_API_KEY", ""),
+        "nanobanana2_model": os.environ.get(
+            "NANOBANANA2_MODEL", "gemini-3.1-flash-image-preview"
+        ),
+        "nanobanana2_aspect_ratio": os.environ.get("NANOBANANA2_ASPECT_RATIO", "9:16"),
+        "threads": int(os.environ.get("THREADS", "2")),
+        "zip_url": os.environ.get("ZIP_URL", ""),
+        "is_for_kids": False,
+        "stt_provider": os.environ.get("STT_PROVIDER", "local_whisper"),
+        "whisper_model": os.environ.get("WHISPER_MODEL", "tiny"),
+        "whisper_device": os.environ.get("WHISPER_DEVICE", "cpu"),
+        "whisper_compute_type": os.environ.get("WHISPER_COMPUTE_TYPE", "int8"),
+        "assembly_ai_api_key": os.environ.get("ASSEMBLYAI_API_KEY", ""),
+        "tts_voice": os.environ.get("TTS_VOICE", "Jasper"),
+        "font": os.environ.get("FONT", "bold_font.ttf"),
+        "imagemagick_path": os.environ.get("IMAGEMAGICK_PATH", "/usr/bin/convert"),
+        "script_sentence_length": int(os.environ.get("SCRIPT_SENTENCE_LENGTH", "4")),
+        "email": {"smtp_server": "", "smtp_port": 587, "username": "", "password": ""},
+        "post_bridge": {
+            "enabled": False,
+            "api_key": "",
+            "platforms": [],
+            "account_ids": [],
+            "auto_crosspost": False,
+        },
+    }
+def assert_folder_structure() -> None:
+    """
+    Make sure that the nessecary folder structure is present.
+    Returns:
+        None
+    """
+    # Create the .mp folder
+    if not os.path.exists(os.path.join(ROOT_DIR, ".mp")):
+        if get_verbose():
+            print(colored(f"=> Creating .mp folder at {os.path.join(ROOT_DIR, '.mp')}", "green"))
+        os.makedirs(os.path.join(ROOT_DIR, ".mp"))
+def get_first_time_running() -> bool:
+    """
+    Checks if the program is running for the first time by checking if .mp folder exists.
+    Returns:
+        exists (bool): True if the program is running for the first time, False otherwise
+    """
+    return not os.path.exists(os.path.join(ROOT_DIR, ".mp"))
+def get_email_credentials() -> dict:
+    return _load_config()["email"]
+def get_verbose() -> bool:
+    return _load_config()["verbose"]
+def get_firefox_profile_path() -> str:
+    return _load_config()["firefox_profile"]
+def get_headless() -> bool:
+    return _load_config()["headless"]
+def get_ollama_base_url() -> str:
+    return _load_config().get("ollama_base_url", "http://127.0.0.1:11434")
+def get_ollama_model() -> str:
+    return _load_config().get("ollama_model", "")
+def get_twitter_language() -> str:
+    return _load_config()["twitter_language"]
+def get_nanobanana2_api_base_url() -> str:
+    return _load_config().get(
+        "nanobanana2_api_base_url",
+        "https://generativelanguage.googleapis.com/v1beta",
+    )
+def get_nanobanana2_api_key() -> str:
+    configured = _load_config().get("nanobanana2_api_key", "")
+    return configured or os.environ.get("GEMINI_API_KEY", "")
+def get_nanobanana2_model() -> str:
+    return _load_config().get("nanobanana2_model", "gemini-3.1-flash-image-preview")
+def get_nanobanana2_aspect_ratio() -> str:
+    return _load_config().get("nanobanana2_aspect_ratio", "9:16")
+def get_threads() -> int:
+    return _load_config()["threads"]
+def get_zip_url() -> str:
+    return _load_config()["zip_url"]
+def get_is_for_kids() -> bool:
+    return _load_config()["is_for_kids"]
+def get_google_maps_scraper_zip_url() -> str:
+    return _load_config()["google_maps_scraper"]
+def get_google_maps_scraper_niche() -> str:
+    return _load_config()["google_maps_scraper_niche"]
+def get_scraper_timeout() -> int:
+    return _load_config()["scraper_timeout"] or 300
+def get_outreach_message_subject() -> str:
+    return _load_config()["outreach_message_subject"]
+def get_outreach_message_body_file() -> str:
+    return _load_config()["outreach_message_body_file"]
+def get_tts_voice() -> str:
+    return _load_config().get("tts_voice", "Jasper")
+def get_assemblyai_api_key() -> str:
+    return _load_config()["assembly_ai_api_key"]
+def get_stt_provider() -> str:
+    return _load_config().get("stt_provider", "local_whisper")
+def get_whisper_model() -> str:
+    return _load_config().get("whisper_model", "base")
+def get_whisper_device() -> str:
+    return _load_config().get("whisper_device", "auto")
+def get_whisper_compute_type() -> str:
+    return _load_config().get("whisper_compute_type", "int8")
+def equalize_subtitles(srt_path: str, max_chars: int = 10) -> None:
+    """
+    Equalizes the subtitles in a SRT file.
+    Args:
+        srt_path (str): The path to the SRT file
+        max_chars (int): The maximum amount of characters in a subtitle
+    Returns:
+        None
+    """
+    srt_equalizer.equalize_srt_file(srt_path, srt_path, max_chars)
+def get_font() -> str:
+    return _load_config()["font"]
+def get_fonts_dir() -> str:
+    return os.path.join(ROOT_DIR, "fonts")
+def get_imagemagick_path() -> str:
+    path = _load_config().get("imagemagick_path", "")
+    if not path and is_running_in_spaces():
+        return "/usr/bin/convert"
+    return path
+def get_script_sentence_length() -> int:
+    val = _load_config().get("script_sentence_length")
+    return val if val is not None else 4
+def get_post_bridge_config() -> dict:
+    defaults = {
+        "enabled": False,
+        "api_key": "",
+        "platforms": ["tiktok", "instagram"],
+        "account_ids": [],
+        "auto_crosspost": False,
+    }
+    supported_platforms = {"tiktok", "instagram"}
+    config_json = _load_config()
+    raw_config = config_json.get("post_bridge", {})
+    if not isinstance(raw_config, dict):
+        raw_config = {}
+    raw_platforms = raw_config.get("platforms")
+    normalized_platforms = []
+    seen_platforms = set()
+    if raw_platforms is None:
+        normalized_platforms = defaults["platforms"].copy()
+    elif isinstance(raw_platforms, list):
+        for platform in raw_platforms:
+            normalized_platform = str(platform).strip().lower()
+            if (
+                normalized_platform in supported_platforms
+                and normalized_platform not in seen_platforms
+            ):
+                normalized_platforms.append(normalized_platform)
+                seen_platforms.add(normalized_platform)
+    else:
+        normalized_platforms = []
+    raw_account_ids = raw_config.get("account_ids", defaults["account_ids"])
+    normalized_account_ids = []
+    if isinstance(raw_account_ids, list):
+        for account_id in raw_account_ids:
+            try:
+                normalized_account_ids.append(int(account_id))
+            except (TypeError, ValueError):
+                continue
+    api_key = str(raw_config.get("api_key", "")).strip()
+    if not api_key:
+        api_key = os.environ.get("POST_BRIDGE_API_KEY", "").strip()
+    return {
+        "enabled": bool(raw_config.get("enabled", defaults["enabled"])),
+        "api_key": api_key,
+        "platforms": normalized_platforms,
+        "account_ids": normalized_account_ids,
+        "auto_crosspost": bool(
+            raw_config.get("auto_crosspost", defaults["auto_crosspost"])
+        ),
+    }

src/llm_provider.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import os
+from config import is_running_in_spaces
+_selected_model: str | None = None
+def _use_hf_backend() -> bool:
+    """Use HF Inference API when running on Spaces or when HF_TOKEN is set and Ollama is absent."""
+    if is_running_in_spaces():
+        return True
+    if os.environ.get("HF_TOKEN") and not os.environ.get("OLLAMA_BASE_URL"):
+        return True
+    return False
+# ---------------------------------------------------------------------------
+# HF Inference API backend
+# ---------------------------------------------------------------------------
+def _hf_client():
+    from huggingface_hub import InferenceClient
+    token = os.environ.get("HF_TOKEN", "")
+    return InferenceClient(token=token)
+def _hf_list_models() -> list[str]:
+    return [
+        "meta-llama/Llama-3.1-8B-Instruct",
+        "mistralai/Mistral-7B-Instruct-v0.3",
+        "google/gemma-2-9b-it",
+    ]
+def _hf_generate_text(prompt: str, model: str) -> str:
+    response = _hf_client().chat_completion(
+        model=model,
+        messages=[{"role": "user", "content": prompt}],
+        max_tokens=2048,
+    )
+    return response.choices[0].message.content.strip()
+# ---------------------------------------------------------------------------
+# Ollama backend (original)
+# ---------------------------------------------------------------------------
+def _ollama_client():
+    import ollama
+    from config import get_ollama_base_url
+    return ollama.Client(host=get_ollama_base_url())
+def _ollama_list_models() -> list[str]:
+    response = _ollama_client().list()
+    return sorted(m.model for m in response.models)
+def _ollama_generate_text(prompt: str, model: str) -> str:
+    response = _ollama_client().chat(
+        model=model,
+        messages=[{"role": "user", "content": prompt}],
+    )
+    return response["message"]["content"].strip()
+# ---------------------------------------------------------------------------
+# Public API (unchanged interface)
+# ---------------------------------------------------------------------------
+def list_models() -> list[str]:
+    if _use_hf_backend():
+        return _hf_list_models()
+    return _ollama_list_models()
+def select_model(model: str) -> None:
+    global _selected_model
+    _selected_model = model
+def get_active_model() -> str | None:
+    return _selected_model
+def generate_text(prompt: str, model_name: str = None) -> str:
+    model = model_name or _selected_model
+    if not model:
+        raise RuntimeError(
+            "No model selected. Call select_model() first or pass model_name."
+        )
+    if _use_hf_backend():
+        return _hf_generate_text(prompt, model)
+    return _ollama_generate_text(prompt, model)

src/status.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from termcolor import colored
+def error(message: str, show_emoji: bool = True) -> None:
+    """
+    Prints an error message.
+    Args:
+        message (str): The error message
+        show_emoji (bool): Whether to show the emoji
+    Returns:
+        None
+    """
+    emoji = "❌" if show_emoji else ""
+    print(colored(f"{emoji} {message}", "red"))
+def success(message: str, show_emoji: bool = True) -> None:
+    """
+    Prints a success message.
+    Args:
+        message (str): The success message
+        show_emoji (bool): Whether to show the emoji
+    Returns:
+        None
+    """
+    emoji = "✅" if show_emoji else ""
+    print(colored(f"{emoji} {message}", "green"))
+def info(message: str, show_emoji: bool = True) -> None:
+    """
+    Prints an info message.
+    Args:
+        message (str): The info message
+        show_emoji (bool): Whether to show the emoji
+    Returns:
+        None
+    """
+    emoji = "ℹ️" if show_emoji else ""
+    print(colored(f"{emoji} {message}", "magenta"))
+def warning(message: str, show_emoji: bool = True) -> None:
+    """
+    Prints a warning message.
+    Args:
+        message (str): The warning message
+        show_emoji (bool): Whether to show the emoji
+    Returns:
+        None
+    """
+    emoji = "⚠️" if show_emoji else ""
+    print(colored(f"{emoji} {message}", "yellow"))
+def question(message: str, show_emoji: bool = True) -> str:
+    """
+    Prints a question message and returns the user's input.
+    Args:
+        message (str): The question message
+        show_emoji (bool): Whether to show the emoji
+    Returns:
+        user_input (str): The user's input
+    """
+    emoji = "❓" if show_emoji else ""
+    return input(colored(f"{emoji} {message}", "magenta"))

src/utils.py ADDED Viewed

	@@ -0,0 +1,159 @@

+import os
+import random
+import zipfile
+import requests
+import platform
+from status import *
+from config import *
+DEFAULT_SONG_ARCHIVE_URLS = []
+def close_running_selenium_instances() -> None:
+    """
+    Closes any running Selenium instances.
+    Returns:
+        None
+    """
+    try:
+        info(" => Closing running Selenium instances...")
+        # Kill all running Firefox instances
+        if platform.system() == "Windows":
+            os.system("taskkill /f /im firefox.exe")
+        else:
+            os.system("pkill firefox")
+        success(" => Closed running Selenium instances.")
+    except Exception as e:
+        error(f"Error occurred while closing running Selenium instances: {str(e)}")
+def build_url(youtube_video_id: str) -> str:
+    """
+    Builds the URL to the YouTube video.
+    Args:
+        youtube_video_id (str): The YouTube video ID.
+    Returns:
+        url (str): The URL to the YouTube video.
+    """
+    return f"https://www.youtube.com/watch?v={youtube_video_id}"
+def rem_temp_files() -> None:
+    """
+    Removes temporary files in the `.mp` directory.
+    Returns:
+        None
+    """
+    # Path to the `.mp` directory
+    mp_dir = os.path.join(ROOT_DIR, ".mp")
+    files = os.listdir(mp_dir)
+    for file in files:
+        if not file.endswith(".json"):
+            os.remove(os.path.join(mp_dir, file))
+def fetch_songs() -> None:
+    """
+    Downloads songs into songs/ directory to use with geneated videos.
+    Returns:
+        None
+    """
+    try:
+        info(f" => Fetching songs...")
+        files_dir = os.path.join(ROOT_DIR, "Songs")
+        if not os.path.exists(files_dir):
+            os.mkdir(files_dir)
+            if get_verbose():
+                info(f" => Created directory: {files_dir}")
+        else:
+            existing_audio_files = [
+                name
+                for name in os.listdir(files_dir)
+                if os.path.isfile(os.path.join(files_dir, name))
+                and name.lower().endswith((".mp3", ".wav", ".m4a", ".aac", ".ogg"))
+            ]
+            if len(existing_audio_files) > 0:
+                return
+        configured_url = get_zip_url().strip()
+        download_urls = [configured_url] if configured_url else []
+        download_urls.extend(DEFAULT_SONG_ARCHIVE_URLS)
+        archive_path = os.path.join(files_dir, "songs.zip")
+        downloaded = False
+        for download_url in download_urls:
+            try:
+                response = requests.get(download_url, timeout=60)
+                response.raise_for_status()
+                with open(archive_path, "wb") as file:
+                    file.write(response.content)
+                SAFE_EXTENSIONS = (".mp3", ".wav", ".m4a", ".aac", ".ogg", ".flac")
+                with zipfile.ZipFile(archive_path, "r") as zf:
+                    for member in zf.namelist():
+                        basename = os.path.basename(member)
+                        if not basename or not basename.lower().endswith(SAFE_EXTENSIONS):
+                            warning(f"Skipping non-audio file in archive: {member}")
+                            continue
+                        if ".." in member or member.startswith("/"):
+                            warning(f"Skipping suspicious path in archive: {member}")
+                            continue
+                        zf.extract(member, files_dir)
+                downloaded = True
+                break
+            except Exception as err:
+                warning(f"Failed to fetch songs from {download_url}: {err}")
+        if not downloaded:
+            raise RuntimeError(
+                "Could not download a valid songs archive from any configured URL"
+            )
+        # Remove the zip file
+        if os.path.exists(archive_path):
+            os.remove(archive_path)
+        success(" => Downloaded Songs to ../Songs.")
+    except Exception as e:
+        error(f"Error occurred while fetching songs: {str(e)}")
+def choose_random_song() -> str:
+    """
+    Chooses a random song from the songs/ directory.
+    Returns:
+        str: The path to the chosen song.
+    """
+    try:
+        songs_dir = os.path.join(ROOT_DIR, "Songs")
+        songs = [
+            name
+            for name in os.listdir(songs_dir)
+            if os.path.isfile(os.path.join(songs_dir, name))
+            and name.lower().endswith((".mp3", ".wav", ".m4a", ".aac", ".ogg"))
+        ]
+        if len(songs) == 0:
+            raise RuntimeError("No audio files found in Songs directory")
+        song = random.choice(songs)
+        success(f" => Chose song: {song}")
+        return os.path.join(ROOT_DIR, "Songs", song)
+    except Exception as e:
+        error(f"Error occurred while choosing random song: {str(e)}")
+        raise