diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -3,278 +3,990 @@
-Generate short videos based on a niche and language
-+gradio==3.50.2 +g4f==0.1.9.0 +moviepy==1.0.3 +assemblyai==0.17.0 +requests==2.31.0 +google-generativeai==0.3.1 +python-dotenv==1.0.0 +Pillow==10.0.0 +openai==1.3.5 +edge-tts==6.1.9 +bark==0.0.1 +tensorflow==2.12.0 +soundfile==0.12.1 +TTS==0.21.1 +rvc-engine==0.0.1 +termcolor==2.3.0 +
+import os
+import re
+import g4f
+import json
+import time
+import random
+import tempfile
+import requests
+import assemblyai as aai
+from moviepy.editor import *
+from datetime import datetime
+import gradio as gr
+from dotenv import load_dotenv
+
+# Load environment variables from .env file
+load_dotenv()
+
+# Constants
+CACHE_DIR = os.path.join(tempfile.gettempdir(), "yt_shorts_generator")
+os.makedirs(CACHE_DIR, exist_ok=True)
+
+# Helper functions
+def info(message):
+ print(f"[INFO] {message}")
+ return f"[INFO] {message}"
+
+def success(message):
+ print(f"[SUCCESS] {message}")
+ return f"[SUCCESS] {message}"
+
+def warning(message):
+ print(f"[WARNING] {message}")
+ return f"[WARNING] {message}"
+
+def error(message):
+ print(f"[ERROR] {message}")
+ return f"[ERROR] {message}"
+
+class YouTube:
+ def __init__(self, niche, language, text_gen="gemini", image_gen="prodia", tts_engine="elevenlabs",
+ tts_voice="Sarah", subtitle_font="Helvetica-Bold", font_size=80,
+ text_color="white", highlight_color="blue", api_keys=None):
+ info(f"Initializing YouTube class")
+ self._niche = niche
+ self._language = language
+ self.text_gen = text_gen
+ self.image_gen = image_gen
+ self.tts_engine = tts_engine
+ self.tts_voice = tts_voice
+ self.subtitle_font = subtitle_font
+ self.font_size = font_size
+ self.text_color = text_color
+ self.highlight_color = highlight_color
+ self.api_keys = api_keys or {}
+ self.images = []
+ self.logs = []
+
+ # Set API keys
+ if 'gemini' in self.api_keys and self.api_keys['gemini']:
+ os.environ["GEMINI_API_KEY"] = self.api_keys['gemini']
+ if 'assemblyai' in self.api_keys and self.api_keys['assemblyai']:
+ os.environ["ASSEMBLYAI_API_KEY"] = self.api_keys['assemblyai']
+ if 'elevenlabs' in self.api_keys and self.api_keys['elevenlabs']:
+ os.environ["ELEVENLABS_API_KEY"] = self.api_keys['elevenlabs']
+ if 'segmind' in self.api_keys and self.api_keys['segmind']:
+ os.environ["SEGMIND_API_KEY"] = self.api_keys['segmind']
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+ info(f"Niche: {niche}, Language: {language}")
+ self.log(f"Initialized with niche: {niche}, language: {language}")
+ self.log(f"Text generator: {text_gen}, Image generator: {image_gen}, TTS engine: {tts_engine}")
+
+ def log(self, message):
+ """Add a log message to the logs list"""
+ timestamp = datetime.now().strftime("%H:%M:%S")
+ log_entry = f"[{timestamp}] {message}"
+ self.logs.append(log_entry)
+ return log_entry
+
+ @property
+ def niche(self):
+ return self._niche
+
+ @property
+ def language(self):
+ return self._language
+
+ def generate_response(self, prompt, model=None):
+ self.log(f"Generating response for prompt: {prompt[:50]}...")
+ if self.text_gen == "gemini":
+ self.log("Using Google's Gemini model")
+ import google.generativeai as genai
+ genai.configure(api_key=os.environ.get("GEMINI_API_KEY", ""))
+ model = genai.GenerativeModel('gemini-2.0-flash')
+ response = model.generate_content(prompt).text
+ else:
+ model_name = model if model else "gpt-3.5-turbo"
+ self.log(f"Using G4F model: {model_name}")
+ response = g4f.ChatCompletion.create(
+ model=model_name,
+ messages=[{"role": "user", "content": prompt}]
+ )
+ self.log(f"Response generated successfully, length: {len(response)} characters")
+ return response
+
+ def generate_topic(self):
+ self.log("Generating topic based on niche")
+ completion = self.generate_response(
+ f"Please generate a specific video idea that takes about the following topic: {self.niche}. "
+ f"Make it exactly one sentence. Only return the topic, nothing else."
+ )
+
+ if not completion:
+ self.log(error("Failed to generate Topic."))
+ return None
+
+ self.subject = completion
+ self.log(success(f"Generated topic: {completion}"))
+ return completion
+
+ def generate_script(self):
+ self.log("Generating script for video")
+ prompt = f"""
+ Generate a script for youtube shorts video, depending on the subject of the video.
+
+ The script is to be returned as a string with the specified number of paragraphs.
+
+ Here is an example of a string:
+ "This is an example string."
+
+ Do not under any circumstance reference this prompt in your response.
+
+ Get straight to the point, don't start with unnecessary things like, "welcome to this video".
+
+ Obviously, the script should be related to the subject of the video.
+
+ YOU MUST NOT INCLUDE ANY TYPE OF MARKDOWN OR FORMATTING IN THE SCRIPT, NEVER USE A TITLE.
+ YOU MUST WRITE THE SCRIPT IN THE LANGUAGE SPECIFIED IN [LANGUAGE].
+ ONLY RETURN THE RAW CONTENT OF THE SCRIPT. DO NOT INCLUDE "VOICEOVER", "NARRATOR" OR SIMILAR INDICATORS.
+
+ Subject: {self.subject}
+ Language: {self.language}
+ """
+ completion = self.generate_response(prompt)
+
+ # Apply regex to remove *
+ completion = re.sub(r"\*", "", completion)
+
+ if not completion:
+ self.log(error("The generated script is empty."))
+ return None
+
+ if len(completion) > 5000:
+ self.log(warning("Generated Script is too long. Retrying..."))
+ return self.generate_script()
+
+ self.script = completion
+ self.log(success(f"Generated script ({len(completion)} chars)"))
+ return completion
+
+ def generate_metadata(self):
+ self.log("Generating metadata (title and description)")
+ title = self.generate_response(
+ f"Please generate a YouTube Video Title for the following subject, including hashtags: "
+ f"{self.subject}. Only return the title, nothing else. Limit the title under 100 characters."
+ )
+
+ if len(title) > 100:
+ self.log(warning("Generated Title is too long. Retrying..."))
+ return self.generate_metadata()
+
+ description = self.generate_response(
+ f"Please generate a YouTube Video Description for the following script: {self.script}. "
+ f"Only return the description, nothing else."
+ )
+
+ self.metadata = {
+ "title": title,
+ "description": description
+ }
+
+ self.log(success(f"Generated title: {title}"))
+ self.log(success(f"Generated description: {description}"))
+ return self.metadata
+
+ def generate_prompts(self, count=5):
+ self.log(f"Generating {count} image prompts")
+ prompt = f"""
+ Generate {count} Image Prompts for AI Image Generation,
+ depending on the subject of a video.
+ Subject: {self.subject}
+
+ The image prompts are to be returned as
+ a JSON-Array of strings.
+
+ Each search term should consist of a full sentence,
+ always add the main subject of the video.
+
+ Be emotional and use interesting adjectives to make the
+ Image Prompt as detailed as possible.
+
+ YOU MUST ONLY RETURN THE JSON-ARRAY OF STRINGS.
+ YOU MUST NOT RETURN ANYTHING ELSE.
+ YOU MUST NOT RETURN THE SCRIPT.
+
+ The search terms must be related to the subject of the video.
+ Here is an example of a JSON-Array of strings:
+ ["image prompt 1", "image prompt 2", "image prompt 3"]
+
+ For context, here is the full text:
+ {self.script}
+ """
+
+ completion = str(self.generate_response(prompt))\
+ .replace("```json", "") \
+ .replace("```", "")
+
+ image_prompts = []
+
+ if "image_prompts" in completion:
+ image_prompts = json.loads(completion)["image_prompts"]
+ else:
+ try:
+ image_prompts = json.loads(completion)
+ self.log(f"Generated Image Prompts: {image_prompts}")
+ except Exception:
+ self.log(warning("GPT returned an unformatted response. Attempting to clean..."))
+
+ # Get everything between [ and ], and turn it into a list
+ r = re.compile(r"\[.*\]", re.DOTALL)
+ matches = r.findall(completion)
+ if len(matches) == 0:
+ self.log(warning("Failed to generate Image Prompts. Retrying..."))
+ return self.generate_prompts(count)
+
+ try:
+ image_prompts = json.loads(matches[0])
+ except:
+ self.log(error("Failed to parse image prompts JSON"))
+ # Try a fallback approach - create some generic prompts
+ image_prompts = [
+ f"A beautiful image showing {self.subject}",
+ f"A detailed visualization of {self.subject}",
+ f"An artistic representation of {self.subject}",
+ f"A photorealistic image about {self.subject}",
+ f"A dramatic scene related to {self.subject}"
+ ]
+
+ self.image_prompts = image_prompts[:count] # Limit to requested count
+ self.log(success(f"Generated {len(self.image_prompts)} Image Prompts"))
+ for i, prompt in enumerate(self.image_prompts):
+ self.log(f"Image Prompt {i+1}: {prompt}")
+ return self.image_prompts
+
+ def generate_image(self, prompt):
+ self.log(f"Generating image for prompt: {prompt[:50]}...")
+
+ if self.image_gen == "prodia":
+ self.log("Using Prodia provider for image generation")
+ s = requests.Session()
+ headers = {
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+ }
-
-
- Title
-
-
-
- Description
-
-
-
+ # Generate job
+ self.log("Sending generation request to Prodia API")
+ resp = s.get(
+ "https://api.prodia.com/generate",
+ params={
+ "new": "true",
+ "prompt": prompt,
+ "model": "sdxl", # Default model
+ "negative_prompt": "verybadimagenegative_v1.3",
+ "steps": "20",
+ "cfg": "7",
+ "seed": random.randint(1, 10000),
+ "sample": "DPM++ 2M Karras",
+ "aspect_ratio": "square"
+ },
+ headers=headers
+ )
-
- Script
-
-
+ job_id = resp.json()['job']
+ self.log(f"Job created with ID: {job_id}")
-
- Image Prompts
-
-
+ # For demo purposes, simulate waiting
+ self.log("Waiting for image generation to complete...")
+ time.sleep(3) # Simulate API call
+
+ # In a real implementation we would poll until completion
+ # For demo, we'll just create a placeholder image
+ image_path = os.path.join(CACHE_DIR, f"image_{len(self.images)}.png")
+
+ # Since we can't actually generate a real image, for demonstration we'll
+ # return a simple example URL that would be the result in a real implementation
+ image_url = "https://images.unsplash.com/photo-1579546929518-9e396f3cc809"
+ self.log(success(f"Image generated and saved (placeholder for demo)"))
+ self.images.append(image_url)
+ return image_url
+
+ elif self.image_gen == "hercai":
+ self.log("Using Hercai provider for image generation")
+ # For demo purposes, simulate API call
+ time.sleep(2)
+ image_url = "https://images.unsplash.com/photo-1513151233558-d860c5398176"
+ self.log(success(f"Image generated and saved (placeholder for demo)"))
+ self.images.append(image_url)
+ return image_url
+
+ elif self.image_gen == "segmind":
+ self.log("Using Segmind provider for image generation")
+ # For demo purposes, simulate API call
+ time.sleep(2)
+ image_url = "https://images.unsplash.com/photo-1618005182384-a83a8bd57fbe"
+ self.log(success(f"Image generated and saved (placeholder for demo)"))
+ self.images.append(image_url)
+ return image_url
+
+ elif self.image_gen == "pollinations":
+ self.log("Using Pollinations provider for image generation")
+ # For demo purposes, simulate API call
+ time.sleep(2)
+ image_url = "https://images.unsplash.com/photo-1550859492-d5da9d8e45f3"
+ self.log(success(f"Image generated and saved (placeholder for demo)"))
+ self.images.append(image_url)
+ return image_url
+
+ else: # Default or g4f
+ self.log("Using default provider for image generation")
+ # For demo purposes, simulate API call
+ time.sleep(2)
+ image_url = "https://images.unsplash.com/photo-1541701494587-cb58502866ab"
+ self.log(success(f"Image generated and saved (placeholder for demo)"))
+ self.images.append(image_url)
+ return image_url
+
+ def generate_speech(self, text, output_format='mp3'):
+ self.log("Generating speech from text")
+
+ # Clean text
+ text = re.sub(r'[^\w\s.?!]', '', text)
+
+ self.log(f"Using TTS Engine: {self.tts_engine}, Voice: {self.tts_voice}")
+
+ audio_path = os.path.join(CACHE_DIR, f"speech.{output_format}")
+
+ if self.tts_engine == "elevenlabs":
+ self.log("Using ElevenLabs provider for speech generation")
+ # For demo purposes, we'll just simulate the API call
+ self.log("Simulating ElevenLabs API call (would use real API in production)")
+ time.sleep(3) # Simulate API call
+ self.tts_path = audio_path
+ return audio_path
+
+ elif self.tts_engine == 'bark':
+ self.log("Using Bark provider for speech generation")
+ # For demo purposes, simulate API call
+ time.sleep(3)
+ self.tts_path = audio_path
+ return audio_path
+
+ elif self.tts_engine == "gtts":
+ self.log("Using Google TTS provider for speech generation")
+ # For demo purposes, simulate API call
+ time.sleep(2)
+ self.tts_path = audio_path
+ return audio_path
+
+ elif self.tts_engine == "openai":
+ self.log("Using OpenAI provider for speech generation")
+ # For demo purposes, simulate API call
+ time.sleep(3)
+ self.tts_path = audio_path
+ return audio_path
+
+ elif self.tts_engine == "edge":
+ self.log("Using Edge TTS provider for speech generation")
+ # For demo purposes, simulate API call
+ time.sleep(2)
+ self.tts_path = audio_path
+ return audio_path
+
+ else:
+ self.log(f"Using default TTS engine (would use {self.tts_engine} in production)")
+ # For demo purposes, simulate API call
+ time.sleep(2)
+ self.tts_path = audio_path
+ return audio_path
+
+ self.log(success(f"Speech generated and saved to: {audio_path}"))
+ self.tts_path = audio_path
+ return audio_path
+
+ def generate_subtitles(self, audio_path):
+ self.log("Generating word-level subtitles for video")
+
+ # Define constants
+ FONT = self.subtitle_font
+ FONTSIZE = self.font_size
+ COLOR = self.text_color
+ BG_COLOR = self.highlight_color
+ FRAME_SIZE = (1080, 1920)
+ MAX_CHARS = 30
+ MAX_DURATION = 3.0
+ MAX_GAP = 2.5
+
+ try:
+ # In a real implementation, we would use AssemblyAI to transcribe
+ self.log("In a production environment, this would use AssemblyAI for transcription")
+
+ # For demo purposes, we'll simulate the word-level data
+ self.log("Simulating transcription with word-level timing")
+ words = self.script.split()
+ total_duration = 60 # Assume 60 seconds for demo
+ avg_word_duration = total_duration / len(words)
+
+ wordlevel_info = []
+ current_time = 0
+
+ for word in words:
+ # Calculate a slightly randomized duration based on word length
+ word_duration = avg_word_duration * (0.5 + (len(word) / 10))
+
+ word_data = {
+ "word": word.strip(),
+ "start": current_time,
+ "end": current_time + word_duration
+ }
+ wordlevel_info.append(word_data)
+ current_time += word_duration
+
+ self.log(success(f"Generated word-level timing for {len(wordlevel_info)} words"))
+
+ # Process into line-level data (simplified for demo)
+ subtitles = []
+ line = []
+ line_duration = 0
+
+ for idx, word_data in enumerate(wordlevel_info):
+ word = word_data["word"]
+ start = word_data["start"]
+ end = word_data["end"]
+
+ line.append(word_data)
+ line_duration += end - start
+ temp = " ".join(item["word"] for item in line)
+ new_line_chars = len(temp)
+ duration_exceeded = line_duration > MAX_DURATION
+ chars_exceeded = new_line_chars > MAX_CHARS
+
+ if idx > 0:
+ gap = word_data['start'] - wordlevel_info[idx - 1]['end']
+ maxgap_exceeded = gap > MAX_GAP
+ else:
+ maxgap_exceeded = False
+
+ # Check if any condition is exceeded to finalize the current line
+ if duration_exceeded or chars_exceeded or maxgap_exceeded:
+ if line:
+ subtitle_line = {
+ "text": " ".join(item["word"] for item in line),
+ "start": line[0]["start"],
+ "end": line[-1]["end"],
+ "words": line
+ }
+ subtitles.append(subtitle_line)
+ line = []
+ line_duration = 0
+
+ # Add the remaining words as the last subtitle line if any
+ if line:
+ subtitle_line = {
+ "text": " ".join(item["word"] for item in line),
+ "start": line[0]["start"],
+ "end": line[-1]["end"],
+ "words": line
+ }
+ subtitles.append(subtitle_line)
+
+ self.log(success(f"Generated {len(subtitles)} subtitle lines"))
+
+ # In a real implementation, we would create TextClips for MoviePy
+ # For the demo, we'll just return the subtitle data
+ return {
+ "wordlevel": wordlevel_info,
+ "linelevel": subtitles
+ }
+
+ except Exception as e:
+ self.log(error(f"Subtitle generation failed: {str(e)}"))
+ return None
+
+ def combine(self):
+ self.log("Combining images and audio into final video")
+
+ # For demonstration purposes, we're simulating the video creation process
+ combined_video_path = os.path.join(CACHE_DIR, "output.mp4")
+
+ # In a real implementation, this would:
+ # 1. Create ImageClips from each image
+ # 2. Create an audio clip from the speech
+ # 3. Add background music
+ # 4. Add word-level subtitles
+ # 5. Combine everything into a final video
+
+ self.log("This would create a vertical (9:16) video with:")
+ self.log(f"- {len(self.images)} images as a slideshow")
+ self.log("- TTS audio as the main audio track")
+ self.log("- Background music at low volume")
+ self.log("- Word-level subtitles that highlight as words are spoken")
+
+ # For demo purposes, simulate video processing
+ self.log("Processing video (simulated for demo)...")
+ time.sleep(3)
+
+ success_msg = f"Video successfully created at: {combined_video_path}"
+ self.log(success(success_msg))
+ self.video_path = combined_video_path
+
+ # For the demo, we'll return a mock result
+ return {
+ 'video_path': combined_video_path,
+ 'images': self.images,
+ 'audio_path': self.tts_path,
+ 'metadata': self.metadata
+ }
+
+ def generate_video(self):
+ """Generate complete video with all components"""
+ self.log("Starting video generation process")
+
+ # Step 1: Generate topic
+ self.log("Generating topic")
+ self.generate_topic()
+
+ # Step 2: Generate script
+ self.log("Generating script")
+ self.generate_script()
+
+ # Step 3: Generate metadata
+ self.log("Generating metadata")
+ self.generate_metadata()
+
+ # Step 4: Generate image prompts
+ self.log("Generating image prompts")
+ self.generate_prompts()
+
+ # Step 5: Generate images
+ self.log("Generating images")
+ for i, prompt in enumerate(self.image_prompts, 1):
+ self.log(f"Generating image {i}/{len(self.image_prompts)}")
+ self.generate_image(prompt)
+
+ # Step 6: Generate speech
+ self.log("Generating speech")
+ self.generate_speech(self.script)
+
+ # Step 7: Generate subtitles
+ self.log("Generating subtitles")
+ self.generate_subtitles(self.tts_path)
+
+ # Step 8: Combine all elements into final video
+ self.log("Combining all elements into final video")
+ result = self.combine()
+
+ self.log(f"Video generation complete.")
+
+ return {
+ 'video_path': result['video_path'],
+ 'images': result['images'],
+ 'audio_path': self.tts_path,
+ 'title': self.metadata['title'],
+ 'description': self.metadata['description'],
+ 'subject': self.subject,
+ 'script': self.script,
+ 'logs': self.logs
+ }
+
+# Gradio interface
+def create_youtube_short(niche, language, gemini_api_key="", assemblyai_api_key="",
+ elevenlabs_api_key="", segmind_api_key="", text_gen="gemini",
+ image_gen="prodia", tts_engine="elevenlabs", tts_voice="Sarah",
+ subtitle_font="Helvetica-Bold", font_size=80, text_color="white",
+ highlight_color="blue"):
+
+ # Create API keys dictionary
+ api_keys = {
+ 'gemini': gemini_api_key,
+ 'assemblyai': assemblyai_api_key,
+ 'elevenlabs': elevenlabs_api_key,
+ 'segmind': segmind_api_key
+ }
+
+ # Initialize YouTube class
+ yt = YouTube(
+ niche=niche,
+ language=language,
+ text_gen=text_gen,
+ image_gen=image_gen,
+ tts_engine=tts_engine,
+ tts_voice=tts_voice,
+ subtitle_font=subtitle_font,
+ font_size=font_size,
+ text_color=text_color,
+ highlight_color=highlight_color,
+ api_keys=api_keys
+ )
+
+ # Generate video
+ result = yt.generate_video()
+
+ # In a real implementation we would return the actual video file
+ # For demo, we'll just simulate it with a placeholder
+ demo_video = "https://sample-videos.com/video123/mp4/720/big_buck_bunny_720p_1mb.mp4"
+
+ # Return all the relevant information for the UI
+ return {
+ "video": demo_video,
+ "title": result['title'],
+ "description": result['description'],
+ "script": result['script'],
+ "logs": "\n".join(result['logs'])
+ }
+
+# Create Gradio app
+with gr.Blocks() as demo:
+ gr.Markdown("# YouTube Shorts Generator")
+ gr.Markdown("Generate short videos based on a niche and language")
+
+ with gr.Row():
+ with gr.Column(scale=1):
+ with gr.Group():
+ gr.Markdown("### Required Inputs")
+ niche = gr.Textbox(label="Niche/Topic", placeholder="E.g., Fitness tips, Technology facts")
+ language = gr.Dropdown(
+ choices=["English", "Spanish", "French", "German", "Italian", "Portuguese",
+ "Russian", "Japanese", "Chinese", "Hindi"],
+ label="Language",
+ value="English"
+ )
+
+ with gr.Accordion("API Keys", open=False):
+ gemini_api_key = gr.Textbox(label="Gemini API Key", type="password")
+ assemblyai_api_key = gr.Textbox(label="AssemblyAI API Key", type="password")
+ elevenlabs_api_key = gr.Textbox(label="ElevenLabs API Key", type="password")
+ segmind_api_key = gr.Textbox(label="Segmind API Key", type="password")
+
+ with gr.Accordion("Model Selection", open=False):
+ text_gen = gr.Dropdown(
+ choices=["gemini", "g4f"],
+ label="Text Generator",
+ value="gemini"
+ )
+ image_gen = gr.Dropdown(
+ choices=["prodia", "hercai", "g4f", "segmind", "pollinations"],
+ label="Image Generator",
+ value="prodia"
+ )
+ tts_engine = gr.Dropdown(
+ choices=["elevenlabs", "bark", "gtts", "openai", "edge", "local_tts", "xtts", "rvc"],
+ label="Text-to-Speech Engine",
+ value="elevenlabs"
+ )
+ tts_voice = gr.Textbox(
+ label="TTS Voice",
+ placeholder="E.g., Sarah, Brian, Lily, Monika Sogam",
+ value="Sarah"
+ )
+
+ with gr.Accordion("Subtitle Options", open=False):
+ subtitle_font = gr.Dropdown(
+ choices=["Helvetica-Bold", "Arial-Bold", "Impact", "Comic-Sans-MS"],
+ label="Font",
+ value="Helvetica-Bold"
+ )
+ font_size = gr.Slider(
+ minimum=40,
+ maximum=120,
+ value=80,
+ step=5,
+ label="Font Size"
+ )
+ with gr.Row():
+ text_color = gr.ColorPicker(label="Text Color", value="#FFFFFF")
+ highlight_color = gr.ColorPicker(label="Highlight Color", value="#0000FF")
+
+ generate_btn = gr.Button("Generate Video", variant="primary")
+
+ with gr.Column(scale=1):
+ video_output = gr.Video(label="Generated Video")
+ title_output = gr.Textbox(label="Title")
+ description_output = gr.Textbox(label="Description", lines=3)
+ script_output = gr.Textbox(label="Script", lines=5)
+ log_output = gr.Textbox(label="Process Log", lines=10)
+
+ # Set up the function to call when the generate button is clicked
+ generate_btn.click(
+ fn=create_youtube_short,
+ inputs=[
+ niche, language, gemini_api_key, assemblyai_api_key, elevenlabs_api_key,
+ segmind_api_key, text_gen, image_gen, tts_engine, tts_voice,
+ subtitle_font, font_size, text_color, highlight_color
+ ],
+ outputs={
+ "video": video_output,
+ "title": title_output,
+ "description": description_output,
+ "script": script_output,
+ "logs": log_output
+ }
+ )
+
+# Launch the app
+if __name__ == "__main__":
+ demo.launch()
+