Spaces:

lolhaha002
/

redditbot

Paused

App Files Files Community

redditbot / app.py

lolhaha002

Rename app2.py to app.py

40efc14 verified 9 months ago

raw

history blame contribute delete

53.5 kB

	import os
	import praw
	import time
	import logging
	import gradio as gr
	from dotenv import load_dotenv
	import re
	from pydub import AudioSegment
	import asyncio
	import tempfile
	import edge_tts
	import random
	import assemblyai as aai
	from moviepy.config import change_settings
	from moviepy.editor import *
	from moviepy.editor import TextClip, CompositeVideoClip, AudioFileClip, ColorClip
	from PIL import Image, ImageDraw, ImageFont
	import numpy as np
	from huggingface_hub import HfApi, login
	import requests

	# Initialize logger
	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s [%(levelname)s] %(message)s",
	handlers=[
	logging.StreamHandler()
	]
	)
	logger = logging.getLogger(__name__)

	os.system("apt-get update && apt-get install -y fonts-dejavu")
	FONT_PATH = "MouldyCheeseRegular-WyMWG.ttf"
	# Verify font file existence
	if not os.path.exists(FONT_PATH):
	raise FileNotFoundError(f"Font file not found: {FONT_PATH}")

	print(f"Using font at: {FONT_PATH}")
	# Logger setup
	logger = logging.getLogger("reddit_audio")
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	change_settings({"IMAGEMAGICK_BINARY": None}) # Disable ImageMagick
	# Load environment variables (Hugging Face Secrets)
	load_dotenv()
	REDDIT_CLIENT_ID = os.getenv("REDDIT_CLIENT_ID")
	REDDIT_CLIENT_SECRET = os.getenv("REDDIT_CLIENT_SECRET")
	REDDIT_USER_AGENT = os.getenv("REDDIT_USER_AGENT")
	PEXELS_API_KEY = os.getenv("PEXELS_API_KEY")

	# Initialize Reddit client
	try:
	reddit = praw.Reddit(
	client_id=REDDIT_CLIENT_ID,
	client_secret=REDDIT_CLIENT_SECRET,
	user_agent=REDDIT_USER_AGENT,
	)
	# Test the connection
	reddit.user.me()
	print(reddit.user.me())
	except Exception as e:
	logger.error(f"Failed to initialize Reddit client: {e}")
	logger.error("Please check your Reddit API credentials in the .env file")
	logger.error("Required environment variables: REDDIT_CLIENT_ID, REDDIT_CLIENT_SECRET, REDDIT_USER_AGENT")

	# Voice options with descriptions
	VOICE_OPTIONS = {
	# American English Voices
	"en-US-GuyNeural": "Male (American)",
	"en-US-JennyNeural": "Female (American)",
	"en-US-AriaNeural": "Female (American)",

	# British English Voices
	"en-GB-RyanNeural": "Male (British)",
	"en-GB-SoniaNeural": "Female (British)",
	"en-GB-LibbyNeural": "Female (British)",
	"en-GB-AlfieNeural": "Male (British)",
	"en-GB-ElliotNeural": "Male (British)",

	# Australian English Voices
	"en-AU-NatashaNeural": "Female (Australian)",
	"en-AU-WilliamNeural": "Male (Australian)",
	"en-AU-AnnetteNeural": "Female (Australian)",
	"en-AU-CarlyNeural": "Female (Australian)",
	"en-AU-DuncanNeural": "Male (Australian)",

	# Indian English Voices
	"en-IN-NeerjaNeural": "Female (Indian)",
	"en-IN-PrabhatNeural": "Male (Indian)",

	# Irish English Voice
	"en-IE-ConnorNeural": "Male (Irish)",
	"en-IE-EmilyNeural": "Female (Irish)",

	# Canadian English Voices
	"en-CA-ClaraNeural": "Female (Canadian)",
	"en-CA-LiamNeural": "Male (Canadian)"
	}

	# Background video options
	BACKGROUND_OPTIONS = {
	"Green": "Solid green background",
	"Black": "Solid black background",
	"Minecraft": "Minecraft gameplay",
	"Cake Making": "Oddly Satisfying Cake Making",
	"Satisfying ART": "Satisfying Art background",
	"Pexels": "Use stock videos based on keywords from Pexels"
	}

	# Directory to save audio files
	os.makedirs("audio_outputs", exist_ok=True)

	HF_TOKEN = os.getenv("HF_TOKEN") # Fetch the token from Hugging Face Secrets
	login(HF_TOKEN)

	# Initialize Hugging Face API
	hf_api = HfApi()

	def clean_text(text):
	"""Remove emojis and unsupported characters from text."""
	text = re.sub(r'[\U00010000-\U0010FFFF]+', '', text) # Remove emojis
	text = re.sub(r'[^\w\s.,!?\'"-]', '', text) # Remove unsupported characters
	return text

	def load_nsfw_words(file_path="nsfw_words.txt"):
	"""Load NSFW words from a file."""
	if not os.path.exists(file_path):
	logger.warning(f"NSFW words file not found: {file_path}")
	return []
	with open(file_path, "r") as f:
	return [line.strip().lower() for line in f if line.strip()]

	# Load NSFW words dynamically
	NSFW_WORDS = load_nsfw_words()

	def filter_nsfw_words(text):
	"""Replace NSFW words with [beep]"""
	if not text:
	return text
	text_lower = text.lower()
	result = text

	for word in NSFW_WORDS:
	if word in text_lower:
	# Find the actual word with original case
	start = text_lower.find(word)
	while start != -1:
	end = start + len(word)
	# Replace only if it's a whole word
	if (start == 0 or not text_lower[start-1].isalnum()) and \
	(end == len(text_lower) or not text_lower[end].isalnum()):
	result = result[:start] + "[beep]" + result[end:]
	text_lower = text_lower[:start] + "[beep]" + text_lower[end:]
	start = text_lower.find(word, start + 1)
	return result

	def contains_nsfw_words(text):
	"""Check if text contains NSFW words."""
	text_lower = text.lower()
	for word in NSFW_WORDS:
	if word in text_lower:
	return True
	return False

	# Word Count logic for story input
	def count_words(text):
	text = text or ""
	word_count = len(text.strip().split())
	return f"{word_count} words"

	def fetch_top_post_and_comments(subreddit_url, filter_type="hot", time_filter="day", max_duration=45, min_duration=30, max_retries=10):
	"""Fetch the top post and comments from a subreddit URL with strict duration enforcement."""
	try:
	# Extract subreddit name
	subreddit_name = subreddit_url.rstrip("/").split("/")[-1]
	subreddit = reddit.subreddit(subreddit_name)
	logger.info(f"Fetching posts from subreddit: {subreddit_name}")

	# Fetch posts based on filter type
	posts = list(subreddit.hot(limit=20) if filter_type == "hot" else subreddit.top(limit=20, time_filter=time_filter))

	retries = 0
	while retries < max_retries:
	retries += 1
	logger.info(f"Attempt {retries}/{max_retries} to find suitable post")

	# Filter only NSFW-marked posts
	suitable_posts = [post for post in posts if not post.over_18]
	if not suitable_posts:
	raise ValueError("No suitable posts found.")

	# Randomly select a post
	top_post = random.choice(suitable_posts)
	posts.remove(top_post) # Remove from pool

	# Clean and filter title and selftext
	title = clean_text(top_post.title)
	title = filter_nsfw_words(title)
	selftext = clean_text(top_post.selftext)
	selftext = filter_nsfw_words(selftext)

	post_content = f"{title}"
	if selftext:
	post_content += f". {selftext}"

	total_duration = estimate_audio_duration(post_content)
	logger.info(f"Post content duration: {total_duration}s")

	if total_duration > max_duration:
	logger.debug(f"Post content too long ({total_duration}s), trying another")
	continue

	# Collect comments while staying within duration limits
	top_comments = []
	top_post.comments.replace_more(limit=0)

	for comment in top_post.comments:
	if isinstance(comment, praw.models.Comment):
	comment_text = clean_text(comment.body)

	# Skip deleted comments
	if comment_text.lower() in ["deleted", "[deleted]"]:
	continue

	# Filter NSFW words
	comment_text = filter_nsfw_words(comment_text)

	comment_duration = estimate_audio_duration(comment_text)
	logger.debug(f"Comment duration: {comment_duration}s")

	if total_duration + comment_duration > max_duration:
	break

	top_comments.append(comment_text)
	total_duration += comment_duration

	# Verify final duration
	if min_duration <= total_duration <= max_duration and top_comments:
	logger.info(f"Found suitable content with {len(top_comments)} comments")
	return post_content, top_comments

	logger.warning(f"Content duration ({total_duration}s) outside acceptable range or no valid comments")

	raise ValueError("Unable to find suitable post within retry limit.")
	except praw.exceptions.PRAWException as e:
	logger.error(f"Reddit API error: {e}")
	raise ValueError(f"Failed to fetch Reddit content: {e}")
	except Exception as e:
	logger.error(f"Unexpected error fetching content: {e}")
	raise ValueError(f"Failed to fetch Reddit content: {e}")

	def fetch_post_and_comments_from_url(post_url, max_duration=45, min_duration=30):
	"""Fetch a specific Reddit post and its comments using the post URL."""
	reddit = praw.Reddit(
	client_id=REDDIT_CLIENT_ID,
	client_secret=REDDIT_CLIENT_SECRET,
	user_agent=REDDIT_USER_AGENT,
	)

	# Fetch the submission (post) using the URL
	submission = reddit.submission(url=post_url)
	logger.info(f"Fetching post from URL: {post_url}")

	if submission.over_18:
	logger.warning("Post is marked as NSFW")
	raise ValueError("The post is marked as NSFW and cannot be processed.")

	# Clean and filter title and selftext
	title = clean_text(submission.title)
	title = filter_nsfw_words(title)
	selftext = clean_text(submission.selftext)
	selftext = filter_nsfw_words(selftext)

	post_content = f"{title}"
	if selftext:
	post_content += f". {selftext}"

	total_duration = estimate_audio_duration(post_content)
	logger.info(f"Post content duration: {total_duration}s")

	# If post content alone exceeds max duration, truncate it
	if total_duration > max_duration:
	logger.warning(f"Post content too long ({total_duration}s > {max_duration}s)")
	raise ValueError(f"Post content alone exceeds maximum duration ({total_duration}s > {max_duration}s)")

	# Fetch comments while respecting duration constraints
	top_comments = []
	submission.comments.replace_more(limit=0)

	# Keep collecting comments until we hit minimum duration or run out of comments
	for comment in submission.comments:
	if isinstance(comment, praw.models.Comment):
	comment_text = clean_text(comment.body)

	# Skip deleted comments
	if comment_text.lower() in ["deleted", "[deleted]"]:
	continue

	# Filter NSFW words
	comment_text = filter_nsfw_words(comment_text)

	comment_duration = estimate_audio_duration(comment_text)
	logger.debug(f"Potential comment duration: {comment_duration}s")

	# Check if adding this comment would exceed max duration
	if total_duration + comment_duration > max_duration:
	if total_duration >= min_duration:
	logger.info(f"Reached sufficient duration ({total_duration}s), skipping remaining comments")
	break
	else:
	logger.debug(f"Comment would exceed max duration ({total_duration + comment_duration}s > {max_duration}s), but haven't reached min duration yet. Looking for shorter comments...")
	continue

	top_comments.append(comment_text)
	total_duration += comment_duration
	logger.debug(f"Added comment. New total duration: {total_duration}s")

	# If we've reached minimum duration, we can stop
	if total_duration >= min_duration:
	logger.info(f"Reached minimum duration ({total_duration}s)")
	break

	# Now check if we have enough content
	if total_duration < min_duration:
	logger.warning(f"Content too short ({total_duration}s < {min_duration}s)")
	raise ValueError(f"Content is too short ({total_duration}s) to generate audio in the desired duration range ({min_duration}s-{max_duration}s).")

	if not top_comments:
	logger.warning("No valid comments found")
	raise ValueError("No valid comments found in the post.")

	logger.info(f"Successfully fetched post with {len(top_comments)} comments. Total duration: {total_duration}s")
	return post_content, top_comments

	def create_video_from_story(story_text, selected_voice, rate, pitch, background, pexels_keywords=None):
	cleaned_story = clean_text(story_text)
	cleaned_story = filter_nsfw_words(cleaned_story)

	audio_path = asyncio.run(text_to_speech(cleaned_story, voice=selected_voice, rate=rate, pitch=pitch))
	if not audio_path:
	return None, None, "Failed to generate audio from story"

	subtitles = generate_subtitles(audio_path)
	if subtitles is None:
	return None, None, "Failed to generate subtitles"

	timestamp = int(time.time())
	output_path = f"/tmp/story_{timestamp}.mp4"
	video_path, video_url = create_video_with_background(
	audio_path=audio_path,
	subtitles=subtitles,
	subreddit_url="story",
	selected_font="Mouldy Cheese",
	background=background,
	output_path=output_path,
	pexels_keywords=pexels_keywords
	)

	if not video_path:
	return None, None, "Failed to create video"

	try:
	if os.path.exists(audio_path):
	os.remove(audio_path)
	except Exception as e:
	logger.warning(f"Failed to clean up audio: {e}")

	return video_path, video_url, "Video generated successfully from story!"

	def estimate_audio_duration(text, words_per_second=3.5, pause_per_sentence=1.0, pause_per_comment=1.5):
	"""Estimate the duration of the audio based on text length and pauses."""
	word_count = len(text.split())
	sentence_count = text.count('.') + text.count('!') + text.count('?')
	comment_count = text.count('. Comments: ') # Count comment transitions
	duration = (word_count / words_per_second) + (sentence_count * pause_per_sentence) + (comment_count * pause_per_comment)
	return duration

	async def text_to_speech(text, voice="en-US-GuyNeural - Male (American)", rate=0, pitch=0):
	"""Convert text to speech using edge-tts."""
	try:
	# Extract voice ID from the display string (e.g., "en-US-GuyNeural - Male (American)" -> "en-US-GuyNeural")
	voice_id = voice.split(" - ")[0] if " - " in voice else voice

	# Format rate and pitch as required by edge-tts
	rate_str = f"{rate:+d}%"
	pitch_str = f"{pitch:+d}Hz"

	# Create output directory if it doesn't exist
	os.makedirs("temp", exist_ok=True)

	# Generate unique filename
	timestamp = int(time.time())
	output_file = f"temp/speech_{timestamp}.mp3"

	logger.info(f"Generating TTS with voice: {voice_id}, rate: {rate_str}, pitch: {pitch_str}")

	# Configure voice settings
	communicate = edge_tts.Communicate(text, voice_id, rate=rate_str, volume='+0%', pitch=pitch_str)

	# Convert to audio
	await communicate.save(output_file)

	if os.path.exists(output_file):
	logger.info(f"Successfully generated audio file: {output_file}")
	return output_file
	else:
	logger.error("Failed to generate audio file")
	return None

	except Exception as e:
	logger.error(f"Error in text_to_speech: {e}")
	return None

	def generate_subtitles(audio_path):
	try:
	if not audio_path or not os.path.exists(audio_path):
	logger.error(f"Invalid audio path: {audio_path}")
	return None

	# Initialize AssemblyAI
	aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
	if not aai.settings.api_key:
	logger.error("AssemblyAI API key not found")
	return None

	transcriber = aai.Transcriber()
	logger.info(f"Uploading audio file: {audio_path}")
	with open(audio_path, "rb") as audio_file:
	transcript = transcriber.transcribe(audio_file)

	transcript.wait_for_completion()
	words = transcript.words
	if not words:
	logger.error("No words found in transcript")
	return None

	logger.info(f"Received {len(words)} words from transcription")

	def create_text_image(current_word, context_words, size=(1000, 150)):
	# Create a new image with an RGBA mode for transparency
	image = Image.new('RGBA', size, (0, 0, 0, 0)) # Fully transparent background
	draw = ImageDraw.Draw(image)

	try:
	font = ImageFont.truetype(FONT_PATH, 72)
	except:
	logger.warning(f"Failed to load font from {FONT_PATH}, using default")
	font = ImageFont.load_default()

	# Join all words with spaces, but add markers around the current word
	text_parts = []
	for word in context_words:
	if word == current_word:
	text_parts.append(f"[{word}]") # Mark current word
	else:
	text_parts.append(word)

	full_text = " ".join(text_parts)

	# Get text bounding box
	bbox = draw.textbbox((0, 0), full_text, font=font)
	text_width = bbox[2] - bbox[0]
	text_height = bbox[3] - bbox[1]

	# Calculate position to center the text
	x = (size[0] - text_width) // 2
	y = (size[1] - text_height) // 2

	# Draw each word with appropriate color
	current_x = x
	for word in context_words:
	word_with_space = word + " "
	word_bbox = draw.textbbox((0, 0), word_with_space, font=font)
	word_width = word_bbox[2] - word_bbox[0]

	# Highlight current word in yellow, others in white
	color = (255, 255, 0, 255) if word == current_word else (255, 255, 255, 255) # Full opacity for text
	draw.text((current_x, y), word_with_space, font=font, fill=color)
	current_x += word_width

	# Convert RGBA to RGB array with alpha channel
	img_array = np.array(image)
	return img_array

	subtitles = []
	window_size = 4 # Number of words to show at once

	for i, current_word in enumerate(words):
	try:
	# Get context words (previous and next words)
	start_idx = max(0, i - window_size // 2)
	end_idx = min(len(words), i + window_size // 2 + 1)
	context_words = [w.text for w in words[start_idx:end_idx]]

	# Create image with text
	img_array = create_text_image(current_word.text, context_words)

	# Convert to ImageClip
	clip = ImageClip(img_array)

	# Set timing
	start_time = float(current_word.start) / 1000
	duration = float(current_word.end - current_word.start) / 1000

	# Position and time the clip
	clip = (clip
	.set_position(('center', 'center'))
	.set_start(start_time)
	.set_duration(duration))

	subtitles.append(clip)
	logger.info(f"Successfully created clip for word: {current_word.text}")

	except Exception as clip_error:
	logger.error(f"Failed to create clip for word '{current_word.text}'. Error: {clip_error}")
	continue

	if not subtitles:
	logger.error("No valid subtitle clips were created")
	return None

	logger.info(f"Successfully generated {len(subtitles)} subtitle clips")
	return subtitles

	except Exception as e:
	logger.error(f"Error generating subtitles: {e}")
	logger.exception("Full traceback:")
	return None

	def is_post_url(url):
	"""Check if the URL is a direct post URL."""
	if not url:
	return False
	return bool(re.match(r'https?://(?:www\.)?reddit\.com/r/\w+/comments/\w+/?', url))

	def generate_audio_from_reddit(url, filter_type, time_filter, selected_voice, rate, pitch):
	try:
	# Get content based on URL type
	try:
	if is_post_url(url):
	logger.info("Processing direct post URL...")
	post_content, comments = fetch_post_and_comments_from_url(url)
	else:
	logger.info("Processing subreddit URL...")
	post_content, comments = fetch_top_post_and_comments(url, filter_type, time_filter)
	except ValueError as e:
	logger.error(f"Error fetching content: {e}")
	return None
	except Exception as e:
	logger.error(f"Unexpected error fetching content: {e}")
	return None

	if not post_content or not comments:
	logger.error("Failed to get Reddit content")
	return None

	# Combine content
	combined_content = f"{post_content}. Hey, Listen:"
	for idx, comment in enumerate(comments, start=1):
	combined_content += f"{comment}. "

	# Generate audio
	audio_path = asyncio.run(text_to_speech(combined_content, voice=selected_voice, rate=rate, pitch=pitch))

	if not audio_path:
	logger.error("Failed to generate audio")
	return None

	logger.info(f"Successfully generated audio at: {audio_path}")
	return audio_path

	except Exception as e:
	logger.error(f"Error in generate_audio_from_reddit: {e}")
	return None

	def search_pexels_video(query, page=1, per_page=5):
	if not PEXELS_API_KEY:
	logger.error("PEXELS_API_KEY is missing from environment variables")
	return None

	# Use only the first keyword (before comma)
	# query = keywords.split(",")[0].strip() # This was already handled, query is now a single keyword
	logger.info(f"Searching Pexels for: '{query}', page: {page}, per_page: {per_page}")

	headers = {"Authorization": PEXELS_API_KEY}
	params = {"query": query, "per_page": per_page, "page": page}
	response = requests.get("https://api.pexels.com/videos/search", headers=headers, params=params)

	logger.debug(f"Pexels API response status: {response.status_code} for query '{query}' page {page}")
	if response.status_code != 200:
	logger.error(f"Pexels API error: {response.text}")
	return None

	data = response.json()
	logger.info(f"Found {len(data.get('videos', []))} videos for query: {query}")

	videos_on_page = data.get("videos", [])
	if not videos_on_page:
	logger.warning(f"No videos found on page {page} for query: {query}")
	return None

	# To introduce more variety, we can pick a random video from the current page's results
	# instead of always the "best" or first one.
	selected_video_data = random.choice(videos_on_page)

	video_files = selected_video_data.get("video_files", [])
	if not video_files:
	logger.warning(f"Selected video (ID: {selected_video_data.get('id')}) has no video_files.")
	return None

	# Filter for minimum resolution (optional) and then pick highest available for that video
	filtered_files = [f for f in video_files if f["width"] >= 720] # Example: min width 720p
	files_to_consider = filtered_files or video_files # Fallback to all files if none meet filter

	if not files_to_consider:
	logger.warning(f"No suitable video files after filtering for video ID: {selected_video_data.get('id')}")
	return None

	# Pick the highest resolution available from the considered files
	sorted_files = sorted(files_to_consider, key=lambda f: f["width"] * f["height"], reverse=True)
	best_file = sorted_files[0]

	logger.info(f"Selected video file: {best_file['link']} ({best_file['width']}x{best_file['height']}) for query '{query}' page {page}")
	return best_file["link"]

	def _fetch_and_process_single_pexels_video(keyword_query, page_number, target_resolution=(1080, 1920)):
	"""
	Fetches a single video for a keyword, processes it (resize/crop),
	and returns the VideoFileClip object and its temporary path.
	The temporary file is NOT deleted by this function.
	"""
	url = search_pexels_video(keyword_query, page=page_number)
	if not url:
	logger.warning(f"No Pexels URL found for '{keyword_query}'.")
	return None, None # Indicate failure

	# Create a unique temporary file path
	temp_dir = "/tmp"
	os.makedirs(temp_dir, exist_ok=True) # Ensure /tmp exists
	temp_fd, temp_path = tempfile.mkstemp(suffix=".mp4", dir=temp_dir, prefix="pexels_")
	os.close(temp_fd) # Close the file descriptor, we just need the path

	video_clip_obj = None
	try:
	logger.info(f"Downloading Pexels clip from {url} to {temp_path}")
	with requests.get(url, stream=True, timeout=30) as r: # Added timeout
	r.raise_for_status() # Check for HTTP errors
	with open(temp_path, "wb") as f:
	for chunk in r.iter_content(chunk_size=8192):
	f.write(chunk)
	logger.info(f"Pexels clip downloaded to {temp_path}")

	if not os.path.exists(temp_path) or os.path.getsize(temp_path) == 0:
	logger.error(f"Downloaded Pexels clip is missing or empty: {temp_path}")
	if os.path.exists(temp_path): os.remove(temp_path) # Clean up failed download
	return None, None

	video_clip_obj = VideoFileClip(temp_path)

	# If the fetched Pexels video is longer than 3 seconds, subclip it to 3 seconds.
	if video_clip_obj.duration > 3.0:
	logger.info(f"Original Pexels clip for '{keyword_query}' is {video_clip_obj.duration:.2f}s long. Subclipping to 3.0s.")
	active_segment = video_clip_obj.subclip(0, 3.0)
	else:
	active_segment = video_clip_obj # Use the full clip if it's <= 3s

	# Resize and crop
	clip_aspect_ratio = active_segment.w / active_segment.h
	target_aspect_ratio = target_resolution[0] / target_resolution[1]
	if clip_aspect_ratio > target_aspect_ratio:
	resized_segment = active_segment.resize(height=target_resolution[1])
	else:
	resized_segment = active_segment.resize(width=target_resolution[0])

	final_processed_segment = resized_segment.crop(
	x_center=resized_segment.w / 2, # Use .w and .h of the resized segment
	y_center=resized_segment.h / 2,
	width=target_resolution[0],
	height=target_resolution[1]
	)

	# DO NOT CLOSE video_clip_obj here if processed_clip is derived from it and shares the reader.
	# If processed_clip is a new object with its own reader after crop/resize, then original can be closed.
	# MoviePy operations like resize/crop usually create new clip instances that might share the reader or copy frames.
	# It's safer to let the caller manage the lifecycle of the returned clip and its source file.
	logger.info(f"Pexels clip for '{keyword_query}' downloaded and processed. Path: {temp_path}, Final Segment Duration: {final_processed_segment.duration:.2f}s")
	return final_processed_segment, temp_path # Return the processed clip and its temp path
	except requests.exceptions.RequestException as e:
	logger.error(f"Failed to download Pexels video for '{keyword_query}': {e}")
	if os.path.exists(temp_path): os.remove(temp_path)
	return None, None
	except Exception as e:
	logger.error(f"Failed to process Pexels video: {e}")
	if video_clip_obj: video_clip_obj.close() # Close if an error occurred after opening
	if os.path.exists(temp_path): os.remove(temp_path) # Clean up on error
	return None, None

	def get_video_clip(background_type, duration, target_resolution=(1080, 1920), pexels_keywords=None):
	"""Get a video clip with random start time and specified duration."""
	logger.info(f"[DEBUG] background_type: '{background_type}', pexels_keywords: '{pexels_keywords}'")
	try:
	if background_type == "Pexels":
	if not pexels_keywords or not isinstance(pexels_keywords, list) or not any(kw.strip() for kw in pexels_keywords):
	logger.warning("No valid Pexels keywords provided or list is empty. Using solid green background.")
	return ColorClip(size=target_resolution, color=(0, 255, 0)).set_duration(duration), []

	collected_clips = []
	temp_files_to_delete_later = []
	current_total_duration = 0.0

	# --- New Pexels clip collection logic for grouping and duration fill ---
	# The pexels_keywords list is already stripped and filtered in tts_interface
	# before being passed to create_video_from_story or create_video_with_background.
	# So, pexels_keywords here should be a clean list of non-empty strings.

	# Cycle through keywords, fetching one clip per keyword per cycle, until duration is met.
	keyword_page_trackers = {kw: 1 for kw in pexels_keywords} # Track current page for each keyword
	MAX_FAILURES_PER_KEYWORD_TOTAL = 3 # Max total failures (no clip from any page) for a keyword before skipping it
	keyword_failure_counts = {kw: 0 for kw in pexels_keywords}

	active_keywords = list(pexels_keywords) # Keywords we are still trying to get clips from

	while current_total_duration < duration and any(active_keywords):
	keyword_processed_in_cycle = False
	for keyword_idx, keyword in enumerate(list(active_keywords)): # Iterate over a copy for safe removal
	if current_total_duration >= duration:
	break

	if keyword_failure_counts[keyword] >= MAX_FAILURES_PER_KEYWORD_TOTAL:
	if keyword in active_keywords: active_keywords.remove(keyword) # Stop trying this keyword
	continue

	current_page = keyword_page_trackers[keyword]
	logger.info(f"Trying keyword '{keyword}', page {current_page}. Total duration: {current_total_duration:.2f}s / {duration:.2f}s")
	clip_segment, temp_file_path = _fetch_and_process_single_pexels_video(keyword, current_page, target_resolution)

	if clip_segment and temp_file_path:
	collected_clips.append(clip_segment)
	temp_files_to_delete_later.append(temp_file_path)
	current_total_duration += clip_segment.duration
	keyword_page_trackers[keyword] += 1 # Move to next page for this keyword on its next turn
	keyword_failure_counts[keyword] = 0 # Reset failure count on success
	keyword_processed_in_cycle = True
	logger.info(f"Added clip for '{keyword}' (page {current_page}). Segment: {clip_segment.duration:.2f}s. Total: {current_total_duration:.2f}s")
	else:
	logger.warning(f"No clip found for '{keyword}' on page {current_page}.")
	keyword_failure_counts[keyword] += 1
	keyword_page_trackers[keyword] += 1 # Still try next page next time
	if keyword_failure_counts[keyword] >= MAX_FAILURES_PER_KEYWORD_TOTAL:
	logger.info(f"Max total failures reached for keyword '{keyword}'. Removing from active list.")
	if keyword in active_keywords: active_keywords.remove(keyword)

	if not keyword_processed_in_cycle and any(active_keywords): # If a full cycle through active keywords yields nothing
	logger.info("A full cycle through active keywords yielded no new clips. Stopping Pexels search.")
	break
	# --- End of Pexels clip collection logic ---

	if not collected_clips:
	logger.warning("No Pexels clips were collected. Using solid green background.")
	return ColorClip(size=target_resolution, color=(0, 255, 0)).set_duration(duration), []

	# Concatenate all collected clips
	logger.info(f"Concatenating {len(collected_clips)} Pexels clips.")
	final_pexels_video = concatenate_videoclips(collected_clips, method="compose")

	if final_pexels_video.duration == 0: # Should not happen if collected_clips is not empty
	logger.error("Concatenated Pexels video has zero duration. This should not happen if clips were collected. Using solid green.")
	for p_clip in collected_clips: p_clip.close() # Close individual segments
	for f_path in temp_files_to_delete_later: # Delete their temp files
	if os.path.exists(f_path): os.remove(f_path)
	return ColorClip(size=target_resolution, color=(0, 255, 0)).set_duration(duration), []

	if final_pexels_video.duration > duration:
	logger.info(f"Subclipping concatenated Pexels video (duration {final_pexels_video.duration:.2f}s) to target duration ({duration:.2f}s)")
	final_pexels_video = final_pexels_video.subclip(0, duration)
	else:
	logger.info(f"Collected Pexels video duration {final_pexels_video.duration:.2f}s. Target audio duration is {duration:.2f}s. Video will be adjusted to audio duration during final composition if shorter.")

	return final_pexels_video, temp_files_to_delete_later

	# --- Handling for other background types (Minecraft, Cake Making, etc.) ---
	# Define video paths relative to the script location
	script_dir = os.path.dirname(os.path.abspath(__file__))
	video_paths = {
	"Minecraft": os.path.join(script_dir, "Minecraft.mp4"),
	"Cake Making": os.path.join(script_dir, "A Collection OF CAKE Oddly Satisfying Chocolate Cake You Never Seen _ Awesome Cake Decorating Ideas.mp4"),
	"Satisfying ART": os.path.join(script_dir, "TOP 80 Satisfying Art Videos _ Best of The Year Quantastic.mp4"),
	}

	# Handle solid color backgrounds
	if background_type == "Black":
	logger.info("Creating solid black background")
	return ColorClip(size=target_resolution, color=(0, 0, 0)).set_duration(duration), []
	elif background_type == "Green":
	logger.info("Creating solid green background")
	return ColorClip(size=target_resolution, color=(0, 255, 0)).set_duration(duration), []

	# Handle predefined video backgrounds
	if background_type not in video_paths:
	logger.error(f"Invalid background type: {background_type}")
	logger.info(f"Available backgrounds: {list(video_paths.keys())}")
	return ColorClip(size=target_resolution, color=(0, 255, 0)).set_duration(duration), [] # Default to green

	video_path = video_paths[background_type]
	if not os.path.exists(video_path):
	logger.error(f"Background video not found: {video_path}")
	logger.info(f"Looking in directory: {script_dir}")
	logger.info(f"Available files: {os.listdir(script_dir)}")
	return ColorClip(size=target_resolution, color=(0, 255, 0)).set_duration(duration), []

	logger.info(f"Loading background video: {video_path}")
	video = VideoFileClip(video_path) # This clip needs to be closed later

	max_start = max(0, video.duration - duration)
	start_time = random.uniform(0, max_start) if max_start > 0 else 0
	logger.info(f"Selected start time: {start_time:.2f}s")

	clip = video.subclip(start_time, start_time + duration)

	target_aspect = target_resolution[0] / target_resolution[1]
	clip_aspect = clip.w / clip.h

	if clip_aspect > target_aspect:
	new_width = int(clip.h * target_aspect)
	x_center = clip.w // 2; x1 = x_center - (new_width // 2); x2 = x_center + (new_width // 2)
	clip = clip.crop(x1=x1, x2=x2)
	else:
	new_height = int(clip.w / target_aspect)
	y_center = clip.h // 2; y1 = y_center - (new_height // 2); y2 = y_center + (new_height // 2)
	clip = clip.crop(y1=y1, y2=y2)

	clip = clip.resize(target_resolution)

	if clip.duration < duration:
	clip = clip.loop(duration=duration)

	# For non-Pexels, the original 'video' object needs to be closed if 'clip' is derived.
	# However, 'clip' itself is what's returned. If 'clip' is a subclip, it shares the reader.
	# It's safer to close 'video' after 'clip' is fully processed by the caller.
	# For simplicity here, we assume 'clip' is self-contained enough or its reader is managed by MoviePy.
	# A more robust solution would track 'video' for later closure.
	# For now, returning just the clip and an empty list for temp files.
	# The 'video' object will be closed by Python's GC if not explicitly closed, but explicit is better.
	# This part needs careful thought on resource management if we were to return 'video' for closure.
	# Since we return 'clip', and 'video' is local, it should be fine.
	# The main issue was with Pexels temp files.
	logger.info("Background video clip created successfully")
	return clip, [] # Return clip and empty list for temp_files

	except Exception as e:
	logger.error(f"Error creating video background: {e}")
	logger.exception("Full traceback:")
	return ColorClip(size=target_resolution, color=(0, 255, 0)).set_duration(duration), []

	def create_video_with_background(audio_path, subtitles, subreddit_url, selected_font="Mouldy Cheese", background="Green", output_path="/tmp/output_video.mp4", pexels_keywords=None):
	logger.info(f"[DEBUG] create_video_with_background received pexels_keywords: '{pexels_keywords}'")
	try:
	logger.info("Starting video creation process...")

	# Validate audio path
	if not isinstance(audio_path, str) or not os.path.exists(audio_path):
	logger.error(f"Invalid audio path: {audio_path}")
	return None, None

	# Validate subtitles
	if not isinstance(subtitles, list):
	logger.error(f"Invalid subtitles format: {type(subtitles)}")
	return None, None

	logger.info(f"Loading audio file: {audio_path}")
	try:
	# Create audio clip with error handling
	audio = AudioFileClip(audio_path)
	duration = audio.duration
	logger.info(f"Audio duration: {duration} seconds")
	except Exception as e:
	logger.error(f"Error loading audio file: {e}")
	return None, None

	try:
	# Create background clip
	logger.info(f"Creating background clip with {background} background...")
	# get_video_clip now returns a tuple (clip, temp_files_list) for Pexels
	result_bg = get_video_clip(background, duration, target_resolution=(1080,1920), pexels_keywords=pexels_keywords)
	if isinstance(result_bg, tuple):
	background_clip, pexels_temp_files_to_clean = result_bg
	else: # For non-Pexels backgrounds that return only the clip
	background_clip = result_bg
	# pexels_temp_files_to_clean remains empty

	if background_clip is None:
	logger.error("Failed to create background clip")
	return None, None
	logger.info("Background clip created successfully")
	except Exception as e:
	logger.error(f"Error creating background clip: {e}")
	return None, None

	try:
	# Combine background with subtitles
	logger.info(f"Combining {len(subtitles)} subtitle clips with background...")
	final_clips = [background_clip] + subtitles
	video = CompositeVideoClip(final_clips, size=(1080, 1920))
	logger.info("Clips combined successfully")
	except Exception as e:
	logger.error(f"Error combining clips: {e}")
	return None, None

	try:
	# Set the audio
	logger.info("Setting audio to video...")
	video = video.set_audio(audio)
	logger.info("Audio set successfully")
	except Exception as e:
	logger.error(f"Error setting audio: {e}")
	return None, None

	try:
	# Write the result to a file in /tmp/
	logger.info(f"Writing video to file: {output_path}")
	video.write_videofile(
	output_path,
	fps=30,
	codec='libx264',
	audio_codec='aac',
	temp_audiofile='temp-audio.m4a',
	remove_temp=True,
	logger=None
	)
	logger.info("Video written successfully")
	except Exception as e:
	logger.error(f"Error writing video file: {e}")
	logger.exception("Full traceback:")
	return None, None
	finally:
	# Clean up
	try:
	video.close()
	audio.close()
	background_clip.close()
	# Explicitly close individual Pexels segments if background_clip is a CompositeVideoClip from Pexels
	if background == "Pexels" and hasattr(background_clip, 'clips') and background_clip.clips:
	# The 'clips' attribute of a CompositeVideoClip holds the list of original clips.
	# These are the ones that were in 'collected_clips' in get_video_clip.
	logger.info(f"Attempting to close {len(background_clip.clips)} Pexels sub-clips.")
	for pexels_segment_clip in background_clip.clips:
	if pexels_segment_clip: # Check if the clip object itself is not None
	pexels_segment_clip.close()
	for clip in subtitles:
	clip.close()
	except Exception as e:
	logger.warning(f"Error during cleanup: {e}")
	finally: # Ensure Pexels temp files are cleaned up
	for temp_f_path in pexels_temp_files_to_clean:
	if os.path.exists(temp_f_path):
	try:
	os.remove(temp_f_path)
	logger.info(f"Cleaned up Pexels temp file: {temp_f_path}")
	except Exception as e_del:
	logger.warning(f"Error deleting Pexels temp file {temp_f_path}: {e_del}")

	if os.path.exists(output_path):
	logger.info(f"Video successfully created at: {output_path}")

	# Upload to Hugging Face Dataset
	hf_api = HfApi()
	dataset_repo = "lolhaha002/redditbotdata" # Change this to your dataset name

	logger.info(f"Uploading video to Hugging Face Dataset: {output_path}")
	video_filename = os.path.basename(output_path)
	hf_api.upload_file(
	path_or_fileobj=output_path,
	path_in_repo=f"videos/{video_filename}",
	repo_id=dataset_repo,
	repo_type="dataset"
	)

	# Generate public URL
	video_url = f"https://huggingface.co/datasets/{dataset_repo}/resolve/main/videos/{video_filename}"
	logger.info(f"Video uploaded successfully: {video_url}")

	return output_path, video_url
	else:
	logger.error("Video file not found after creation")
	return None, None

	except Exception as e:
	logger.error(f"Unexpected error in video creation: {e}")
	logger.exception("Full traceback:")
	# Cleanup Pexels temp files even on outer exception
	for temp_f_path in pexels_temp_files_to_clean:
	if os.path.exists(temp_f_path):
	try:
	os.remove(temp_f_path)
	logger.info(f"Cleaned up Pexels temp file on error: {temp_f_path}")
	except Exception as e_del:
	logger.warning(f"Error deleting Pexels temp file {temp_f_path} on error: {e_del}")
	return None, None

	def tts_interface(subreddit_url, story_text, filter_type, time_filter, selected_voice, rate, pitch, background, pexels_keywords):
	logger.info(f"[DEBUG] tts_interface received pexels_keywords: '{pexels_keywords}'")
	try:
	logger.info("Starting TTS interface process...")
	logger.info(f"Selected background: {background}")

	# Story Logic
	story_text = (story_text or "").strip()
	subreddit_url = (subreddit_url or "").strip()

	if not story_text and not subreddit_url:
	return None, None, "Please provide either a story or a Reddit URL."

	if story_text:
	# Parse pexels_keywords if provided for story mode
	if isinstance(pexels_keywords, str):
	keywords_list_story = [kw.strip() for kw in pexels_keywords.split(",") if kw.strip()]
	else:
	keywords_list_story = []
	logger.info(f"[DEBUG] Parsed keywords list for story: {keywords_list_story}")
	return create_video_from_story(story_text, selected_voice, rate, pitch, background, pexels_keywords=keywords_list_story)

	# Generate audio
	logger.info("Generating audio from Reddit content...")
	audio_path = generate_audio_from_reddit(subreddit_url, filter_type, time_filter, selected_voice, rate, pitch)
	if not audio_path or not isinstance(audio_path, str):
	logger.error(f"Invalid audio path returned: {audio_path}")
	return None, None, "Failed to generate audio: Content not suitable or contains NSFW material"

	logger.info(f"Audio generated successfully: {audio_path}")

	# Generate subtitles
	logger.info(f"Generating subtitles")
	subtitles = generate_subtitles(audio_path)
	if subtitles is None:
	logger.error("Failed to generate subtitles")
	return None, None, "Failed to generate subtitles"
	logger.info(f"Generated {len(subtitles)} subtitle clips")

	# Create unique output path
	timestamp = int(time.time())
	if subreddit_url and "reddit.com/r/" in subreddit_url:
	subreddit_name = subreddit_url.split("reddit.com/r/")[-1].split("/")[0]
	else:
	subreddit_name = "unknown"
	video_filename = f"{subreddit_name}_{timestamp}.mp4"
	output_path = f"/tmp/{video_filename}"
	logger.info(f"Creating video with output path: {output_path}")

	# Create video
	logger.info(f"[DEBUG] Raw pexels_keywords input: '{pexels_keywords}'")
	if isinstance(pexels_keywords, str):
	keywords_list = [kw.strip() for kw in pexels_keywords.split(",") if kw.strip()]
	else:
	keywords_list = []
	logger.info(f"[DEBUG] Parsed keywords list: {keywords_list}")
	if not keywords_list:
	logger.warning("No valid Pexels keywords found after parsing.")
	else:
	for i, kw in enumerate(keywords_list, start=1):
	logger.info(f"[DEBUG] Keyword {i}: '{kw}'")
	video_path, video_url = create_video_with_background(
	audio_path=audio_path,
	subtitles=subtitles,
	subreddit_url=subreddit_url,
	selected_font="Mouldy Cheese",
	background=background,
	output_path=output_path,
	pexels_keywords=keywords_list
	)

	if video_path is None:
	logger.error("Failed to create video")
	return None, None, "Failed to create video"

	logger.info(f"Video created at: {video_path}")
	logger.info(f"Video URL: {video_url}")

	# Clean up audio file only, keep the video file for preview
	try:
	if os.path.exists(audio_path):
	os.remove(audio_path)
	logger.info(f"Cleaned up temporary audio file: {audio_path}")
	except Exception as e:
	logger.warning(f"Failed to clean up temporary files: {e}")

	logger.info("Video generation process completed successfully")
	return video_path, video_url, "Video generated and uploaded successfully!"

	except Exception as e:
	logger.error(f"Error in TTS interface: {e}")
	logger.exception("Full traceback:")
	return None, None, f"Error: {str(e)}"

	if __name__ == "__main__":
	with gr.Blocks() as demo:
	gr.Markdown("""
	# Reddit to Video Generator

	Enter either:
	- A subreddit URL (e.g., https://www.reddit.com/r/AskReddit/) to get top posts
	- A direct post URL (e.g., https://www.reddit.com/r/AskReddit/comments/abc123/post_title/) to use that specific post

	If you are looking for a bulk shorts creator and/or wants to provide a support, please checkout my fiverr gigs https://www.fiverr.com/s/dDdbGXZ
	""")

	with gr.Row():
	with gr.Column():
	url_input = gr.Textbox(
	label="Reddit URL",
	placeholder="Enter subreddit URL or direct post URL"
	)

	story_input = gr.Textbox(
	label="Or Enter Your Own Story",
	placeholder="Paste or write your story here (no Reddit needed)",
	lines=6
	)

	word_count_display = gr.Textbox(label="Word Count", interactive=False)

	story_input.change(
	fn=count_words,
	inputs=story_input,
	outputs=word_count_display
	)

	filter_type = gr.Dropdown(
	["hot", "top", "new"],
	label="Filter Type (for subreddit URLs only)",
	value="hot"
	)
	time_filter = gr.Dropdown(
	["hour", "day", "week", "month", "year", "all"],
	label="Time Filter (for subreddit URLs only)",
	value="day"
	)
	selected_voice = gr.Dropdown(
	choices=[f"{k} - {v}" for k, v in VOICE_OPTIONS.items()],
	value="en-US-GuyNeural - Male (American)",
	label="Voice"
	)
	rate = gr.Slider(
	minimum=-100,
	maximum=100,
	value=0,
	step=10,
	label="Voice Speed"
	)
	pitch = gr.Slider(
	minimum=-100,
	maximum=100,
	value=0,
	step=10,
	label="Voice Pitch"
	)
	background = gr.Dropdown(
	list(BACKGROUND_OPTIONS.keys()),
	label="Background",
	value="Green" # Changed default to Green
	)
	pexels_keywords = gr.Textbox(
	label="Pexels Keywords (for video background)",
	placeholder="e.g., nature, city night, forest"
	)

	with gr.Column():
	video_preview = gr.Video(label="Video Preview")
	dataset_url = gr.Textbox(label="Dataset URL (Click to View)")
	status_text = gr.Textbox(label="Status")
	submit_btn = gr.Button("Generate Video")

	# The pexels_keywords Textbox is already defined within the first gr.Column.
	# The print statement below and the second definition were redundant.

	submit_btn.click(
	fn=tts_interface,
	inputs=[url_input, story_input, filter_type, time_filter, selected_voice, rate, pitch, background, pexels_keywords],
	outputs=[video_preview, dataset_url, status_text],
	queue=True
	)
	@gr.on(inputs=[pexels_keywords])
	def debug_pexels_input(text):
	logger.debug(f"[DEBUGUI] Received Pexels keywords via UI event: '{text}'")
	return None # Explicitly return None or omit return

	demo.launch(share=True)