Spaces:
No application file
No application file
File size: 20,716 Bytes
0a5bec3 808f27e 0a5bec3 808f27e 0a5bec3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 |
#!/usr/bin/env python
# pylint: disable=unused-argument, wrong-import-position
# This program is dedicated to the public domain under the MIT license.
import logging
import os
import re
import asyncio
import yt_dlp
import subprocess
from telegram import Update, InputFile
from telegram import BotCommand
from telegram.ext import Application, CommandHandler, MessageHandler, filters, ContextTypes
from telegram.constants import ParseMode
from dotenv import load_dotenv
load_dotenv() # take environment variables
# Enable logging
logging.basicConfig(
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO
)
# set higher logging level for httpx to avoid all GET and POST requests being logged
logging.getLogger("httpx").setLevel(logging.WARNING)
logger = logging.getLogger(__name__)
# --- Configuration ---
TELEGRAM_BOT_TOKEN = os.getenv("YOUR_TELEGRAM_BOT_TOKEN") # Replace with your bot token
DOWNLOAD_PATH = "video_downloads/" # Folder to store downloaded videos temporarily
MAX_FILE_SIZE_MB = 49 # Telegram's typical bot upload limit is 50MB, stay slightly under
# --- !! IMPORTANT !! ---
# Paths to your Netscape format cookies files.
YOUTUBE_COOKIES_FILE = "cookies/youtube.txt"
INSTAGRAM_COOKIES_FILE = "cookies/instagram.txt"
# --- Helper Functions ---
def ensure_download_path_exists():
"""Creates the download directory if it doesn't exist."""
if not os.path.exists(DOWNLOAD_PATH):
try:
os.makedirs(DOWNLOAD_PATH)
logger.info(f"Created download directory: {DOWNLOAD_PATH}")
except OSError as e:
logger.error(f"Error creating download directory {DOWNLOAD_PATH}: {e}")
# Depending on the desired behavior, you might want to exit or raise the exception
# For this example, we'll log and continue, but downloads will likely fail.
async def send_typing_action(update: Update, context: ContextTypes.DEFAULT_TYPE):
"""Sends a typing action to indicate the bot is working."""
await context.bot.send_chat_action(chat_id=update.effective_chat.id, action='upload_video')
def is_time_like(text: str) -> bool:
"""Basic check if a string looks like a time argument (contains ':' or is all digits)."""
if not text:
return False
return ':' in text or text.isdigit()
def is_youtube_url(url: str) -> bool:
"""Checks if the URL is a YouTube URL."""
youtube_regex = (
r'(https?://)?(www\.)?'
'(youtube|youtu|youtube-nocookie)\.(com|be)/'
'(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})')
return bool(re.match(youtube_regex, url))
def is_instagram_url(url: str) -> bool:
"""Checks if the URL is an Instagram URL."""
instagram_regex = r'(https?://)?(www\.)?instagram\.com/(p|reel|tv)/([^/?#&]+)'
return bool(re.match(instagram_regex, url))
# --- Bot Command Handlers ---
async def start_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
"""Sends a welcome message when the /start command is issued."""
user = update.effective_user
welcome_message = (
f"👋 Hello {user.mention_html()}!\n\n"
"I'm your video downloading bot.\n"
"Use `/download <URL> [START_TIME] [END_TIME]` to fetch a video or a segment.\n"
"Times are optional (e.g., `MM:SS` or `HH:MM:SS`).\n\n"
"Example (full video): `/download <your_video_url>`\n"
"Example (segment): `/download <your_video_url> 00:10 00:50`\n\n"
"Type /help for more detailed information."
)
await update.message.reply_html(welcome_message)
async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
"""Sends a help message when the /help command is issued."""
help_text = (
"ℹ️ **How to use me:**\n"
"Use the `/download` command followed by a video URL.\n"
"You can optionally specify a start and/or end time for the segment.\n\n"
"**Formats:**\n"
"1. `/download <VIDEO_URL>`\n"
" Downloads the full video.\n\n"
"2. `/download <VIDEO_URL> <START_TIME>`\n"
" Downloads from `START_TIME` to the end of the video.\n"
" Example: `/download <url> 01:20` (starts at 1 min 20 secs)\n\n"
"3. `/download <VIDEO_URL> <START_TIME> <END_TIME>`\n"
" Downloads the segment between `START_TIME` and `END_TIME`.\n"
" Example: `/download <url> 00:30 02:15`\n\n"
"Time format can be `MM:SS` or `HH:MM:SS` (e.g., `1:23` or `00:01:23`).\n"
"Use `0` or `00:00` for the beginning if specifying an end time only (e.g. `/download <url> 0 00:55`).\n\n"
"**Supported Sites:**\n"
"Most sites supported by `yt-dlp` (YouTube, Vimeo, Twitter, etc.).\n\n"
"**File Size Limit:**\n"
"Telegram bots can only send files up to ~50MB. I'll try to get a version under this. "
"Segments are more likely to fit!"
)
await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
async def downloader_segment(update: Update, context: ContextTypes.DEFAULT_TYPE, url: str,
start_time_str: str, end_time_str: str) -> tuple:
chat_id = update.effective_chat.id
if not url.startswith(('http://', 'https://')):
await update.message.reply_text("⚠️ That doesn't look like a valid URL. Please send a direct link to a video.")
return
processing_message = await update.message.reply_text("🔍 Got your link! Processing and trying to download the video...")
await send_typing_action(update, context)
downloaded_file_path = None # To store the path of the downloaded file
# Get the current event loop to schedule coroutines from the hook
try:
# yt-dlp options
ydl_opts = {
'format': f'bestvideo[ext=mp4][filesize<{MAX_FILE_SIZE_MB}M]+bestaudio[ext=m4a]/best[ext=mp4][filesize<{MAX_FILE_SIZE_MB}M]/best[filesize<{MAX_FILE_SIZE_MB}M]',
'output': f'{DOWNLOAD_PATH}%(title)s.200B.%(ext)s', # Limit title length to avoid overly long filenames
'no-playlist': "", # Download only single video if playlist URL is given
# 'quiet': True, # Suppress yt-dlp console output
'merge-output-format': 'mp4', # Ensure output is mp4 if merging is needed
'max-filesize': str(MAX_FILE_SIZE_MB * 1024 * 1024), # Alternative way to specify max filesize
# 'verbose': True
}
if start_time_str and end_time_str:
ydl_opts['download_sections'] = f"*{start_time_str}-{end_time_str}"
ydl_opts['force_keyframes_at_cuts'] = "" # potential issue
logger.info(f"Segment (start-end): recode=mp4, download_sections, force_keyframes, pp_args for FFmpegVideoConvertor.")
elif start_time_str:
ydl_opts['download_sections'] = f"*{start_time_str}"
ydl_opts['force_keyframes_at_cuts'] = ""
logger.info(f"Segment (start-onwards): recode=mp4, download_sections, force_keyframes, pp_args with -ss for FFmpegVideoConvertor.")
else: # Full video download
logger.info(f"Full video download requested for {url}")
if "instagram" in url:
del ydl_opts['format'] # TEMP FIX FOR INSTAGRAM DOWNLOAD ISSUE.
ydl_opts_list = []
for k,v in ydl_opts.items():
if k:
ydl_opts_list.append(f"--{k}")
if v:
ydl_opts_list.append(v)
print(["yt-dlp", *ydl_opts_list, url])
try:
subprocess.run(["yt-dlp", *ydl_opts_list, url]) # potentially blocking...
if not downloaded_file_path or not os.path.exists(downloaded_file_path):
# try to find the latest mp4 file in the directory (less robust)
list_of_files = [os.path.join(DOWNLOAD_PATH, f) for f in os.listdir(DOWNLOAD_PATH) if f.endswith(".mp4")]
if list_of_files:
downloaded_file_path = max(list_of_files, key=os.path.getctime)
logger.info(f"Found downloaded file by fallback: {downloaded_file_path}")
else:
logger.error("Could not determine downloaded file path after download.")
file_size = os.path.getsize(downloaded_file_path)
logger.info(f"File downloaded: {downloaded_file_path}, Size: {file_size / (1024*1024):.2f} MB")
except Exception as e: # Catch other yt-dlp related errors
logger.error(f"yt-dlp generic error for URL {url}: {e}")
except Exception as e:
logger.error(f"An overarching unexpected error occurred while downloading URL {url}: {e}", exc_info=True)
return downloaded_file_path, processing_message
# --- Main Video Processing Logic ---
async def download_command_handler(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
"""Handles messages containing video URLs."""
chat_id = update.effective_chat.id
if not context.args:
await update.message.reply_text("⚠️ URL missing. Usage: /download <URL> [start] [end]"); return
url = context.args[0]
if not context.args:
await update.message.reply_text(
"⚠️ Please provide a video URL after the /download command.\n"
"Example: `/download https://www.youtube.com/watch?v=dQw4w9WgXcQ`", # Corrected example URL
parse_mode=ParseMode.MARKDOWN
)
return
url = context.args[0] # The first argument after /download
# Parse optional start and end times
start_time_str = None
end_time_str = None
if len(context.args) >= 2:
potential_start_time = context.args[1]
if is_time_like(potential_start_time): # Basic check
start_time_str = potential_start_time
else:
await update.message.reply_text(f"⚠️ '{potential_start_time}' doesn't look like a valid start time (e.g., MM:SS). Downloading full video or stopping.")
# Decide if you want to proceed with full download or stop. For now, we'll proceed assuming no valid time.
if len(context.args) >= 3 and start_time_str: # Only look for end_time if start_time was plausible
potential_end_time = context.args[2]
if is_time_like(potential_end_time):
end_time_str = potential_end_time
else:
await update.message.reply_text(f"⚠️ '{potential_end_time}' doesn't look like a valid end time. Will download from {start_time_str} to end if applicable.")
downloaded_file_path, processing_message = await downloader_segment(update, context, url, start_time_str, end_time_str)
try:
if downloaded_file_path and os.path.exists(downloaded_file_path):
file_size = os.path.getsize(downloaded_file_path)
if file_size > MAX_FILE_SIZE_MB * 1024 * 1024:
logger.warning(f"Video {downloaded_file_path} is too large: {file_size / (1024*1024):.2f} MB")
await processing_message.edit_text(
f"⚠️ The downloaded video is too large ({file_size / (1024*1024):.2f} MB) "
f"for me to send via Telegram (max ~{MAX_FILE_SIZE_MB}MB). I tried to get a smaller version."
)
return # No cleanup here, user might want to access it if bot is self-hosted
logger.info(f"PRE-SEND CHECK: Attempting to send video from path: '{downloaded_file_path}'")
logger.info(f"PRE-SEND CHECK: Does file exist at this exact path? {os.path.exists(downloaded_file_path)}")
logger.info(f"PRE-SEND CHECK: File size is {file_size / (1024*1024):.2f} MB")
await processing_message.edit_text("✅ Download complete! Now uploading to Telegram...")
await send_typing_action(update, context)
try:
with open(downloaded_file_path, 'rb') as video_file:
# For InputFile, you can pass a file path directly or a file-like object.
# Using a file path directly is often simpler.
# await context.bot.send_video(chat_id=chat_id, video=video_file, supports_streaming=True, caption=os.path.basename(downloaded_file_path))
logger.info("Attempting to send video using the file object directly...") # New log line
sent_message = await context.bot.send_video(
chat_id=chat_id,
video=video_file, # USE THIS INSTEAD
filename=os.path.basename(downloaded_file_path), # Good to add filename when sending file object
caption=f"🎬 Here's your video!\nOriginal URL: {url}",
supports_streaming=True,
read_timeout=180, # Increased timeout slightly for testing
write_timeout=180, # Increased timeout slightly for testing
connect_timeout=60
)
await processing_message.delete() # Delete "Processing..." message
logger.info(f"Video sent to chat_id {chat_id}: {downloaded_file_path}")
except Exception as e: # Catch errors during Telegram upload
logger.error(f"Error sending video to Telegram: {e}")
await processing_message.edit_text(f"❌ Failed to upload video to Telegram: {str(e)}")
else:
if not context.chat_data.get(f'download_error_{chat_id}'): # If no specific download error was already sent by hook
await processing_message.edit_text("❌ Download failed or no file was produced. Please check the URL or try again.")
# Clear any error flag
context.chat_data.pop(f'download_error_{chat_id}', None)
except Exception as e:
logger.error(f"An overarching unexpected error occurred for URL {url}: {e}", exc_info=True)
await processing_message.edit_text("❌ An unexpected error occurred. Please try again later.")
finally:
# Clean up the downloaded file
if downloaded_file_path and os.path.exists(downloaded_file_path):
try:
os.remove(downloaded_file_path)
logger.info(f"Cleaned up downloaded file: {downloaded_file_path}")
except OSError as e:
logger.error(f"Error deleting file {downloaded_file_path}: {e}")
# Clear any stored filename from chat_data
context.chat_data.pop(f'download_filename_{chat_id}', None)
context.chat_data.pop(f'download_error_{chat_id}', None)
context.chat_data.pop(f'last_progress_msg_{chat_id}', None)
# --- yt-dlp Progress Hook ---
# Keep track of messages to edit for progress (to avoid spamming)
progress_message_ids = {} # chat_id: message_id
async def download_progress_hook(d, update: Update, context: ContextTypes.DEFAULT_TYPE, initial_message_id: int):
"""yt-dlp progress hook to update Telegram message."""
chat_id = update.effective_chat.id
if d['status'] == 'downloading':
percent_str = d.get('_percent_str', 'N/A')
# Remove ANSI codes if present (yt-dlp might use them)
percent_str = percent_str.replace('\x1b[0;94m', '').replace('\x1b[0m', '')
total_bytes_str = d.get('_total_bytes_str', 'N/A')
speed_str = d.get('_speed_str', 'N/A')
eta_str = d.get('_eta_str', 'N/A')
# To avoid hitting Telegram rate limits, only update message periodically
# This simple version updates on each hook call, which might be too frequent.
# A better approach would involve a timer or updating every N percent.
try:
# Use the initial "Processing..." message to show progress
current_progress_message = f"Downloading...\nProgress: {percent_str}\nSize: {total_bytes_str}\nSpeed: {speed_str}\nETA: {eta_str}"
# Only edit if the message content has changed significantly
last_message = context.chat_data.get(f'last_progress_msg_{chat_id}', "")
if last_message != current_progress_message: # Basic check to avoid identical edits
await context.bot.edit_message_text(
text=current_progress_message,
chat_id=chat_id,
message_id=initial_message_id
)
context.chat_data[f'last_progress_msg_{chat_id}'] = current_progress_message
except Exception as e:
# logger.warning(f"Could not edit progress message: {e}") # Can be noisy
pass # Ignore if editing fails (e.g., message not found or too old)
elif d['status'] == 'finished':
logger.info(f"yt-dlp finished processing for chat {chat_id}. Filename: {d.get('filename') or d.get('info_dict', {}).get('_filename')}")
# Store the final filename in chat_data to be picked up by the main handler
# yt-dlp provides filename in different places depending on version/context
final_filename = d.get('filename') # For when download=True
if not final_filename and d.get('info_dict'): # For when download=False then True, or from info_dict
final_filename = d['info_dict'].get('_filename')
if final_filename:
context.chat_data[f'download_filename_{chat_id}'] = final_filename
try:
await context.bot.edit_message_text(
text="✅ Download finished by yt-dlp. Preparing to send...",
chat_id=chat_id,
message_id=initial_message_id
)
except Exception:
pass # Ignore if editing fails
# Clean up last progress message cache
context.chat_data.pop(f'last_progress_msg_{chat_id}', None)
elif d['status'] == 'error':
logger.error(f"yt-dlp reported an error for chat {chat_id}.")
context.chat_data[f'download_error_{chat_id}'] = True # Flag an error
try:
await context.bot.edit_message_text(
text="❌ yt-dlp encountered an error during download.",
chat_id=chat_id,
message_id=initial_message_id
)
except Exception:
pass # Ignore if editing fails
async def post_init(application: Application) -> None:
"""Sets the bot's commands after initialization."""
commands = [
BotCommand("start", "Starts the bot and shows a welcome message."),
BotCommand("help", "Shows the help message with instructions."),
BotCommand("download", "Downloads a video or segment. Usage: /download <URL> [start] [end]")
]
await application.bot.set_my_commands(commands)
logger.info("Bot commands have been set programmatically.")
# --- Main Bot Execution ---
def main() -> None:
"""Starts the bot."""
if TELEGRAM_BOT_TOKEN == "YOUR_TELEGRAM_BOT_TOKEN":
logger.error("CRITICAL: Bot token is not set! Please replace 'YOUR_TELEGRAM_BOT_TOKEN' with your actual bot token.")
return
if YOUTUBE_COOKIES_FILE and not os.path.exists(YOUTUBE_COOKIES_FILE): # Check if configured AND not found
logger.warning(f"YouTube cookies file configured but not found at '{YOUTUBE_COOKIES_FILE}'. YouTube downloads might fail.")
elif YOUTUBE_COOKIES_FILE and os.path.exists(YOUTUBE_COOKIES_FILE):
logger.info(f"YouTube cookies file found at '{YOUTUBE_COOKIES_FILE}'. Will be used for YouTube URLs.")
if INSTAGRAM_COOKIES_FILE and not os.path.exists(INSTAGRAM_COOKIES_FILE): # Check if configured AND not found
logger.warning(f"Instagram cookies file configured but not found at '{INSTAGRAM_COOKIES_FILE}'. Instagram downloads might fail.")
elif INSTAGRAM_COOKIES_FILE and os.path.exists(INSTAGRAM_COOKIES_FILE):
logger.info(f"Instagram cookies file found at '{INSTAGRAM_COOKIES_FILE}'. Will be used for Instagram URLs.")
ensure_download_path_exists()
# Create the Application and pass it your bot's token.
application = Application.builder().token(TELEGRAM_BOT_TOKEN).post_init(post_init).build()
# Add command handlers
application.add_handler(CommandHandler("start", start_command))
application.add_handler(CommandHandler("help", help_command))
application.add_handler(CommandHandler("download", download_command_handler)) # New download handler
# Add message handler for video URLs (non-command text messages)
# application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_video_url))
# Run the bot until the user presses Ctrl-C
logger.info("Bot starting...")
application.run_polling(allowed_updates=Update.ALL_TYPES)
if __name__ == "__main__":
main() |