ViralCutterPRO / scripts /download_video.py
RafaG's picture
Upload 24 files
ac8854d verified
import os
import re
import yt_dlp
import sys
from i18n.i18n import I18nAuto
i18n = I18nAuto()
def sanitize_filename(name):
"""Remove caracteres inválidos e emojis para evitar erro de encoding no Windows."""
# Remove caracteres reservados do sistema de arquivos
cleaned = re.sub(r'[\\/*?:"<>|]', "", name)
# Remove emojis e caracteres não suportados pelo console Windows (CP1252)
# Isso mantém acentos (á, ç, é) mas remove 😱, etc.
try:
cleaned = cleaned.encode('cp1252', 'ignore').decode('cp1252')
except:
# Fallback se não tiver CP1252: remove tudo não-ascii (remove acentos)
cleaned = cleaned.encode('ascii', 'ignore').decode('ascii')
cleaned = cleaned.strip()
return cleaned
def progress_hook(d):
if d['status'] == 'downloading':
try:
p = d.get('_percent_str', '').replace('%','')
print(f"[download] {p}% - {d.get('_eta_str', 'N/A')} remaining", flush=True)
except:
pass
elif d['status'] == 'finished':
print(f"[download] Download concluído: {d['filename']}", flush=True)
def download(url, base_root="VIRALS", download_subs=True, quality="best"):
# 1. Extrair informações do vídeo para pegar o título
# 1. Extrair informações do vídeo para pegar o título
print(i18n("Extracting video information..."))
title = None
# ... (Keep existing title extraction logic) ...
# Instead of repeating it effectively, I will rely on the diff to keep it or re-write it if I have to replace the whole block.
# Since replace_file_content works on line ranges, I should be careful.
# Let's assume I'm replacing the whole function body or significant parts.
# Tentativa 1: Com cookies
try:
with yt_dlp.YoutubeDL({'quiet': True, 'no_warnings': True, 'cookiesfrombrowser': ('chrome',)}) as ydl:
info = ydl.extract_info(url, download=False)
title = info.get('title')
except Exception as e:
try:
print(i18n("Warning: Failed to extract info with cookies: {}").format(e))
except UnicodeEncodeError:
print(i18n("Warning: Failed to extract info with cookies: [Encoding Error in Message]"))
# Tentativa 2: Sem cookies
if not title:
try:
with yt_dlp.YoutubeDL({'quiet': True, 'no_warnings': True}) as ydl:
info = ydl.extract_info(url, download=False)
title = info.get('title')
except Exception as e:
try:
print(i18n("Error getting video info (without cookies): {}").format(e))
except UnicodeEncodeError:
print(i18n("Error getting video info (without cookies): [Encoding Error in Message]"))
# Fallback final
if title:
safe_title = sanitize_filename(title)
try:
print(i18n("Detected title: {}").format(title))
except UnicodeEncodeError:
# Fallback for Windows consoles that choke on Emojis
clean_title = title.encode('ascii', 'replace').decode('ascii')
print(i18n("Detected title: {}").format(clean_title))
else:
print(i18n("WARNING: Title could not be obtained. Using 'Unknown_Video'."))
safe_title = i18n("Unknown_Video")
# 2. Criar estrutura de pastas
project_folder = os.path.join(base_root, safe_title)
os.makedirs(project_folder, exist_ok=True)
# Caminho final do vídeo
output_filename = 'input'
output_path_base = os.path.join(project_folder, output_filename)
final_video_path = f"{output_path_base}.mp4"
# Verificação inteligente
if os.path.exists(final_video_path):
if os.path.getsize(final_video_path) > 1024:
try:
print(i18n("Video already exists at: {}").format(final_video_path))
except UnicodeEncodeError:
print(i18n("Video already exists at: {}").format(final_video_path.encode('ascii', 'replace').decode('ascii')))
print(i18n("Skipping download and reusing local file."))
return final_video_path, project_folder
else:
print(i18n("Existing file found but seems corrupted/empty. Downloading again..."))
try:
os.remove(final_video_path)
except:
pass
# Limpeza de temp
temp_path = f"{output_path_base}.temp.mp4"
if os.path.exists(temp_path):
try:
os.remove(temp_path)
except:
pass
# Mapeamento de Qualidade
quality_map = {
"best": 'bestvideo+bestaudio/best',
"1080p": 'bestvideo[height<=1080]+bestaudio/best[height<=1080]',
"720p": 'bestvideo[height<=720]+bestaudio/best[height<=720]',
"480p": 'bestvideo[height<=480]+bestaudio/best[height<=480]'
}
selected_format = quality_map.get(quality, 'bestvideo+bestaudio/best')
print(i18n("Configuring download quality: {} -> {}").format(quality, selected_format))
ydl_opts = {
'format': selected_format,
'overwrites': True,
'outtmpl': output_path_base,
'postprocessor_args': [
'-movflags', 'faststart'
],
'merge_output_format': 'mp4',
'progress_hooks': [progress_hook],
# Opções de Legenda
'writesubtitles': download_subs,
'writeautomaticsub': download_subs,
'subtitleslangs': ['pt.*', 'en.*', 'sp.*'], # Prioritize generic PT, EN, SP
'http_headers': {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
},
'skip_download': False,
'quiet': False,
'no_warnings': False,
'force_ipv4': True,
}
if download_subs:
ydl_opts['postprocessors'] = [{
'key': 'FFmpegSubtitlesConvertor',
'format': 'srt',
}]
try:
print(i18n("Downloading video to: {}...").format(project_folder))
except UnicodeEncodeError:
print(i18n("Downloading video to: {}...").format(project_folder.encode('ascii', 'replace').decode('ascii')))
# Tentativa 1: Com configuração original
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
except yt_dlp.utils.DownloadError as e:
error_str = str(e)
if "No address associated with hostname" in error_str or "Failed to resolve" in error_str:
print(i18n("\n[CRITICAL ERROR] Connection Failure: Could not access YouTube."))
print(i18n("Check your internet connection or if there is any DNS block."))
print(i18n("Details: {}").format(e))
sys.exit(1)
elif download_subs and ("Unable to download video subtitles" in error_str or "429" in error_str):
print(i18n("\nWarning: Error downloading subtitles ({}).").format(e))
print(i18n("Retrying ONLY the video (without subtitles)..."))
ydl_opts['writesubtitles'] = False
ydl_opts['writeautomaticsub'] = False
ydl_opts['postprocessors'] = [p for p in ydl_opts.get('postprocessors', []) if 'Subtitle' not in p.get('key', '')]
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
except Exception as e2:
print(i18n("Fatal error on second attempt: {}").format(e2))
raise
elif "is not a valid URL" in error_str:
print(i18n("Error: the entered link is not valid."))
raise
else:
print(i18n("Download error: {}").format(e))
raise
except Exception as e:
print(i18n("Unexpected error: {}").format(e))
raise
# RENOMEAR LEGENDA PARA PADRÃO (input.vtt ou input.srt)
# Se for VTT, converte para SRT para garantir compatibilidade.
try:
import glob
# Pega a primeira que encontrar
potential_subs = glob.glob(os.path.join(project_folder, "input.*.vtt")) + glob.glob(os.path.join(project_folder, "input.*.srt"))
if potential_subs:
best_sub = potential_subs[0]
ext = os.path.splitext(best_sub)[1]
new_name = os.path.join(project_folder, "input.srt") # Vamos padronizar tudo para .srt
if ext.lower() == '.vtt':
try:
print(i18n("Formatting complex VTT subtitle ({}) to clean SRT...").format(os.path.basename(best_sub)))
except UnicodeEncodeError:
print(i18n("Formatting complex VTT subtitle ({}) to clean SRT...").format(os.path.basename(best_sub).encode('ascii', 'replace').decode('ascii')))
try:
with open(best_sub, 'r', encoding='utf-8') as f:
lines = f.readlines()
srt_content = []
counter = 1
seen_texts = set()
last_text = ""
for line in lines:
clean_line = line.strip()
# Ignora Headers e Metadados do VTT/Youtube
if clean_line.startswith("WEBVTT") or \
clean_line.startswith("X-TIMESTAMP") or \
clean_line.startswith("NOTE") or \
clean_line.startswith("Kind:") or \
clean_line.startswith("Language:"):
continue
if "-->" in clean_line:
# Parse Timestamp
parts = clean_line.split("-->")
start = parts[0].strip()
# Remove tags de posicionamento "align:start position:0%"
end = parts[1].strip().split(' ')[0]
def fix_time(t):
t = t.replace('.', ',')
if t.count(':') == 1:
t = "00:" + t
return t
current_start = fix_time(start)
current_end = fix_time(end)
elif clean_line:
# Texto: remover tags complexas <00:00:00.560><c> etc
# O YouTube usa formato karaoke. Ex: "Quanto<...> custa<...>"
# Precisamos do texto limpo.
text = re.sub(r'<[^>]+>', '', clean_line).strip()
if not text: continue
# Lógica para remover duplicatas do estilo "Roll-up" ou "Karaoke"
# O YouTube repete a linha anterior às vezes.
# Ex:
# 1: "Quanto custa"
# 2: "Quanto custa\nQuantos quilos"
# Vamos pegar apenas a ULTIMA linha se tiver quebras
lines_in_text = text.split('\n')
final_line = lines_in_text[-1].strip()
if not final_line: continue
# Filtro de duplicidade consecutivo
if final_line == last_text:
continue
# Evita blocos ultra curtos (glitch de 10ms) que repetem texto
# Mas aqui estamos processando texto.
srt_content.append(f"{counter}\n")
srt_content.append(f"{current_start} --> {current_end}\n")
srt_content.append(f"{final_line}\n\n")
last_text = final_line
counter += 1
with open(new_name, 'w', encoding='utf-8') as f_out:
f_out.writelines(srt_content)
try:
print(i18n("Subtitle converted and cleaned: {}").format(new_name))
except UnicodeEncodeError:
print(i18n("Subtitle converted and cleaned: {}").format(new_name.encode('ascii', 'replace').decode('ascii')))
try: os.remove(best_sub)
except: pass
except Exception as e_conv:
print(i18n("Failed to convert VTT: {}. Keeping original.").format(e_conv))
# Fallback: rename apenas
new_name_fallback = os.path.join(project_folder, "input.vtt")
if os.path.exists(new_name_fallback) and new_name_fallback != best_sub:
try: os.remove(new_name_fallback)
except: pass
os.rename(best_sub, new_name_fallback)
else:
# Já é SRT, só renomeia
if os.path.exists(new_name) and new_name != best_sub:
try: os.remove(new_name)
except: pass
os.rename(best_sub, new_name)
try:
print(i18n("SRT subtitle renamed to: {}").format(new_name))
except UnicodeEncodeError:
print(i18n("SRT subtitle renamed to: {}").format(new_name.encode('ascii', 'replace').decode('ascii')))
# Limpa sobras
for extra in potential_subs[1:]:
try: os.remove(extra)
except: pass
except Exception as e_ren:
print(i18n("Error processing subtitles: {}").format(e_ren))
return final_video_path, project_folder