Spaces:
Sleeping
Sleeping
File size: 14,557 Bytes
f7598da ac8854d f7598da ac8854d f7598da ac8854d f7598da ac8854d f7598da ac8854d f7598da ac8854d f7598da ac8854d f7598da ac8854d f7598da ac8854d f7598da ac8854d f7598da 80b326d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 |
import os
import re
import yt_dlp
import sys
from i18n.i18n import I18nAuto
i18n = I18nAuto()
def sanitize_filename(name):
"""Remove caracteres inválidos e emojis para evitar erro de encoding no Windows."""
# Remove caracteres reservados do sistema de arquivos
cleaned = re.sub(r'[\\/*?:"<>|]', "", name)
# Remove emojis e caracteres não suportados pelo console Windows (CP1252)
# Isso mantém acentos (á, ç, é) mas remove 😱, etc.
try:
cleaned = cleaned.encode('cp1252', 'ignore').decode('cp1252')
except:
# Fallback se não tiver CP1252: remove tudo não-ascii (remove acentos)
cleaned = cleaned.encode('ascii', 'ignore').decode('ascii')
cleaned = cleaned.strip()
return cleaned
def progress_hook(d):
if d['status'] == 'downloading':
try:
p = d.get('_percent_str', '').replace('%','')
print(f"[download] {p}% - {d.get('_eta_str', 'N/A')} remaining", flush=True)
except:
pass
elif d['status'] == 'finished':
print(f"[download] Download concluído: {d['filename']}", flush=True)
def download(url, base_root="VIRALS", download_subs=True, quality="best"):
# 1. Extrair informações do vídeo para pegar o título
# 1. Extrair informações do vídeo para pegar o título
print(i18n("Extracting video information..."))
title = None
# ... (Keep existing title extraction logic) ...
# Instead of repeating it effectively, I will rely on the diff to keep it or re-write it if I have to replace the whole block.
# Since replace_file_content works on line ranges, I should be careful.
# Let's assume I'm replacing the whole function body or significant parts.
# Tentativa 1: Com cookies
try:
with yt_dlp.YoutubeDL({'quiet': True, 'no_warnings': True, 'cookiesfrombrowser': ('chrome',)}) as ydl:
info = ydl.extract_info(url, download=False)
title = info.get('title')
except Exception as e:
try:
print(i18n("Warning: Failed to extract info with cookies: {}").format(e))
except UnicodeEncodeError:
print(i18n("Warning: Failed to extract info with cookies: [Encoding Error in Message]"))
# Tentativa 2: Sem cookies
if not title:
try:
with yt_dlp.YoutubeDL({'quiet': True, 'no_warnings': True}) as ydl:
info = ydl.extract_info(url, download=False)
title = info.get('title')
except Exception as e:
try:
print(i18n("Error getting video info (without cookies): {}").format(e))
except UnicodeEncodeError:
print(i18n("Error getting video info (without cookies): [Encoding Error in Message]"))
# Fallback final
if title:
safe_title = sanitize_filename(title)
try:
print(i18n("Detected title: {}").format(title))
except UnicodeEncodeError:
# Fallback for Windows consoles that choke on Emojis
clean_title = title.encode('ascii', 'replace').decode('ascii')
print(i18n("Detected title: {}").format(clean_title))
else:
print(i18n("WARNING: Title could not be obtained. Using 'Unknown_Video'."))
safe_title = i18n("Unknown_Video")
# 2. Criar estrutura de pastas
project_folder = os.path.join(base_root, safe_title)
os.makedirs(project_folder, exist_ok=True)
# Caminho final do vídeo
output_filename = 'input'
output_path_base = os.path.join(project_folder, output_filename)
final_video_path = f"{output_path_base}.mp4"
# Verificação inteligente
if os.path.exists(final_video_path):
if os.path.getsize(final_video_path) > 1024:
try:
print(i18n("Video already exists at: {}").format(final_video_path))
except UnicodeEncodeError:
print(i18n("Video already exists at: {}").format(final_video_path.encode('ascii', 'replace').decode('ascii')))
print(i18n("Skipping download and reusing local file."))
return final_video_path, project_folder
else:
print(i18n("Existing file found but seems corrupted/empty. Downloading again..."))
try:
os.remove(final_video_path)
except:
pass
# Limpeza de temp
temp_path = f"{output_path_base}.temp.mp4"
if os.path.exists(temp_path):
try:
os.remove(temp_path)
except:
pass
# Mapeamento de Qualidade
quality_map = {
"best": 'bestvideo+bestaudio/best',
"1080p": 'bestvideo[height<=1080]+bestaudio/best[height<=1080]',
"720p": 'bestvideo[height<=720]+bestaudio/best[height<=720]',
"480p": 'bestvideo[height<=480]+bestaudio/best[height<=480]'
}
selected_format = quality_map.get(quality, 'bestvideo+bestaudio/best')
print(i18n("Configuring download quality: {} -> {}").format(quality, selected_format))
ydl_opts = {
'format': selected_format,
'overwrites': True,
'outtmpl': output_path_base,
'postprocessor_args': [
'-movflags', 'faststart'
],
'merge_output_format': 'mp4',
'progress_hooks': [progress_hook],
# Opções de Legenda
'writesubtitles': download_subs,
'writeautomaticsub': download_subs,
'subtitleslangs': ['pt.*', 'en.*', 'sp.*'], # Prioritize generic PT, EN, SP
'http_headers': {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
},
'skip_download': False,
'quiet': False,
'no_warnings': False,
'force_ipv4': True,
}
if download_subs:
ydl_opts['postprocessors'] = [{
'key': 'FFmpegSubtitlesConvertor',
'format': 'srt',
}]
try:
print(i18n("Downloading video to: {}...").format(project_folder))
except UnicodeEncodeError:
print(i18n("Downloading video to: {}...").format(project_folder.encode('ascii', 'replace').decode('ascii')))
# Tentativa 1: Com configuração original
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
except yt_dlp.utils.DownloadError as e:
error_str = str(e)
if "No address associated with hostname" in error_str or "Failed to resolve" in error_str:
print(i18n("\n[CRITICAL ERROR] Connection Failure: Could not access YouTube."))
print(i18n("Check your internet connection or if there is any DNS block."))
print(i18n("Details: {}").format(e))
sys.exit(1)
elif download_subs and ("Unable to download video subtitles" in error_str or "429" in error_str):
print(i18n("\nWarning: Error downloading subtitles ({}).").format(e))
print(i18n("Retrying ONLY the video (without subtitles)..."))
ydl_opts['writesubtitles'] = False
ydl_opts['writeautomaticsub'] = False
ydl_opts['postprocessors'] = [p for p in ydl_opts.get('postprocessors', []) if 'Subtitle' not in p.get('key', '')]
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
except Exception as e2:
print(i18n("Fatal error on second attempt: {}").format(e2))
raise
elif "is not a valid URL" in error_str:
print(i18n("Error: the entered link is not valid."))
raise
else:
print(i18n("Download error: {}").format(e))
raise
except Exception as e:
print(i18n("Unexpected error: {}").format(e))
raise
# RENOMEAR LEGENDA PARA PADRÃO (input.vtt ou input.srt)
# Se for VTT, converte para SRT para garantir compatibilidade.
try:
import glob
# Pega a primeira que encontrar
potential_subs = glob.glob(os.path.join(project_folder, "input.*.vtt")) + glob.glob(os.path.join(project_folder, "input.*.srt"))
if potential_subs:
best_sub = potential_subs[0]
ext = os.path.splitext(best_sub)[1]
new_name = os.path.join(project_folder, "input.srt") # Vamos padronizar tudo para .srt
if ext.lower() == '.vtt':
try:
print(i18n("Formatting complex VTT subtitle ({}) to clean SRT...").format(os.path.basename(best_sub)))
except UnicodeEncodeError:
print(i18n("Formatting complex VTT subtitle ({}) to clean SRT...").format(os.path.basename(best_sub).encode('ascii', 'replace').decode('ascii')))
try:
with open(best_sub, 'r', encoding='utf-8') as f:
lines = f.readlines()
srt_content = []
counter = 1
seen_texts = set()
last_text = ""
for line in lines:
clean_line = line.strip()
# Ignora Headers e Metadados do VTT/Youtube
if clean_line.startswith("WEBVTT") or \
clean_line.startswith("X-TIMESTAMP") or \
clean_line.startswith("NOTE") or \
clean_line.startswith("Kind:") or \
clean_line.startswith("Language:"):
continue
if "-->" in clean_line:
# Parse Timestamp
parts = clean_line.split("-->")
start = parts[0].strip()
# Remove tags de posicionamento "align:start position:0%"
end = parts[1].strip().split(' ')[0]
def fix_time(t):
t = t.replace('.', ',')
if t.count(':') == 1:
t = "00:" + t
return t
current_start = fix_time(start)
current_end = fix_time(end)
elif clean_line:
# Texto: remover tags complexas <00:00:00.560><c> etc
# O YouTube usa formato karaoke. Ex: "Quanto<...> custa<...>"
# Precisamos do texto limpo.
text = re.sub(r'<[^>]+>', '', clean_line).strip()
if not text: continue
# Lógica para remover duplicatas do estilo "Roll-up" ou "Karaoke"
# O YouTube repete a linha anterior às vezes.
# Ex:
# 1: "Quanto custa"
# 2: "Quanto custa\nQuantos quilos"
# Vamos pegar apenas a ULTIMA linha se tiver quebras
lines_in_text = text.split('\n')
final_line = lines_in_text[-1].strip()
if not final_line: continue
# Filtro de duplicidade consecutivo
if final_line == last_text:
continue
# Evita blocos ultra curtos (glitch de 10ms) que repetem texto
# Mas aqui estamos processando texto.
srt_content.append(f"{counter}\n")
srt_content.append(f"{current_start} --> {current_end}\n")
srt_content.append(f"{final_line}\n\n")
last_text = final_line
counter += 1
with open(new_name, 'w', encoding='utf-8') as f_out:
f_out.writelines(srt_content)
try:
print(i18n("Subtitle converted and cleaned: {}").format(new_name))
except UnicodeEncodeError:
print(i18n("Subtitle converted and cleaned: {}").format(new_name.encode('ascii', 'replace').decode('ascii')))
try: os.remove(best_sub)
except: pass
except Exception as e_conv:
print(i18n("Failed to convert VTT: {}. Keeping original.").format(e_conv))
# Fallback: rename apenas
new_name_fallback = os.path.join(project_folder, "input.vtt")
if os.path.exists(new_name_fallback) and new_name_fallback != best_sub:
try: os.remove(new_name_fallback)
except: pass
os.rename(best_sub, new_name_fallback)
else:
# Já é SRT, só renomeia
if os.path.exists(new_name) and new_name != best_sub:
try: os.remove(new_name)
except: pass
os.rename(best_sub, new_name)
try:
print(i18n("SRT subtitle renamed to: {}").format(new_name))
except UnicodeEncodeError:
print(i18n("SRT subtitle renamed to: {}").format(new_name.encode('ascii', 'replace').decode('ascii')))
# Limpa sobras
for extra in potential_subs[1:]:
try: os.remove(extra)
except: pass
except Exception as e_ren:
print(i18n("Error processing subtitles: {}").format(e_ren))
return final_video_path, project_folder |