Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| import yt_dlp | |
| import sys | |
| from i18n.i18n import I18nAuto | |
| i18n = I18nAuto() | |
| def sanitize_filename(name): | |
| """Remove caracteres inválidos e emojis para evitar erro de encoding no Windows.""" | |
| # Remove caracteres reservados do sistema de arquivos | |
| cleaned = re.sub(r'[\\/*?:"<>|]', "", name) | |
| # Remove emojis e caracteres não suportados pelo console Windows (CP1252) | |
| # Isso mantém acentos (á, ç, é) mas remove 😱, etc. | |
| try: | |
| cleaned = cleaned.encode('cp1252', 'ignore').decode('cp1252') | |
| except: | |
| # Fallback se não tiver CP1252: remove tudo não-ascii (remove acentos) | |
| cleaned = cleaned.encode('ascii', 'ignore').decode('ascii') | |
| cleaned = cleaned.strip() | |
| return cleaned | |
| def progress_hook(d): | |
| if d['status'] == 'downloading': | |
| try: | |
| p = d.get('_percent_str', '').replace('%','') | |
| print(f"[download] {p}% - {d.get('_eta_str', 'N/A')} remaining", flush=True) | |
| except: | |
| pass | |
| elif d['status'] == 'finished': | |
| print(f"[download] Download concluído: {d['filename']}", flush=True) | |
| def download(url, base_root="VIRALS", download_subs=True, quality="best"): | |
| # 1. Extrair informações do vídeo para pegar o título | |
| # 1. Extrair informações do vídeo para pegar o título | |
| print(i18n("Extracting video information...")) | |
| title = None | |
| # ... (Keep existing title extraction logic) ... | |
| # Instead of repeating it effectively, I will rely on the diff to keep it or re-write it if I have to replace the whole block. | |
| # Since replace_file_content works on line ranges, I should be careful. | |
| # Let's assume I'm replacing the whole function body or significant parts. | |
| # Tentativa 1: Com cookies | |
| try: | |
| with yt_dlp.YoutubeDL({'quiet': True, 'no_warnings': True, 'cookiesfrombrowser': ('chrome',)}) as ydl: | |
| info = ydl.extract_info(url, download=False) | |
| title = info.get('title') | |
| except Exception as e: | |
| try: | |
| print(i18n("Warning: Failed to extract info with cookies: {}").format(e)) | |
| except UnicodeEncodeError: | |
| print(i18n("Warning: Failed to extract info with cookies: [Encoding Error in Message]")) | |
| # Tentativa 2: Sem cookies | |
| if not title: | |
| try: | |
| with yt_dlp.YoutubeDL({'quiet': True, 'no_warnings': True}) as ydl: | |
| info = ydl.extract_info(url, download=False) | |
| title = info.get('title') | |
| except Exception as e: | |
| try: | |
| print(i18n("Error getting video info (without cookies): {}").format(e)) | |
| except UnicodeEncodeError: | |
| print(i18n("Error getting video info (without cookies): [Encoding Error in Message]")) | |
| # Fallback final | |
| if title: | |
| safe_title = sanitize_filename(title) | |
| try: | |
| print(i18n("Detected title: {}").format(title)) | |
| except UnicodeEncodeError: | |
| # Fallback for Windows consoles that choke on Emojis | |
| clean_title = title.encode('ascii', 'replace').decode('ascii') | |
| print(i18n("Detected title: {}").format(clean_title)) | |
| else: | |
| print(i18n("WARNING: Title could not be obtained. Using 'Unknown_Video'.")) | |
| safe_title = i18n("Unknown_Video") | |
| # 2. Criar estrutura de pastas | |
| project_folder = os.path.join(base_root, safe_title) | |
| os.makedirs(project_folder, exist_ok=True) | |
| # Caminho final do vídeo | |
| output_filename = 'input' | |
| output_path_base = os.path.join(project_folder, output_filename) | |
| final_video_path = f"{output_path_base}.mp4" | |
| # Verificação inteligente | |
| if os.path.exists(final_video_path): | |
| if os.path.getsize(final_video_path) > 1024: | |
| try: | |
| print(i18n("Video already exists at: {}").format(final_video_path)) | |
| except UnicodeEncodeError: | |
| print(i18n("Video already exists at: {}").format(final_video_path.encode('ascii', 'replace').decode('ascii'))) | |
| print(i18n("Skipping download and reusing local file.")) | |
| return final_video_path, project_folder | |
| else: | |
| print(i18n("Existing file found but seems corrupted/empty. Downloading again...")) | |
| try: | |
| os.remove(final_video_path) | |
| except: | |
| pass | |
| # Limpeza de temp | |
| temp_path = f"{output_path_base}.temp.mp4" | |
| if os.path.exists(temp_path): | |
| try: | |
| os.remove(temp_path) | |
| except: | |
| pass | |
| # Mapeamento de Qualidade | |
| quality_map = { | |
| "best": 'bestvideo+bestaudio/best', | |
| "1080p": 'bestvideo[height<=1080]+bestaudio/best[height<=1080]', | |
| "720p": 'bestvideo[height<=720]+bestaudio/best[height<=720]', | |
| "480p": 'bestvideo[height<=480]+bestaudio/best[height<=480]' | |
| } | |
| selected_format = quality_map.get(quality, 'bestvideo+bestaudio/best') | |
| print(i18n("Configuring download quality: {} -> {}").format(quality, selected_format)) | |
| ydl_opts = { | |
| 'format': selected_format, | |
| 'overwrites': True, | |
| 'outtmpl': output_path_base, | |
| 'postprocessor_args': [ | |
| '-movflags', 'faststart' | |
| ], | |
| 'merge_output_format': 'mp4', | |
| 'progress_hooks': [progress_hook], | |
| # Opções de Legenda | |
| 'writesubtitles': download_subs, | |
| 'writeautomaticsub': download_subs, | |
| 'subtitleslangs': ['pt.*', 'en.*', 'sp.*'], # Prioritize generic PT, EN, SP | |
| 'http_headers': { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', | |
| }, | |
| 'skip_download': False, | |
| 'quiet': False, | |
| 'no_warnings': False, | |
| 'force_ipv4': True, | |
| } | |
| if download_subs: | |
| ydl_opts['postprocessors'] = [{ | |
| 'key': 'FFmpegSubtitlesConvertor', | |
| 'format': 'srt', | |
| }] | |
| try: | |
| print(i18n("Downloading video to: {}...").format(project_folder)) | |
| except UnicodeEncodeError: | |
| print(i18n("Downloading video to: {}...").format(project_folder.encode('ascii', 'replace').decode('ascii'))) | |
| # Tentativa 1: Com configuração original | |
| try: | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| ydl.download([url]) | |
| except yt_dlp.utils.DownloadError as e: | |
| error_str = str(e) | |
| if "No address associated with hostname" in error_str or "Failed to resolve" in error_str: | |
| print(i18n("\n[CRITICAL ERROR] Connection Failure: Could not access YouTube.")) | |
| print(i18n("Check your internet connection or if there is any DNS block.")) | |
| print(i18n("Details: {}").format(e)) | |
| sys.exit(1) | |
| elif download_subs and ("Unable to download video subtitles" in error_str or "429" in error_str): | |
| print(i18n("\nWarning: Error downloading subtitles ({}).").format(e)) | |
| print(i18n("Retrying ONLY the video (without subtitles)...")) | |
| ydl_opts['writesubtitles'] = False | |
| ydl_opts['writeautomaticsub'] = False | |
| ydl_opts['postprocessors'] = [p for p in ydl_opts.get('postprocessors', []) if 'Subtitle' not in p.get('key', '')] | |
| try: | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| ydl.download([url]) | |
| except Exception as e2: | |
| print(i18n("Fatal error on second attempt: {}").format(e2)) | |
| raise | |
| elif "is not a valid URL" in error_str: | |
| print(i18n("Error: the entered link is not valid.")) | |
| raise | |
| else: | |
| print(i18n("Download error: {}").format(e)) | |
| raise | |
| except Exception as e: | |
| print(i18n("Unexpected error: {}").format(e)) | |
| raise | |
| # RENOMEAR LEGENDA PARA PADRÃO (input.vtt ou input.srt) | |
| # Se for VTT, converte para SRT para garantir compatibilidade. | |
| try: | |
| import glob | |
| # Pega a primeira que encontrar | |
| potential_subs = glob.glob(os.path.join(project_folder, "input.*.vtt")) + glob.glob(os.path.join(project_folder, "input.*.srt")) | |
| if potential_subs: | |
| best_sub = potential_subs[0] | |
| ext = os.path.splitext(best_sub)[1] | |
| new_name = os.path.join(project_folder, "input.srt") # Vamos padronizar tudo para .srt | |
| if ext.lower() == '.vtt': | |
| try: | |
| print(i18n("Formatting complex VTT subtitle ({}) to clean SRT...").format(os.path.basename(best_sub))) | |
| except UnicodeEncodeError: | |
| print(i18n("Formatting complex VTT subtitle ({}) to clean SRT...").format(os.path.basename(best_sub).encode('ascii', 'replace').decode('ascii'))) | |
| try: | |
| with open(best_sub, 'r', encoding='utf-8') as f: | |
| lines = f.readlines() | |
| srt_content = [] | |
| counter = 1 | |
| seen_texts = set() | |
| last_text = "" | |
| for line in lines: | |
| clean_line = line.strip() | |
| # Ignora Headers e Metadados do VTT/Youtube | |
| if clean_line.startswith("WEBVTT") or \ | |
| clean_line.startswith("X-TIMESTAMP") or \ | |
| clean_line.startswith("NOTE") or \ | |
| clean_line.startswith("Kind:") or \ | |
| clean_line.startswith("Language:"): | |
| continue | |
| if "-->" in clean_line: | |
| # Parse Timestamp | |
| parts = clean_line.split("-->") | |
| start = parts[0].strip() | |
| # Remove tags de posicionamento "align:start position:0%" | |
| end = parts[1].strip().split(' ')[0] | |
| def fix_time(t): | |
| t = t.replace('.', ',') | |
| if t.count(':') == 1: | |
| t = "00:" + t | |
| return t | |
| current_start = fix_time(start) | |
| current_end = fix_time(end) | |
| elif clean_line: | |
| # Texto: remover tags complexas <00:00:00.560><c> etc | |
| # O YouTube usa formato karaoke. Ex: "Quanto<...> custa<...>" | |
| # Precisamos do texto limpo. | |
| text = re.sub(r'<[^>]+>', '', clean_line).strip() | |
| if not text: continue | |
| # Lógica para remover duplicatas do estilo "Roll-up" ou "Karaoke" | |
| # O YouTube repete a linha anterior às vezes. | |
| # Ex: | |
| # 1: "Quanto custa" | |
| # 2: "Quanto custa\nQuantos quilos" | |
| # Vamos pegar apenas a ULTIMA linha se tiver quebras | |
| lines_in_text = text.split('\n') | |
| final_line = lines_in_text[-1].strip() | |
| if not final_line: continue | |
| # Filtro de duplicidade consecutivo | |
| if final_line == last_text: | |
| continue | |
| # Evita blocos ultra curtos (glitch de 10ms) que repetem texto | |
| # Mas aqui estamos processando texto. | |
| srt_content.append(f"{counter}\n") | |
| srt_content.append(f"{current_start} --> {current_end}\n") | |
| srt_content.append(f"{final_line}\n\n") | |
| last_text = final_line | |
| counter += 1 | |
| with open(new_name, 'w', encoding='utf-8') as f_out: | |
| f_out.writelines(srt_content) | |
| try: | |
| print(i18n("Subtitle converted and cleaned: {}").format(new_name)) | |
| except UnicodeEncodeError: | |
| print(i18n("Subtitle converted and cleaned: {}").format(new_name.encode('ascii', 'replace').decode('ascii'))) | |
| try: os.remove(best_sub) | |
| except: pass | |
| except Exception as e_conv: | |
| print(i18n("Failed to convert VTT: {}. Keeping original.").format(e_conv)) | |
| # Fallback: rename apenas | |
| new_name_fallback = os.path.join(project_folder, "input.vtt") | |
| if os.path.exists(new_name_fallback) and new_name_fallback != best_sub: | |
| try: os.remove(new_name_fallback) | |
| except: pass | |
| os.rename(best_sub, new_name_fallback) | |
| else: | |
| # Já é SRT, só renomeia | |
| if os.path.exists(new_name) and new_name != best_sub: | |
| try: os.remove(new_name) | |
| except: pass | |
| os.rename(best_sub, new_name) | |
| try: | |
| print(i18n("SRT subtitle renamed to: {}").format(new_name)) | |
| except UnicodeEncodeError: | |
| print(i18n("SRT subtitle renamed to: {}").format(new_name.encode('ascii', 'replace').decode('ascii'))) | |
| # Limpa sobras | |
| for extra in potential_subs[1:]: | |
| try: os.remove(extra) | |
| except: pass | |
| except Exception as e_ren: | |
| print(i18n("Error processing subtitles: {}").format(e_ren)) | |
| return final_video_path, project_folder |