import hashlib import re import subprocess import gdown import os import tempfile from pathlib import Path import requests def get_duration(path: str) -> float: return float(subprocess.check_output( f'ffprobe -v error -show_entries format=duration -of csv=p=0 "{path}"', shell=True).decode().strip()) def is_url(string: str) -> bool: return string.startswith("http://") or string.startswith("https://") def detect_id(url) -> str | None: m = re.search(r"(?:d/|folders/)([\w-]+)", url) if m: return m.group(1) return None def download_by_id(id, output) -> None: if not Path(output).exists(): gdown.download(id=id, output=output) return output def download(url, output) -> str: if Path(output).exists(): return output if 'drive.google.com' in url: gdown.download(url, output=output) return output res = requests.get(url, stream=True) res.raise_for_status() with open(output, 'wb') as f: for chunk in res.iter_content(chunk_size=8192): f.write(chunk) return output def download_drive_file(url: str, output: str) -> str: id = detect_id(url) if not id: raise ValueError("Link invalid") return download_by_id(id, output) def md5(s: str) -> str: return hashlib.md5(s.encode()).hexdigest()