#!/usr/bin/python3 # -*- coding: utf-8 -*- import argparse from pathlib import Path import os import shutil from typing import List from huggingface_hub.hf_api import CommitInfo from project_settings import project_path from toolbox.hf_netdisk.netdisk.repo_client import RepoClient class NetdiskClient(RepoClient): def __init__(self, hf_token: str): super(NetdiskClient, self).__init__(hf_token=hf_token) def upload_file(self, src_file, tgt_file, repo_id: str) -> str: self.make_sure_repo(repo_id) tgt_file_url = self.hf_api.upload_file( path_or_fileobj=src_file, path_in_repo=tgt_file, repo_id=repo_id, repo_type="dataset" ) return tgt_file_url def query_files(self, repo_id: str) -> List[str]: result: List[str] = self.hf_api.list_repo_files( repo_id=repo_id, repo_type="dataset" ) return result def delete_file(self, path_in_repo: str, repo_id: str) -> str: result: CommitInfo = self.hf_api.delete_file( path_in_repo=path_in_repo, repo_id=repo_id, repo_type="dataset", ) return result def download_file(self, remote_file: str, local_file: str, repo_id: str, cache_dir: str = None): file_path = self.hf_api.hf_hub_download( filename=remote_file, repo_id=repo_id, repo_type="dataset", cache_dir=cache_dir, local_dir_use_symlinks=False, ) file_path = Path(file_path).resolve() local_file = Path(local_file) local_file.parent.mkdir(parents=True, exist_ok=True) shutil.move(file_path, local_file.as_posix()) return local_file def get_args(): parser = argparse.ArgumentParser() parser.add_argument( "--key_of_credentials", default="hf_miyuki_write_token", type=str, ) parser.add_argument( "--src_file", default=(project_path / "README.md").as_posix(), type=str, ) parser.add_argument( "--tgt_file", default="video_platform/upload/README.md", type=str, ) args = parser.parse_args() return args def main(): args = get_args() import log from project_settings import environment, project_path, log_directory, time_zone_info log.setup_size_rotating(log_directory=log_directory, tz_info=time_zone_info) hf_token = environment.get(key=args.key_of_credentials) client = NetdiskClient( hf_token=hf_token, ) result = client.upload_file( repo_id="tianxing1994/hf_netdisk", src_file=args.src_file, tgt_file=args.tgt_file, ) print(result) result = client.query_files( repo_id="tianxing1994/hf_netdisk", ) print(result) # result = client.download_file( # remote_file="xianyu/customers/陈杰森/20251226/焦虑个锤子(20251225_165801直播).mp4", # local_file="temp.mp4", # repo_id="tianxing1994/hf_netdisk", # ) # print(result) result = client.delete_file( path_in_repo=args.tgt_file, repo_id="tianxing1994/hf_netdisk", ) print(result) return def main2(): args = get_args() import log from project_settings import environment, project_path, temp_directory, log_directory, time_zone_info log.setup_size_rotating(log_directory=log_directory, tz_info=time_zone_info) hf_token = environment.get(key=args.key_of_credentials) client = NetdiskClient( hf_token=hf_token, ) file_list = client.query_files( repo_id="tianxing1994/hf_netdisk", ) for filename in file_list: filename = Path(filename) parts_count = len(filename.parts) if parts_count < 5: continue platform = filename.parts[-5] user_type = filename.parts[-4] user_name = filename.parts[-3] date = filename.parts[-2] name = filename.name if platform not in ("xianyu",): continue if user_type not in ("customers",): continue if user_name not in ("找光小吕",): continue year = date[:4] month = date[4:6] day = date[6:8] cache_dir = temp_directory / "hf_cache_dir" target_file = temp_directory / f"{platform}/{user_type}/{user_name}/{year}/{month}/{name}" target_file.parent.mkdir(parents=True, exist_ok=True) print(f"downloading; src_file: {filename.as_posix()}, tgt_file: {target_file.as_posix()}") client.download_file( remote_file=filename.as_posix(), local_file=target_file.as_posix(), repo_id="tianxing1994/hf_netdisk", cache_dir=cache_dir.as_posix(), ) return def main3(): args = get_args() import log from project_settings import environment, project_path, temp_directory, log_directory, time_zone_info log.setup_size_rotating(log_directory=log_directory, tz_info=time_zone_info) hf_token = environment.get(key=args.key_of_credentials) client = NetdiskClient( hf_token=hf_token, ) file_list = client.query_files( repo_id="tianxing1994/hf_netdisk", ) for filename in file_list: filename = Path(filename) parts_count = len(filename.parts) if parts_count < 5: continue platform = filename.parts[-5] user_type = filename.parts[-4] user_name = filename.parts[-3] date = filename.parts[-2] name = filename.name if platform not in ("xianyu",): continue if user_type not in ("customers",): continue if user_name not in ("找光小吕",): continue if date not in ("20260119",): continue cache_dir = temp_directory / "hf_cache_dir" #ffmpeg -f concat -safe 0 -i file.txt -c copy output.mp4 target_file = temp_directory / f"{platform}/{user_type}/{user_name}/{date}/{name[1:16]}.mp4" target_file.parent.mkdir(parents=True, exist_ok=True) map_target_file = temp_directory / f"{platform}/{user_type}/{user_name}/{date}/file.txt" with open(map_target_file.as_posix(), "a+") as f: f.write(f"file '{name[1:16]}.mp4'\n") if target_file.exists(): continue print(f"downloading; src_file: {filename.as_posix()}, tgt_file: {target_file.as_posix()}") client.download_file( remote_file=filename.as_posix(), local_file=target_file.as_posix(), repo_id="tianxing1994/hf_netdisk", cache_dir=cache_dir.as_posix(), ) return if __name__ == "__main__": main3()