Spaces:
Paused
Paused
| #!/usr/bin/python3 | |
| # -*- coding: utf-8 -*- | |
| import argparse | |
| from pathlib import Path | |
| import os | |
| import shutil | |
| from typing import List | |
| from huggingface_hub.hf_api import CommitInfo | |
| from project_settings import project_path | |
| from toolbox.hf_netdisk.netdisk.repo_client import RepoClient | |
| class NetdiskClient(RepoClient): | |
| def __init__(self, hf_token: str): | |
| super(NetdiskClient, self).__init__(hf_token=hf_token) | |
| def upload_file(self, src_file, tgt_file, repo_id: str) -> str: | |
| self.make_sure_repo(repo_id) | |
| tgt_file_url = self.hf_api.upload_file( | |
| path_or_fileobj=src_file, | |
| path_in_repo=tgt_file, | |
| repo_id=repo_id, | |
| repo_type="dataset" | |
| ) | |
| return tgt_file_url | |
| def query_files(self, repo_id: str) -> List[str]: | |
| result: List[str] = self.hf_api.list_repo_files( | |
| repo_id=repo_id, | |
| repo_type="dataset" | |
| ) | |
| return result | |
| def delete_file(self, path_in_repo: str, repo_id: str) -> str: | |
| result: CommitInfo = self.hf_api.delete_file( | |
| path_in_repo=path_in_repo, | |
| repo_id=repo_id, | |
| repo_type="dataset", | |
| ) | |
| return result | |
| def download_file(self, remote_file: str, local_file: str, repo_id: str, cache_dir: str = None): | |
| file_path = self.hf_api.hf_hub_download( | |
| filename=remote_file, | |
| repo_id=repo_id, | |
| repo_type="dataset", | |
| cache_dir=cache_dir, | |
| local_dir_use_symlinks=False, | |
| ) | |
| file_path = Path(file_path).resolve() | |
| local_file = Path(local_file) | |
| local_file.parent.mkdir(parents=True, exist_ok=True) | |
| shutil.move(file_path, local_file.as_posix()) | |
| return local_file | |
| def get_args(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument( | |
| "--key_of_credentials", | |
| default="hf_miyuki_write_token", | |
| type=str, | |
| ) | |
| parser.add_argument( | |
| "--src_file", | |
| default=(project_path / "README.md").as_posix(), | |
| type=str, | |
| ) | |
| parser.add_argument( | |
| "--tgt_file", | |
| default="video_platform/upload/README.md", | |
| type=str, | |
| ) | |
| args = parser.parse_args() | |
| return args | |
| def main(): | |
| args = get_args() | |
| import log | |
| from project_settings import environment, project_path, log_directory, time_zone_info | |
| log.setup_size_rotating(log_directory=log_directory, tz_info=time_zone_info) | |
| hf_token = environment.get(key=args.key_of_credentials) | |
| client = NetdiskClient( | |
| hf_token=hf_token, | |
| ) | |
| result = client.upload_file( | |
| repo_id="tianxing1994/hf_netdisk", | |
| src_file=args.src_file, | |
| tgt_file=args.tgt_file, | |
| ) | |
| print(result) | |
| result = client.query_files( | |
| repo_id="tianxing1994/hf_netdisk", | |
| ) | |
| print(result) | |
| # result = client.download_file( | |
| # remote_file="xianyu/customers/陈杰森/20251226/焦虑个锤子(20251225_165801直播).mp4", | |
| # local_file="temp.mp4", | |
| # repo_id="tianxing1994/hf_netdisk", | |
| # ) | |
| # print(result) | |
| result = client.delete_file( | |
| path_in_repo=args.tgt_file, | |
| repo_id="tianxing1994/hf_netdisk", | |
| ) | |
| print(result) | |
| return | |
| def main2(): | |
| args = get_args() | |
| import log | |
| from project_settings import environment, project_path, temp_directory, log_directory, time_zone_info | |
| log.setup_size_rotating(log_directory=log_directory, tz_info=time_zone_info) | |
| hf_token = environment.get(key=args.key_of_credentials) | |
| client = NetdiskClient( | |
| hf_token=hf_token, | |
| ) | |
| file_list = client.query_files( | |
| repo_id="tianxing1994/hf_netdisk", | |
| ) | |
| for filename in file_list: | |
| filename = Path(filename) | |
| parts_count = len(filename.parts) | |
| if parts_count < 5: | |
| continue | |
| platform = filename.parts[-5] | |
| user_type = filename.parts[-4] | |
| user_name = filename.parts[-3] | |
| date = filename.parts[-2] | |
| name = filename.name | |
| if platform not in ("xianyu",): | |
| continue | |
| if user_type not in ("customers",): | |
| continue | |
| if user_name not in ("找光小吕",): | |
| continue | |
| year = date[:4] | |
| month = date[4:6] | |
| day = date[6:8] | |
| cache_dir = temp_directory / "hf_cache_dir" | |
| target_file = temp_directory / f"{platform}/{user_type}/{user_name}/{year}/{month}/{name}" | |
| target_file.parent.mkdir(parents=True, exist_ok=True) | |
| print(f"downloading; src_file: {filename.as_posix()}, tgt_file: {target_file.as_posix()}") | |
| client.download_file( | |
| remote_file=filename.as_posix(), | |
| local_file=target_file.as_posix(), | |
| repo_id="tianxing1994/hf_netdisk", | |
| cache_dir=cache_dir.as_posix(), | |
| ) | |
| return | |
| def main3(): | |
| args = get_args() | |
| import log | |
| from project_settings import environment, project_path, temp_directory, log_directory, time_zone_info | |
| log.setup_size_rotating(log_directory=log_directory, tz_info=time_zone_info) | |
| hf_token = environment.get(key=args.key_of_credentials) | |
| client = NetdiskClient( | |
| hf_token=hf_token, | |
| ) | |
| file_list = client.query_files( | |
| repo_id="tianxing1994/hf_netdisk", | |
| ) | |
| for filename in file_list: | |
| filename = Path(filename) | |
| parts_count = len(filename.parts) | |
| if parts_count < 5: | |
| continue | |
| platform = filename.parts[-5] | |
| user_type = filename.parts[-4] | |
| user_name = filename.parts[-3] | |
| date = filename.parts[-2] | |
| name = filename.name | |
| if platform not in ("xianyu",): | |
| continue | |
| if user_type not in ("customers",): | |
| continue | |
| if user_name not in ("找光小吕",): | |
| continue | |
| if date not in ("20260119",): | |
| continue | |
| cache_dir = temp_directory / "hf_cache_dir" | |
| #ffmpeg -f concat -safe 0 -i file.txt -c copy output.mp4 | |
| target_file = temp_directory / f"{platform}/{user_type}/{user_name}/{date}/{name[1:16]}.mp4" | |
| target_file.parent.mkdir(parents=True, exist_ok=True) | |
| map_target_file = temp_directory / f"{platform}/{user_type}/{user_name}/{date}/file.txt" | |
| with open(map_target_file.as_posix(), "a+") as f: | |
| f.write(f"file '{name[1:16]}.mp4'\n") | |
| if target_file.exists(): | |
| continue | |
| print(f"downloading; src_file: {filename.as_posix()}, tgt_file: {target_file.as_posix()}") | |
| client.download_file( | |
| remote_file=filename.as_posix(), | |
| local_file=target_file.as_posix(), | |
| repo_id="tianxing1994/hf_netdisk", | |
| cache_dir=cache_dir.as_posix(), | |
| ) | |
| return | |
| if __name__ == "__main__": | |
| main3() | |