qgyd2021's picture
update
ea8b1fb
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
from pathlib import Path
import os
import shutil
from typing import List
from huggingface_hub.hf_api import CommitInfo
from project_settings import project_path
from toolbox.hf_netdisk.netdisk.repo_client import RepoClient
class NetdiskClient(RepoClient):
def __init__(self, hf_token: str):
super(NetdiskClient, self).__init__(hf_token=hf_token)
def upload_file(self, src_file, tgt_file, repo_id: str) -> str:
self.make_sure_repo(repo_id)
tgt_file_url = self.hf_api.upload_file(
path_or_fileobj=src_file,
path_in_repo=tgt_file,
repo_id=repo_id,
repo_type="dataset"
)
return tgt_file_url
def query_files(self, repo_id: str) -> List[str]:
result: List[str] = self.hf_api.list_repo_files(
repo_id=repo_id,
repo_type="dataset"
)
return result
def delete_file(self, path_in_repo: str, repo_id: str) -> str:
result: CommitInfo = self.hf_api.delete_file(
path_in_repo=path_in_repo,
repo_id=repo_id,
repo_type="dataset",
)
return result
def download_file(self, remote_file: str, local_file: str, repo_id: str, cache_dir: str = None):
file_path = self.hf_api.hf_hub_download(
filename=remote_file,
repo_id=repo_id,
repo_type="dataset",
cache_dir=cache_dir,
local_dir_use_symlinks=False,
)
file_path = Path(file_path).resolve()
local_file = Path(local_file)
local_file.parent.mkdir(parents=True, exist_ok=True)
shutil.move(file_path, local_file.as_posix())
return local_file
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--key_of_credentials",
default="hf_miyuki_write_token",
type=str,
)
parser.add_argument(
"--src_file",
default=(project_path / "README.md").as_posix(),
type=str,
)
parser.add_argument(
"--tgt_file",
default="video_platform/upload/README.md",
type=str,
)
args = parser.parse_args()
return args
def main():
args = get_args()
import log
from project_settings import environment, project_path, log_directory, time_zone_info
log.setup_size_rotating(log_directory=log_directory, tz_info=time_zone_info)
hf_token = environment.get(key=args.key_of_credentials)
client = NetdiskClient(
hf_token=hf_token,
)
result = client.upload_file(
repo_id="tianxing1994/hf_netdisk",
src_file=args.src_file,
tgt_file=args.tgt_file,
)
print(result)
result = client.query_files(
repo_id="tianxing1994/hf_netdisk",
)
print(result)
# result = client.download_file(
# remote_file="xianyu/customers/陈杰森/20251226/焦虑个锤子(20251225_165801直播).mp4",
# local_file="temp.mp4",
# repo_id="tianxing1994/hf_netdisk",
# )
# print(result)
result = client.delete_file(
path_in_repo=args.tgt_file,
repo_id="tianxing1994/hf_netdisk",
)
print(result)
return
def main2():
args = get_args()
import log
from project_settings import environment, project_path, temp_directory, log_directory, time_zone_info
log.setup_size_rotating(log_directory=log_directory, tz_info=time_zone_info)
hf_token = environment.get(key=args.key_of_credentials)
client = NetdiskClient(
hf_token=hf_token,
)
file_list = client.query_files(
repo_id="tianxing1994/hf_netdisk",
)
for filename in file_list:
filename = Path(filename)
parts_count = len(filename.parts)
if parts_count < 5:
continue
platform = filename.parts[-5]
user_type = filename.parts[-4]
user_name = filename.parts[-3]
date = filename.parts[-2]
name = filename.name
if platform not in ("xianyu",):
continue
if user_type not in ("customers",):
continue
if user_name not in ("找光小吕",):
continue
year = date[:4]
month = date[4:6]
day = date[6:8]
cache_dir = temp_directory / "hf_cache_dir"
target_file = temp_directory / f"{platform}/{user_type}/{user_name}/{year}/{month}/{name}"
target_file.parent.mkdir(parents=True, exist_ok=True)
print(f"downloading; src_file: {filename.as_posix()}, tgt_file: {target_file.as_posix()}")
client.download_file(
remote_file=filename.as_posix(),
local_file=target_file.as_posix(),
repo_id="tianxing1994/hf_netdisk",
cache_dir=cache_dir.as_posix(),
)
return
def main3():
args = get_args()
import log
from project_settings import environment, project_path, temp_directory, log_directory, time_zone_info
log.setup_size_rotating(log_directory=log_directory, tz_info=time_zone_info)
hf_token = environment.get(key=args.key_of_credentials)
client = NetdiskClient(
hf_token=hf_token,
)
file_list = client.query_files(
repo_id="tianxing1994/hf_netdisk",
)
for filename in file_list:
filename = Path(filename)
parts_count = len(filename.parts)
if parts_count < 5:
continue
platform = filename.parts[-5]
user_type = filename.parts[-4]
user_name = filename.parts[-3]
date = filename.parts[-2]
name = filename.name
if platform not in ("xianyu",):
continue
if user_type not in ("customers",):
continue
if user_name not in ("找光小吕",):
continue
if date not in ("20260119",):
continue
cache_dir = temp_directory / "hf_cache_dir"
#ffmpeg -f concat -safe 0 -i file.txt -c copy output.mp4
target_file = temp_directory / f"{platform}/{user_type}/{user_name}/{date}/{name[1:16]}.mp4"
target_file.parent.mkdir(parents=True, exist_ok=True)
map_target_file = temp_directory / f"{platform}/{user_type}/{user_name}/{date}/file.txt"
with open(map_target_file.as_posix(), "a+") as f:
f.write(f"file '{name[1:16]}.mp4'\n")
if target_file.exists():
continue
print(f"downloading; src_file: {filename.as_posix()}, tgt_file: {target_file.as_posix()}")
client.download_file(
remote_file=filename.as_posix(),
local_file=target_file.as_posix(),
repo_id="tianxing1994/hf_netdisk",
cache_dir=cache_dir.as_posix(),
)
return
if __name__ == "__main__":
main3()