audio-separator-models / assets /process_dirtylist-txt.py
lainlives's picture
Add files using upload-large-folder tool
34d5737 verified
import argparse
import hashlib
import json
import os
import re
import subprocess
import sys
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor
import requests
from huggingface_hub import HfApi, snapshot_download
import model_tools as mt
from model_tools import download_files_from_txt, download_hf_repo
REPO_OWNER = "lainlives"
REPO_NAME = "audio-separator-models"
repo_type = "model"
HF_TOKEN = os.getenv("HF_TOKEN")
def uploadthething():
repo_id = f"{REPO_OWNER}/{REPO_NAME}"
parser = argparse.ArgumentParser(description="Model Upload Tool")
parser.add_argument(
"--large",
action="store_true",
help="Upload using upload-large-folder",
)
parser.add_argument(
"--token",
type=str,
default=HF_TOKEN,
dest="HF_TOKEN",
help=f"Your access token.",
)
args = parser.parse_args()
if args.large:
mt.push_large_folder_to_hf(repo_id, repo_type)
else:
mt.push_to_hf(repo_id, repo_type)
def downloadthething():
token = os.getenv("HF_TOKEN")
repo_id = f"{REPO_OWNER}/{REPO_NAME}"
parser = argparse.ArgumentParser(description="Model Download Tool")
local_dir = os.getcwd()
parser.add_argument(
"--fromtxt",
action="store_true",
help="Download or process based on a text file instead of cloning Hugging Face repo",
)
args = parser.parse_args()
if args.fromtxt:
mt.download_files_from_txt("assets/links.txt", local_dir)
else:
mt.download_hf_repo(repo_id, local_dir, token)
if __name__ == "__main__":
input_filename = "dirtylist.txt"
characters_to_remove = [",", '"', "}"]
mt.extract_links_to_txt(input_filename, "/tmp/extracted_html")
mt.extract_urls_from_file(input_filename, "/tmp/extracted_other")
with open("/tmp/extracted_urls.txt", "w") as outfile:
with open("/tmp/extracted_html", "r") as infile1:
outfile.write(infile1.read())
with open("/tmp/extracted_other", "r") as infile2:
outfile.write(infile2.read())
mt.remove_chars_from_file("/tmp/extracted_urls.txt", characters_to_remove)
mt.remove_duplicate_lines(
"/tmp/extracted_urls.txt", "/tmp/extracted_urls_deduped.txt"
)
mt.sort_links_by_extension(
"/tmp/extracted_urls_deduped.txt", "/tmp/sorted_links.txt"
)
downloadthething()
uploadthething()