| | |
| | from huggingface_hub import login, logout, get_token, whoami, repo_exists |
| | import os |
| | import sys |
| | import subprocess |
| | import glob |
| | import time |
| |
|
| | |
| | oname = os.name |
| | if oname == 'nt': |
| | osclear = 'cls' |
| | osmv = 'move' |
| | osrmd = 'rmdir /s /q' |
| | oscp = 'copy' |
| | pyt = 'venv\\scripts\\python.exe' |
| | slsh = '\\' |
| | elif oname == 'posix': |
| | osclear = 'clear' |
| | osmv = 'mv' |
| | osrmd = 'rm -rf' |
| | oscp = 'cp' |
| | pyt = './venv/bin/python' |
| | slsh = '/' |
| | else: |
| | sys.exit('This script is not compatible with your machine.') |
| | def clear_screen(): |
| | os.system(osclear) |
| |
|
| | |
| | if os.environ.get('HF_TOKEN', None) is not None: |
| | try: |
| | login(get_token()) |
| | except ValueError: |
| | print("You have an invalid token set in your environment variable HF_TOKEN. This will cause issues with this script\nRemove the variable or set it to a valid token.") |
| | sys.exit("Exiting...") |
| | if get_token() is not None: |
| | tfound = 'true' |
| | |
| | try: |
| | login(get_token()) |
| | except ValueError: |
| | tfound = 'false' |
| | try: |
| | login(input("API token is no longer valid. Enter your new HuggingFace token (empty to logout): ")) |
| | except: |
| | logout() |
| | print("Logging out... (Unable to access private or gated models)") |
| | tfound = 'false but logged out' |
| | time.sleep(3) |
| | else: |
| | |
| | tfound = "false" |
| | try: |
| | login(input("API token not detected. Enter your HuggingFace token (empty to skip): ")) |
| | except: |
| | print("Skipping login... (Unable to access private or gated models)") |
| | tfound = "false but skipped" |
| | time.sleep(3) |
| | clear_screen() |
| |
|
| | |
| | repo_url = input("Enter unquantized model repository (User/Repo): ") |
| |
|
| | |
| | if repo_exists(repo_url) == False: |
| | print(f"Model repo doesn't exist at https://huggingface.co/{repo_url}") |
| | sys.exit("Exiting...") |
| | model = repo_url.replace("/", "_") |
| | modelname = repo_url.split("/")[1] |
| | clear_screen() |
| |
|
| | |
| | qmount = int(input("Enter the number of quants you want to create: ")) |
| | qmount += 1 |
| | clear_screen() |
| |
|
| | |
| | print(f"Type the BPW for the following {qmount - 1} quants. Recommend staying over 2.4 BPW. Use the vram calculator to find the best BPW values: https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator") |
| | qnum = {} |
| | for i in range(1, qmount): |
| | qnum[f"bpw{i}"] = float(input(f"Enter BPW for quant {i} (2.00-8.00): ")) |
| | clear_screen() |
| |
|
| | |
| | bpwvalue = list(qnum.values()) |
| |
|
| | |
| | bpwvalue.sort() |
| |
|
| | |
| | delmodel = input("Do you want to delete the original model? (Won't delete if paused or failed) (y/N): ").lower() |
| | if delmodel == '': |
| | delmodel = 'n' |
| | while delmodel != 'y' and delmodel != 'n': |
| | delmodel = input("Please enter 'y' or 'n': ").lower() |
| | if delmodel == '': |
| | delmodel = 'n' |
| | if delmodel == 'y': |
| | print(f"Deleting dir models/{model} after quants are finished.") |
| | time.sleep(3) |
| | clear_screen() |
| |
|
| | |
| | if not os.path.exists(f"models{slsh}{model}{slsh}converted-st"): |
| | result = subprocess.run(f"{pyt} download-model.py {repo_url}", shell=True) |
| | if result.returncode != 0: |
| | print("Download failed.") |
| | sys.exit("Exiting...") |
| | clear_screen() |
| |
|
| | |
| | if not glob.glob(f"models/{model}/*.safetensors"): |
| | convertst = input("Couldn't find safetensors model, do you want to convert to safetensors? (y/n): ").lower() |
| | while convertst != 'y' and convertst != 'n': |
| | convertst = input("Please enter 'y' or 'n': ").lower() |
| | if convertst == 'y': |
| | convusebf16 = input("Would you like to use bf16 loading? Will reduce ram usage (y/n): ").lower() |
| | while convusebf16 != 'y' and convusebf16 != 'n': |
| | convusebf16 = input("Please enter 'y' or 'n': ").lower() |
| | if convusebf16 == 'y': |
| | usingbf16 = "--bf16" |
| | else: |
| | usingbf16 = "" |
| | if convertst == 'y': |
| | print("Converting weights to safetensors, please wait...") |
| | result = subprocess.run(f"{pyt} convert-to-safetensors.py models{slsh}{model} --output models{slsh}{model}-st {usingbf16}", shell=True) |
| | if result.returncode != 0: |
| | print("Converting failed. Please look for a safetensors model or convert model manually.") |
| | sys.exit("Exiting...") |
| | subprocess.run(f"{osrmd} models{slsh}{model}", shell=True) |
| | subprocess.run(f"{osmv} models{slsh}{model}-st models{slsh}{model}", shell=True) |
| | open(f"models{slsh}{model}{slsh}converted-st", 'w').close() |
| | print("Finished converting") |
| | else: |
| | sys.exit("Can't quantize a non-safetensors model. Exiting...") |
| | clear_screen() |
| |
|
| | |
| | for bpw in bpwvalue: |
| | if os.path.exists(f"measurements{slsh}{model}-measure{slsh}measurement.json"): |
| | cmdir = False |
| | mskip = f" -m measurements{slsh}{model}-measure{slsh}measurement.json" |
| | else: |
| | cmdir = True |
| | mskip = "" |
| | print(f"Starting quantization for BPW {bpw}") |
| | os.makedirs(f"{model}-exl2-{bpw}bpw-WD", exist_ok=True) |
| | os.makedirs(f"{modelname}-exl2-quants{slsh}{modelname}-exl2-{bpw}bpw", exist_ok=True) |
| | subprocess.run(f"{oscp} models{slsh}{model}{slsh}config.json {model}-exl2-{bpw}bpw-WD", shell=True) |
| | |
| | result = subprocess.run(f"{pyt} exllamav2/convert.py -i models/{model} -o {model}-exl2-{bpw}bpw-WD -cf {modelname}-exl2-quants{slsh}{modelname}-exl2-{bpw}bpw -b {bpw}{mskip}", shell=True) |
| | if result.returncode != 0: |
| | print("Quantization failed.") |
| | sys.exit("Exiting...") |
| | if cmdir == True: |
| | os.makedirs(f"measurements{slsh}{model}-measure", exist_ok=True) |
| | subprocess.run(f"{oscp} {model}-exl2-{bpw}bpw-WD{slsh}measurement.json measurements{slsh}{model}-measure", shell=True) |
| | open(f"measurements{slsh}{model}-measure/Delete folder when no more quants are needed from this model", 'w').close() |
| | subprocess.run(f"{osrmd} {model}-exl2-{bpw}bpw-WD", shell=True) |
| | |
| | |
| | if delmodel == 'y': |
| | subprocess.run(f"{osrmd} models{slsh}{model}", shell=True) |
| | print(f"Deleted models/{model}") |
| |
|
| | |
| | if tfound == 'false': |
| | print(f''' |
| | You are now logged in as {whoami().get('fullname', None)}. |
| | |
| | To logout, use the hf command line interface 'huggingface-cli logout' |
| | To view your active account, use 'huggingface-cli whoami' |
| | ''') |
| | |
| | print("Finished quantizing. Exiting...") |
| |
|