Spaces:
Sleeping
Sleeping
Oleg Shulyakov
commited on
Commit
·
c96815e
1
Parent(s):
67040c8
Move base model creation into separate method
Browse files
app.py
CHANGED
|
@@ -15,8 +15,25 @@ from apscheduler.schedulers.background import BackgroundScheduler
|
|
| 15 |
SPACE_ID = os.environ.get("SPACE_ID")
|
| 16 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 17 |
|
| 18 |
-
#
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
# escape HTML for logging
|
| 22 |
def escape(s: str) -> str:
|
|
@@ -27,6 +44,9 @@ def escape(s: str) -> str:
|
|
| 27 |
s = s.replace("\n", "<br/>")
|
| 28 |
return s
|
| 29 |
|
|
|
|
|
|
|
|
|
|
| 30 |
def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
|
| 31 |
if not os.path.isfile(model_path):
|
| 32 |
raise Exception(f"Model file not found: {model_path}")
|
|
@@ -59,8 +79,8 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token:
|
|
| 59 |
print(f"Model path: {model_path}")
|
| 60 |
print(f"Output dir: {outdir}")
|
| 61 |
|
| 62 |
-
if oauth_token is
|
| 63 |
-
raise
|
| 64 |
|
| 65 |
split_cmd = [
|
| 66 |
"llama-gguf-split",
|
|
@@ -101,8 +121,8 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token:
|
|
| 101 |
api = HfApi(token=oauth_token.token)
|
| 102 |
for file in sharded_model_files:
|
| 103 |
file_path = os.path.join(outdir, file)
|
| 104 |
-
print(f"Uploading file: {file_path}")
|
| 105 |
try:
|
|
|
|
| 106 |
api.upload_file(
|
| 107 |
path_or_fileobj=file_path,
|
| 108 |
path_in_repo=file,
|
|
@@ -115,23 +135,16 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token:
|
|
| 115 |
|
| 116 |
print("Sharded model has been uploaded successfully!")
|
| 117 |
|
| 118 |
-
def
|
| 119 |
-
|
| 120 |
-
raise gr.Error("You must be logged in to use GGUF-my-repo")
|
| 121 |
-
|
| 122 |
-
# validate the oauth token
|
| 123 |
-
try:
|
| 124 |
-
whoami(oauth_token.token)
|
| 125 |
-
except Exception as e:
|
| 126 |
-
raise gr.Error("You must be logged in to use GGUF-my-repo")
|
| 127 |
-
|
| 128 |
-
model_name = model_id.split('/')[-1]
|
| 129 |
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
|
|
|
| 134 |
|
|
|
|
| 135 |
pattern = (
|
| 136 |
"*.safetensors"
|
| 137 |
if any(
|
|
@@ -144,40 +157,50 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
| 144 |
else "*.bin"
|
| 145 |
)
|
| 146 |
|
|
|
|
| 147 |
dl_pattern += [pattern]
|
| 148 |
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
imatrix_path = Path(outdir)/"imatrix.dat"
|
| 183 |
|
|
@@ -197,6 +220,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
| 197 |
print("Not using imatrix quantization.")
|
| 198 |
|
| 199 |
# Quantize the model
|
|
|
|
| 200 |
quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
|
| 201 |
quantized_gguf_path = str(Path(outdir)/quantized_gguf_name)
|
| 202 |
if use_imatrix:
|
|
@@ -214,12 +238,13 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
| 214 |
stderr_str = result.stderr.decode("utf-8")
|
| 215 |
raise Exception(f"Error quantizing: {stderr_str}")
|
| 216 |
print(f"Quantized successfully with {imatrix_q_method if use_imatrix else q_method} option!")
|
| 217 |
-
print(f"Quantized model path: {quantized_gguf_path}")
|
| 218 |
|
| 219 |
# Create empty repo
|
| 220 |
username = whoami(oauth_token.token)["name"]
|
| 221 |
|
| 222 |
repo_name = f"{username}/{model_name}-GGUF"
|
|
|
|
| 223 |
new_repo_url = api.create_repo(repo_id=repo_name, exist_ok=True, private=private_repo)
|
| 224 |
new_repo_id = new_repo_url.repo_id
|
| 225 |
print("Repo created successfully!", new_repo_url)
|
|
@@ -329,6 +354,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
|
|
| 329 |
"llama.png",
|
| 330 |
)
|
| 331 |
except Exception as e:
|
|
|
|
| 332 |
return (f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{escape(str(e))}</pre>', "error.png")
|
| 333 |
|
| 334 |
|
|
|
|
| 15 |
SPACE_ID = os.environ.get("SPACE_ID")
|
| 16 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 17 |
|
| 18 |
+
# Folder
|
| 19 |
+
DOWNLOAD_FOLDER = "./downloads"
|
| 20 |
+
OUTPUT_FOLDER = "./outputs"
|
| 21 |
+
|
| 22 |
+
def create_folder(folder_name: str):
|
| 23 |
+
if not os.path.exists(folder_name):
|
| 24 |
+
print(f"Creating folder: {folder_name}")
|
| 25 |
+
os.makedirs(folder_name)
|
| 26 |
+
|
| 27 |
+
def is_valid_token(oauth_token):
|
| 28 |
+
if oauth_token is None or oauth_token.token is None:
|
| 29 |
+
return False
|
| 30 |
+
|
| 31 |
+
try:
|
| 32 |
+
whoami(oauth_token.token)
|
| 33 |
+
except Exception as e:
|
| 34 |
+
return False
|
| 35 |
+
|
| 36 |
+
return True
|
| 37 |
|
| 38 |
# escape HTML for logging
|
| 39 |
def escape(s: str) -> str:
|
|
|
|
| 44 |
s = s.replace("\n", "<br/>")
|
| 45 |
return s
|
| 46 |
|
| 47 |
+
def get_model_name(model_id: str):
|
| 48 |
+
return model_id.split('/')[-1]
|
| 49 |
+
|
| 50 |
def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
|
| 51 |
if not os.path.isfile(model_path):
|
| 52 |
raise Exception(f"Model file not found: {model_path}")
|
|
|
|
| 79 |
print(f"Model path: {model_path}")
|
| 80 |
print(f"Output dir: {outdir}")
|
| 81 |
|
| 82 |
+
if is_valid_token(oauth_token) is False:
|
| 83 |
+
raise gr.Error("You have to be logged in.")
|
| 84 |
|
| 85 |
split_cmd = [
|
| 86 |
"llama-gguf-split",
|
|
|
|
| 121 |
api = HfApi(token=oauth_token.token)
|
| 122 |
for file in sharded_model_files:
|
| 123 |
file_path = os.path.join(outdir, file)
|
|
|
|
| 124 |
try:
|
| 125 |
+
print(f"Uploading file: {file_path}")
|
| 126 |
api.upload_file(
|
| 127 |
path_or_fileobj=file_path,
|
| 128 |
path_in_repo=file,
|
|
|
|
| 135 |
|
| 136 |
print("Sharded model has been uploaded successfully!")
|
| 137 |
|
| 138 |
+
def download_base_model(token: str, model_id: str, outdir: tempfile.TemporaryDirectory):
|
| 139 |
+
model_name = get_model_name(model_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
+
with tempfile.TemporaryDirectory(dir=DOWNLOAD_FOLDER) as tmpdir:
|
| 142 |
+
# Download model
|
| 143 |
+
print(f"Downloading model {model_name}")
|
| 144 |
+
local_dir = Path(tmpdir)/model_name # Keep the model name as the dirname so the model name metadata is populated correctly
|
| 145 |
+
print(f"Local directory: {os.path.abspath(local_dir)}")
|
| 146 |
|
| 147 |
+
api = HfApi(token=token)
|
| 148 |
pattern = (
|
| 149 |
"*.safetensors"
|
| 150 |
if any(
|
|
|
|
| 157 |
else "*.bin"
|
| 158 |
)
|
| 159 |
|
| 160 |
+
dl_pattern = ["*.md", "*.json", "*.model"]
|
| 161 |
dl_pattern += [pattern]
|
| 162 |
|
| 163 |
+
api.snapshot_download(repo_id=model_id, local_dir=local_dir, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
|
| 164 |
+
print("Model downloaded successfully!")
|
| 165 |
+
|
| 166 |
+
print(f"Model directory contents: {os.listdir(local_dir)}")
|
| 167 |
+
config_dir = local_dir/"config.json"
|
| 168 |
+
adapter_config_dir = local_dir/"adapter_config.json"
|
| 169 |
+
if os.path.exists(adapter_config_dir) and not os.path.exists(config_dir):
|
| 170 |
+
raise Exception('adapter_config.json is present.<br/><br/>If you are converting a LoRA adapter to GGUF, please use <a href="https://huggingface.co/spaces/ggml-org/gguf-my-lora" target="_blank" style="text-decoration:underline">GGUF-my-lora</a>.')
|
| 171 |
+
|
| 172 |
+
# Convert HF to GGUF
|
| 173 |
+
fp16_model = str(Path(outdir)/f"{model_name}_fp16.gguf")
|
| 174 |
+
print(f"Converting to GGUF FP16: {os.path.abspath(fp16_model)}")
|
| 175 |
+
result = subprocess.run(
|
| 176 |
+
[
|
| 177 |
+
"python3", "/app/convert_hf_to_gguf.py", local_dir, "--outtype", "f16", "--outfile", fp16_model
|
| 178 |
+
],
|
| 179 |
+
shell=False,
|
| 180 |
+
capture_output=True
|
| 181 |
+
)
|
| 182 |
+
print(f"Model directory contents: {result}")
|
| 183 |
+
if result.returncode != 0:
|
| 184 |
+
stderr_str = result.stderr.decode("utf-8")
|
| 185 |
+
raise Exception(f"Error converting to fp16: {stderr_str}")
|
| 186 |
+
|
| 187 |
+
print("Model converted to fp16 successfully!")
|
| 188 |
+
print(f"Converted model path: {os.path.abspath(fp16_model)}")
|
| 189 |
+
|
| 190 |
+
return fp16_model
|
| 191 |
+
|
| 192 |
+
def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_repo, train_data_file, split_model, split_max_tensors, split_max_size, repo_name, gguf_name, oauth_token: gr.OAuthToken | None):
|
| 193 |
+
# validate the oauth token
|
| 194 |
+
if is_valid_token(oauth_token) is False:
|
| 195 |
+
raise gr.Error("You must be logged in to use GGUF-my-repo")
|
| 196 |
+
|
| 197 |
+
print(f"Current working directory: {os.path.abspath(os.getcwd())}")
|
| 198 |
+
create_folder(DOWNLOAD_FOLDER)
|
| 199 |
+
create_folder(OUTPUT_FOLDER)
|
| 200 |
+
|
| 201 |
+
try:
|
| 202 |
+
with tempfile.TemporaryDirectory(dir=OUTPUT_FOLDER) as outdir:
|
| 203 |
+
fp16 = download_base_model(oauth_token.token, model_id, outdir)
|
| 204 |
|
| 205 |
imatrix_path = Path(outdir)/"imatrix.dat"
|
| 206 |
|
|
|
|
| 220 |
print("Not using imatrix quantization.")
|
| 221 |
|
| 222 |
# Quantize the model
|
| 223 |
+
model_name = get_model_name(model_id)
|
| 224 |
quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
|
| 225 |
quantized_gguf_path = str(Path(outdir)/quantized_gguf_name)
|
| 226 |
if use_imatrix:
|
|
|
|
| 238 |
stderr_str = result.stderr.decode("utf-8")
|
| 239 |
raise Exception(f"Error quantizing: {stderr_str}")
|
| 240 |
print(f"Quantized successfully with {imatrix_q_method if use_imatrix else q_method} option!")
|
| 241 |
+
print(f"Quantized model path: {os.path.abspath(quantized_gguf_path)}")
|
| 242 |
|
| 243 |
# Create empty repo
|
| 244 |
username = whoami(oauth_token.token)["name"]
|
| 245 |
|
| 246 |
repo_name = f"{username}/{model_name}-GGUF"
|
| 247 |
+
api = HfApi(token=oauth_token.token)
|
| 248 |
new_repo_url = api.create_repo(repo_id=repo_name, exist_ok=True, private=private_repo)
|
| 249 |
new_repo_id = new_repo_url.repo_id
|
| 250 |
print("Repo created successfully!", new_repo_url)
|
|
|
|
| 354 |
"llama.png",
|
| 355 |
)
|
| 356 |
except Exception as e:
|
| 357 |
+
print((f"Error processing model: {e}"))
|
| 358 |
return (f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{escape(str(e))}</pre>', "error.png")
|
| 359 |
|
| 360 |
|