Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -21,15 +21,14 @@ if not HF_TOKEN:
|
|
| 21 |
logging.warning("HF_TOKEN environment variable not set. Packaging and uploading will fail.")
|
| 22 |
|
| 23 |
api = HfApi()
|
| 24 |
-
|
|
|
|
| 25 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 26 |
|
| 27 |
-
# The Dockerfile guarantees these files exist, so we just define the paths.
|
| 28 |
LLAMA_CPP_DIR = Path("llama.cpp")
|
| 29 |
LLAMA_CPP_CONVERT_SCRIPT = LLAMA_CPP_DIR / "convert.py"
|
| 30 |
LLAMA_CPP_QUANTIZE_SCRIPT = LLAMA_CPP_DIR / "quantize"
|
| 31 |
|
| 32 |
-
# Verify that the build was successful during startup
|
| 33 |
if not LLAMA_CPP_QUANTIZE_SCRIPT.exists():
|
| 34 |
error_msg = "FATAL ERROR: llama.cpp binaries not found. The Docker build may have failed."
|
| 35 |
logging.error(error_msg)
|
|
@@ -94,12 +93,10 @@ def stage_3_4_gguf_quantize(model_path_or_id: str, original_model_id: str, quant
|
|
| 94 |
log_stream = "[STAGE 3 & 4] Converting to GGUF using llama.cpp...\n"
|
| 95 |
run_id = datetime.now().strftime("%Y%m%d-%H%M%S")
|
| 96 |
model_name = original_model_id.replace('/', '_')
|
| 97 |
-
# Use absolute paths for outputs to avoid issues with changing working directories
|
| 98 |
gguf_path = os.path.abspath(os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-gguf"))
|
| 99 |
os.makedirs(gguf_path, exist_ok=True)
|
| 100 |
f16_gguf_path = os.path.join(gguf_path, "model-f16.gguf")
|
| 101 |
quantized_gguf_path = os.path.join(gguf_path, "model.gguf")
|
| 102 |
-
# Use absolute path for model input if it's a local directory
|
| 103 |
absolute_model_path = os.path.abspath(model_path_or_id) if os.path.exists(model_path_or_id) else model_path_or_id
|
| 104 |
try:
|
| 105 |
convert_command = ["python3", "convert.py", absolute_model_path, "--outfile", f16_gguf_path, "--outtype", "f16"]
|
|
@@ -182,7 +179,7 @@ def run_amop_pipeline(model_id: str, pipeline_type: str, do_prune: bool, prune_p
|
|
| 182 |
raise ValueError("Invalid pipeline type selected.")
|
| 183 |
full_log += log
|
| 184 |
yield {final_output: "Packaging & Uploading (4/5)", log_output: full_log}
|
| 185 |
-
final_message, log = stage_5_package_and_upload(model_id,
|
| 186 |
full_log += log
|
| 187 |
yield {final_output: gr.update(value="SUCCESS", label="Status"), log_output: full_log, success_box: gr.Markdown(f"✅ **Success!** Model available: [{repo_id_for_link}](https://huggingface.co/{repo_id_for_link})", visible=True), run_button: gr.Button(interactive=True, value="Run Optimization Pipeline", variant="primary"), analyze_button: gr.Button(interactive=True, value="Analyze Model")}
|
| 188 |
except Exception as e:
|
|
|
|
| 21 |
logging.warning("HF_TOKEN environment variable not set. Packaging and uploading will fail.")
|
| 22 |
|
| 23 |
api = HfApi()
|
| 24 |
+
# Use the /tmp directory which is always writable in a container environment
|
| 25 |
+
OUTPUT_DIR = "/tmp/optimized_models"
|
| 26 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 27 |
|
|
|
|
| 28 |
LLAMA_CPP_DIR = Path("llama.cpp")
|
| 29 |
LLAMA_CPP_CONVERT_SCRIPT = LLAMA_CPP_DIR / "convert.py"
|
| 30 |
LLAMA_CPP_QUANTIZE_SCRIPT = LLAMA_CPP_DIR / "quantize"
|
| 31 |
|
|
|
|
| 32 |
if not LLAMA_CPP_QUANTIZE_SCRIPT.exists():
|
| 33 |
error_msg = "FATAL ERROR: llama.cpp binaries not found. The Docker build may have failed."
|
| 34 |
logging.error(error_msg)
|
|
|
|
| 93 |
log_stream = "[STAGE 3 & 4] Converting to GGUF using llama.cpp...\n"
|
| 94 |
run_id = datetime.now().strftime("%Y%m%d-%H%M%S")
|
| 95 |
model_name = original_model_id.replace('/', '_')
|
|
|
|
| 96 |
gguf_path = os.path.abspath(os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-gguf"))
|
| 97 |
os.makedirs(gguf_path, exist_ok=True)
|
| 98 |
f16_gguf_path = os.path.join(gguf_path, "model-f16.gguf")
|
| 99 |
quantized_gguf_path = os.path.join(gguf_path, "model.gguf")
|
|
|
|
| 100 |
absolute_model_path = os.path.abspath(model_path_or_id) if os.path.exists(model_path_or_id) else model_path_or_id
|
| 101 |
try:
|
| 102 |
convert_command = ["python3", "convert.py", absolute_model_path, "--outfile", f16_gguf_path, "--outtype", "f16"]
|
|
|
|
| 179 |
raise ValueError("Invalid pipeline type selected.")
|
| 180 |
full_log += log
|
| 181 |
yield {final_output: "Packaging & Uploading (4/5)", log_output: full_log}
|
| 182 |
+
final_message, log = stage_5_package_and_upload(model_id, optimized_path, full_log, options)
|
| 183 |
full_log += log
|
| 184 |
yield {final_output: gr.update(value="SUCCESS", label="Status"), log_output: full_log, success_box: gr.Markdown(f"✅ **Success!** Model available: [{repo_id_for_link}](https://huggingface.co/{repo_id_for_link})", visible=True), run_button: gr.Button(interactive=True, value="Run Optimization Pipeline", variant="primary"), analyze_button: gr.Button(interactive=True, value="Analyze Model")}
|
| 185 |
except Exception as e:
|