broadfield-dev commited on
Commit
916de11
·
verified ·
1 Parent(s): a3cec94

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -6
app.py CHANGED
@@ -21,15 +21,14 @@ if not HF_TOKEN:
21
  logging.warning("HF_TOKEN environment variable not set. Packaging and uploading will fail.")
22
 
23
  api = HfApi()
24
- OUTPUT_DIR = "optimized_models"
 
25
  os.makedirs(OUTPUT_DIR, exist_ok=True)
26
 
27
- # The Dockerfile guarantees these files exist, so we just define the paths.
28
  LLAMA_CPP_DIR = Path("llama.cpp")
29
  LLAMA_CPP_CONVERT_SCRIPT = LLAMA_CPP_DIR / "convert.py"
30
  LLAMA_CPP_QUANTIZE_SCRIPT = LLAMA_CPP_DIR / "quantize"
31
 
32
- # Verify that the build was successful during startup
33
  if not LLAMA_CPP_QUANTIZE_SCRIPT.exists():
34
  error_msg = "FATAL ERROR: llama.cpp binaries not found. The Docker build may have failed."
35
  logging.error(error_msg)
@@ -94,12 +93,10 @@ def stage_3_4_gguf_quantize(model_path_or_id: str, original_model_id: str, quant
94
  log_stream = "[STAGE 3 & 4] Converting to GGUF using llama.cpp...\n"
95
  run_id = datetime.now().strftime("%Y%m%d-%H%M%S")
96
  model_name = original_model_id.replace('/', '_')
97
- # Use absolute paths for outputs to avoid issues with changing working directories
98
  gguf_path = os.path.abspath(os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-gguf"))
99
  os.makedirs(gguf_path, exist_ok=True)
100
  f16_gguf_path = os.path.join(gguf_path, "model-f16.gguf")
101
  quantized_gguf_path = os.path.join(gguf_path, "model.gguf")
102
- # Use absolute path for model input if it's a local directory
103
  absolute_model_path = os.path.abspath(model_path_or_id) if os.path.exists(model_path_or_id) else model_path_or_id
104
  try:
105
  convert_command = ["python3", "convert.py", absolute_model_path, "--outfile", f16_gguf_path, "--outtype", "f16"]
@@ -182,7 +179,7 @@ def run_amop_pipeline(model_id: str, pipeline_type: str, do_prune: bool, prune_p
182
  raise ValueError("Invalid pipeline type selected.")
183
  full_log += log
184
  yield {final_output: "Packaging & Uploading (4/5)", log_output: full_log}
185
- final_message, log = stage_5_package_and_upload(model_id, optimized_model_path, full_log, options)
186
  full_log += log
187
  yield {final_output: gr.update(value="SUCCESS", label="Status"), log_output: full_log, success_box: gr.Markdown(f"✅ **Success!** Model available: [{repo_id_for_link}](https://huggingface.co/{repo_id_for_link})", visible=True), run_button: gr.Button(interactive=True, value="Run Optimization Pipeline", variant="primary"), analyze_button: gr.Button(interactive=True, value="Analyze Model")}
188
  except Exception as e:
 
21
  logging.warning("HF_TOKEN environment variable not set. Packaging and uploading will fail.")
22
 
23
  api = HfApi()
24
+ # Use the /tmp directory which is always writable in a container environment
25
+ OUTPUT_DIR = "/tmp/optimized_models"
26
  os.makedirs(OUTPUT_DIR, exist_ok=True)
27
 
 
28
  LLAMA_CPP_DIR = Path("llama.cpp")
29
  LLAMA_CPP_CONVERT_SCRIPT = LLAMA_CPP_DIR / "convert.py"
30
  LLAMA_CPP_QUANTIZE_SCRIPT = LLAMA_CPP_DIR / "quantize"
31
 
 
32
  if not LLAMA_CPP_QUANTIZE_SCRIPT.exists():
33
  error_msg = "FATAL ERROR: llama.cpp binaries not found. The Docker build may have failed."
34
  logging.error(error_msg)
 
93
  log_stream = "[STAGE 3 & 4] Converting to GGUF using llama.cpp...\n"
94
  run_id = datetime.now().strftime("%Y%m%d-%H%M%S")
95
  model_name = original_model_id.replace('/', '_')
 
96
  gguf_path = os.path.abspath(os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-gguf"))
97
  os.makedirs(gguf_path, exist_ok=True)
98
  f16_gguf_path = os.path.join(gguf_path, "model-f16.gguf")
99
  quantized_gguf_path = os.path.join(gguf_path, "model.gguf")
 
100
  absolute_model_path = os.path.abspath(model_path_or_id) if os.path.exists(model_path_or_id) else model_path_or_id
101
  try:
102
  convert_command = ["python3", "convert.py", absolute_model_path, "--outfile", f16_gguf_path, "--outtype", "f16"]
 
179
  raise ValueError("Invalid pipeline type selected.")
180
  full_log += log
181
  yield {final_output: "Packaging & Uploading (4/5)", log_output: full_log}
182
+ final_message, log = stage_5_package_and_upload(model_id, optimized_path, full_log, options)
183
  full_log += log
184
  yield {final_output: gr.update(value="SUCCESS", label="Status"), log_output: full_log, success_box: gr.Markdown(f"✅ **Success!** Model available: [{repo_id_for_link}](https://huggingface.co/{repo_id_for_link})", visible=True), run_button: gr.Button(interactive=True, value="Run Optimization Pipeline", variant="primary"), analyze_button: gr.Button(interactive=True, value="Analyze Model")}
185
  except Exception as e: