broadfield-dev commited on
Commit
97b9b15
·
verified ·
1 Parent(s): fd28273

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -34
app.py CHANGED
@@ -24,41 +24,16 @@ api = HfApi()
24
  OUTPUT_DIR = "optimized_models"
25
  os.makedirs(OUTPUT_DIR, exist_ok=True)
26
 
 
27
  LLAMA_CPP_DIR = Path("llama.cpp")
28
  LLAMA_CPP_CONVERT_SCRIPT = LLAMA_CPP_DIR / "convert.py"
29
  LLAMA_CPP_QUANTIZE_SCRIPT = LLAMA_CPP_DIR / "quantize"
30
 
31
- def setup_llama_cpp():
32
- if not LLAMA_CPP_DIR.exists():
33
- logging.info("Cloning llama.cpp repository...")
34
- try:
35
- subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"], check=True, capture_output=True, text=True)
36
- logging.info("llama.cpp cloned successfully.")
37
- except subprocess.CalledProcessError as e:
38
- raise RuntimeError(f"Failed to clone llama.cpp. Error: {e.stderr}")
39
-
40
- requirements_path = LLAMA_CPP_DIR / "requirements.txt"
41
- if requirements_path.exists():
42
- logging.info("Installing llama.cpp Python dependencies...")
43
- try:
44
- subprocess.run(["pip", "install", "-r", str(requirements_path)], check=True, capture_output=True, text=True)
45
- logging.info("llama.cpp Python dependencies installed successfully.")
46
- except subprocess.CalledProcessError as e:
47
- raise RuntimeError(f"Failed to install llama.cpp requirements. Error: {e.stderr}")
48
-
49
- if not LLAMA_CPP_QUANTIZE_SCRIPT.exists():
50
- logging.info("llama.cpp binaries not found. Building with CMake...")
51
- try:
52
- subprocess.run(["cmake", "."], cwd=str(LLAMA_CPP_DIR), check=True, capture_output=True, text=True)
53
- subprocess.run(["cmake", "--build", "."], cwd=str(LLAMA_CPP_DIR), check=True, capture_output=True, text=True)
54
- logging.info("llama.cpp binaries built successfully with CMake.")
55
- except subprocess.CalledProcessError as e:
56
- raise RuntimeError(f"Failed to build llama.cpp with CMake. Error: {e.stderr}")
57
-
58
- try:
59
- setup_llama_cpp()
60
- except Exception as e:
61
- logging.error(f"FATAL ERROR during llama.cpp setup: {e}", exc_info=True)
62
 
63
  def stage_1_analyze_model(model_id: str):
64
  log_stream = "[STAGE 1] Analyzing model...\n"
@@ -119,10 +94,12 @@ def stage_3_4_gguf_quantize(model_path_or_id: str, original_model_id: str, quant
119
  log_stream = "[STAGE 3 & 4] Converting to GGUF using llama.cpp...\n"
120
  run_id = datetime.now().strftime("%Y%m%d-%H%M%S")
121
  model_name = original_model_id.replace('/', '_')
122
- gguf_path = os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-gguf")
 
123
  os.makedirs(gguf_path, exist_ok=True)
124
- f16_gguf_path = os.path.abspath(os.path.join(gguf_path, "model-f16.gguf"))
125
- quantized_gguf_path = os.path.abspath(os.path.join(gguf_path, "model.gguf"))
 
126
  absolute_model_path = os.path.abspath(model_path_or_id) if os.path.exists(model_path_or_id) else model_path_or_id
127
  try:
128
  convert_command = ["python3", "convert.py", absolute_model_path, "--outfile", f16_gguf_path, "--outtype", "f16"]
 
24
  OUTPUT_DIR = "optimized_models"
25
  os.makedirs(OUTPUT_DIR, exist_ok=True)
26
 
27
+ # The Dockerfile guarantees these files exist, so we just define the paths.
28
  LLAMA_CPP_DIR = Path("llama.cpp")
29
  LLAMA_CPP_CONVERT_SCRIPT = LLAMA_CPP_DIR / "convert.py"
30
  LLAMA_CPP_QUANTIZE_SCRIPT = LLAMA_CPP_DIR / "quantize"
31
 
32
+ # Verify that the build was successful during startup
33
+ if not LLAMA_CPP_QUANTIZE_SCRIPT.exists():
34
+ error_msg = "FATAL ERROR: llama.cpp binaries not found. The Docker build may have failed."
35
+ logging.error(error_msg)
36
+ raise RuntimeError(error_msg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  def stage_1_analyze_model(model_id: str):
39
  log_stream = "[STAGE 1] Analyzing model...\n"
 
94
  log_stream = "[STAGE 3 & 4] Converting to GGUF using llama.cpp...\n"
95
  run_id = datetime.now().strftime("%Y%m%d-%H%M%S")
96
  model_name = original_model_id.replace('/', '_')
97
+ # Use absolute paths for outputs to avoid issues with changing working directories
98
+ gguf_path = os.path.abspath(os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-gguf"))
99
  os.makedirs(gguf_path, exist_ok=True)
100
+ f16_gguf_path = os.path.join(gguf_path, "model-f16.gguf")
101
+ quantized_gguf_path = os.path.join(gguf_path, "model.gguf")
102
+ # Use absolute path for model input if it's a local directory
103
  absolute_model_path = os.path.abspath(model_path_or_id) if os.path.exists(model_path_or_id) else model_path_or_id
104
  try:
105
  convert_command = ["python3", "convert.py", absolute_model_path, "--outfile", f16_gguf_path, "--outtype", "f16"]