Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -24,41 +24,16 @@ api = HfApi()
|
|
| 24 |
OUTPUT_DIR = "optimized_models"
|
| 25 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 26 |
|
|
|
|
| 27 |
LLAMA_CPP_DIR = Path("llama.cpp")
|
| 28 |
LLAMA_CPP_CONVERT_SCRIPT = LLAMA_CPP_DIR / "convert.py"
|
| 29 |
LLAMA_CPP_QUANTIZE_SCRIPT = LLAMA_CPP_DIR / "quantize"
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
logging.info("llama.cpp cloned successfully.")
|
| 37 |
-
except subprocess.CalledProcessError as e:
|
| 38 |
-
raise RuntimeError(f"Failed to clone llama.cpp. Error: {e.stderr}")
|
| 39 |
-
|
| 40 |
-
requirements_path = LLAMA_CPP_DIR / "requirements.txt"
|
| 41 |
-
if requirements_path.exists():
|
| 42 |
-
logging.info("Installing llama.cpp Python dependencies...")
|
| 43 |
-
try:
|
| 44 |
-
subprocess.run(["pip", "install", "-r", str(requirements_path)], check=True, capture_output=True, text=True)
|
| 45 |
-
logging.info("llama.cpp Python dependencies installed successfully.")
|
| 46 |
-
except subprocess.CalledProcessError as e:
|
| 47 |
-
raise RuntimeError(f"Failed to install llama.cpp requirements. Error: {e.stderr}")
|
| 48 |
-
|
| 49 |
-
if not LLAMA_CPP_QUANTIZE_SCRIPT.exists():
|
| 50 |
-
logging.info("llama.cpp binaries not found. Building with CMake...")
|
| 51 |
-
try:
|
| 52 |
-
subprocess.run(["cmake", "."], cwd=str(LLAMA_CPP_DIR), check=True, capture_output=True, text=True)
|
| 53 |
-
subprocess.run(["cmake", "--build", "."], cwd=str(LLAMA_CPP_DIR), check=True, capture_output=True, text=True)
|
| 54 |
-
logging.info("llama.cpp binaries built successfully with CMake.")
|
| 55 |
-
except subprocess.CalledProcessError as e:
|
| 56 |
-
raise RuntimeError(f"Failed to build llama.cpp with CMake. Error: {e.stderr}")
|
| 57 |
-
|
| 58 |
-
try:
|
| 59 |
-
setup_llama_cpp()
|
| 60 |
-
except Exception as e:
|
| 61 |
-
logging.error(f"FATAL ERROR during llama.cpp setup: {e}", exc_info=True)
|
| 62 |
|
| 63 |
def stage_1_analyze_model(model_id: str):
|
| 64 |
log_stream = "[STAGE 1] Analyzing model...\n"
|
|
@@ -119,10 +94,12 @@ def stage_3_4_gguf_quantize(model_path_or_id: str, original_model_id: str, quant
|
|
| 119 |
log_stream = "[STAGE 3 & 4] Converting to GGUF using llama.cpp...\n"
|
| 120 |
run_id = datetime.now().strftime("%Y%m%d-%H%M%S")
|
| 121 |
model_name = original_model_id.replace('/', '_')
|
| 122 |
-
|
|
|
|
| 123 |
os.makedirs(gguf_path, exist_ok=True)
|
| 124 |
-
f16_gguf_path = os.path.
|
| 125 |
-
quantized_gguf_path = os.path.
|
|
|
|
| 126 |
absolute_model_path = os.path.abspath(model_path_or_id) if os.path.exists(model_path_or_id) else model_path_or_id
|
| 127 |
try:
|
| 128 |
convert_command = ["python3", "convert.py", absolute_model_path, "--outfile", f16_gguf_path, "--outtype", "f16"]
|
|
|
|
| 24 |
OUTPUT_DIR = "optimized_models"
|
| 25 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 26 |
|
| 27 |
+
# The Dockerfile guarantees these files exist, so we just define the paths.
|
| 28 |
LLAMA_CPP_DIR = Path("llama.cpp")
|
| 29 |
LLAMA_CPP_CONVERT_SCRIPT = LLAMA_CPP_DIR / "convert.py"
|
| 30 |
LLAMA_CPP_QUANTIZE_SCRIPT = LLAMA_CPP_DIR / "quantize"
|
| 31 |
|
| 32 |
+
# Verify that the build was successful during startup
|
| 33 |
+
if not LLAMA_CPP_QUANTIZE_SCRIPT.exists():
|
| 34 |
+
error_msg = "FATAL ERROR: llama.cpp binaries not found. The Docker build may have failed."
|
| 35 |
+
logging.error(error_msg)
|
| 36 |
+
raise RuntimeError(error_msg)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
def stage_1_analyze_model(model_id: str):
|
| 39 |
log_stream = "[STAGE 1] Analyzing model...\n"
|
|
|
|
| 94 |
log_stream = "[STAGE 3 & 4] Converting to GGUF using llama.cpp...\n"
|
| 95 |
run_id = datetime.now().strftime("%Y%m%d-%H%M%S")
|
| 96 |
model_name = original_model_id.replace('/', '_')
|
| 97 |
+
# Use absolute paths for outputs to avoid issues with changing working directories
|
| 98 |
+
gguf_path = os.path.abspath(os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-gguf"))
|
| 99 |
os.makedirs(gguf_path, exist_ok=True)
|
| 100 |
+
f16_gguf_path = os.path.join(gguf_path, "model-f16.gguf")
|
| 101 |
+
quantized_gguf_path = os.path.join(gguf_path, "model.gguf")
|
| 102 |
+
# Use absolute path for model input if it's a local directory
|
| 103 |
absolute_model_path = os.path.abspath(model_path_or_id) if os.path.exists(model_path_or_id) else model_path_or_id
|
| 104 |
try:
|
| 105 |
convert_command = ["python3", "convert.py", absolute_model_path, "--outfile", f16_gguf_path, "--outtype", "f16"]
|