compilade
/

quant-tests

compilade commited on Dec 28, 2024

Commit

d1fe1b7

1 Parent(s): 9d41255

Improve the Python-based benchmarking script

Files changed (1) hide show

bench-TriLMs.py CHANGED Viewed

@@ -44,12 +44,12 @@ def build_llama_cpp(options: Sequence[str]):
     os.chdir(LLAMA_CPP_PATH)
     builddir = LLAMA_CPP_PATH / "build"
     if builddir.exists():
-        os.system("pwd")
-        os.system("rm -Ir build")
     builddir.mkdir()
     os.chdir(builddir)
     os.system(shlex.join(("cmake", "..", *options)))
-    os.system("make -j llama-bench llama-quantize test-backend-ops")
 def quantize(types: Sequence[str] = ALL_TYPES, sizes: Sequence[str] = MODEL_SIZES):
@@ -103,11 +103,10 @@ def llama_bench(
                     "-o",
                     "json",
                 ]
-                result = subprocess.run(
-                    [str(LLAMA_CPP_PATH / "build" / "bin" / "llama-bench")] + args,
-                    capture_output=True,
-                )
-                logger.debug(result.stderr)
                 new_output = json.loads(result.stdout)
                 logger.info(json.dumps(new_output, indent=4))
@@ -116,6 +115,7 @@ def llama_bench(
 def test_backend_perf() -> str:
     result = subprocess.run(
         [
             str(LLAMA_CPP_PATH / "build" / "bin" / "test-backend-ops"),
@@ -125,6 +125,7 @@ def test_backend_perf() -> str:
         ],
         capture_output=True,
     )
     return result.stdout.decode(encoding="utf-8")
@@ -165,6 +166,8 @@ def parse_args(args: Sequence[str]):
 if __name__ == "__main__":
     args = parse_args(sys.argv)
     LLAMA_CPP_PATH = args.llama_cpp_path
     MODEL_DIR = args.model_dir

     os.chdir(LLAMA_CPP_PATH)
     builddir = LLAMA_CPP_PATH / "build"
     if builddir.exists():
+        logger.info("Removing %s", builddir)
+        os.system("rm -rf build")
     builddir.mkdir()
     os.chdir(builddir)
     os.system(shlex.join(("cmake", "..", *options)))
+    os.system(f"make -j{os.cpu_count()} llama-bench llama-quantize test-backend-ops")
 def quantize(types: Sequence[str] = ALL_TYPES, sizes: Sequence[str] = MODEL_SIZES):
                     "-o",
                     "json",
                 ]
+                command = [str(LLAMA_CPP_PATH / "build" / "bin" / "llama-bench")] + args
+                logger.info("Running: %s", " ".join(command))
+                result = subprocess.run(command, capture_output=True)
+                logger.debug(result.stderr.decode())
                 new_output = json.loads(result.stdout)
                 logger.info(json.dumps(new_output, indent=4))
 def test_backend_perf() -> str:
+    logger.info("Test MUL_MAT performance")
     result = subprocess.run(
         [
             str(LLAMA_CPP_PATH / "build" / "bin" / "test-backend-ops"),
         ],
         capture_output=True,
     )
+    logger.debug(result.stdout.decode())
     return result.stdout.decode(encoding="utf-8")
 if __name__ == "__main__":
     args = parse_args(sys.argv)
+    logging.basicConfig(level=logging.DEBUG)
     LLAMA_CPP_PATH = args.llama_cpp_path
     MODEL_DIR = args.model_dir