Improve the Python-based benchmarking script
Browse files- bench-TriLMs.py +11 -8
bench-TriLMs.py
CHANGED
|
@@ -44,12 +44,12 @@ def build_llama_cpp(options: Sequence[str]):
|
|
| 44 |
os.chdir(LLAMA_CPP_PATH)
|
| 45 |
builddir = LLAMA_CPP_PATH / "build"
|
| 46 |
if builddir.exists():
|
| 47 |
-
|
| 48 |
-
os.system("rm -
|
| 49 |
builddir.mkdir()
|
| 50 |
os.chdir(builddir)
|
| 51 |
os.system(shlex.join(("cmake", "..", *options)))
|
| 52 |
-
os.system("make -j llama-bench llama-quantize test-backend-ops")
|
| 53 |
|
| 54 |
|
| 55 |
def quantize(types: Sequence[str] = ALL_TYPES, sizes: Sequence[str] = MODEL_SIZES):
|
|
@@ -103,11 +103,10 @@ def llama_bench(
|
|
| 103 |
"-o",
|
| 104 |
"json",
|
| 105 |
]
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
)
|
| 110 |
-
logger.debug(result.stderr)
|
| 111 |
|
| 112 |
new_output = json.loads(result.stdout)
|
| 113 |
logger.info(json.dumps(new_output, indent=4))
|
|
@@ -116,6 +115,7 @@ def llama_bench(
|
|
| 116 |
|
| 117 |
|
| 118 |
def test_backend_perf() -> str:
|
|
|
|
| 119 |
result = subprocess.run(
|
| 120 |
[
|
| 121 |
str(LLAMA_CPP_PATH / "build" / "bin" / "test-backend-ops"),
|
|
@@ -125,6 +125,7 @@ def test_backend_perf() -> str:
|
|
| 125 |
],
|
| 126 |
capture_output=True,
|
| 127 |
)
|
|
|
|
| 128 |
return result.stdout.decode(encoding="utf-8")
|
| 129 |
|
| 130 |
|
|
@@ -165,6 +166,8 @@ def parse_args(args: Sequence[str]):
|
|
| 165 |
if __name__ == "__main__":
|
| 166 |
args = parse_args(sys.argv)
|
| 167 |
|
|
|
|
|
|
|
| 168 |
LLAMA_CPP_PATH = args.llama_cpp_path
|
| 169 |
MODEL_DIR = args.model_dir
|
| 170 |
|
|
|
|
| 44 |
os.chdir(LLAMA_CPP_PATH)
|
| 45 |
builddir = LLAMA_CPP_PATH / "build"
|
| 46 |
if builddir.exists():
|
| 47 |
+
logger.info("Removing %s", builddir)
|
| 48 |
+
os.system("rm -rf build")
|
| 49 |
builddir.mkdir()
|
| 50 |
os.chdir(builddir)
|
| 51 |
os.system(shlex.join(("cmake", "..", *options)))
|
| 52 |
+
os.system(f"make -j{os.cpu_count()} llama-bench llama-quantize test-backend-ops")
|
| 53 |
|
| 54 |
|
| 55 |
def quantize(types: Sequence[str] = ALL_TYPES, sizes: Sequence[str] = MODEL_SIZES):
|
|
|
|
| 103 |
"-o",
|
| 104 |
"json",
|
| 105 |
]
|
| 106 |
+
command = [str(LLAMA_CPP_PATH / "build" / "bin" / "llama-bench")] + args
|
| 107 |
+
logger.info("Running: %s", " ".join(command))
|
| 108 |
+
result = subprocess.run(command, capture_output=True)
|
| 109 |
+
logger.debug(result.stderr.decode())
|
|
|
|
| 110 |
|
| 111 |
new_output = json.loads(result.stdout)
|
| 112 |
logger.info(json.dumps(new_output, indent=4))
|
|
|
|
| 115 |
|
| 116 |
|
| 117 |
def test_backend_perf() -> str:
|
| 118 |
+
logger.info("Test MUL_MAT performance")
|
| 119 |
result = subprocess.run(
|
| 120 |
[
|
| 121 |
str(LLAMA_CPP_PATH / "build" / "bin" / "test-backend-ops"),
|
|
|
|
| 125 |
],
|
| 126 |
capture_output=True,
|
| 127 |
)
|
| 128 |
+
logger.debug(result.stdout.decode())
|
| 129 |
return result.stdout.decode(encoding="utf-8")
|
| 130 |
|
| 131 |
|
|
|
|
| 166 |
if __name__ == "__main__":
|
| 167 |
args = parse_args(sys.argv)
|
| 168 |
|
| 169 |
+
logging.basicConfig(level=logging.DEBUG)
|
| 170 |
+
|
| 171 |
LLAMA_CPP_PATH = args.llama_cpp_path
|
| 172 |
MODEL_DIR = args.model_dir
|
| 173 |
|