Spaces:
Runtime error
Runtime error
Create optimization/benchmark.py
Browse files- optimization/benchmark.py +35 -0
optimization/benchmark.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ctranslate2
|
| 2 |
+
import transformers
|
| 3 |
+
import time
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
def convert_model():
|
| 7 |
+
print("Converting model to CTranslate2...")
|
| 8 |
+
# System command for conversion
|
| 9 |
+
os.system("ct2-transformers-converter --model ./final_model --output_dir ./ct2_model --quantization int8")
|
| 10 |
+
|
| 11 |
+
def run_benchmark():
|
| 12 |
+
device = "cpu"
|
| 13 |
+
translator = ctranslate2.Translator("./ct2_model", device=device)
|
| 14 |
+
tokenizer = transformers.AutoTokenizer.from_pretrained("./final_model")
|
| 15 |
+
|
| 16 |
+
test_inputs = ["<hin> shanti", "<ben> namaskar", "<tam> vanakkam"]
|
| 17 |
+
tokens = [tokenizer.convert_ids_to_tokens(tokenizer.encode(t)) for t in test_inputs]
|
| 18 |
+
|
| 19 |
+
# Benchmark CTranslate2
|
| 20 |
+
start = time.time()
|
| 21 |
+
for _ in range(10): # Average over 10 runs
|
| 22 |
+
results = translator.translate_batch(tokens)
|
| 23 |
+
ct2_time = (time.time() - start) / 30 # 10 runs * 3 inputs
|
| 24 |
+
|
| 25 |
+
print(f"CTranslate2 Latency: {ct2_time:.4f}s per word")
|
| 26 |
+
|
| 27 |
+
# Size comparison
|
| 28 |
+
orig_size = sum(os.path.getsize(os.path.join("./final_model", f)) for f in os.listdir("./final_model") if os.path.isfile(os.path.join("./final_model", f)))
|
| 29 |
+
opt_size = sum(os.path.getsize(os.path.join("./ct2_model", f)) for f in os.listdir("./ct2_model") if os.path.isfile(os.path.join("./ct2_model", f)))
|
| 30 |
+
|
| 31 |
+
print(f"Compression Ratio: {orig_size / opt_size:.2f}x")
|
| 32 |
+
|
| 33 |
+
if __name__ == "__main__":
|
| 34 |
+
convert_model()
|
| 35 |
+
run_benchmark()
|