| import os |
| import subprocess |
| import sys |
| import threading |
| import time |
| import shutil |
|
|
| os.environ["CUDA_VISIBLE_DEVICES"] = "" |
|
|
| def log(msg): |
| print(f"[{time.strftime('%H:%M:%S')}] {msg}", flush=True) |
|
|
| def run_cmd(cmd): |
| log(f"Running: {cmd}") |
| subprocess.check_call(cmd, shell=True) |
|
|
| |
| def heartbeat(): |
| start = time.time() |
| while True: |
| time.sleep(60) |
| elapsed = int(time.time() - start) |
| log(f"HEARTBEAT: still alive after {elapsed}s") |
|
|
| t = threading.Thread(target=heartbeat, daemon=True) |
| t.start() |
|
|
| |
| |
| |
| log("Looking for model.tflite from previous kernel output...") |
|
|
| |
| input_base = "/kaggle/input" |
| tflite_path = None |
|
|
| for root, dirs, files in os.walk(input_base): |
| for f in files: |
| fpath = os.path.join(root, f) |
| size_mb = os.path.getsize(fpath) / (1024*1024) |
| log(f" Found: {fpath} ({size_mb:.1f} MB)") |
| if f.endswith(".tflite"): |
| tflite_path = fpath |
|
|
| if not tflite_path: |
| log("ERROR: No .tflite file found in input!") |
| log("Listing all input directories:") |
| for root, dirs, files in os.walk(input_base): |
| log(f" DIR: {root} ({len(files)} files)") |
| sys.exit(1) |
|
|
| size_gb = os.path.getsize(tflite_path) / (1024*1024*1024) |
| log(f"Found model: {tflite_path} ({size_gb:.2f} GB)") |
|
|
| |
| |
| |
| log("Installing dependencies...") |
| run_cmd(f"{sys.executable} -m pip install -U torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu") |
| run_cmd(f"{sys.executable} -m pip install -U litert-torch torchao transformers huggingface-hub mediapipe accelerate sentencepiece 'protobuf>=6.0'") |
|
|
| log("All dependencies installed.") |
|
|
| |
| |
| |
| log("=== QUANTIZING MODEL (dynamic_wi8_afp32) ===") |
|
|
| |
| work_tflite = "/kaggle/working/model.tflite" |
| log(f"Copying {tflite_path} -> {work_tflite}...") |
| shutil.copy2(tflite_path, work_tflite) |
| log("Copy done.") |
|
|
| |
| input_dir = os.path.dirname(tflite_path) |
| for f in os.listdir(input_dir): |
| src = os.path.join(input_dir, f) |
| dst = os.path.join("/kaggle/working", f) |
| if os.path.isfile(src) and src != tflite_path: |
| log(f"Copying {f}...") |
| shutil.copy2(src, dst) |
|
|
| from ai_edge_quantizer import quantizer as quant_lib |
| from ai_edge_quantizer import recipe as recipe_lib |
|
|
| log("Starting quantization...") |
| quantized_path = "/kaggle/working/model_quantized.tflite" |
|
|
| qt = quant_lib.Quantizer(work_tflite) |
| recipe = recipe_lib.dynamic_wi8_afp32() |
| qt.load_quantization_recipe(recipe) |
| log("Running quantization (this will take a while)...") |
| qt.quantize().export_model(quantized_path, overwrite=True) |
|
|
| size_gb = os.path.getsize(quantized_path) / (1024*1024*1024) |
| log(f"Quantized model saved: {quantized_path} ({size_gb:.2f} GB)") |
|
|
| |
| os.remove(work_tflite) |
| log("Removed unquantized copy.") |
|
|
| |
| |
| |
| log("=== BUNDLING INTO .litertlm ===") |
|
|
| import litert_torch.generative.export_hf.export as export_lib |
|
|
| |
| |
| try: |
| from litert_torch.generative.export_hf.core import bundle_utils |
| output_bundle = "/kaggle/working/gemma-4-E2B-it-uncensored.litertlm" |
| bundle_utils.bundle_litert_lm( |
| model_path=quantized_path, |
| output_path=output_bundle, |
| model_type="gemma4", |
| ) |
| size_gb = os.path.getsize(output_bundle) / (1024*1024*1024) |
| log(f"Bundle saved: {output_bundle} ({size_gb:.2f} GB)") |
| except Exception as e: |
| log(f"Bundling failed (not critical): {e}") |
| log("The quantized .tflite is still available as output.") |
|
|
| |
| |
| |
| log("=== OUTPUT FILES ===") |
| for f in os.listdir("/kaggle/working"): |
| fpath = os.path.join("/kaggle/working", f) |
| if os.path.isfile(fpath): |
| size_mb = os.path.getsize(fpath) / (1024*1024) |
| log(f" {f} ({size_mb:.1f} MB)") |
|
|
| log("SUCCESS! Quantization complete.") |
|
|