| """ |
| Konverterar cisco-ai/SecureBERT2.0-cross_encoder till Core ML via torch.export. |
| |
| torch.export är PyTorchs nyare export-mekanism (sedan PyTorch 2.1) som hanterar |
| dynamiska shapes och int-ops bättre än torch.jit.trace. |
| |
| Output: |
| output/SecureBERT2_CrossEncoder_FP32.mlpackage |
| output/SecureBERT2_CrossEncoder_FP16.mlpackage |
| """ |
|
|
| import os |
| import sys |
| import time |
| import numpy as np |
| import torch |
| import coremltools as ct |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
| |
| |
| |
|
|
| MODEL_ID = "cisco-ai/SecureBERT2.0-cross_encoder" |
| OUTPUT_DIR = "./output" |
| MAX_SEQUENCE_LENGTH = 512 |
|
|
| |
| |
| |
|
|
| print("=" * 70) |
| print("Fas 1 (via torch.export): SecureBERT2.0-cross_encoder → Core ML") |
| print("=" * 70) |
| print() |
|
|
| os.makedirs(OUTPUT_DIR, exist_ok=True) |
|
|
| print(f"[1/6] Laddar tokenizer och modell...") |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) |
|
|
| model = AutoModelForSequenceClassification.from_pretrained( |
| MODEL_ID, |
| attn_implementation="eager", |
| torch_dtype=torch.float32 |
| ) |
| model.eval() |
| print(f" ✓ Modell laddad") |
| print(f" Architecture: {model.config.architectures}") |
| print() |
|
|
| |
| |
| |
|
|
| print(f"[2/6] Förbereder example input...") |
|
|
| example_query = "How do I configure vPC peer-link on Nexus 9000?" |
| example_document = ( |
| "vPC peer-link must be configured as a port-channel with all VLANs " |
| "allowed. Use LACP active mode and spanning-tree port type network " |
| "for fast convergence between peer switches." |
| ) |
|
|
| example_inputs = tokenizer( |
| example_query, |
| example_document, |
| padding="max_length", |
| truncation=True, |
| max_length=MAX_SEQUENCE_LENGTH, |
| return_tensors="pt" |
| ) |
|
|
| |
| input_ids = example_inputs['input_ids'] |
| attention_mask = example_inputs['attention_mask'] |
|
|
| print(f" ✓ Input shape: {input_ids.shape}, dtype: {input_ids.dtype}") |
| print() |
|
|
| |
| |
| |
|
|
| print(f"[3/6] Verifierar PyTorch-modellen...") |
|
|
| with torch.no_grad(): |
| pytorch_output = model(input_ids=input_ids, attention_mask=attention_mask) |
| pytorch_score = torch.sigmoid(pytorch_output.logits).item() |
|
|
| print(f" PyTorch score: {pytorch_score:.4f}") |
| print() |
|
|
| |
| |
| |
|
|
| print(f"[4/6] Wrappar modell med sigmoid...") |
|
|
| class CrossEncoderWrapper(torch.nn.Module): |
| def __init__(self, model): |
| super().__init__() |
| self.model = model |
|
|
| def forward(self, input_ids, attention_mask): |
| outputs = self.model( |
| input_ids=input_ids, |
| attention_mask=attention_mask |
| ) |
| return torch.sigmoid(outputs.logits) |
|
|
| wrapped_model = CrossEncoderWrapper(model) |
| wrapped_model.eval() |
| print(f" ✓ Wrapper skapad") |
| print() |
|
|
| |
| |
| |
|
|
| print(f"[5/6] Exporterar med torch.export.export...") |
| print(f" (Detta tar 30-60 sekunder)") |
|
|
| start = time.time() |
|
|
| try: |
| |
| |
| exported_program = torch.export.export( |
| wrapped_model, |
| args=(input_ids, attention_mask), |
| |
| strict=False, |
| ) |
|
|
| export_time = time.time() - start |
| print(f" ✓ Export klar på {export_time:.1f}s") |
| print(f" Type: {type(exported_program).__name__}") |
|
|
| |
| print(f" Dekomponerar till ATEN-dialekt för coremltools...") |
| decomp_start = time.time() |
| exported_program = exported_program.run_decompositions({}) |
| print(f" ✓ Dekomposition klar på {time.time() - decomp_start:.1f}s") |
|
|
| |
| with torch.no_grad(): |
| exported_output = exported_program.module()(input_ids, attention_mask) |
| exported_score = exported_output.item() |
|
|
| print(f" PyTorch: {pytorch_score:.6f}") |
| print(f" Exported: {exported_score:.6f}") |
| print(f" Diff: {abs(pytorch_score - exported_score):.6f}") |
| print() |
|
|
| except Exception as e: |
| print(f" ✗ torch.export.export misslyckades: {e}") |
| print() |
| print(f"Detta är oväntat eftersom ONNX-export fungerade.") |
| print(f"Möjlig orsak: torch.export är striktare än ONNX-export.") |
| print() |
| print(f"Fallback: prova med dynamic_shapes-spec eller andra args.") |
| sys.exit(1) |
|
|
| |
| |
| |
|
|
| print(f"[6/6] Konverterar ExportedProgram → Core ML...") |
| print() |
|
|
| print(f" a) FP32-version...") |
| start = time.time() |
|
|
| try: |
| mlmodel_fp32 = ct.convert( |
| exported_program, |
| inputs=[ |
| ct.TensorType( |
| name="input_ids", |
| shape=(1, MAX_SEQUENCE_LENGTH), |
| dtype=np.int32 |
| ), |
| ct.TensorType( |
| name="attention_mask", |
| shape=(1, MAX_SEQUENCE_LENGTH), |
| dtype=np.int32 |
| ), |
| ], |
| outputs=[ct.TensorType(name="score")], |
| convert_to="mlprogram", |
| compute_precision=ct.precision.FLOAT32, |
| compute_units=ct.ComputeUnit.ALL, |
| minimum_deployment_target=ct.target.macOS14, |
| ) |
|
|
| fp32_time = time.time() - start |
| print(f" ✓ FP32-konvertering klar på {fp32_time:.1f}s") |
|
|
| mlmodel_fp32.author = "Cisco AI (konverterad för Nomad via torch.export)" |
| mlmodel_fp32.short_description = ( |
| "SecureBERT 2.0 Cross-Encoder for text reranking. " |
| "Input: query + document pair. Output: similarity score (0-1)." |
| ) |
| mlmodel_fp32.version = "1.0" |
|
|
| fp32_path = os.path.join(OUTPUT_DIR, "SecureBERT2_CrossEncoder_FP32.mlpackage") |
| mlmodel_fp32.save(fp32_path) |
|
|
| fp32_size = sum( |
| os.path.getsize(os.path.join(dp, f)) |
| for dp, _, files in os.walk(fp32_path) |
| for f in files |
| ) / (1024 * 1024) |
| print(f" ✓ Sparad: {fp32_path}") |
| print(f" Storlek: {fp32_size:.1f} MB") |
| print() |
|
|
| except Exception as e: |
| print(f" ✗ FP32-konvertering misslyckades: {e}") |
| print() |
| print(f"Vid detta läge har vi uttömt de vanligaste vägarna:") |
| print(f" ✗ torch.jit.trace + coremltools") |
| print(f" ✗ ONNX som input till coremltools 9.0") |
| print(f" ✗ torch.export + coremltools") |
| print() |
| print(f"Nästa steg vore att försöka optimum-cli från HuggingFace.") |
| sys.exit(1) |
|
|
| |
| |
| |
|
|
| print(f" b) FP16-version...") |
| start = time.time() |
|
|
| try: |
| mlmodel_fp16 = ct.convert( |
| exported_program, |
| inputs=[ |
| ct.TensorType( |
| name="input_ids", |
| shape=(1, MAX_SEQUENCE_LENGTH), |
| dtype=np.int32 |
| ), |
| ct.TensorType( |
| name="attention_mask", |
| shape=(1, MAX_SEQUENCE_LENGTH), |
| dtype=np.int32 |
| ), |
| ], |
| outputs=[ct.TensorType(name="score")], |
| convert_to="mlprogram", |
| compute_precision=ct.precision.FLOAT16, |
| compute_units=ct.ComputeUnit.ALL, |
| minimum_deployment_target=ct.target.macOS14, |
| ) |
|
|
| fp16_time = time.time() - start |
| print(f" ✓ FP16-konvertering klar på {fp16_time:.1f}s") |
|
|
| mlmodel_fp16.author = "Cisco AI (konverterad för Nomad via torch.export, FP16)" |
| mlmodel_fp16.short_description = ( |
| "SecureBERT 2.0 Cross-Encoder for text reranking (FP16). " |
| "Input: query + document pair. Output: similarity score (0-1)." |
| ) |
| mlmodel_fp16.version = "1.0" |
|
|
| fp16_path = os.path.join(OUTPUT_DIR, "SecureBERT2_CrossEncoder_FP16.mlpackage") |
| mlmodel_fp16.save(fp16_path) |
|
|
| fp16_size = sum( |
| os.path.getsize(os.path.join(dp, f)) |
| for dp, _, files in os.walk(fp16_path) |
| for f in files |
| ) / (1024 * 1024) |
| print(f" ✓ Sparad: {fp16_path}") |
| print(f" Storlek: {fp16_size:.1f} MB") |
| print() |
|
|
| except Exception as e: |
| print(f" ✗ FP16-konvertering misslyckades: {e}") |
| print(f" FP32 är OK, vi fortsätter med den") |
| print() |
|
|
| |
| |
| |
|
|
| print(f"=" * 70) |
| print(f"VERIFIERING: Core ML vs PyTorch") |
| print(f"=" * 70) |
| print() |
|
|
| test_cases = [ |
| ("How do I configure vPC peer-link on Nexus 9000?", |
| "vPC peer-link must be configured as a port-channel with all VLANs allowed. Use LACP active mode and spanning-tree port type network for fast convergence.", |
| "Hög (relaterat)"), |
|
|
| ("How do I configure vPC peer-link on Nexus 9000?", |
| "BGP neighbor configuration requires remote-as statement and update-source for stable peering between routers.", |
| "Mellan (samma domän)"), |
|
|
| ("How do I configure vPC peer-link on Nexus 9000?", |
| "How to bake a chocolate cake: mix flour, sugar, cocoa powder and butter.", |
| "Låg (orelaterat)"), |
| ] |
|
|
| print(f"{'Förväntan':<30} {'PyTorch':<12} {'CoreML FP32':<14} {'Diff':<10}") |
| print("-" * 66) |
|
|
| max_diff = 0.0 |
| for query, doc, expected in test_cases: |
| inputs = tokenizer( |
| query, doc, |
| padding="max_length", |
| truncation=True, |
| max_length=MAX_SEQUENCE_LENGTH, |
| return_tensors="pt" |
| ) |
|
|
| with torch.no_grad(): |
| pt_score = torch.sigmoid(model(**inputs).logits).item() |
|
|
| cm_inputs = { |
| "input_ids": inputs["input_ids"].numpy().astype(np.int32), |
| "attention_mask": inputs["attention_mask"].numpy().astype(np.int32), |
| } |
| cm_output = mlmodel_fp32.predict(cm_inputs) |
| cm_score = float(cm_output["score"].flatten()[0]) |
|
|
| diff = abs(pt_score - cm_score) |
| max_diff = max(max_diff, diff) |
|
|
| print(f"{expected:<30} {pt_score:<12.4f} {cm_score:<14.4f} {diff:<10.6f}") |
|
|
| print() |
| print(f"Max diff: {max_diff:.6f}") |
|
|
| if max_diff < 0.001: |
| print(f"✓ EXCELLENT: Core ML är numeriskt identisk med PyTorch") |
| elif max_diff < 0.01: |
| print(f"✓ OK: Mindre numerisk drift, acceptabel") |
| else: |
| print(f"⚠️ Märkbar drift, verifiera rangordning") |
|
|
| print() |
| print(f"=" * 70) |
| print(f"KLART") |
| print(f"=" * 70) |
| print() |
| print(f"Output:") |
| print(f" {fp32_path}") |
| if 'fp16_path' in dir(): |
| print(f" {fp16_path}") |
|
|