anubhavg97
/

constbert-onnx

Feature Extraction

text-embeddings-inference

Model card Files Files and versions

ag-nexla commited on Jul 5, 2025

Commit

5d58ea2

·

1 Parent(s): 2336bf5

updated handler

Files changed (2) hide show

colbert_configuration.py +1 -1
handler.py +9 -5

colbert_configuration.py CHANGED Viewed

@@ -164,7 +164,7 @@ class ResourceSettings:
 @dataclass
 class DocSettings:
     dim: int = DefaultVal(128)
-    doc_maxlen: int = DefaultVal(220)
     mask_punctuation: bool = DefaultVal(True)

 @dataclass
 class DocSettings:
     dim: int = DefaultVal(128)
+    doc_maxlen: int = DefaultVal(250) # Changed from 220 to 250
     mask_punctuation: bool = DefaultVal(True)

handler.py CHANGED Viewed

@@ -22,11 +22,15 @@ class EndpointHandler:
         self.tokenizer = AutoTokenizer.from_pretrained(path)
         print(f"Tokenizer loaded from: {path}")
-        # Load ColBERTConfig to get doc_maxlen for consistent padding
-        # IMPORTANT: Use load_from_checkpoint to get the *exact* config used for model export.
-        self.colbert_config = ColBERTConfig.load_from_checkpoint(path)
-        self.doc_max_length = self.colbert_config.doc_maxlen
-        print(f"ColBERTConfig doc_maxlen loaded as: {self.doc_max_length}")
         # Load the ONNX model
         onnx_model_path = os.path.join(path, "model.onnx")

         self.tokenizer = AutoTokenizer.from_pretrained(path)
         print(f"Tokenizer loaded from: {path}")
+        # Use the doc_maxlen that the ONNX model was *actually exported with* (250).
+        # This ensures consistency between the handler's tokenizer and the ONNX model's expectation.
+        self.doc_max_length = 250
+        print(f"Hardcoded doc_maxlen for tokenizer as: {self.doc_max_length}")
+        # NOTE: If you need other colbert_config parameters, you'd load it here,
+        # but for doc_max_length, we are explicitly setting it to avoid mismatches.
+        # self.colbert_config = ColBERTConfig.load_from_checkpoint(path)
+        # self.doc_max_length = self.colbert_config.doc_maxlen
         # Load the ONNX model
         onnx_model_path = os.path.join(path, "model.onnx")