Upload PyTorch model

Browse files

Files changed (10) hide show

1755067493/config.json +35 -0
1755067493/model.py +107 -0
1755067493/special_tokens_map.json +37 -0
1755067493/tokenizer.json +0 -0
1755067493/tokenizer_config.json +62 -0
1755067493/training_args.bin +3 -0
1755067493/vocab.txt +0 -0
config_cpu.pbtxt +48 -0
config_gpu.pbtxt +28 -0
model_info.json +33 -0

1755067493/config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "_name_or_path": "scl_familyhistory_de",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 312,
+  "id2label": {
+    "0": "NOT_FAMILY",
+    "1": "FAMILY"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 312,
+  "label2id": {
+    "FAMILY": 1,
+    "NOT_FAMILY": 0
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 4,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "pre_trained": "",
+  "problem_type": "single_label_classification",
+  "training": "",
+  "transformers_version": "4.45.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 31102
+}

1755067493/model.py ADDED Viewed

	@@ -0,0 +1,107 @@

+##############################################################
+## C O P Y R I G H T (c) 2024                               ##
+## DH Healthcare GmbH and/or its affiliates                 ##
+## All Rights Reserved                                      ##
+##############################################################
+##                                                          ##
+## THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF           ##
+## DH Healthcare GmbH and/or its affiliates.                ##
+## The copyright notice above does not evidence any         ##
+## actual or intended publication of such source code.      ##
+##                                                          ##
+##############################################################
+import os
+import pathlib
+import numpy as np
+import triton_python_backend_utils as pb_utils
+from nlpserving.family_history.serving.models.family_history_model import FamilyHistoryClassificationModel
+class TritonPythonModel:
+    def initialize(self, args):
+        """Initialize the model with performance optimizations."""
+        PATH = os.path.join(pathlib.Path(__file__).parent.resolve(), '../')
+        self.model = FamilyHistoryClassificationModel(model_dir=PATH)
+        # Performance configuration
+        self.batch_size = int(os.environ.get('INFERENCE_BATCH_SIZE', 64))
+        self.max_sequence_length = int(
+            os.environ.get('MAX_SEQUENCE_LENGTH', 512)
+        )
+        # Pre-allocate common objects to reduce GC pressure
+        self._empty_response_cache = None
+        # Warmup the model with a dummy inference
+        try:
+            dummy_input = ['warmup text']
+            self.model(dummy_input, batch_size=1, top_k=1)
+        except Exception:
+            pass  # Ignore warmup errors
+    def execute(self, requests):
+        """Perform optimized inference with adaptive batching."""
+        if not requests:
+            return []
+        # Collect all texts from all requests for better batching
+        all_texts = []
+        request_boundaries = []
+        current_idx = 0
+        for request in requests:
+            input_tensors = pb_utils.get_input_tensor_by_name(request, "text")
+            # Direct conversion avoiding intermediate list
+            texts = [
+                tensor.decode('utf-8') for tensor in input_tensors.as_numpy()
+            ]
+            all_texts.extend(texts)
+            request_boundaries.append((current_idx, current_idx + len(texts)))
+            current_idx += len(texts)
+        if not all_texts:
+            return []
+        # Use adaptive batch size based on text characteristics
+        total_chars = sum(len(text) for text in all_texts)
+        avg_chars = total_chars / len(all_texts) if all_texts else 0
+        # Adjust batch size based on text length
+        if avg_chars > 1000:
+            effective_batch_size = min(len(all_texts), self.batch_size // 2)
+        elif avg_chars < 200:
+            effective_batch_size = min(len(all_texts), self.batch_size * 2)
+        else:
+            effective_batch_size = min(len(all_texts), self.batch_size)
+        # Process all texts together for better efficiency
+        all_outputs = self.model(
+            all_texts,
+            batch_size=effective_batch_size,
+            top_k=1
+        )
+        # Split outputs back to individual responses
+        responses = []
+        for start_idx, end_idx in request_boundaries:
+            request_outputs = all_outputs[start_idx:end_idx]
+            # Pre-allocate array for better performance
+            output = np.array([
+                str(output_dict).encode('utf-8')
+                for output_dict in request_outputs
+            ], dtype=object)
+            response = pb_utils.InferenceResponse(
+                output_tensors=[pb_utils.Tensor("output", output)]
+            )
+            responses.append(response)
+        return responses
+    def finalize(self):
+        """Clean up model resources."""
+        if hasattr(self, 'model'):
+            del self.model

1755067493/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

1755067493/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

1755067493/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "104": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "max_length": 256,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

1755067493/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:78d7bde3be46debb8c44f9a2e0c0fcf939497216bea33667edf23e380c47ff31
+size 5240

1755067493/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

config_cpu.pbtxt ADDED Viewed

	@@ -0,0 +1,48 @@

+name: "tinybert-familyhistory-de"
+backend: "python"
+max_batch_size: 0
+input [
+    {
+        name: "text"
+        data_type: TYPE_STRING
+        dims: [-1]
+    }
+]
+output [
+    {
+        name: "output"
+        data_type: TYPE_STRING
+        dims: [-1]
+    }
+]
+instance_group [
+    {
+        count: 1
+        kind: KIND_CPU
+    }
+]
+# CPU-specific optimizations
+optimization {
+    execution_accelerators {
+        cpu_execution_accelerator: [{
+            name: "openvino"
+        }]
+    }
+}
+# Performance tuning parameters
+parameters: {
+    key: "INFERENCE_BATCH_SIZE"
+    value: {
+        string_value: "96"
+    }
+}
+parameters: {
+    key: "MAX_SEQUENCE_LENGTH"
+    value: {
+        string_value: "512"
+    }
+}

config_gpu.pbtxt ADDED Viewed

	@@ -0,0 +1,28 @@

+name: "tinybert-familyhistory-de"
+backend: "python"
+input [
+    {
+        name: "text"
+        data_type: TYPE_STRING
+        dims: [-1]
+    }
+]
+output [
+    {
+        name: "output"
+        data_type: TYPE_STRING
+        dims: [-1]
+    }
+]
+instance_group [
+    {
+        kind: KIND_GPU
+    }
+]
+dynamic_batching {
+    default_queue_policy: {
+        default_timeout_microseconds: 60000000
+    }
+}

model_info.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+    "model_version": 1768930002,
+    "model_name": "bert-demo-de",
+    "model_type": "bert",
+    "model_platform": "pytorch",
+    "model_architecture": "BERT",
+    "model_description": "Retrieve named entities from text.",
+    "model_date": "2026-01-20T18:26:42.445432+01:00",
+    "clinalytix_version": "unknown",
+    "model_objective": "RECOGNITION",
+    "use_case": "demo",
+    "build_number": null,
+    "revision_number": null,
+    "language_code": "de",
+    "language_codes_multilingual": null,
+    "target": null,
+    "ner_config": {
+        "max_length": 256,
+        "stride": 16
+    },
+    "nen_config": null,
+    "negation_config": null,
+    "temporality_config": null,
+    "familyhistory_config": null,
+    "rer_config": null,
+    "git_info": {
+        "commit_hash": null,
+        "branch": null,
+        "tag": null,
+        "description": null,
+        "remote_url": null
+    }
+}