Upload folder using huggingface_hub

Browse files

Files changed (11) hide show

README.md +10 -3
chinese_best_model_q8.onnx +3 -0
inference_onnx.py +168 -0
multilingual_best_model_q8.onnx +3 -0
run_chinese.sh +1 -0
run_multilingual.sh +1 -0
tokenizer/config.json +31 -0
tokenizer/special_tokens_map.json +7 -0
tokenizer/tokenizer.json +0 -0
tokenizer/tokenizer_config.json +55 -0
tokenizer/vocab.txt +0 -0

README.md CHANGED Viewed

@@ -1,3 +1,10 @@
----
-license: apache-2.0
----

+---
+license: "apache-2.0"
+---
+### fireredchat-turn-detector
+chinese_best_model_q8.onnx: FireRedChat turn-detector model (Chinese only)
+multilingual_best_model_q8.onnx: FireRedChat turn-detector model (Chinese and English)
+### Acknowledgment
+Base model: google-bert/bert-base-multilingual-cased (license: "apache-2.0")

chinese_best_model_q8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e0738b2d2f8cf17ee75fc8ac8a36f2b0a9dcb29f288387df4ab7554f0c3f6317
+size 178152957

inference_onnx.py ADDED Viewed

	@@ -0,0 +1,168 @@

+import sys
+import os
+import json
+import logging
+from typing import List, Dict, Tuple, Optional
+import time
+import numpy as np
+from tqdm import tqdm
+import onnxruntime as ort
+from transformers import AutoTokenizer
+class StopJudgmentONNXInference:
+    def __init__(self, onnx_model_path: str, tokenizer_path: str, device: str = 'auto'):
+        """
+        判停模型ONNX推理类
+        Args:
+            onnx_model_path: ONNX模型路径
+            tokenizer_path: tokenizer路径
+            device: 设备类型 ('auto', 'cuda', 'cpu')
+        """
+        self.onnx_model_path = onnx_model_path
+        self.tokenizer_path = tokenizer_path
+        self.setup_logging()
+        self.load_model_and_tokenizer()
+    def setup_logging(self):
+        """设置日志"""
+        logging.basicConfig(
+            level=logging.INFO,
+            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+        )
+        self.logger = logging.getLogger(__name__)
+    def load_model_and_tokenizer(self):
+        """加载ONNX模型和tokenizer"""
+        # 加载tokenizer
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_path, local_files_only=True)
+            self.logger.info("Tokenizer loaded successfully")
+        except Exception as e:
+            self.logger.error(f"Failed to load tokenizer: {e}")
+            raise
+        # 修复providers配置
+        providers = []
+        # 检查CUDA是否可用
+        available_providers = ort.get_available_providers()
+        if 'CUDAExecutionProvider' in available_providers:
+            providers.append('CUDAExecutionProvider')
+            self.logger.info("CUDA provider is available and will be used")
+        providers.append('CPUExecutionProvider')  # 始终添加CPU作为备选
+        try:
+            self.ort_session = ort.InferenceSession(self.onnx_model_path, providers=providers)
+            self.logger.info(f"ONNX model loaded successfully with providers: {self.ort_session.get_providers()}")
+        except Exception as e:
+            self.logger.error(f"Failed to load ONNX model: {e}")
+            raise
+        # 获取输入输出信息
+        self.input_names = [input.name for input in self.ort_session.get_inputs()]
+        self.output_names = [output.name for output in self.ort_session.get_outputs()]
+        self.logger.info(f"Input names: {self.input_names}")
+        self.logger.info(f"Output names: {self.output_names}")
+    def preprocess_text(self, texts: List[str], max_length: int = 128) -> Dict[str, np.ndarray]:
+        """
+        预处理文本数据
+        Args:
+            texts: 文本列表
+            max_length: 最大长度
+        Returns:
+            包含input_ids和attention_mask的字典
+        """
+        encoding = self.tokenizer(
+            texts,
+            truncation=True,
+            padding='max_length',
+            max_length=max_length,
+            return_tensors='np'  # 返回numpy数组
+        )
+        return {
+            'input_ids': encoding['input_ids'].astype(np.int64),
+            'attention_mask': encoding['attention_mask'].astype(np.int64)
+        }
+    def predict_single(self, text: str, max_length: int = 128) -> Tuple[int, float]:
+        """单个文本预测"""
+        inputs = self.preprocess_text([text], max_length)
+        # ONNX推理
+        ort_inputs = {
+            self.input_names[0]: inputs['input_ids'],
+            self.input_names[1]: inputs['attention_mask']
+        }
+        ort_outputs = self.ort_session.run(self.output_names, ort_inputs)
+        logits = ort_outputs[0]
+        # 计算概率和预测
+        probabilities = self.softmax(logits)
+        prediction = np.argmax(probabilities[0])
+        confidence = probabilities[0][prediction]
+        return int(prediction), float(confidence)
+    def predict_batch(self, texts: List[str], max_length: int = 128,
+                     batch_size: int = 32) -> Tuple[List[int], List[float]]:
+        """批量预测"""
+        all_predictions = []
+        all_confidences = []
+        for i in tqdm(range(0, len(texts), batch_size), desc="ONNX Predicting"):
+            batch_texts = texts[i:i + batch_size]
+            inputs = self.preprocess_text(batch_texts, max_length)
+            # ONNX推理
+            ort_inputs = {
+                self.input_names[0]: inputs['input_ids'],
+                self.input_names[1]: inputs['attention_mask']
+            }
+            ort_outputs = self.ort_session.run(self.output_names, ort_inputs)
+            logits = ort_outputs[0]
+            # 计算概率和预测
+            probabilities = self.softmax(logits)
+            predictions = np.argmax(probabilities, axis=1)
+            confidences = [probabilities[j][pred] for j, pred in enumerate(predictions)]
+            all_predictions.extend(predictions.tolist())
+            all_confidences.extend(confidences)
+        return all_predictions, all_confidences
+    @staticmethod
+    def softmax(x):
+        """Softmax函数"""
+        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
+        return exp_x / np.sum(exp_x, axis=1, keepdims=True)
+def main():
+    """主函数"""
+    if len(sys.argv) < 3:
+        print("Usage: python validate_onnx.py <tokenizer_path> <onnx_model_path> [test_sentence]")
+        sys.exit(1)
+    tokenizer_path = sys.argv[1]
+    onnx_model_path = sys.argv[2]
+    test_sentence = sys.argv[3] if len(sys.argv) > 3 else "欢迎测试本判停模型有修正建议请随时提出"
+    print("\n ONNX Model Inference...")
+    onnx_inferencer = StopJudgmentONNXInference(onnx_model_path, tokenizer_path)
+    prediction, confidence = onnx_inferencer.predict_single(
+        test_sentence, max_length=128
+    )
+    print(prediction, confidence)
+if __name__ == "__main__":
+    main()

multilingual_best_model_q8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:285cafedb0263b065d83964ecc728b795bb281688b0d6525f6c7ca6cb1f756df
+size 178152959

run_chinese.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ python inference_onnx.py tokenizer chinese_best_model_q8.onnx $1

run_multilingual.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ python inference_onnx.py tokenizer multilingual_best_model_q8.onnx $1

tokenizer/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "pretrained_models/bert-base-multilingual-cased",
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 119547
+}

tokenizer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

tokenizer/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff