Spaces:

chidamnat2002
/

intent_classifier

Sleeping

Chidam Gopal commited on Oct 9, 2024

Commit

2cba4b1

unverified ·

1 Parent(s): 7538db6

directly use the onnx quantized file

Files changed (2) hide show

infer_intent.py CHANGED Viewed

@@ -1,5 +1,9 @@
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import torch
 class IntentClassifier:
@@ -15,10 +19,20 @@ class IntentClassifier:
         self.label2id = {label:id for id,label in self.id2label.items()}
         self.tokenizer = AutoTokenizer.from_pretrained("Mozilla/mobilebert-uncased-finetuned-LoRA-intent-classifier")
-        self.intent_model = AutoModelForSequenceClassification.from_pretrained('Mozilla/mobilebert-uncased-finetuned-LoRA-intent-classifier',
-                                                                        num_labels=8,
-                                                                        id2label=self.id2label,
-                                                                        label2id=self.label2id)
     def find_intent(self, sequence, verbose=False):
         inputs = self.tokenizer(sequence,

+from transformers import AutoTokenizer
 import torch
+import onnxruntime as ort
+import numpy as np
+import requests
+import os
 class IntentClassifier:
         self.label2id = {label:id for id,label in self.id2label.items()}
         self.tokenizer = AutoTokenizer.from_pretrained("Mozilla/mobilebert-uncased-finetuned-LoRA-intent-classifier")
+        model_url = "https://huggingface.co/Mozilla/mobilebert-uncased-finetuned-LoRA-intent-classifier/resolve/main/onnx/model_quantized.onnx"
+        model_dir_path = "models"
+        model_path = f"{model_dir_path}/mobilebert-uncased-finetuned-LoRA-intent-classifier_model_quantized.onnx"
+        if not os.path.exists(model_dir_path):
+            os.makedirs(model_dir_path)
+        if not os.path.exists(model_path):
+            print("Downloading ONNX model...")
+            response = requests.get(model_url)
+            with open(model_path, "wb") as f:
+                f.write(response.content)
+            print("ONNX model downloaded.")
+        # Load the ONNX model
+        self.ort_session = ort.InferenceSession(model_path)
     def find_intent(self, sequence, verbose=False):
         inputs = self.tokenizer(sequence,

requirements.txt CHANGED Viewed

@@ -1,4 +1,6 @@
 transformers==4.45.1
 torch==2.4.1
 streamlit==1.38.0
-matplotlib==3.9.2

 transformers==4.45.1
 torch==2.4.1
 streamlit==1.38.0
+matplotlib==3.9.2
+## onnx
+onnxruntime==1.19.2