Abineshkumar77 commited on
Commit
06f2360
·
1 Parent(s): 3a3cb2d

Add application file

Browse files
Files changed (2) hide show
  1. app.py +28 -7
  2. requirements.txt +1 -1
app.py CHANGED
@@ -1,13 +1,33 @@
1
  from fastapi import FastAPI
2
- from optimum.onnxruntime import ORTModelForSequenceClassification
3
  from transformers import AutoTokenizer
 
 
 
4
  import time
5
 
6
  # Load the tokenizer
7
  tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
8
 
9
- # Load the quantized ONNX model from Hugging Face
10
- model = ORTModelForSequenceClassification.from_pretrained("minhdang/model_onnx", file_name="quantized_model.onnx")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  app = FastAPI()
13
 
@@ -36,17 +56,18 @@ def analyze_sentiment(tweet: str):
36
  # Tokenize the input tweet
37
  inputs = tokenizer(tweet_proc, return_tensors="pt")
38
 
39
- # Perform the inference with the ONNX model
40
- outputs = model(**inputs)
 
41
 
42
  # Calculate the inference time
43
  inference_time = time.time() - start_time
44
 
45
  # Get the probabilities from the logits
46
- probabilities = outputs.logits.softmax(dim=1)
47
 
48
  # Get the label with the highest probability
49
- max_prob, max_index = probabilities.max(dim=1)
50
 
51
  # Map the labels to desired names
52
  label_map = {
 
1
  from fastapi import FastAPI
 
2
  from transformers import AutoTokenizer
3
+ from optimum.onnxruntime import ORTModelForSequenceClassification, ORTOptimizer, ORTQuantizer
4
+ from optimum.onnxruntime.configuration import OptimizationConfig, AutoQuantizationConfig
5
+ import torch
6
  import time
7
 
8
  # Load the tokenizer
9
  tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
10
 
11
+ # Convert the model to ONNX and optimize it
12
+ model_id = "cardiffnlp/twitter-roberta-base-sentiment"
13
+
14
+ # Load and convert the model to ONNX
15
+ onnx_model = ORTModelForSequenceClassification.from_pretrained(model_id, from_transformers=True)
16
+ onnx_model.save_pretrained("./model_onnx")
17
+
18
+ # Optimize the ONNX model
19
+ optimizer = ORTOptimizer.from_pretrained(onnx_model)
20
+ optimizer.optimize(
21
+ OptimizationConfig(optimization_level=99), # Adjust optimization level as needed
22
+ save_dir="./model_onnx_optimized"
23
+ )
24
+ optimized_model = ORTModelForSequenceClassification.from_pretrained("./model_onnx_optimized", file_name="model_optimized.onnx")
25
+
26
+ # Quantize the optimized ONNX model
27
+ quantizer = ORTQuantizer.from_pretrained(optimized_model)
28
+ quantization_config = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=True)
29
+ quantizer.quantize(save_dir="./model_onnx_quantized", quantization_config=quantization_config)
30
+ quantized_model = ORTModelForSequenceClassification.from_pretrained("./model_onnx_quantized", file_name="model_quantized.onnx")
31
 
32
  app = FastAPI()
33
 
 
56
  # Tokenize the input tweet
57
  inputs = tokenizer(tweet_proc, return_tensors="pt")
58
 
59
+ # Perform the inference with the quantized ONNX model
60
+ with torch.no_grad():
61
+ outputs = quantized_model(**inputs)
62
 
63
  # Calculate the inference time
64
  inference_time = time.time() - start_time
65
 
66
  # Get the probabilities from the logits
67
+ probabilities = torch.softmax(outputs.logits, dim=1)
68
 
69
  # Get the label with the highest probability
70
+ max_prob, max_index = torch.max(probabilities, dim=1)
71
 
72
  # Map the labels to desired names
73
  label_map = {
requirements.txt CHANGED
@@ -3,6 +3,6 @@ uvicorn
3
  transformers
4
  torch
5
  scipy
6
-
7
 
8
 
 
3
  transformers
4
  torch
5
  scipy
6
+ optimum
7
 
8