Abineshkumar77 commited on
Commit
4fcd4f9
·
1 Parent(s): 06f2360

Add application file

Browse files
Files changed (1) hide show
  1. app.py +7 -26
app.py CHANGED
@@ -1,33 +1,12 @@
1
  from fastapi import FastAPI
2
  from transformers import AutoTokenizer
3
- from optimum.onnxruntime import ORTModelForSequenceClassification, ORTOptimizer, ORTQuantizer
4
- from optimum.onnxruntime.configuration import OptimizationConfig, AutoQuantizationConfig
5
  import torch
6
  import time
7
 
8
- # Load the tokenizer
9
  tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
10
-
11
- # Convert the model to ONNX and optimize it
12
- model_id = "cardiffnlp/twitter-roberta-base-sentiment"
13
-
14
- # Load and convert the model to ONNX
15
- onnx_model = ORTModelForSequenceClassification.from_pretrained(model_id, from_transformers=True)
16
- onnx_model.save_pretrained("./model_onnx")
17
-
18
- # Optimize the ONNX model
19
- optimizer = ORTOptimizer.from_pretrained(onnx_model)
20
- optimizer.optimize(
21
- OptimizationConfig(optimization_level=99), # Adjust optimization level as needed
22
- save_dir="./model_onnx_optimized"
23
- )
24
- optimized_model = ORTModelForSequenceClassification.from_pretrained("./model_onnx_optimized", file_name="model_optimized.onnx")
25
-
26
- # Quantize the optimized ONNX model
27
- quantizer = ORTQuantizer.from_pretrained(optimized_model)
28
- quantization_config = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=True)
29
- quantizer.quantize(save_dir="./model_onnx_quantized", quantization_config=quantization_config)
30
- quantized_model = ORTModelForSequenceClassification.from_pretrained("./model_onnx_quantized", file_name="model_quantized.onnx")
31
 
32
  app = FastAPI()
33
 
@@ -56,9 +35,9 @@ def analyze_sentiment(tweet: str):
56
  # Tokenize the input tweet
57
  inputs = tokenizer(tweet_proc, return_tensors="pt")
58
 
59
- # Perform the inference with the quantized ONNX model
60
  with torch.no_grad():
61
- outputs = quantized_model(**inputs)
62
 
63
  # Calculate the inference time
64
  inference_time = time.time() - start_time
@@ -87,3 +66,5 @@ def analyze_sentiment(tweet: str):
87
  "score": highest_score,
88
  "inference_time": round(inference_time, 4) # In seconds
89
  }
 
 
 
1
  from fastapi import FastAPI
2
  from transformers import AutoTokenizer
3
+ from optimum.onnxruntime import ORTModelForSequenceClassification
 
4
  import torch
5
  import time
6
 
7
+ # Load the tokenizer and optimized model
8
  tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
9
+ model = ORTModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment", from_transformers=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  app = FastAPI()
12
 
 
35
  # Tokenize the input tweet
36
  inputs = tokenizer(tweet_proc, return_tensors="pt")
37
 
38
+ # Perform the inference
39
  with torch.no_grad():
40
+ outputs = model(**inputs)
41
 
42
  # Calculate the inference time
43
  inference_time = time.time() - start_time
 
66
  "score": highest_score,
67
  "inference_time": round(inference_time, 4) # In seconds
68
  }
69
+
70
+