Spaces:

Abineshkumar
/

demodeploy

Runtime error

App Files Files Community

Abineshkumar77 commited on Aug 25, 2024

Commit

d555bd3

1 Parent(s): 9bae10e

Add application file

Browse files

Files changed (1) hide show

app.py +60 -8

app.py CHANGED Viewed

@@ -1,13 +1,43 @@
 from fastapi import FastAPI
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 import time
-# Load the tokenizer and model directly
 tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
-model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
-app = FastAPI()
 def preprocess_tweet(tweet: str) -> str:
     tweet_words = []
@@ -19,6 +49,22 @@ def preprocess_tweet(tweet: str) -> str:
         tweet_words.append(word)
     return " ".join(tweet_words)
 @app.get("/")
 def home():
     return {"message": "Welcome to the sentiment analysis API"}
@@ -33,16 +79,22 @@ def analyze_sentiment(tweet: str):
     # Tokenize the input tweet
     inputs = tokenizer(tweet_proc, return_tensors="pt")
-    # Perform the inference
-    with torch.no_grad():
-        outputs = model(**inputs)
     # Calculate the inference time
     inference_time = time.time() - start_time
     # Get the probabilities from the logits
-    probabilities = torch.softmax(outputs.logits, dim=1)
     # Get the label with the highest probability
     max_prob, max_index = torch.max(probabilities, dim=1)
@@ -64,4 +116,4 @@ def analyze_sentiment(tweet: str):
         "label": highest_label,
         "score": highest_score,
         "inference_time": round(inference_time, 4)  # In seconds
-    }

 from fastapi import FastAPI
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
+import onnx
+import onnxruntime as ort
 import time
+import os
+app = FastAPI()
+# Load the tokenizer
 tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
+# Define paths
+onnx_model_path = "sentiment_model.onnx"
+def export_model_to_onnx(model, tokenizer, onnx_model_path):
+    # Create dummy input for model export
+    dummy_input = tokenizer("This is a test input", return_tensors="pt")
+    # Export the model to ONNX
+    torch.onnx.export(
+        model,
+        (dummy_input["input_ids"], dummy_input["attention_mask"]),
+        onnx_model_path,
+        input_names=["input_ids", "attention_mask"],
+        output_names=["logits"],
+        opset_version=11,
+        dynamic_axes={"input_ids": {0: "batch_size"}, "attention_mask": {0: "batch_size"}}
+    )
+    print(f"Model exported to {onnx_model_path}")
+def optimize_onnx_model(onnx_model_path):
+    # Quantize the model
+    quantized_model_path = onnx_model_path.replace(".onnx", "_quantized.onnx")
+    os.system(f"python -m onnxruntime.tools.optimizer_cli --input {onnx_model_path} --output {quantized_model_path} --optimize --quantize")
+    print(f"Model quantized to {quantized_model_path}")
+    return quantized_model_path
 def preprocess_tweet(tweet: str) -> str:
     tweet_words = []
         tweet_words.append(word)
     return " ".join(tweet_words)
+# Load or export and quantize the model
+if not os.path.exists(onnx_model_path):
+    # Load the original model
+    model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
+    # Export the model to ONNX
+    export_model_to_onnx(model, tokenizer, onnx_model_path)
+    # Quantize the model
+    onnx_model_path = optimize_onnx_model(onnx_model_path)
+else:
+    print("ONNX model already exists. Skipping export.")
+# Load the quantized ONNX model
+ort_session = ort.InferenceSession(onnx_model_path)
 @app.get("/")
 def home():
     return {"message": "Welcome to the sentiment analysis API"}
     # Tokenize the input tweet
     inputs = tokenizer(tweet_proc, return_tensors="pt")
+    # Prepare input for ONNX runtime
+    ort_inputs = {
+        "input_ids": inputs["input_ids"].numpy(),
+        "attention_mask": inputs["attention_mask"].numpy(),
+    }
+    # Perform the inference with ONNX runtime
+    ort_outs = ort_session.run(None, ort_inputs)
     # Calculate the inference time
     inference_time = time.time() - start_time
     # Get the probabilities from the logits
+    logits = torch.tensor(ort_outs[0])
+    probabilities = torch.softmax(logits, dim=1)
     # Get the label with the highest probability
     max_prob, max_index = torch.max(probabilities, dim=1)
         "label": highest_label,
         "score": highest_score,
         "inference_time": round(inference_time, 4)  # In seconds
+    }