Spaces:

Abineshkumar
/

demodeploy

Runtime error

App Files Files Community

Abineshkumar77 commited on Aug 25, 2024

Commit

86f4524

1 Parent(s): d555bd3

Add application file

Browse files

Files changed (1) hide show

app.py +8 -60

app.py CHANGED Viewed

@@ -1,43 +1,13 @@
 from fastapi import FastAPI
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
-import onnx
-import onnxruntime as ort
 import time
-import os
-app = FastAPI()
-# Load the tokenizer
 tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
-# Define paths
-onnx_model_path = "sentiment_model.onnx"
-def export_model_to_onnx(model, tokenizer, onnx_model_path):
-    # Create dummy input for model export
-    dummy_input = tokenizer("This is a test input", return_tensors="pt")
-    # Export the model to ONNX
-    torch.onnx.export(
-        model,
-        (dummy_input["input_ids"], dummy_input["attention_mask"]),
-        onnx_model_path,
-        input_names=["input_ids", "attention_mask"],
-        output_names=["logits"],
-        opset_version=11,
-        dynamic_axes={"input_ids": {0: "batch_size"}, "attention_mask": {0: "batch_size"}}
-    )
-    print(f"Model exported to {onnx_model_path}")
-def optimize_onnx_model(onnx_model_path):
-    # Quantize the model
-    quantized_model_path = onnx_model_path.replace(".onnx", "_quantized.onnx")
-    os.system(f"python -m onnxruntime.tools.optimizer_cli --input {onnx_model_path} --output {quantized_model_path} --optimize --quantize")
-    print(f"Model quantized to {quantized_model_path}")
-    return quantized_model_path
 def preprocess_tweet(tweet: str) -> str:
     tweet_words = []
@@ -49,22 +19,6 @@ def preprocess_tweet(tweet: str) -> str:
         tweet_words.append(word)
     return " ".join(tweet_words)
-# Load or export and quantize the model
-if not os.path.exists(onnx_model_path):
-    # Load the original model
-    model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
-    # Export the model to ONNX
-    export_model_to_onnx(model, tokenizer, onnx_model_path)
-    # Quantize the model
-    onnx_model_path = optimize_onnx_model(onnx_model_path)
-else:
-    print("ONNX model already exists. Skipping export.")
-# Load the quantized ONNX model
-ort_session = ort.InferenceSession(onnx_model_path)
 @app.get("/")
 def home():
     return {"message": "Welcome to the sentiment analysis API"}
@@ -79,22 +33,16 @@ def analyze_sentiment(tweet: str):
     # Tokenize the input tweet
     inputs = tokenizer(tweet_proc, return_tensors="pt")
-    # Prepare input for ONNX runtime
-    ort_inputs = {
-        "input_ids": inputs["input_ids"].numpy(),
-        "attention_mask": inputs["attention_mask"].numpy(),
-    }
-    # Perform the inference with ONNX runtime
-    ort_outs = ort_session.run(None, ort_inputs)
     # Calculate the inference time
     inference_time = time.time() - start_time
     # Get the probabilities from the logits
-    logits = torch.tensor(ort_outs[0])
-    probabilities = torch.softmax(logits, dim=1)
     # Get the label with the highest probability
     max_prob, max_index = torch.max(probabilities, dim=1)
@@ -116,4 +64,4 @@ def analyze_sentiment(tweet: str):
         "label": highest_label,
         "score": highest_score,
         "inference_time": round(inference_time, 4)  # In seconds
-    }

 from fastapi import FastAPI
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 import time
+# Load the tokenizer and model directly
 tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
+model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
+app = FastAPI()
 def preprocess_tweet(tweet: str) -> str:
     tweet_words = []
         tweet_words.append(word)
     return " ".join(tweet_words)
 @app.get("/")
 def home():
     return {"message": "Welcome to the sentiment analysis API"}
     # Tokenize the input tweet
     inputs = tokenizer(tweet_proc, return_tensors="pt")
+    # Perform the inference
+    with torch.no_grad():
+        outputs = model(**inputs)
     # Calculate the inference time
     inference_time = time.time() - start_time
     # Get the probabilities from the logits
+    probabilities = torch.softmax(outputs.logits, dim=1)
     # Get the label with the highest probability
     max_prob, max_index = torch.max(probabilities, dim=1)
         "label": highest_label,
         "score": highest_score,
         "inference_time": round(inference_time, 4)  # In seconds
+    }