File size: 2,740 Bytes
190bc8f
d65dba3
f97bb94
3a3cb2d
d65dba3
9bae10e
06f2360
d65dba3
3a3cb2d
f97bb94
3a3cb2d
d65dba3
f97bb94
3a3cb2d
f97bb94
 
d65dba3
86f4524
3a3cb2d
86f4524
d65dba3
c1f251c
190bc8f
 
3a3cb2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f97bb94
 
 
 
 
 
 
 
 
3a3cb2d
 
 
 
 
f97bb94
 
3a3cb2d
 
06f2360
642ed96
3a3cb2d
 
 
 
 
 
32659b4
3a3cb2d
 
 
190bc8f
3a3cb2d
 
 
 
 
 
d65dba3
 
 
3a3cb2d
d65dba3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from fastapi import FastAPI
<<<<<<< HEAD
import onnxruntime as ort
from transformers import AutoTokenizer
=======
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
>>>>>>> 86f452461e3f04611f1ee50ade207b1a64893e79
import time
import torch

<<<<<<< HEAD
# Load the tokenizer and ONNX model
tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
onnx_model_path = "D:/demodeploy/sentiment_model.onnx"
onnx_session = ort.InferenceSession(onnx_model_path)
=======
# Load the tokenizer and model directly
tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
>>>>>>> 86f452461e3f04611f1ee50ade207b1a64893e79

app = FastAPI()

def preprocess_tweet(tweet: str) -> str:
    tweet_words = []
    for word in tweet.split(' '):
        if word.startswith('@') and len(word) > 1:
            word = '@user'
        elif word.startswith('http'):
            word = "http"
        tweet_words.append(word)
    return " ".join(tweet_words)

@app.get("/")
def home():
    return {"message": "Welcome to the sentiment analysis API"}

@app.get("/analyze")
def analyze_sentiment(tweet: str):
    # Preprocess the tweet
    tweet_proc = preprocess_tweet(tweet)

    # Measure the time taken for the inference
    start_time = time.time()

    # Tokenize the input tweet
    inputs = tokenizer(tweet_proc, return_tensors="pt")
    input_ids = inputs["input_ids"].numpy()
    attention_mask = inputs["attention_mask"].numpy()

    # Perform the inference using ONNX
    onnx_inputs = {
        'input_ids': input_ids,
        'attention_mask': attention_mask
    }
    outputs = onnx_session.run(None, onnx_inputs)

    # Calculate the inference time
    inference_time = time.time() - start_time

    # Get the probabilities from the logits
    logits = outputs[0]
    probabilities = torch.softmax(torch.tensor(logits), dim=1)

    # Get the label with the highest probability
    max_prob, max_index = torch.max(probabilities, dim=1)

    # Map the labels to desired names
    label_map = {
        0: "Negative",
        1: "Neutral",
        2: "Positive"
    }

    # Get the highest label and its corresponding score
    highest_label = label_map[max_index.item()]
    highest_score = round(max_prob.item(), 4)

    # Return the original tweet, the label with the highest score, and the inference time
    return {
        "text": tweet,
        "label": highest_label,
        "score": highest_score,
        "inference_time": round(inference_time, 4)  # In seconds
<<<<<<< HEAD
    }
=======
    }
>>>>>>> 86f452461e3f04611f1ee50ade207b1a64893e79