|
|
import gradio as gr |
|
|
import torch |
|
|
from transformers import T5Tokenizer |
|
|
import torch.nn as nn |
|
|
from transformers import T5EncoderModel |
|
|
import re |
|
|
from nltk.tokenize import word_tokenize |
|
|
from nltk.corpus import stopwords |
|
|
from nltk.stem import WordNetLemmatizer |
|
|
import nltk |
|
|
|
|
|
|
|
|
nltk.download('punkt_tab') |
|
|
nltk.download('stopwords') |
|
|
nltk.download('wordnet') |
|
|
nltk.download('omw-1.4') |
|
|
|
|
|
|
|
|
stop_words = set(stopwords.words('english')) |
|
|
lemmatizer = WordNetLemmatizer() |
|
|
|
|
|
|
|
|
def preprocess_text(text): |
|
|
|
|
|
text = re.sub(r'[^A-Za-z\s]', '', text) |
|
|
|
|
|
text = re.sub(r'http\S+|www\S+|https\S+', '', text) |
|
|
|
|
|
text = re.sub(r'\s+', ' ', text).strip() |
|
|
|
|
|
text = text.lower() |
|
|
|
|
|
tokens = word_tokenize(text) |
|
|
|
|
|
tokens = [word for word in tokens if word not in stop_words] |
|
|
|
|
|
tokens = [lemmatizer.lemmatize(word) for word in tokens] |
|
|
|
|
|
return ' '.join(tokens) |
|
|
|
|
|
|
|
|
class T5_regression(nn.Module): |
|
|
def __init__(self): |
|
|
super(T5_regression, self).__init__() |
|
|
self.t5 = T5EncoderModel.from_pretrained("t5-base") |
|
|
self.fc = nn.Linear(self.t5.config.d_model, 1) |
|
|
self.relu = nn.ReLU() |
|
|
|
|
|
def forward(self, input_ids, attention_mask): |
|
|
output = self.t5(input_ids=input_ids, attention_mask=attention_mask) |
|
|
pooled_output = output.last_hidden_state[:, 0, :] |
|
|
rating = self.fc(pooled_output) |
|
|
return rating.squeeze(-1) |
|
|
|
|
|
|
|
|
tokenizer = T5Tokenizer.from_pretrained("t5-base") |
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
model = T5_regression().to(device) |
|
|
|
|
|
|
|
|
model.load_state_dict(torch.load("best_model.pth", map_location=device)) |
|
|
model.eval() |
|
|
|
|
|
|
|
|
def predict_rating(review_text): |
|
|
|
|
|
clean_text = preprocess_text(review_text) |
|
|
|
|
|
encoding = tokenizer( |
|
|
clean_text, |
|
|
truncation=True, |
|
|
padding='max_length', |
|
|
max_length=128, |
|
|
return_tensors='pt' |
|
|
) |
|
|
|
|
|
input_ids = encoding['input_ids'].to(device) |
|
|
attention_mask = encoding['attention_mask'].to(device) |
|
|
|
|
|
with torch.no_grad(): |
|
|
output = model(input_ids, attention_mask) |
|
|
rating = output.item() |
|
|
|
|
|
return round(rating, 1) |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=predict_rating, |
|
|
inputs=gr.Textbox(lines=4, placeholder="Enter your review here..."), |
|
|
outputs=gr.Number(label="Predicted Rating"), |
|
|
title="Review Rating Predictor", |
|
|
description="Predicts the rating of a mobile app review using a fine-tuned T5 regression model." |
|
|
) |
|
|
|
|
|
iface.launch() |
|
|
|