import gradio as gr import torch from transformers import T5Tokenizer import torch.nn as nn from transformers import T5EncoderModel import re from nltk.tokenize import word_tokenize from nltk.corpus import stopwords from nltk.stem import WordNetLemmatizer import nltk # Download NLTK resources (only first time) nltk.download('punkt_tab') nltk.download('stopwords') nltk.download('wordnet') nltk.download('omw-1.4') # Initialize preprocessing tools stop_words = set(stopwords.words('english')) lemmatizer = WordNetLemmatizer() # Preprocessing function def preprocess_text(text): # Remove non-alphabet characters text = re.sub(r'[^A-Za-z\s]', '', text) # Remove URLs text = re.sub(r'http\S+|www\S+|https\S+', '', text) # Normalize whitespace text = re.sub(r'\s+', ' ', text).strip() # Lowercase text = text.lower() # Tokenize tokens = word_tokenize(text) # Remove stopwords tokens = [word for word in tokens if word not in stop_words] # Lemmatize tokens = [lemmatizer.lemmatize(word) for word in tokens] # Re-join return ' '.join(tokens) # Model class class T5_regression(nn.Module): def __init__(self): super(T5_regression, self).__init__() self.t5 = T5EncoderModel.from_pretrained("t5-base") self.fc = nn.Linear(self.t5.config.d_model, 1) self.relu = nn.ReLU() def forward(self, input_ids, attention_mask): output = self.t5(input_ids=input_ids, attention_mask=attention_mask) pooled_output = output.last_hidden_state[:, 0, :] rating = self.fc(pooled_output) return rating.squeeze(-1) # Load tokenizer and model tokenizer = T5Tokenizer.from_pretrained("t5-base") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = T5_regression().to(device) # Load trained weights model.load_state_dict(torch.load("best_model.pth", map_location=device)) model.eval() # Prediction function def predict_rating(review_text): # Preprocess review clean_text = preprocess_text(review_text) encoding = tokenizer( clean_text, truncation=True, padding='max_length', max_length=128, return_tensors='pt' ) input_ids = encoding['input_ids'].to(device) attention_mask = encoding['attention_mask'].to(device) with torch.no_grad(): output = model(input_ids, attention_mask) rating = output.item() return round(rating, 1) # Gradio UI iface = gr.Interface( fn=predict_rating, inputs=gr.Textbox(lines=4, placeholder="Enter your review here..."), outputs=gr.Number(label="Predicted Rating"), title="Review Rating Predictor", description="Predicts the rating of a mobile app review using a fine-tuned T5 regression model." ) iface.launch()