Spaces:

ma4389
/

T5_Classify_Rating_Mobile_Reveiew

Sleeping

App Files Files Community

T5_Classify_Rating_Mobile_Reveiew / app.py

ma4389

Update app.py

e4f73ac verified 6 months ago

raw

history blame contribute delete

2.79 kB

	import gradio as gr
	import torch
	from transformers import T5Tokenizer
	import torch.nn as nn
	from transformers import T5EncoderModel
	import re
	from nltk.tokenize import word_tokenize
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer
	import nltk

	# Download NLTK resources (only first time)
	nltk.download('punkt_tab')
	nltk.download('stopwords')
	nltk.download('wordnet')
	nltk.download('omw-1.4')

	# Initialize preprocessing tools
	stop_words = set(stopwords.words('english'))
	lemmatizer = WordNetLemmatizer()

	# Preprocessing function
	def preprocess_text(text):
	# Remove non-alphabet characters
	text = re.sub(r'[^A-Za-z\s]', '', text)
	# Remove URLs
	text = re.sub(r'http\S+\|www\S+\|https\S+', '', text)
	# Normalize whitespace
	text = re.sub(r'\s+', ' ', text).strip()
	# Lowercase
	text = text.lower()
	# Tokenize
	tokens = word_tokenize(text)
	# Remove stopwords
	tokens = [word for word in tokens if word not in stop_words]
	# Lemmatize
	tokens = [lemmatizer.lemmatize(word) for word in tokens]
	# Re-join
	return ' '.join(tokens)

	# Model class
	class T5_regression(nn.Module):
	def __init__(self):
	super(T5_regression, self).__init__()
	self.t5 = T5EncoderModel.from_pretrained("t5-base")
	self.fc = nn.Linear(self.t5.config.d_model, 1)
	self.relu = nn.ReLU()

	def forward(self, input_ids, attention_mask):
	output = self.t5(input_ids=input_ids, attention_mask=attention_mask)
	pooled_output = output.last_hidden_state[:, 0, :]
	rating = self.fc(pooled_output)
	return rating.squeeze(-1)

	# Load tokenizer and model
	tokenizer = T5Tokenizer.from_pretrained("t5-base")
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model = T5_regression().to(device)

	# Load trained weights
	model.load_state_dict(torch.load("best_model.pth", map_location=device))
	model.eval()

	# Prediction function
	def predict_rating(review_text):
	# Preprocess review
	clean_text = preprocess_text(review_text)

	encoding = tokenizer(
	clean_text,
	truncation=True,
	padding='max_length',
	max_length=128,
	return_tensors='pt'
	)

	input_ids = encoding['input_ids'].to(device)
	attention_mask = encoding['attention_mask'].to(device)

	with torch.no_grad():
	output = model(input_ids, attention_mask)
	rating = output.item()

	return round(rating, 1)

	# Gradio UI
	iface = gr.Interface(
	fn=predict_rating,
	inputs=gr.Textbox(lines=4, placeholder="Enter your review here..."),
	outputs=gr.Number(label="Predicted Rating"),
	title="Review Rating Predictor",
	description="Predicts the rating of a mobile app review using a fine-tuned T5 regression model."
	)

	iface.launch()