Spaces:

ma4389
/

YouTube_Sentiment_

Sleeping

App Files Files Community

YouTube_Sentiment_ / app.py

ma4389

Update app.py

e8d47b0 verified 7 months ago

raw

history blame contribute delete

2.23 kB

	import torch
	from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
	import gradio as gr
	import re
	import nltk
	from nltk.tokenize import word_tokenize
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer

	# Download NLTK resources (if not already available)
	nltk.download('punkt_tab')
	nltk.download('stopwords')
	nltk.download('wordnet')

	# Preprocessing setup
	stop_words = set(stopwords.words('english'))
	lemmatizer = WordNetLemmatizer()

	def preprocess_text(text):
	text = re.sub(r'[^A-Za-z\s]', '', text)
	text = re.sub(r'http\S+\|www\S+\|https\S+', '', text)
	text = re.sub(r'\s+', ' ', text).strip()
	text = text.lower()
	tokens = word_tokenize(text)
	tokens = [word for word in tokens if word not in stop_words]
	tokens = [lemmatizer.lemmatize(word) for word in tokens]
	return ' '.join(tokens)

	# Load tokenizer and model
	tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
	model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=3)
	model.load_state_dict(torch.load("sent_model.pth", map_location=torch.device("cpu")))
	model.eval()

	# ✅ Correct label mapping based on LabelEncoder
	idx2label = {0: "negative", 1: "neutral", 2: "positive"}

	# Prediction function
	def predict(text):
	clean_text = preprocess_text(text)
	inputs = tokenizer(clean_text, return_tensors="pt", truncation=True, padding=True, max_length=128)
	with torch.no_grad():
	outputs = model(**inputs)
	probs = torch.nn.functional.softmax(outputs.logits, dim=1)[0].numpy()

	return {idx2label[i]: float(round(probs[i], 4)) for i in range(3)}

	# Gradio UI
	interface = gr.Interface(
	fn=predict,
	inputs=gr.Textbox(lines=4, placeholder="Enter a YouTube comment..."),
	outputs=gr.Label(num_top_classes=3),
	title="🎯 YouTube Comment Sentiment Classifier",
	description="Predicts sentiment (positive, neutral, or negative) from YouTube comments using a custom DistilBERT model.",
	examples=[
	["This video is absolutely amazing!"],
	["Not bad, but could be better."],
	["I hated this. Waste of time."]
	]
	)

	if __name__ == "__main__":
	interface.launch()