import torch from transformers import DistilBertTokenizer, DistilBertForSequenceClassification import gradio as gr import re import nltk from nltk.tokenize import word_tokenize from nltk.corpus import stopwords from nltk.stem import WordNetLemmatizer # Download NLTK resources (if not already available) nltk.download('punkt_tab') nltk.download('stopwords') nltk.download('wordnet') # Preprocessing setup stop_words = set(stopwords.words('english')) lemmatizer = WordNetLemmatizer() def preprocess_text(text): text = re.sub(r'[^A-Za-z\s]', '', text) text = re.sub(r'http\S+|www\S+|https\S+', '', text) text = re.sub(r'\s+', ' ', text).strip() text = text.lower() tokens = word_tokenize(text) tokens = [word for word in tokens if word not in stop_words] tokens = [lemmatizer.lemmatize(word) for word in tokens] return ' '.join(tokens) # Load tokenizer and model tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased") model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=3) model.load_state_dict(torch.load("sent_model.pth", map_location=torch.device("cpu"))) model.eval() # ✅ Correct label mapping based on LabelEncoder idx2label = {0: "negative", 1: "neutral", 2: "positive"} # Prediction function def predict(text): clean_text = preprocess_text(text) inputs = tokenizer(clean_text, return_tensors="pt", truncation=True, padding=True, max_length=128) with torch.no_grad(): outputs = model(**inputs) probs = torch.nn.functional.softmax(outputs.logits, dim=1)[0].numpy() return {idx2label[i]: float(round(probs[i], 4)) for i in range(3)} # Gradio UI interface = gr.Interface( fn=predict, inputs=gr.Textbox(lines=4, placeholder="Enter a YouTube comment..."), outputs=gr.Label(num_top_classes=3), title="🎯 YouTube Comment Sentiment Classifier", description="Predicts sentiment (positive, neutral, or negative) from YouTube comments using a custom DistilBERT model.", examples=[ ["This video is absolutely amazing!"], ["Not bad, but could be better."], ["I hated this. Waste of time."] ] ) if __name__ == "__main__": interface.launch()