Spaces:
Sleeping
Sleeping
| import torch | |
| from transformers import DistilBertTokenizer, DistilBertForSequenceClassification | |
| import gradio as gr | |
| import re | |
| import nltk | |
| from nltk.tokenize import word_tokenize | |
| from nltk.corpus import stopwords | |
| from nltk.stem import WordNetLemmatizer | |
| # Download NLTK resources (if not already available) | |
| nltk.download('punkt_tab') | |
| nltk.download('stopwords') | |
| nltk.download('wordnet') | |
| # Preprocessing setup | |
| stop_words = set(stopwords.words('english')) | |
| lemmatizer = WordNetLemmatizer() | |
| def preprocess_text(text): | |
| text = re.sub(r'[^A-Za-z\s]', '', text) | |
| text = re.sub(r'http\S+|www\S+|https\S+', '', text) | |
| text = re.sub(r'\s+', ' ', text).strip() | |
| text = text.lower() | |
| tokens = word_tokenize(text) | |
| tokens = [word for word in tokens if word not in stop_words] | |
| tokens = [lemmatizer.lemmatize(word) for word in tokens] | |
| return ' '.join(tokens) | |
| # Load tokenizer and model | |
| tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased") | |
| model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=3) | |
| model.load_state_dict(torch.load("sent_model.pth", map_location=torch.device("cpu"))) | |
| model.eval() | |
| # ✅ Correct label mapping based on LabelEncoder | |
| idx2label = {0: "negative", 1: "neutral", 2: "positive"} | |
| # Prediction function | |
| def predict(text): | |
| clean_text = preprocess_text(text) | |
| inputs = tokenizer(clean_text, return_tensors="pt", truncation=True, padding=True, max_length=128) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| probs = torch.nn.functional.softmax(outputs.logits, dim=1)[0].numpy() | |
| return {idx2label[i]: float(round(probs[i], 4)) for i in range(3)} | |
| # Gradio UI | |
| interface = gr.Interface( | |
| fn=predict, | |
| inputs=gr.Textbox(lines=4, placeholder="Enter a YouTube comment..."), | |
| outputs=gr.Label(num_top_classes=3), | |
| title="🎯 YouTube Comment Sentiment Classifier", | |
| description="Predicts sentiment (positive, neutral, or negative) from YouTube comments using a custom DistilBERT model.", | |
| examples=[ | |
| ["This video is absolutely amazing!"], | |
| ["Not bad, but could be better."], | |
| ["I hated this. Waste of time."] | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| interface.launch() | |