Spaces:

vm24
/

comment_classifier

Runtime error

File size: 2,632 Bytes

e953d9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ccdf7ef
e953d9a

import gradio as gr
import pandas as pd
import numpy as np
import re
import nltk
import string
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
from transformers import pipeline

# Download NLTK resources
nltk.download('stopwords')
stopword = set(stopwords.words('english'))
stemmer = SnowballStemmer("english")

# Load the dataset
data = pd.read_csv("commentdatset (1) (1).csv")

# Labelling the data set with classifier classes according to which classifications has to perform
data["labels"] = data["class"].map({0: "Offensive Language", 1: "Abusive comments", 2: "No Abusive and Offensive"})
data = data[["comments", "labels"]]

# Clean data function
def clean(text):
    text = str(text).lower()
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"it's", "it is", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"what's", "that is", text)
    text = re.sub(r"where's", "where is", text)
    text = re.sub(r"how's", "how is", text)
    text = re.sub(r"'ll", " will", text)
    text = re.sub(r"'ve", " have", text)
    text = re.sub(r"'re", " are", text)
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"r", "", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"'d", " would", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"can't", "cannot", text)
    text = re.sub(r"n't", " not", text)
    text = re.sub(r"n'", "ng", text)
    text = re.sub(r"'bout", "about", text)
    text = re.sub(r"'til", "until", text)
    text = re.sub('\[.*?\]', '', text)
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    text = [word for word in text.split(' ') if word not in stopword]
    text = " ".join(text)
    text = [stemmer.stem(word) for word in text.split(' ')]
    text = " ".join(text)
    return text

data["comments"] = data["comments"].apply(clean)

# Using a pre-trained transformer model for sentiment analysis
sentiment_pipeline = pipeline("sentiment-analysis")

# Function to classify comments
def classify_comment(comment):
    cleaned_comment = clean(comment)
    prediction = sentiment_pipeline(cleaned_comment)
    label = prediction[0]['label']
    return label

comment_input = gr.Textbox(label="Enter a comment")
classification_output = gr.Label()

# Create the Gradio interface
interface = gr.Interface(fn=classify_comment, inputs=comment_input, outputs=classification_output, title="Comment Classifier")
interface.launch()