import gradio as gr import joblib import re import string # 1. Load the model and vectorizer # Ensure 'hate_speech_model.joblib' is in the same directory checkpoint = joblib.load('hate_speech_model.joblib') model = checkpoint['model'] tfidf = checkpoint['tfidf'] # 2. Pre-processing function (must match the one used during training) def clean_text(text): text = str(text).lower() text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE) text = re.sub(r'\@\w+|\#','', text) text = text.translate(str.maketrans('', '', string.punctuation)) text = ' '.join(text.split()) return text # 3. Prediction function def predict(text): if not text: return "Please enter some text." cleaned_text = clean_text(text) vectorized_text = tfidf.transform([cleaned_text]) prediction = model.predict(vectorized_text)[0] # Map numerical class to label labels = {0: "Hate Speech", 1: "Offensive Language", 2: "Neither"} return labels.get(prediction, "Unknown") # 4. Build Gradio Interface demo = gr.Interface( fn=predict, inputs=gr.Textbox(lines=2, placeholder="Enter text here...", label="Input Text"), outputs=gr.Label(label="Classification Result"), title="Hate Speech Detector", description="This model classifies text into Hate Speech, Offensive Language, or Neither.", examples=[ ["I hope you have a wonderful day!"], ["You are so stupid and I hate you."], ["That person is a complete idiot."] ] ) if __name__ == "__main__": demo.launch()