Upload folder using huggingface_hub
Browse files- README.md +42 -5
- app.py +129 -0
- requirements.txt +4 -0
README.md
CHANGED
|
@@ -1,12 +1,49 @@
|
|
| 1 |
---
|
| 2 |
title: Toxic Speech Classifier
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
title: Toxic Speech Classifier
|
| 3 |
+
emoji: π€
|
| 4 |
+
colorFrom: red
|
| 5 |
+
colorTo: yellow
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 4.0.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# Toxic Speech Classifier π€
|
| 13 |
+
|
| 14 |
+
A fine-tuned Gemma3-1B model for detecting and classifying toxic, insulting, or harmful language in text.
|
| 15 |
+
|
| 16 |
+
## Description
|
| 17 |
+
|
| 18 |
+
This demo uses a fine-tuned language model to analyze text and determine whether it contains toxic or harmful content. The model returns structured information including toxicity label, tags, severity, and reasoning.
|
| 19 |
+
|
| 20 |
+
## Usage
|
| 21 |
+
|
| 22 |
+
Simply enter any text, and the model will classify whether it is toxic or non-toxic, along with detailed structured output.
|
| 23 |
+
|
| 24 |
+
## Examples
|
| 25 |
+
|
| 26 |
+
- "You are absolutely worthless and no one will ever love you."
|
| 27 |
+
- "Shut up you brainless moron, nobody asked for your stupid opinion."
|
| 28 |
+
- "The weather today is really nice, I enjoyed my walk in the park."
|
| 29 |
+
- "Thank you for your help, I really appreciate everything you did."
|
| 30 |
+
|
| 31 |
+
## Model
|
| 32 |
+
|
| 33 |
+
- **Base Model**: Gemma3-1B
|
| 34 |
+
- **Fine-tuned on**: Insult and toxic speech classification data
|
| 35 |
+
- **Model ID**: berkeruveyik/toxic-speech-finetune-with-gemma-3-1b-v1
|
| 36 |
+
|
| 37 |
+
## Output Fields
|
| 38 |
+
|
| 39 |
+
| Field | Description |
|
| 40 |
+
|-------|-------------|
|
| 41 |
+
| `is_toxic` | Whether the text is toxic (true/false) |
|
| 42 |
+
| `label` | Classification label (e.g., insult, threat, neutral) |
|
| 43 |
+
| `tags` | Relevant tags describing the type of toxicity |
|
| 44 |
+
| `reason` | Explanation for the classification |
|
| 45 |
+
| `severity` | Severity level of the toxic content |
|
| 46 |
+
|
| 47 |
+
## License
|
| 48 |
+
|
| 49 |
+
Please check the model license on the Hugging Face model page.
|
app.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import json
|
| 3 |
+
import time
|
| 4 |
+
import spaces
|
| 5 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 6 |
+
|
| 7 |
+
MODEL_PATH = 'berkeruveyik/toxic-speech-finetune-with-gemma-3-1b-v1'
|
| 8 |
+
|
| 9 |
+
# Load model and tokenizer
|
| 10 |
+
loaded_model = AutoModelForCausalLM.from_pretrained(
|
| 11 |
+
MODEL_PATH,
|
| 12 |
+
torch_dtype='auto',
|
| 13 |
+
device_map='auto',
|
| 14 |
+
attn_implementation='eager'
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
|
| 18 |
+
|
| 19 |
+
loaded_model_pipeline = pipeline(
|
| 20 |
+
'text-generation',
|
| 21 |
+
model=loaded_model,
|
| 22 |
+
tokenizer=tokenizer
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
@spaces.GPU
|
| 26 |
+
def pred_on_text(input_text):
|
| 27 |
+
"""Generate prediction from input text"""
|
| 28 |
+
start_time = time.time()
|
| 29 |
+
|
| 30 |
+
raw_output = loaded_model_pipeline(
|
| 31 |
+
text_inputs=[{'role': 'user', 'content': input_text}],
|
| 32 |
+
max_new_tokens=256
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
end_time = time.time()
|
| 36 |
+
total_time = round(end_time - start_time, 4)
|
| 37 |
+
|
| 38 |
+
generated_text = raw_output[0]['generated_text'][1]['content']
|
| 39 |
+
|
| 40 |
+
return generated_text, raw_output, total_time
|
| 41 |
+
|
| 42 |
+
def parse_generated_text(text):
|
| 43 |
+
"""Parse the generated text and format it nicely"""
|
| 44 |
+
try:
|
| 45 |
+
data = json.loads(text)
|
| 46 |
+
return data
|
| 47 |
+
except:
|
| 48 |
+
try:
|
| 49 |
+
text = text.strip()
|
| 50 |
+
if text.startswith('{') and text.endswith('}'):
|
| 51 |
+
data = eval(text)
|
| 52 |
+
return data
|
| 53 |
+
except:
|
| 54 |
+
pass
|
| 55 |
+
return {"raw_output": text}
|
| 56 |
+
|
| 57 |
+
def format_output(input_text, parsed_output, total_time):
|
| 58 |
+
"""Format output as readable text with each field on new line"""
|
| 59 |
+
output_lines = []
|
| 60 |
+
output_lines.append(f"π Input: {input_text}")
|
| 61 |
+
output_lines.append("")
|
| 62 |
+
output_lines.append("β" * 50)
|
| 63 |
+
output_lines.append("")
|
| 64 |
+
|
| 65 |
+
if "is_toxic" in parsed_output:
|
| 66 |
+
emoji = "π¨" if parsed_output["is_toxic"] else "β
"
|
| 67 |
+
output_lines.append(f"{emoji} is_toxic: {parsed_output['is_toxic']}")
|
| 68 |
+
|
| 69 |
+
if "label" in parsed_output:
|
| 70 |
+
output_lines.append(f"π·οΈ label: {parsed_output['label']}")
|
| 71 |
+
|
| 72 |
+
if "tags" in parsed_output:
|
| 73 |
+
output_lines.append(f"π tags: {parsed_output['tags']}")
|
| 74 |
+
|
| 75 |
+
if "reason" in parsed_output:
|
| 76 |
+
output_lines.append(f"π¬ reason: {parsed_output['reason']}")
|
| 77 |
+
|
| 78 |
+
if "severity" in parsed_output:
|
| 79 |
+
output_lines.append(f"β‘ severity: {parsed_output['severity']}")
|
| 80 |
+
|
| 81 |
+
if "raw_output" in parsed_output:
|
| 82 |
+
output_lines.append(f"π raw_output: {parsed_output['raw_output']}")
|
| 83 |
+
|
| 84 |
+
output_lines.append("")
|
| 85 |
+
output_lines.append("β" * 50)
|
| 86 |
+
output_lines.append(f"β±οΈ processing_time: {total_time} seconds")
|
| 87 |
+
|
| 88 |
+
return "\n".join(output_lines)
|
| 89 |
+
|
| 90 |
+
def gradio_predict(input_text):
|
| 91 |
+
"""Wrapper function for Gradio"""
|
| 92 |
+
if not input_text.strip():
|
| 93 |
+
return "Please enter some text."
|
| 94 |
+
|
| 95 |
+
generated_text, raw_output, total_time = pred_on_text(input_text)
|
| 96 |
+
parsed_output = parse_generated_text(generated_text)
|
| 97 |
+
|
| 98 |
+
formatted_output = format_output(input_text, parsed_output, total_time)
|
| 99 |
+
|
| 100 |
+
return formatted_output
|
| 101 |
+
|
| 102 |
+
# Gradio interface
|
| 103 |
+
demo = gr.Interface(
|
| 104 |
+
fn=gradio_predict,
|
| 105 |
+
inputs=gr.Textbox(
|
| 106 |
+
label="Input Text",
|
| 107 |
+
placeholder="Enter your text here...",
|
| 108 |
+
lines=3
|
| 109 |
+
),
|
| 110 |
+
outputs=gr.Textbox(
|
| 111 |
+
label="Model Output",
|
| 112 |
+
lines=12
|
| 113 |
+
),
|
| 114 |
+
title="π€ Toxic Speech Classifier",
|
| 115 |
+
description="Analyze whether a given text contains toxic, insulting, or harmful language using a fine-tuned Gemma3 model.",
|
| 116 |
+
examples=[
|
| 117 |
+
["You are absolutely worthless and no one will ever love you."],
|
| 118 |
+
["I hope you get hit by a bus, you disgusting excuse for a person."],
|
| 119 |
+
["The weather today is really nice, I enjoyed my walk in the park."],
|
| 120 |
+
["Shut up you brainless moron, nobody asked for your stupid opinion."],
|
| 121 |
+
["Thank you for your help, I really appreciate everything you did."],
|
| 122 |
+
["You are such a pathetic loser, get out of my sight."],
|
| 123 |
+
["I just finished reading a great book, it was very inspiring."],
|
| 124 |
+
],
|
| 125 |
+
theme=gr.themes.Soft()
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
if __name__ == "__main__":
|
| 129 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
transformers
|
| 2 |
+
torch
|
| 3 |
+
gradio
|
| 4 |
+
accelerate
|