Spaces:

Lahari2005
/

comment_classification

No application file

App Files Files Community

Lahari2005 commited on Sep 20, 2025

Commit

561afe3

verified ·

1 Parent(s): f08a743

Update README.md

Browse files

Files changed (1) hide show

README.md +446 -0

README.md CHANGED Viewed

@@ -11,3 +11,449 @@ short_description: comment classification
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+import gradio as gr
+import pandas as pd
+import numpy as np
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split
+import random
+import re
+# Create synthetic dataset for toxic and non-toxic comments
+def create_synthetic_dataset():
+    np.random.seed(42)
+    random.seed(42)
+    # Toxic comments patterns
+    toxic_patterns = [
+        "You're such a {insult} who knows nothing about {topic}.",
+        "Only an {insult} would think that about {topic}.",
+        "This is the dumbest take on {topic} I've ever seen.",
+        "Go back to {place}, you {insult}.",
+        "Why are you so {negative_adj} about everything?",
+        "Everyone like you should be {threat}.",
+        "Your opinion is worthless because you're a {insult}.",
+        "I hope you {threat} for saying that.",
+        "People like you are the reason why {bad_thing} happens.",
+        "Shut up, you don't know what you're talking about.",
+        "You're just a {insult} with no life.",
+        "How can anyone be this {negative_adj}?",
+        "I wouldn't expect anything better from a {insult}.",
+        "Your existence is an insult to {group}.",
+        "Do everyone a favor and {threat}."
+    ]
+    # Non-toxic comments patterns
+    non_toxic_patterns = [
+        "I appreciate your perspective on {topic}.",
+        "That's an interesting point about {topic}.",
+        "I see what you mean, but have you considered {alternative_view}?",
+        "Thanks for sharing your thoughts on {topic}.",
+        "I respectfully disagree because of {reason}.",
+        "That's a good question about {topic}.",
+        "I learned something new about {topic} today.",
+        "Could you elaborate more on your view about {topic}?",
+        "I never thought about it that way before.",
+        "You make a valid point regarding {topic}.",
+        "I understand where you're coming from.",
+        "Let's agree to disagree on this one.",
+        "I value different opinions on {topic}.",
+        "That's a fair assessment of the situation.",
+        "I think we have common ground on {shared_view}."
+    ]
+    # Fillers for the patterns
+    insults = ["idiot", "moron", "fool", "jerk", "imbecile", "buffoon", "dimwit", "simpleton", "dunce", "nitwit"]
+    topics = ["politics", "sports", "technology", "music", "movies", "science", "education", "health", "environment", "economy"]
+    negative_adjs = ["stupid", "ignorant", "pathetic", "ridiculous", "awful", "terrible", "horrible", "disgusting", "vile", "repulsive"]
+    places = ["your country", "where you came from", "your mom's basement", "the cave you live in", "under your rock"]
+    threats = ["die", "disappear", "stop talking", "leave", "get banned", "be quiet", "go away", "never return", "get lost", "vanish"]
+    bad_things = ["war", "famine", "disease", "poverty", "conflict", "hate", "violence", "discrimination", "suffering", "chaos"]
+    groups = ["humanity", "society", "this community", "intelligent people", "decent folks"]
+    alternative_views = ["this other aspect", "the historical context", "the data", "recent developments", "expert opinions"]
+    reasons = ["my experiences", "the evidence", "what I've read", "statistics", "expert analysis"]
+    shared_views = ["this issue", "the importance of dialogue", "seeking truth", "finding solutions", "moving forward"]
+    # Generate toxic comments
+    toxic_comments = []
+    for _ in range(500):
+        pattern = random.choice(toxic_patterns)
+        comment = pattern.format(
+            insult=random.choice(insults),
+            topic=random.choice(topics),
+            negative_adj=random.choice(negative_adjs),
+            place=random.choice(places),
+            threat=random.choice(threats),
+            bad_thing=random.choice(bad_things),
+            group=random.choice(groups)
+        )
+        toxic_comments.append((comment, 1))
+    # Generate non-toxic comments
+    non_toxic_comments = []
+    for _ in range(500):
+        pattern = random.choice(non_toxic_patterns)
+        comment = pattern.format(
+            topic=random.choice(topics),
+            alternative_view=random.choice(alternative_views),
+            reason=random.choice(reasons),
+            shared_view=random.choice(shared_views)
+        )
+        non_toxic_comments.append((comment, 0))
+    # Combine and shuffle
+    all_comments = toxic_comments + non_toxic_comments
+    random.shuffle(all_comments)
+    # Create DataFrame
+    df = pd.DataFrame(all_comments, columns=['comment', 'toxic'])
+    return df
+# Create and train the model
+def create_and_train_model(df):
+    # Split the data
+    X_train, X_test, y_train, y_test = train_test_split(
+        df['comment'], df['toxic'], test_size=0.2, random_state=42
+    )
+    # Vectorize the text
+    vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
+    X_train_vec = vectorizer.fit_transform(X_train)
+    X_test_vec = vectorizer.transform(X_test)
+    # Train the model
+    model = LogisticRegression(max_iter=1000, random_state=42)
+    model.fit(X_train_vec, y_train)
+    return model, vectorizer
+# Create the synthetic dataset and train the model
+df = create_synthetic_dataset()
+model, vectorizer = create_and_train_model(df)
+# Function to predict toxicity
+def predict_toxicity(comment):
+    if not comment.strip():
+        return {"toxic": False, "toxicity_score": 0.0, "display_text": "No text provided"}
+    # Vectorize the comment
+    comment_vec = vectorizer.transform([comment])
+    # Predict
+    prediction = model.predict_proba(comment_vec)[0]
+    toxic_prob = prediction[1]  # Probability of being toxic
+    # Determine if toxic
+    is_toxic = toxic_prob > 0.7
+    return {
+        "toxic": is_toxic,
+        "toxicity_score": float(toxic_prob),
+        "display_text": comment
+    }
+# Function to simulate browser extension highlighting
+def highlight_toxic_comments(text):
+    if not text.strip():
+        return "<div style='font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; color: #666;'>No comments to analyze</div>"
+    # Split into comments (assuming each line is a comment)
+    comments = text.split('\n')
+    highlighted_html = "<div style='font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto;'>"
+    for comment in comments:
+        if not comment.strip():
+            continue
+        result = predict_toxicity(comment)
+        if result['toxic']:
+            # Highlight toxic comments in red
+            highlighted_html += f"""
+            <div style='
+                background-color: #ffebee;
+                border-left: 5px solid #f44336;
+                padding: 12px;
+                margin: 10px 0;
+                border-radius: 4px;
+                box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+            '>
+                <div style='display: flex; justify-content: space-between; align-items: center;'>
+                    <span style='color: #d32f2f; font-weight: bold;'>⚠️ Toxic Comment</span>
+                    <span style='color: #888; font-size: 0.9em;'>Toxicity: {result['toxicity_score']*100:.1f}%</span>
+                </div>
+                <p style='margin: 8px 0; color: #333;'>{comment}</p>
+            </div>
+            """
+        else:
+            # Keep non-toxic comments normal
+            highlighted_html += f"""
+            <div style='
+                background-color: #f5f5f5;
+                border-left: 5px solid #4caf50;
+                padding: 12px;
+                margin: 10px 0;
+                border-radius: 4px;
+                box-shadow: 0 1px 3px rgba(0,0,0,0.1);
+            '>
+                <div style='display: flex; justify-content: space-between; align-items: center;'>
+                    <span style='color: #388e3c; font-weight: bold;'>✓ Civil Comment</span>
+                    <span style='color: #888; font-size: 0.9em;'>Toxicity: {result['toxicity_score']*100:.1f}%</span>
+                </div>
+                <p style='margin: 8px 0; color: #333;'>{comment}</p>
+            </div>
+            """
+    highlighted_html += "</div>"
+    return highlighted_html
+# Function to analyze single comment
+def analyze_single_comment(comment):
+    if not comment.strip():
+        return "Please enter a comment to analyze", "white", "0%"
+    result = predict_toxicity(comment)
+    if result['toxic']:
+        return (
+            f"⚠️ This comment is classified as TOXIC with a {result['toxicity_score']*100:.1f}% probability.",
+            "red",
+            f"{result['toxicity_score']*100:.1f}%"
+        )
+    else:
+        return (
+            f"✓ This comment is CIVIL with a {result['toxicity_score']*100:.1f}% toxicity probability.",
+            "green",
+            f"{result['toxicity_score']*100:.1f}%"
+        )
+# Create custom CSS for styling
+custom_css = """
+.gr-button {
+    background: linear-gradient(45deg, #ff6b6b, #ff8e8e) !important;
+    color: white !important;
+    border: none !important;
+    border-radius: 8px !important;
+    padding: 12px 24px !important;
+    font-weight: bold !important;
+    transition: all 0.3s ease !important;
+}
+.gr-button:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important;
+}
+.gr-button:active {
+    transform: translateY(0);
+}
+.toxicity-meter {
+    background: linear-gradient(90deg, #4caf50 0%, #ffeb3b 50%, #f44336 100%);
+    height: 20px;
+    border-radius: 10px;
+    margin: 10px 0;
+    position: relative;
+}
+.toxicity-value {
+    position: absolute;
+    top: -25px;
+    font-weight: bold;
+    color: #333;
+}
+h1 {
+    background: linear-gradient(45deg, #ff6b6b, #ff8e8e);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    text-align: center;
+    margin-bottom: 20px !important;
+}
+.gr-box {
+    border-radius: 12px !important;
+    border: 2px solid #e0e0e0 !important;
+    padding: 16px !important;
+}
+.gr-tab {
+    border-radius: 12px 12px 0 0 !important;
+}
+.example-container {
+    background: #f9f9f9;
+    padding: 15px;
+    border-radius: 12px;
+    margin: 10px 0;
+}
+.example-comment {
+    padding: 10px;
+    margin: 5px 0;
+    border-radius: 8px;
+    background: white;
+    cursor: pointer;
+    transition: all 0.2s ease;
+}
+.example-comment:hover {
+    transform: translateX(5px);
+    box-shadow: 0 2px 5px rgba(0,0,0,0.1);
+}
+"""
+# Create Gradio interface
+with gr.Blocks(title="Toxic Comment Classifier", theme=gr.themes.Soft(), css=custom_css) as demo:
+    gr.Markdown(
+        """
+        # 🚨 Toxic Comment Classifier
+        This tool identifies abusive, hateful, or toxic comments using machine learning.
+        It simulates how a browser extension would highlight toxic content in red.
+        """
+    )
+    with gr.Tab("🔍 Single Comment Analysis"):
+        gr.Markdown("## Analyze a Single Comment")
+        with gr.Row():
+            with gr.Column(scale=1):
+                input_text = gr.Textbox(
+                    label="Enter a comment to analyze",
+                    placeholder="Type your comment here...",
+                    lines=3,
+                    elem_classes="gr-box"
+                )
+                analyze_btn = gr.Button("Analyze Comment", variant="primary")
+                # Toxicity meter
+                gr.Markdown("### Toxicity Meter")
+                toxicity_display = gr.Label(label="Toxicity Score", value="0%")
+                # Visual indicator
+                gr.Markdown("### Visual Indicator")
+                color_box = gr.Textbox(
+                    value="Enter a comment to see analysis",
+                    interactive=False,
+                    label="Analysis Result"
+                )
+            with gr.Column(scale=1):
+                # Examples for single comment
+                gr.Markdown("### Try These Examples")
+                with gr.Column(elem_classes="example-container"):
+                    examples = [
+                        "You're such an idiot who knows nothing about politics.",
+                        "I appreciate your perspective on this topic.",
+                        "People like you are the reason why we have so many problems in society.",
+                        "That's an interesting point about the economy."
+                    ]
+                    for example in examples:
+                        example_btn = gr.Button(
+                            example,
+                            size="sm",
+                            variant="secondary",
+                            elem_classes="example-comment"
+                        )
+                        example_btn.click(
+                            fn=lambda e=example: e,
+                            inputs=None,
+                            outputs=input_text
+                        )
+    with gr.Tab("🌐 Browser Extension Simulator"):
+        gr.Markdown("""
+        ## Browser Extension Simulator
+        Paste multiple comments (one per line) to simulate how a browser extension would highlight toxic content:
+        """)
+        with gr.Row():
+            with gr.Column():
+                multi_comments = gr.Textbox(
+                    label="Comments (one per line)",
+                    placeholder="Enter multiple comments here, one per line...",
+                    lines=10,
+                    elem_classes="gr-box"
+                )
+                analyze_multi_btn = gr.Button("Analyze Comments", variant="primary")
+            with gr.Column():
+                highlighted_output = gr.HTML(label="Highlighted Comments")
+        # Examples for multiple comments
+        gr.Markdown("### Example Comment Threads")
+        with gr.Row():
+            with gr.Column():
+                example1 = gr.Examples(
+                    examples=[
+                        """You're such an idiot who knows nothing about politics.
+I appreciate your perspective on this topic.
+People like you are the reason why we have so many problems in society.
+That's an interesting point about the economy.
+Everyone like you should be banned from this platform."""
+                    ],
+                    inputs=multi_comments,
+                    label="Example 1"
+                )
+            with gr.Column():
+                example2 = gr.Examples(
+                    examples=[
+                        """This is the dumbest take on sports I've ever seen.
+Thanks for sharing your thoughts on the environment.
+I hope you disappear for saying that.
+I see what you mean, but have you considered the historical context?"""
+                    ],
+                    inputs=multi_comments,
+                    label="Example 2"
+                )
+    with gr.Tab("📘 About This Project"):
+        gr.Markdown("""
+        ## About the Toxic Comment Classifier
+        This project demonstrates a machine learning approach to identifying toxic comments online.
+        **How it works:**
+        - Uses TF-IDF for text vectorization
+        - Employs Logistic Regression for classification
+        - Trained on a synthetic dataset of toxic and non-toxic comments
+        **Browser Extension Simulation:**
+        The tool simulates how a browser extension would highlight toxic comments in red
+        and civil comments in green, creating a visual content moderation aid.
+        **Potential Applications:**
+        - Social media moderation
+        - Forum content filtering
+        - Online community management
+        **Note:** This is a demonstration using synthetic data. Real-world applications would require
+        training on larger, more diverse datasets for improved accuracy.
+        """)
+    # Setup event handlers
+    analyze_btn.click(
+        fn=analyze_single_comment,
+        inputs=input_text,
+        outputs=[color_box, color_box, toxicity_display]
+    )
+    analyze_multi_btn.click(
+        fn=highlight_toxic_comments,
+        inputs=multi_comments,
+        outputs=highlighted_output
+    )
+    # Update toxicity display when text changes
+    input_text.change(
+        fn=lambda x: "0%" if not x.strip() else f"{predict_toxicity(x)['toxicity_score']*100:.1f}%",
+        inputs=input_text,
+        outputs=toxicity_display
+    )
+# Launch the application
+if __name__ == "__main__":
+    demo.launch()