mindBERTevaluation

Runtime error

App Files Files Community

DrSyedFaizan commited on Mar 2, 2025

Commit

72cd162

verified ·

1 Parent(s): f155b04

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -14

app.py CHANGED Viewed

@@ -4,11 +4,12 @@ import numpy as np
 import pandas as pd
 import evaluate
 import gradio as gr
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
-from sklearn.metrics import accuracy_score, classification_report
 from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
-from dataclasses import dataclass, field
-from typing import List, Optional
 # Load Accuracy and F1-Score Metrics
 accuracy_metric = evaluate.load("accuracy")
@@ -22,23 +23,37 @@ MODEL_PATHS = {
     "DistilBERT": "distilbert-base-uncased"
 }
-# Load Reddit Mental Health Dataset
-def load_reddit_data(file_path):
-    df = pd.read_csv(file_path)
-    df = df.dropna(subset=["text", "label"])  # Ensure no missing values in relevant columns
-    return df
-# Preprocess Dataset
-def preprocess_data(df, sample_size=100):
     df_sample = df.sample(n=sample_size, random_state=42)  # Sample a subset
     test_texts = df_sample["text"].tolist()
-    test_labels = df_sample["label"].tolist()
     return test_texts, test_labels
 # Function to evaluate models
 def evaluate_models(dataset_path):
-    df = load_reddit_data(dataset_path)
-    test_texts, test_labels = preprocess_data(df)
     results = []
     model_metadata = {
@@ -82,12 +97,13 @@ def evaluate_models(dataset_path):
     return pd.DataFrame(results)
 # Load and evaluate
-DATASET_PATH = "path/to/reddit_mental_health.csv"
 df_results = evaluate_models(DATASET_PATH)
 # Display results
 df_results
 # Initialize leaderboard with custom columns
 def init_leaderboard(dataframe):
     if dataframe is None or dataframe.empty:

 import pandas as pd
 import evaluate
 import gradio as gr
+import re
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
+from sklearn.metrics import accuracy_score
 from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
+from dataclasses import dataclass
+from typing import List
 # Load Accuracy and F1-Score Metrics
 accuracy_metric = evaluate.load("accuracy")
     "DistilBERT": "distilbert-base-uncased"
 }
+# Label Mapping
+LABEL_MAPPING = {
+    0: "Stress",
+    1: "Depression",
+    2: "Bipolar disorder",
+    3: "Personality disorder",
+    4: "Anxiety"
+}
+# Function to clean text using regular expressions
+def clean_text(text):
+    text = text.lower()
+    text = re.sub(r'http\S+', '', text)  # Remove URLs
+    text = re.sub(r'\s+', ' ', text)  # Remove excessive whitespace
+    text = re.sub(r'[^a-zA-Z0-9 ]', '', text)  # Remove special characters
+    return text.strip()
+# Load and preprocess Reddit Mental Health Dataset
+def load_reddit_data(file_path, sample_size=100):
+    df = pd.read_csv(file_path)
+    df = df.dropna(subset=["text", "target"])  # Ensure no missing values in relevant columns
+    df = df.drop(columns=[df.columns[0], "title"])  # Drop index and title columns
+    df["text"] = df["text"].apply(clean_text)  # Clean text column
     df_sample = df.sample(n=sample_size, random_state=42)  # Sample a subset
     test_texts = df_sample["text"].tolist()
+    test_labels = df_sample["target"].tolist()
     return test_texts, test_labels
 # Function to evaluate models
 def evaluate_models(dataset_path):
+    test_texts, test_labels = load_reddit_data(dataset_path)
     results = []
     model_metadata = {
     return pd.DataFrame(results)
 # Load and evaluate
+DATASET_PATH = "https://huggingface.co/spaces/DrSyedFaizan/mindBERTevaluation/resolve/main/rmhd.csv"
 df_results = evaluate_models(DATASET_PATH)
 # Display results
 df_results
 # Initialize leaderboard with custom columns
 def init_leaderboard(dataframe):
     if dataframe is None or dataframe.empty: