Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,11 +4,12 @@ import numpy as np
|
|
| 4 |
import pandas as pd
|
| 5 |
import evaluate
|
| 6 |
import gradio as gr
|
|
|
|
| 7 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 8 |
-
from sklearn.metrics import accuracy_score
|
| 9 |
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
|
| 10 |
-
from dataclasses import dataclass
|
| 11 |
-
from typing import List
|
| 12 |
|
| 13 |
# Load Accuracy and F1-Score Metrics
|
| 14 |
accuracy_metric = evaluate.load("accuracy")
|
|
@@ -22,23 +23,37 @@ MODEL_PATHS = {
|
|
| 22 |
"DistilBERT": "distilbert-base-uncased"
|
| 23 |
}
|
| 24 |
|
| 25 |
-
#
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
#
|
| 32 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
df_sample = df.sample(n=sample_size, random_state=42) # Sample a subset
|
| 34 |
test_texts = df_sample["text"].tolist()
|
| 35 |
-
test_labels = df_sample["
|
| 36 |
return test_texts, test_labels
|
| 37 |
|
| 38 |
# Function to evaluate models
|
| 39 |
def evaluate_models(dataset_path):
|
| 40 |
-
|
| 41 |
-
test_texts, test_labels = preprocess_data(df)
|
| 42 |
results = []
|
| 43 |
|
| 44 |
model_metadata = {
|
|
@@ -82,12 +97,13 @@ def evaluate_models(dataset_path):
|
|
| 82 |
return pd.DataFrame(results)
|
| 83 |
|
| 84 |
# Load and evaluate
|
| 85 |
-
DATASET_PATH = "
|
| 86 |
df_results = evaluate_models(DATASET_PATH)
|
| 87 |
|
| 88 |
# Display results
|
| 89 |
df_results
|
| 90 |
|
|
|
|
| 91 |
# Initialize leaderboard with custom columns
|
| 92 |
def init_leaderboard(dataframe):
|
| 93 |
if dataframe is None or dataframe.empty:
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
import evaluate
|
| 6 |
import gradio as gr
|
| 7 |
+
import re
|
| 8 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 9 |
+
from sklearn.metrics import accuracy_score
|
| 10 |
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
|
| 11 |
+
from dataclasses import dataclass
|
| 12 |
+
from typing import List
|
| 13 |
|
| 14 |
# Load Accuracy and F1-Score Metrics
|
| 15 |
accuracy_metric = evaluate.load("accuracy")
|
|
|
|
| 23 |
"DistilBERT": "distilbert-base-uncased"
|
| 24 |
}
|
| 25 |
|
| 26 |
+
# Label Mapping
|
| 27 |
+
LABEL_MAPPING = {
|
| 28 |
+
0: "Stress",
|
| 29 |
+
1: "Depression",
|
| 30 |
+
2: "Bipolar disorder",
|
| 31 |
+
3: "Personality disorder",
|
| 32 |
+
4: "Anxiety"
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
# Function to clean text using regular expressions
|
| 36 |
+
def clean_text(text):
|
| 37 |
+
text = text.lower()
|
| 38 |
+
text = re.sub(r'http\S+', '', text) # Remove URLs
|
| 39 |
+
text = re.sub(r'\s+', ' ', text) # Remove excessive whitespace
|
| 40 |
+
text = re.sub(r'[^a-zA-Z0-9 ]', '', text) # Remove special characters
|
| 41 |
+
return text.strip()
|
| 42 |
|
| 43 |
+
# Load and preprocess Reddit Mental Health Dataset
|
| 44 |
+
def load_reddit_data(file_path, sample_size=100):
|
| 45 |
+
df = pd.read_csv(file_path)
|
| 46 |
+
df = df.dropna(subset=["text", "target"]) # Ensure no missing values in relevant columns
|
| 47 |
+
df = df.drop(columns=[df.columns[0], "title"]) # Drop index and title columns
|
| 48 |
+
df["text"] = df["text"].apply(clean_text) # Clean text column
|
| 49 |
df_sample = df.sample(n=sample_size, random_state=42) # Sample a subset
|
| 50 |
test_texts = df_sample["text"].tolist()
|
| 51 |
+
test_labels = df_sample["target"].tolist()
|
| 52 |
return test_texts, test_labels
|
| 53 |
|
| 54 |
# Function to evaluate models
|
| 55 |
def evaluate_models(dataset_path):
|
| 56 |
+
test_texts, test_labels = load_reddit_data(dataset_path)
|
|
|
|
| 57 |
results = []
|
| 58 |
|
| 59 |
model_metadata = {
|
|
|
|
| 97 |
return pd.DataFrame(results)
|
| 98 |
|
| 99 |
# Load and evaluate
|
| 100 |
+
DATASET_PATH = "https://huggingface.co/spaces/DrSyedFaizan/mindBERTevaluation/resolve/main/rmhd.csv"
|
| 101 |
df_results = evaluate_models(DATASET_PATH)
|
| 102 |
|
| 103 |
# Display results
|
| 104 |
df_results
|
| 105 |
|
| 106 |
+
|
| 107 |
# Initialize leaderboard with custom columns
|
| 108 |
def init_leaderboard(dataframe):
|
| 109 |
if dataframe is None or dataframe.empty:
|