Spaces:

ma4389
/

Sentiment_Health_Classification

Sleeping

App Files Files Community

ma4389 commited on Aug 15, 2025

Commit

2083483

verified ·

1 Parent(s): ed8ba86

Upload 3 files

Browse files

Files changed (3) hide show

app.py +79 -0
best_model.pth +3 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import torch
+from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
+import gradio as gr
+import re
+import nltk
+from nltk.corpus import stopwords
+from nltk.stem import WordNetLemmatizer
+from nltk.tokenize import word_tokenize
+# ======== Download NLTK Resources ========
+nltk.download('stopwords')
+nltk.download('punkt_tab')
+nltk.download('wordnet')
+nltk.download('omw-1.4')
+# ======== Preprocessing Setup ========
+stop_words = set(stopwords.words('english'))
+lemmatizer = WordNetLemmatizer()
+def preprocess_text(text):
+    # Remove non-alphabetic characters
+    text = re.sub(r'[^A-Za-z\s]', '', text)
+    # Remove URLs
+    text = re.sub(r'http\S+|www\S+|https\S+', '', text)
+    # Normalize spaces
+    text = re.sub(r'\s+', ' ', text).strip()
+    # Lowercase
+    text = text.lower()
+    # Tokenize
+    tokens = word_tokenize(text)
+    # Remove stopwords
+    tokens = [word for word in tokens if word not in stop_words]
+    # Lemmatize
+    tokens = [lemmatizer.lemmatize(word) for word in tokens]
+    return ' '.join(tokens)
+# ======== Class Names ========
+class_names = [
+    "Normal",
+    "Depression",
+    "Suicidal",
+    "Anxiety",
+    "Bipolar",
+    "Personality disorder"
+]
+# ======== Load Tokenizer & Model ========
+tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
+model = DistilBertForSequenceClassification.from_pretrained(
+    "distilbert-base-uncased",
+    num_labels=len(class_names)
+)
+model.load_state_dict(torch.load("best_model.pth", map_location=torch.device("cpu")))
+model.eval()
+# ======== Prediction Function ========
+def predict_text(text):
+    cleaned_text = preprocess_text(text)
+    if not cleaned_text.strip():
+        return {cls: 0.0 for cls in class_names}
+    inputs = tokenizer(cleaned_text, truncation=True, padding=True, max_length=128, return_tensors='pt')
+    with torch.no_grad():
+        outputs = model(**inputs)
+        probs = torch.softmax(outputs.logits, dim=1).flatten().tolist()
+    return {cls: float(prob) for cls, prob in zip(class_names, probs)}
+# ======== Gradio Interface ========
+demo = gr.Interface(
+    fn=predict_text,
+    inputs=gr.Textbox(lines=4, placeholder="Enter your statement here..."),
+    outputs=gr.Label(num_top_classes=len(class_names)),
+    title="Mental Health Sentiment Classifier",
+    description="Classifies text into mental health categories."
+)
+if __name__ == "__main__":
+    demo.launch()

best_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df2a8c6fa062a80c85368b5709e49beab9db5827726500c016637cd3f0abb583
+size 267877222

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+torch
+transformers
+gradio
+nltk