Agnist's picture
Update app.py
8a99441 verified
import gradio as gr
import pandas as pd
import numpy as np
import torch
from datasets import load_dataset
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE
import plotly.express as px
import plotly.graph_objects as go
import warnings
from sklearn.metrics import precision_score, recall_score, f1_score
warnings.filterwarnings("ignore")
# Hugging face dataset import
print("Loading dataset...")
ds = load_dataset("uhoui/text-tone-classifier")
# Optional: download csv (colab)
# df = ds['train'].to_pandas()
# df.to_csv("text_tone_classifier.csv", index=False)
df = pd.DataFrame(ds["train"])
# Console Log dataset and class
print(f"Dataset size: {len(df)} entries")
print(f"Columns: {df.columns}")
label_counts = df['label'].value_counts()
print("\nClass distribution:")
print(label_counts)
# Labels
label_encoder = LabelEncoder()
df['label_encoded'] = label_encoder.fit_transform(df['label'])
print(label_encoder)
num_classes = len(label_encoder.classes_)
print(num_classes)
# Train testsplit
X_train, X_test, y_train, y_test = train_test_split(
df['text'],
df['label_encoded'],
test_size=0.2,
random_state=42,
stratify=None
)
# TFIDF Feature extraction
tfidf = TfidfVectorizer(max_features=5000)
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)
# SMOTE
print("Handling class imbalance (via SNOTE)...")
try:
smallest_class_size = min(np.bincount(y_train)[np.bincount(y_train) > 0])
k_neighbors = min(5, smallest_class_size - 1)
if k_neighbors > 0:
smote = SMOTE(random_state=42, k_neighbors=k_neighbors)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_tfidf, y_train)
print(f"After SMOTE: {X_train_resampled.shape}")
else:
print("Classes too small for SMOTE, using original data.")
X_train_resampled, y_train_resampled = X_train_tfidf, y_train
except ValueError as e:
print(f"SMOTE error: {e}. Using original data.")
X_train_resampled, y_train_resampled = X_train_tfidf, y_train
# Logistic Regression Model
# max iter exceeding 200 doesnt improve anything
# Don't set C low, set to 100+ default. 200 works better.
model = LogisticRegression(C=200, max_iter=200, n_jobs=-1, solver='lbfgs', multi_class='multinomial')
model.fit(X_train_resampled, y_train_resampled)
# Evaluate Model
y_pred = model.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
print(f"Accuracy: {(1 - accuracy) * 100:.2f}%")
print(f"Precision: {(1 - precision) * 100:.2f}%")
print(f"Recall: {(1 - recall) * 100:.2f}%")
print(f"F1 Score: {(1 - f1) * 100:.2f}%")
def predict_tone(text):
text_tfidf = tfidf.transform([text])
probs = model.predict_proba(text_tfidf)[0]
pred_class_encoded = model.classes_[np.argmax(probs)]
pred_class = label_encoder.inverse_transform([pred_class_encoded])[0]
trained_labels = model.classes_
trained_label_names = label_encoder.inverse_transform(trained_labels)
results = {label: float(prob) for label, prob in zip(trained_label_names, probs)}
sorted_results = {k: v for k, v in sorted(results.items(), key=lambda item: item[1], reverse=True)}
top_n = 5 # Top 5, adjust later if needed
top_labels = list(sorted_results.keys())[:top_n]
top_probs = list(sorted_results.values())[:top_n]
colors = ["rgba(64, 128, 255, " + str(min(1.0, p + 0.3)) + ")" for p in top_probs]
fig = go.Figure()
fig.add_trace(go.Bar(
x=top_probs,
y=top_labels,
orientation='h',
marker_color=colors,
text=[f"{p:.1%}" for p in top_probs],
textposition='auto'
))
fig.update_layout(
title="Emotion Probability",
xaxis_title="Probability",
yaxis_title="Emotion",
height=400,
margin=dict(l=20, r=20, t=40, b=20),
xaxis=dict(range=[0, 1])
)
# Fetch examples
example_texts = df[df['label'] == pred_class]['text'].sample(min(3, len(df[df['label'] == pred_class]))).tolist()
return pred_class, sorted_results, fig, example_texts
def get_tone_examples(tone):
examples = df[df['label'] == tone]['text'].sample(min(5, len(df[df['label'] == tone]))).tolist()
return examples
# Gradio interface
def analyze_tone(text, selected_tone=None):
if not text:
return "Enter the text to analyze:", {}, None, []
if selected_tone and not text:
examples = get_tone_examples(selected_tone)
return f"Examples of '{selected_tone}' tone:", {}, None, examples
predicted_tone, all_probs, fig, examples = predict_tone(text)
message = f"The tone is: **{predicted_tone}**"
return message, all_probs, fig, examples
# Gradio interface Creation
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
gr.Markdown("# Text Tone Sentimental Analyzer")
gr.Markdown("Be mindful of punctuation as it affects results. Slang is unaccounted for due to dataset constraints.")
with gr.Row():
with gr.Column(scale=3):
text_input = gr.Textbox(
label="Enter your text here",
placeholder="Example: The satisfaction of completing a difficult puzzle is indescribable.",
lines=5
)
analyze_button = gr.Button("Analyze Tone", variant="primary")
with gr.Column(scale=2):
# Example Tones Dropdown
tone_dropdown = gr.Dropdown(
choices=sorted(df['label'].unique().tolist()),
label="Select a tone to view an example below."
)
gr.Markdown("<br>", elem_id="line-break-1")
with gr.Row():
with gr.Column(scale=1):
result_message = gr.Markdown()
with gr.Row():
with gr.Column(scale=2):
plot_output = gr.Plot(label="Tone Probabilities")
with gr.Column(scale=1):
all_probs_output = gr.JSON(label="All Probabilities")
with gr.Row():
examples_output = gr.Dataframe(
headers=["Examples of similar texts"],
datatype=["str"],
label="Example texts with similar tone"
)
analyze_button.click(
fn=analyze_tone,
inputs=[text_input, tone_dropdown],
outputs=[result_message, all_probs_output, plot_output, examples_output]
)
tone_dropdown.change(
fn=get_tone_examples,
inputs=tone_dropdown,
outputs=examples_output
)
# Main
if __name__ == "__main__":
demo.launch()