Agnist commited on
Commit
680ff8e
·
verified ·
1 Parent(s): 3e27c40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +210 -22
app.py CHANGED
@@ -1,25 +1,213 @@
1
- model = keras.Sequential([
2
- vectorizer,
3
- Embedding(input_dim=vocab_size, output_dim=64),
4
- GlobalAveragePooling1D(),
5
- Dense(64, activation="relu"),
6
- Dense(len(df['label'].unique()), activation="softmax") # Adjust for number of classes
7
- ])
8
-
9
- model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
10
- return model
11
- # Train the model
12
- model = build_model()
13
- model.fit(train_ds.batch(32), epochs=10, validation_data=test_ds.batch(32))
14
- # Function to make predictions
15
- def predict(text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Vectorize the input text
17
- vectorized_text = vectorizer([text]) # Use the vectorizer to transform the input
18
- prediction = model.predict(vectorized_text) # Pass the vectorized input to the model
19
- predicted_label = tf.argmax(prediction, axis=1).numpy()[0]
20
- return df['label'].cat.categories[predicted_label]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  # Gradio interface
22
- iface = gr.Interface(fn=predict, inputs="text", outputs="text", title="Text Tone Sentiment Analysis",
23
- description="Enter a text to analyze its tone (e.g., joy, depression, contentment).")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  if __name__ == "__main__":
25
- iface.launch()
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import torch
5
+ from datasets import load_dataset
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.feature_extraction.text import TfidfVectorizer
8
+ from sklearn.linear_model import LogisticRegression
9
+ from sklearn.metrics import accuracy_score, classification_report
10
+ from sklearn.preprocessing import LabelEncoder
11
+ import matplotlib.pyplot as plt
12
+ from imblearn.over_sampling import SMOTE
13
+ import plotly.express as px
14
+ import plotly.graph_objects as go
15
+ import warnings
16
+
17
+ # Suppress warnings
18
+ warnings.filterwarnings("ignore")
19
+
20
+ # Load dataset
21
+ print("Loading dataset...")
22
+ ds = load_dataset("uhoui/text-tone-classifier")
23
+
24
+ # Convert to pandas DataFrame
25
+ df = pd.DataFrame(ds["train"])
26
+
27
+ # Print dataset statistics
28
+ print(f"Dataset size: {len(df)} entries")
29
+ print(f"Columns: {df.columns}")
30
+
31
+ # Check class distribution
32
+ label_counts = df['label'].value_counts()
33
+ print("\nClass distribution:")
34
+ print(label_counts)
35
+
36
+ # Encode labels
37
+ label_encoder = LabelEncoder()
38
+ df['label_encoded'] = label_encoder.fit_transform(df['label'])
39
+ num_classes = len(label_encoder.classes_)
40
+
41
+ # Split the data
42
+ X_train, X_test, y_train, y_test = train_test_split(
43
+ df['text'],
44
+ df['label_encoded'],
45
+ test_size=0.2,
46
+ random_state=42,
47
+ stratify=df['label_encoded'] if len(df) > 10 else None # Only stratify if we have enough samples
48
+ )
49
+
50
+ # Feature extraction using TF-IDF
51
+ print("Creating TF-IDF features...")
52
+ tfidf = TfidfVectorizer(max_features=5000)
53
+ X_train_tfidf = tfidf.fit_transform(X_train)
54
+ X_test_tfidf = tfidf.transform(X_test)
55
+
56
+ # Handle class imbalance using SMOTE
57
+ print("Applying SMOTE to handle class imbalance...")
58
+ try:
59
+ smote = SMOTE(random_state=42)
60
+ X_train_resampled, y_train_resampled = smote.fit_resample(X_train_tfidf, y_train)
61
+ print(f"After SMOTE: {X_train_resampled.shape}")
62
+ except ValueError as e:
63
+ print(f"SMOTE error: {e}. Using original data.")
64
+ X_train_resampled, y_train_resampled = X_train_tfidf, y_train
65
+
66
+ # Train a logistic regression model
67
+ print("Training model...")
68
+ model = LogisticRegression(C=10, max_iter=1000, n_jobs=-1, solver='lbfgs', multi_class='multinomial')
69
+ model.fit(X_train_resampled, y_train_resampled)
70
+
71
+ # Evaluate model
72
+ y_pred = model.predict(X_test_tfidf)
73
+ accuracy = accuracy_score(y_test, y_pred)
74
+ print(f"Model accuracy: {accuracy:.4f}")
75
+
76
+ # Function to predict tone with probabilities
77
+ def predict_tone(text):
78
  # Vectorize the input text
79
+ text_tfidf = tfidf.transform([text])
80
+
81
+ # Get prediction probabilities
82
+ probs = model.predict_proba(text_tfidf)[0]
83
+
84
+ # Get the predicted class and its probability
85
+ pred_class_idx = np.argmax(probs)
86
+ pred_class = label_encoder.inverse_transform([pred_class_idx])[0]
87
+
88
+ # Create results dictionary with all probabilities
89
+ results = {}
90
+ for i, label in enumerate(label_encoder.classes_):
91
+ results[label] = float(probs[i])
92
+
93
+ # Sort results by probability (descending)
94
+ sorted_results = {k: v for k, v in sorted(results.items(), key=lambda item: item[1], reverse=True)}
95
+
96
+ # Create visualization
97
+ top_n = 5 # Show top 5 emotions
98
+ top_labels = list(sorted_results.keys())[:top_n]
99
+ top_probs = list(sorted_results.values())[:top_n]
100
+
101
+ # Generate colors based on probability (higher probability = more intense color)
102
+ colors = ["rgba(64, 128, 255, " + str(min(1.0, p + 0.3)) + ")" for p in top_probs]
103
+
104
+ fig = go.Figure()
105
+ fig.add_trace(go.Bar(
106
+ x=top_probs,
107
+ y=top_labels,
108
+ orientation='h',
109
+ marker_color=colors,
110
+ text=[f"{p:.1%}" for p in top_probs],
111
+ textposition='auto'
112
+ ))
113
+
114
+ fig.update_layout(
115
+ title="Emotion Probability",
116
+ xaxis_title="Probability",
117
+ yaxis_title="Emotion",
118
+ height=400,
119
+ margin=dict(l=20, r=20, t=40, b=20),
120
+ xaxis=dict(range=[0, 1])
121
+ )
122
+
123
+ # Get example texts for the predicted emotion
124
+ example_texts = df[df['label'] == pred_class]['text'].sample(min(3, len(df[df['label'] == pred_class]))).tolist()
125
+
126
+ return pred_class, sorted_results, fig, example_texts
127
+
128
+ # Function to handle the example display
129
+ def get_tone_examples(tone):
130
+ examples = df[df['label'] == tone]['text'].sample(min(5, len(df[df['label'] == tone]))).tolist()
131
+ return examples
132
+
133
  # Gradio interface
134
+ def analyze_tone(text, selected_tone=None):
135
+ if not text:
136
+ return "Please enter some text to analyze.", {}, None, []
137
+
138
+ # If a tone is selected from the dropdown, show examples
139
+ if selected_tone and not text:
140
+ examples = get_tone_examples(selected_tone)
141
+ return f"Examples of '{selected_tone}' tone:", {}, None, examples
142
+
143
+ # Otherwise, analyze the text
144
+ predicted_tone, all_probs, fig, examples = predict_tone(text)
145
+
146
+ # Format the result message
147
+ message = f"The predicted tone is: **{predicted_tone}**"
148
+
149
+ return message, all_probs, fig, examples
150
+
151
+ # Create the Gradio interface
152
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
153
+ gr.Markdown("# Text Tone Analyzer")
154
+ gr.Markdown("Enter text to analyze its emotional tone.")
155
+
156
+ with gr.Row():
157
+ with gr.Column(scale=3):
158
+ text_input = gr.Textbox(
159
+ label="Enter your text here",
160
+ placeholder="Type something to analyze its emotional tone...",
161
+ lines=5
162
+ )
163
+ analyze_button = gr.Button("Analyze Tone", variant="primary")
164
+
165
+ with gr.Column(scale=2):
166
+ # Dropdown to select example tones
167
+ tone_dropdown = gr.Dropdown(
168
+ choices=sorted(df['label'].unique().tolist()),
169
+ label="Or select a tone to see examples"
170
+ )
171
+
172
+ with gr.Row():
173
+ with gr.Column(scale=1):
174
+ result_message = gr.Markdown()
175
+
176
+ with gr.Row():
177
+ with gr.Column(scale=2):
178
+ plot_output = gr.Plot(label="Tone Probabilities")
179
+ with gr.Column(scale=1):
180
+ all_probs_output = gr.JSON(label="All Probabilities")
181
+
182
+ with gr.Row():
183
+ examples_output = gr.Dataframe(
184
+ headers=["Examples of similar texts"],
185
+ datatype=["str"],
186
+ label="Example texts with similar tone"
187
+ )
188
+
189
+ # Set up event handlers
190
+ analyze_button.click(
191
+ fn=analyze_tone,
192
+ inputs=[text_input, None],
193
+ outputs=[result_message, all_probs_output, plot_output, examples_output]
194
+ )
195
+
196
+ tone_dropdown.change(
197
+ fn=get_tone_examples,
198
+ inputs=[tone_dropdown],
199
+ outputs=[examples_output]
200
+ )
201
+
202
+ # Add example inputs
203
+ examples = [
204
+ ["I'm so excited about this new project!"],
205
+ ["I'm feeling quite down today and nothing seems to work."],
206
+ ["The movie was interesting, but I'm not sure if I liked it."],
207
+ ["I can't believe what just happened! This is outrageous!"]
208
+ ]
209
+ gr.Examples(examples=examples, inputs=text_input)
210
+
211
+ # Launch the app
212
  if __name__ == "__main__":
213
+ demo.launch()