Spaces:

Agnist
/

Text-Tone-Sentimental-Analysis

Sleeping

App Files Files Community

Agnist commited on May 17, 2025

Commit

bb3f86e

verified ·

1 Parent(s): 93debb2

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -43

app.py CHANGED Viewed

@@ -14,51 +14,45 @@ import plotly.express as px
 import plotly.graph_objects as go
 import warnings
-# Suppress warnings
 warnings.filterwarnings("ignore")
-# Load dataset
 print("Loading dataset...")
 ds = load_dataset("uhoui/text-tone-classifier")
-# Convert to pandas DataFrame
 df = pd.DataFrame(ds["train"])
-# Print dataset statistics
 print(f"Dataset size: {len(df)} entries")
 print(f"Columns: {df.columns}")
-# Check class distribution
 label_counts = df['label'].value_counts()
 print("\nClass distribution:")
 print(label_counts)
-# Encode labels
 label_encoder = LabelEncoder()
 df['label_encoded'] = label_encoder.fit_transform(df['label'])
 num_classes = len(label_encoder.classes_)
-# Split the data - fix: remove stratify for classes with few samples
 X_train, X_test, y_train, y_test = train_test_split(
     df['text'],
     df['label_encoded'],
     test_size=0.2,
     random_state=42,
-    # Only use stratify if we have enough samples
-    stratify=None  # Removed stratification to fix the error
 )
-# Feature extraction using TF-IDF
 print("Creating TF-IDF features...")
 tfidf = TfidfVectorizer(max_features=5000)
 X_train_tfidf = tfidf.fit_transform(X_train)
 X_test_tfidf = tfidf.transform(X_test)
-# Handle class imbalance using SMOTE - Fix for SMOTE error
-print("Applying SMOTE to handle class imbalance...")
 try:
-    # Modify the SMOTE parameters to handle small sample sizes
-    # Use k_neighbors=min(5, n_samples-1) for classes with few samples
     smallest_class_size = min(np.bincount(y_train)[np.bincount(y_train) > 0])
     k_neighbors = min(5, smallest_class_size - 1)
@@ -73,46 +67,44 @@ except ValueError as e:
     print(f"SMOTE error: {e}. Using original data.")
     X_train_resampled, y_train_resampled = X_train_tfidf, y_train
-# Train a logistic regression model
 print("Training model...")
 model = LogisticRegression(C=10, max_iter=1000, n_jobs=-1, solver='lbfgs', multi_class='multinomial')
 model.fit(X_train_resampled, y_train_resampled)
-# Evaluate model
 y_pred = model.predict(X_test_tfidf)
 accuracy = accuracy_score(y_test, y_pred)
 print(f"Model accuracy: {accuracy:.4f}")
-# Function to predict tone with probabilities
 def predict_tone(text):
-    # Vectorize the input text
     text_tfidf = tfidf.transform([text])
     # Get prediction probabilities
     probs = model.predict_proba(text_tfidf)[0]
-    # Get the predicted class and its probability
     pred_class_idx = np.argmax(probs)
     pred_class = label_encoder.inverse_transform([pred_class_idx])[0]
-    # Get the labels used in training
-    trained_labels = model.classes_  # These are encoded label indices
-    # Convert encoded labels back to original string labels
     trained_label_names = label_encoder.inverse_transform(trained_labels)
-    # Create results dictionary with only trained labels
     results = {label: float(prob) for label, prob in zip(trained_label_names, probs)}
     # Sort results by probability (descending)
     sorted_results = {k: v for k, v in sorted(results.items(), key=lambda item: item[1], reverse=True)}
     # Create visualization
-    top_n = 5  # Show top 5 emotions
     top_labels = list(sorted_results.keys())[:top_n]
     top_probs = list(sorted_results.values())[:top_n]
-    # Generate colors based on probability (higher probability = more intense color)
     colors = ["rgba(64, 128, 255, " + str(min(1.0, p + 0.3)) + ")" for p in top_probs]
     fig = go.Figure()
@@ -134,12 +126,11 @@ def predict_tone(text):
         xaxis=dict(range=[0, 1])
     )
-    # Get example texts for the predicted emotion
     example_texts = df[df['label'] == pred_class]['text'].sample(min(3, len(df[df['label'] == pred_class]))).tolist()
     return pred_class, sorted_results, fig, example_texts
-# Function to handle the example display
 def get_tone_examples(tone):
     examples = df[df['label'] == tone]['text'].sample(min(5, len(df[df['label'] == tone]))).tolist()
     return examples
@@ -147,40 +138,37 @@ def get_tone_examples(tone):
 # Gradio interface
 def analyze_tone(text, selected_tone=None):
     if not text:
-        return "Please enter some text to analyze.", {}, None, []
-    # If a tone is selected from the dropdown, show examples
     if selected_tone and not text:
         examples = get_tone_examples(selected_tone)
         return f"Examples of '{selected_tone}' tone:", {}, None, examples
-    # Otherwise, analyze the text
     predicted_tone, all_probs, fig, examples = predict_tone(text)
-    # Format the result message
-    message = f"The predicted tone is: **{predicted_tone}**"
     return message, all_probs, fig, examples
-# Create the Gradio interface
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
     gr.Markdown("# Text Tone Analyzer")
-    gr.Markdown("Enter text to analyze its emotional tone.")
     with gr.Row():
         with gr.Column(scale=3):
             text_input = gr.Textbox(
                 label="Enter your text here",
-                placeholder="Type something to analyze its emotional tone...",
                 lines=5
             )
             analyze_button = gr.Button("Analyze Tone", variant="primary")
         with gr.Column(scale=2):
-            # Dropdown to select example tones
             tone_dropdown = gr.Dropdown(
                 choices=sorted(df['label'].unique().tolist()),
-                label="Or select a tone to see examples"
             )
     with gr.Row():
@@ -200,21 +188,19 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
             label="Example texts with similar tone"
         )
-    # Fix for the click event handler - properly list the inputs
     analyze_button.click(
         fn=analyze_tone,
-        inputs=[text_input, tone_dropdown],  # Fixed: explicitly list both inputs
         outputs=[result_message, all_probs_output, plot_output, examples_output]
     )
-    # Fix for tone_dropdown event handler
     tone_dropdown.change(
         fn=get_tone_examples,
-        inputs=tone_dropdown,  # This also needs to be fixed to be a list
         outputs=examples_output
     )
-    # Add example inputs
     examples = [
         ["I'm so excited about this new project!"],
         ["I'm feeling quite down today and nothing seems to work."],
@@ -223,6 +209,6 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
     ]
     gr.Examples(examples=examples, inputs=text_input)
-# Launch the app
 if __name__ == "__main__":
     demo.launch()

 import plotly.graph_objects as go
 import warnings
 warnings.filterwarnings("ignore")
+# Hugging face dataset import
 print("Loading dataset...")
 ds = load_dataset("uhoui/text-tone-classifier")
 df = pd.DataFrame(ds["train"])
+# Console Log dataset and class
 print(f"Dataset size: {len(df)} entries")
 print(f"Columns: {df.columns}")
 label_counts = df['label'].value_counts()
 print("\nClass distribution:")
 print(label_counts)
+# Labels
 label_encoder = LabelEncoder()
 df['label_encoded'] = label_encoder.fit_transform(df['label'])
 num_classes = len(label_encoder.classes_)
+# Train  testsplit
 X_train, X_test, y_train, y_test = train_test_split(
     df['text'],
     df['label_encoded'],
     test_size=0.2,
     random_state=42,
+    stratify=None
 )
+# TFIDF Feature extraction
 print("Creating TF-IDF features...")
 tfidf = TfidfVectorizer(max_features=5000)
 X_train_tfidf = tfidf.fit_transform(X_train)
 X_test_tfidf = tfidf.transform(X_test)
+# SMOTE
+print("Handling class imbalance (via SNOTE)...")
 try:
     smallest_class_size = min(np.bincount(y_train)[np.bincount(y_train) > 0])
     k_neighbors = min(5, smallest_class_size - 1)
     print(f"SMOTE error: {e}. Using original data.")
     X_train_resampled, y_train_resampled = X_train_tfidf, y_train
+# Logistic Regression Model
 print("Training model...")
 model = LogisticRegression(C=10, max_iter=1000, n_jobs=-1, solver='lbfgs', multi_class='multinomial')
 model.fit(X_train_resampled, y_train_resampled)
+# Evaluate Model
 y_pred = model.predict(X_test_tfidf)
 accuracy = accuracy_score(y_test, y_pred)
 print(f"Model accuracy: {accuracy:.4f}")
 def predict_tone(text):
+    # Vectorize
     text_tfidf = tfidf.transform([text])
     # Get prediction probabilities
     probs = model.predict_proba(text_tfidf)[0]
+    # Get predicted class and its probability
     pred_class_idx = np.argmax(probs)
     pred_class = label_encoder.inverse_transform([pred_class_idx])[0]
+    # Get the labels used during training
+    trained_labels = model.classes_
+    # Decode to string (Labels)
     trained_label_names = label_encoder.inverse_transform(trained_labels)
     results = {label: float(prob) for label, prob in zip(trained_label_names, probs)}
     # Sort results by probability (descending)
     sorted_results = {k: v for k, v in sorted(results.items(), key=lambda item: item[1], reverse=True)}
     # Create visualization
+    top_n = 5  # Top 5, adjust later if needed
     top_labels = list(sorted_results.keys())[:top_n]
     top_probs = list(sorted_results.values())[:top_n]
+    # OPTIONAL: color-code probabilities
     colors = ["rgba(64, 128, 255, " + str(min(1.0, p + 0.3)) + ")" for p in top_probs]
     fig = go.Figure()
         xaxis=dict(range=[0, 1])
     )
+    # Fetch examples
     example_texts = df[df['label'] == pred_class]['text'].sample(min(3, len(df[df['label'] == pred_class]))).tolist()
     return pred_class, sorted_results, fig, example_texts
 def get_tone_examples(tone):
     examples = df[df['label'] == tone]['text'].sample(min(5, len(df[df['label'] == tone]))).tolist()
     return examples
 # Gradio interface
 def analyze_tone(text, selected_tone=None):
     if not text:
+        return "Enter the text to analyze:", {}, None, []
     if selected_tone and not text:
         examples = get_tone_examples(selected_tone)
         return f"Examples of '{selected_tone}' tone:", {}, None, examples
     predicted_tone, all_probs, fig, examples = predict_tone(text)
+    message = f"The tone is: **{predicted_tone}**"
     return message, all_probs, fig, examples
+# Gradio interface Creation
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
     gr.Markdown("# Text Tone Analyzer")
+    gr.Markdown("Enter the text to analyze:")
     with gr.Row():
         with gr.Column(scale=3):
             text_input = gr.Textbox(
                 label="Enter your text here",
+                placeholder="Example: The satisfaction of completing a difficult puzzle is indescribable.",
                 lines=5
             )
             analyze_button = gr.Button("Analyze Tone", variant="primary")
         with gr.Column(scale=2):
+            # Example Tones Dropdown
             tone_dropdown = gr.Dropdown(
                 choices=sorted(df['label'].unique().tolist()),
+                label="Select a tone to view an example below."
             )
     with gr.Row():
             label="Example texts with similar tone"
         )
     analyze_button.click(
         fn=analyze_tone,
+        inputs=[text_input, tone_dropdown],
         outputs=[result_message, all_probs_output, plot_output, examples_output]
     )
     tone_dropdown.change(
         fn=get_tone_examples,
+        inputs=tone_dropdown,
         outputs=examples_output
     )
+    # Example inputs
     examples = [
         ["I'm so excited about this new project!"],
         ["I'm feeling quite down today and nothing seems to work."],
     ]
     gr.Examples(examples=examples, inputs=text_input)
+# Main
 if __name__ == "__main__":
     demo.launch()