Update app.py
Browse files
app.py
CHANGED
|
@@ -13,6 +13,7 @@ from imblearn.over_sampling import SMOTE
|
|
| 13 |
import plotly.express as px
|
| 14 |
import plotly.graph_objects as go
|
| 15 |
import warnings
|
|
|
|
| 16 |
|
| 17 |
warnings.filterwarnings("ignore")
|
| 18 |
|
|
@@ -20,6 +21,10 @@ warnings.filterwarnings("ignore")
|
|
| 20 |
print("Loading dataset...")
|
| 21 |
ds = load_dataset("uhoui/text-tone-classifier")
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
df = pd.DataFrame(ds["train"])
|
| 24 |
|
| 25 |
# Console Log dataset and class
|
|
@@ -45,7 +50,6 @@ X_train, X_test, y_train, y_test = train_test_split(
|
|
| 45 |
)
|
| 46 |
|
| 47 |
# TFIDF Feature extraction
|
| 48 |
-
print("Creating TF-IDF features...")
|
| 49 |
tfidf = TfidfVectorizer(max_features=5000)
|
| 50 |
X_train_tfidf = tfidf.fit_transform(X_train)
|
| 51 |
X_test_tfidf = tfidf.transform(X_test)
|
|
@@ -68,14 +72,20 @@ except ValueError as e:
|
|
| 68 |
X_train_resampled, y_train_resampled = X_train_tfidf, y_train
|
| 69 |
|
| 70 |
# Logistic Regression Model
|
| 71 |
-
print("Training model...")
|
| 72 |
model = LogisticRegression(C=10, max_iter=1000, n_jobs=-1, solver='lbfgs', multi_class='multinomial')
|
| 73 |
model.fit(X_train_resampled, y_train_resampled)
|
| 74 |
|
| 75 |
# Evaluate Model
|
| 76 |
y_pred = model.predict(X_test_tfidf)
|
| 77 |
accuracy = accuracy_score(y_test, y_pred)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
print(f"Model accuracy: {accuracy:.4f}")
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
def predict_tone(text):
|
| 81 |
# Vectorize
|
|
|
|
| 13 |
import plotly.express as px
|
| 14 |
import plotly.graph_objects as go
|
| 15 |
import warnings
|
| 16 |
+
from sklearn.metrics import precision_score, recall_score, f1_score
|
| 17 |
|
| 18 |
warnings.filterwarnings("ignore")
|
| 19 |
|
|
|
|
| 21 |
print("Loading dataset...")
|
| 22 |
ds = load_dataset("uhoui/text-tone-classifier")
|
| 23 |
|
| 24 |
+
# Optional: download csv (colab)
|
| 25 |
+
# df = ds['train'].to_pandas()
|
| 26 |
+
# df.to_csv("text_tone_classifier.csv", index=False)
|
| 27 |
+
|
| 28 |
df = pd.DataFrame(ds["train"])
|
| 29 |
|
| 30 |
# Console Log dataset and class
|
|
|
|
| 50 |
)
|
| 51 |
|
| 52 |
# TFIDF Feature extraction
|
|
|
|
| 53 |
tfidf = TfidfVectorizer(max_features=5000)
|
| 54 |
X_train_tfidf = tfidf.fit_transform(X_train)
|
| 55 |
X_test_tfidf = tfidf.transform(X_test)
|
|
|
|
| 72 |
X_train_resampled, y_train_resampled = X_train_tfidf, y_train
|
| 73 |
|
| 74 |
# Logistic Regression Model
|
|
|
|
| 75 |
model = LogisticRegression(C=10, max_iter=1000, n_jobs=-1, solver='lbfgs', multi_class='multinomial')
|
| 76 |
model.fit(X_train_resampled, y_train_resampled)
|
| 77 |
|
| 78 |
# Evaluate Model
|
| 79 |
y_pred = model.predict(X_test_tfidf)
|
| 80 |
accuracy = accuracy_score(y_test, y_pred)
|
| 81 |
+
precision = precision_score(y_test, y_pred, average='weighted')
|
| 82 |
+
recall = recall_score(y_test, y_pred, average='weighted')
|
| 83 |
+
f1 = f1_score(y_test, y_pred, average='weighted')
|
| 84 |
+
|
| 85 |
print(f"Model accuracy: {accuracy:.4f}")
|
| 86 |
+
print(f"Precision: {precision:.4f}")
|
| 87 |
+
print(f"Recall: {recall:.4f}")
|
| 88 |
+
print(f"F1 Score: {f1:.4f}")
|
| 89 |
|
| 90 |
def predict_tone(text):
|
| 91 |
# Vectorize
|