|
|
import pandas as pd |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
|
from sklearn.linear_model import SGDClassifier |
|
|
import colorama |
|
|
from colorama import Fore |
|
|
import csv |
|
|
|
|
|
colorama.init(autoreset=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df = pd.read_csv("dataset.csv") |
|
|
texts = df["text"] |
|
|
labels = df[["joy", "sadness", "anger", "neutral"]] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split( |
|
|
texts, labels, test_size=0.2, random_state=42 |
|
|
) |
|
|
|
|
|
vectorizer = TfidfVectorizer(lowercase=True, max_features=5000) |
|
|
X_train_vec = vectorizer.fit_transform(X_train) |
|
|
|
|
|
|
|
|
emotions = ["joy", "sadness", "anger", "neutral"] |
|
|
models = {} |
|
|
for emotion in emotions: |
|
|
clf = SGDClassifier(loss="log_loss", max_iter=1000) |
|
|
clf.partial_fit(X_train_vec, y_train[emotion], classes=[0,1]) |
|
|
models[emotion] = clf |
|
|
|
|
|
print("Modelo treinado com sucesso.\n") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def predict_emotions(text): |
|
|
vec = vectorizer.transform([text]) |
|
|
result = {} |
|
|
for emotion, clf in models.items(): |
|
|
prob = clf.predict_proba(vec)[0][1] |
|
|
result[emotion] = prob |
|
|
return result |
|
|
|
|
|
def show_bar(prob, length=20): |
|
|
filled_len = int(round(length * prob)) |
|
|
return '█' * filled_len + '░' * (length - filled_len) |
|
|
|
|
|
def intensity_label(prob): |
|
|
if prob < 0.2: |
|
|
return "Muito fraca" |
|
|
elif prob < 0.4: |
|
|
return "Fraca" |
|
|
elif prob < 0.6: |
|
|
return "Média" |
|
|
elif prob < 0.8: |
|
|
return "Forte" |
|
|
else: |
|
|
return "Muito forte" |
|
|
|
|
|
def save_to_csv(text, label): |
|
|
"""Salva a nova frase no dataset.csv""" |
|
|
row = [text] + [1 if e == label else 0 for e in emotions] |
|
|
with open("dataset.csv", "a", newline="", encoding="utf-8") as f: |
|
|
writer = csv.writer(f) |
|
|
writer.writerow(row) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("Classificador de Emoções (digite 'sair' para encerrar)") |
|
|
print("-" * 50) |
|
|
|
|
|
while True: |
|
|
choice = input("Digite 'classificar', 'treinar' ou 'sair': ").strip().lower() |
|
|
|
|
|
if choice in ["sair", "quit"]: |
|
|
print("Encerrando classificador.") |
|
|
break |
|
|
|
|
|
elif choice == "classificar": |
|
|
user_input = input("Digite um texto: ").strip() |
|
|
if not user_input: |
|
|
continue |
|
|
|
|
|
result = predict_emotions(user_input) |
|
|
dominant = max(result, key=result.get) |
|
|
|
|
|
print("\nResultado:") |
|
|
for emotion, prob in result.items(): |
|
|
bar = show_bar(prob) |
|
|
label = intensity_label(prob) |
|
|
color = { |
|
|
"joy": Fore.GREEN, |
|
|
"sadness": Fore.BLUE, |
|
|
"anger": Fore.RED, |
|
|
"neutral": Fore.LIGHTBLACK_EX |
|
|
}.get(emotion, Fore.WHITE) |
|
|
print(f"{color}{emotion:8}: {bar} {prob:.2f} ({label})") |
|
|
|
|
|
print(f"\nEmoção dominante: {dominant} ({intensity_label(result[dominant])})") |
|
|
print("-" * 50) |
|
|
|
|
|
elif choice == "treinar": |
|
|
new_text = input("Digite a nova frase: ").strip() |
|
|
new_label = input("Digite a emoção dominante (joy/sadness/anger/neutral): ").strip().lower() |
|
|
|
|
|
if new_label in emotions: |
|
|
|
|
|
X_new = vectorizer.transform([new_text]) |
|
|
for emotion in emotions: |
|
|
y_new = [1] if emotion == new_label else [0] |
|
|
models[emotion].partial_fit(X_new, y_new) |
|
|
|
|
|
save_to_csv(new_text, new_label) |
|
|
print("Nova frase adicionada, modelo atualizado e dataset salvo!") |
|
|
else: |
|
|
print("Rótulo inválido. Use: joy, sadness, anger ou neutral.") |
|
|
|
|
|
else: |
|
|
print("Opção inválida. Use 'classificar', 'treinar' ou 'sair'.") |
|
|
|