File size: 3,947 Bytes
9df5177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import SGDClassifier
import colorama
from colorama import Fore
import csv

colorama.init(autoreset=True)

# =========================
# Carregar dataset
# =========================
df = pd.read_csv("dataset.csv")
texts = df["text"]
labels = df[["joy", "sadness", "anger", "neutral"]]

# =========================
# Treino
# =========================
X_train, X_test, y_train, y_test = train_test_split(
    texts, labels, test_size=0.2, random_state=42
)

vectorizer = TfidfVectorizer(lowercase=True, max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)

# Criar um SGDClassifier para cada emoção
emotions = ["joy", "sadness", "anger", "neutral"]
models = {}
for emotion in emotions:
    clf = SGDClassifier(loss="log_loss", max_iter=1000)  # removido class_weight
    clf.partial_fit(X_train_vec, y_train[emotion], classes=[0,1])
    models[emotion] = clf

print("Modelo treinado com sucesso.\n")

# =========================
# Funções auxiliares
# =========================
def predict_emotions(text):
    vec = vectorizer.transform([text])
    result = {}
    for emotion, clf in models.items():
        prob = clf.predict_proba(vec)[0][1]
        result[emotion] = prob
    return result

def show_bar(prob, length=20):
    filled_len = int(round(length * prob))
    return '█' * filled_len + '░' * (length - filled_len)

def intensity_label(prob):
    if prob < 0.2:
        return "Muito fraca"
    elif prob < 0.4:
        return "Fraca"
    elif prob < 0.6:
        return "Média"
    elif prob < 0.8:
        return "Forte"
    else:
        return "Muito forte"

def save_to_csv(text, label):
    """Salva a nova frase no dataset.csv"""
    row = [text] + [1 if e == label else 0 for e in emotions]
    with open("dataset.csv", "a", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(row)

# =========================
# Loop interativo
# =========================
print("Classificador de Emoções (digite 'sair' para encerrar)")
print("-" * 50)

while True:
    choice = input("Digite 'classificar', 'treinar' ou 'sair': ").strip().lower()

    if choice in ["sair", "quit"]:
        print("Encerrando classificador.")
        break

    elif choice == "classificar":
        user_input = input("Digite um texto: ").strip()
        if not user_input:
            continue

        result = predict_emotions(user_input)
        dominant = max(result, key=result.get)

        print("\nResultado:")
        for emotion, prob in result.items():
            bar = show_bar(prob)
            label = intensity_label(prob)
            color = {
                "joy": Fore.GREEN,
                "sadness": Fore.BLUE,
                "anger": Fore.RED,
                "neutral": Fore.LIGHTBLACK_EX
            }.get(emotion, Fore.WHITE)
            print(f"{color}{emotion:8}: {bar} {prob:.2f} ({label})")

        print(f"\nEmoção dominante: {dominant} ({intensity_label(result[dominant])})")
        print("-" * 50)

    elif choice == "treinar":
        new_text = input("Digite a nova frase: ").strip()
        new_label = input("Digite a emoção dominante (joy/sadness/anger/neutral): ").strip().lower()

        if new_label in emotions:
            # Treino incremental
            X_new = vectorizer.transform([new_text])
            for emotion in emotions:
                y_new = [1] if emotion == new_label else [0]
                models[emotion].partial_fit(X_new, y_new)
            # Salvar no CSV
            save_to_csv(new_text, new_label)
            print("Nova frase adicionada, modelo atualizado e dataset salvo!")
        else:
            print("Rótulo inválido. Use: joy, sadness, anger ou neutral.")

    else:
        print("Opção inválida. Use 'classificar', 'treinar' ou 'sair'.")