Arthur Samuel Galego Panucci FIgueiredo
Update class.py
9df5177 verified
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import SGDClassifier
import colorama
from colorama import Fore
import csv
colorama.init(autoreset=True)
# =========================
# Carregar dataset
# =========================
df = pd.read_csv("dataset.csv")
texts = df["text"]
labels = df[["joy", "sadness", "anger", "neutral"]]
# =========================
# Treino
# =========================
X_train, X_test, y_train, y_test = train_test_split(
texts, labels, test_size=0.2, random_state=42
)
vectorizer = TfidfVectorizer(lowercase=True, max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
# Criar um SGDClassifier para cada emoção
emotions = ["joy", "sadness", "anger", "neutral"]
models = {}
for emotion in emotions:
clf = SGDClassifier(loss="log_loss", max_iter=1000) # removido class_weight
clf.partial_fit(X_train_vec, y_train[emotion], classes=[0,1])
models[emotion] = clf
print("Modelo treinado com sucesso.\n")
# =========================
# Funções auxiliares
# =========================
def predict_emotions(text):
vec = vectorizer.transform([text])
result = {}
for emotion, clf in models.items():
prob = clf.predict_proba(vec)[0][1]
result[emotion] = prob
return result
def show_bar(prob, length=20):
filled_len = int(round(length * prob))
return '█' * filled_len + '░' * (length - filled_len)
def intensity_label(prob):
if prob < 0.2:
return "Muito fraca"
elif prob < 0.4:
return "Fraca"
elif prob < 0.6:
return "Média"
elif prob < 0.8:
return "Forte"
else:
return "Muito forte"
def save_to_csv(text, label):
"""Salva a nova frase no dataset.csv"""
row = [text] + [1 if e == label else 0 for e in emotions]
with open("dataset.csv", "a", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(row)
# =========================
# Loop interativo
# =========================
print("Classificador de Emoções (digite 'sair' para encerrar)")
print("-" * 50)
while True:
choice = input("Digite 'classificar', 'treinar' ou 'sair': ").strip().lower()
if choice in ["sair", "quit"]:
print("Encerrando classificador.")
break
elif choice == "classificar":
user_input = input("Digite um texto: ").strip()
if not user_input:
continue
result = predict_emotions(user_input)
dominant = max(result, key=result.get)
print("\nResultado:")
for emotion, prob in result.items():
bar = show_bar(prob)
label = intensity_label(prob)
color = {
"joy": Fore.GREEN,
"sadness": Fore.BLUE,
"anger": Fore.RED,
"neutral": Fore.LIGHTBLACK_EX
}.get(emotion, Fore.WHITE)
print(f"{color}{emotion:8}: {bar} {prob:.2f} ({label})")
print(f"\nEmoção dominante: {dominant} ({intensity_label(result[dominant])})")
print("-" * 50)
elif choice == "treinar":
new_text = input("Digite a nova frase: ").strip()
new_label = input("Digite a emoção dominante (joy/sadness/anger/neutral): ").strip().lower()
if new_label in emotions:
# Treino incremental
X_new = vectorizer.transform([new_text])
for emotion in emotions:
y_new = [1] if emotion == new_label else [0]
models[emotion].partial_fit(X_new, y_new)
# Salvar no CSV
save_to_csv(new_text, new_label)
print("Nova frase adicionada, modelo atualizado e dataset salvo!")
else:
print("Rótulo inválido. Use: joy, sadness, anger ou neutral.")
else:
print("Opção inválida. Use 'classificar', 'treinar' ou 'sair'.")