import pandas as pd from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model import SGDClassifier import colorama from colorama import Fore import csv colorama.init(autoreset=True) # ========================= # Carregar dataset # ========================= df = pd.read_csv("dataset.csv") texts = df["text"] labels = df[["joy", "sadness", "anger", "neutral"]] # ========================= # Treino # ========================= X_train, X_test, y_train, y_test = train_test_split( texts, labels, test_size=0.2, random_state=42 ) vectorizer = TfidfVectorizer(lowercase=True, max_features=5000) X_train_vec = vectorizer.fit_transform(X_train) # Criar um SGDClassifier para cada emoção emotions = ["joy", "sadness", "anger", "neutral"] models = {} for emotion in emotions: clf = SGDClassifier(loss="log_loss", max_iter=1000) # removido class_weight clf.partial_fit(X_train_vec, y_train[emotion], classes=[0,1]) models[emotion] = clf print("Modelo treinado com sucesso.\n") # ========================= # Funções auxiliares # ========================= def predict_emotions(text): vec = vectorizer.transform([text]) result = {} for emotion, clf in models.items(): prob = clf.predict_proba(vec)[0][1] result[emotion] = prob return result def show_bar(prob, length=20): filled_len = int(round(length * prob)) return '█' * filled_len + '░' * (length - filled_len) def intensity_label(prob): if prob < 0.2: return "Muito fraca" elif prob < 0.4: return "Fraca" elif prob < 0.6: return "Média" elif prob < 0.8: return "Forte" else: return "Muito forte" def save_to_csv(text, label): """Salva a nova frase no dataset.csv""" row = [text] + [1 if e == label else 0 for e in emotions] with open("dataset.csv", "a", newline="", encoding="utf-8") as f: writer = csv.writer(f) writer.writerow(row) # ========================= # Loop interativo # ========================= print("Classificador de Emoções (digite 'sair' para encerrar)") print("-" * 50) while True: choice = input("Digite 'classificar', 'treinar' ou 'sair': ").strip().lower() if choice in ["sair", "quit"]: print("Encerrando classificador.") break elif choice == "classificar": user_input = input("Digite um texto: ").strip() if not user_input: continue result = predict_emotions(user_input) dominant = max(result, key=result.get) print("\nResultado:") for emotion, prob in result.items(): bar = show_bar(prob) label = intensity_label(prob) color = { "joy": Fore.GREEN, "sadness": Fore.BLUE, "anger": Fore.RED, "neutral": Fore.LIGHTBLACK_EX }.get(emotion, Fore.WHITE) print(f"{color}{emotion:8}: {bar} {prob:.2f} ({label})") print(f"\nEmoção dominante: {dominant} ({intensity_label(result[dominant])})") print("-" * 50) elif choice == "treinar": new_text = input("Digite a nova frase: ").strip() new_label = input("Digite a emoção dominante (joy/sadness/anger/neutral): ").strip().lower() if new_label in emotions: # Treino incremental X_new = vectorizer.transform([new_text]) for emotion in emotions: y_new = [1] if emotion == new_label else [0] models[emotion].partial_fit(X_new, y_new) # Salvar no CSV save_to_csv(new_text, new_label) print("Nova frase adicionada, modelo atualizado e dataset salvo!") else: print("Rótulo inválido. Use: joy, sadness, anger ou neutral.") else: print("Opção inválida. Use 'classificar', 'treinar' ou 'sair'.")