Spaces:

ProfRod100
/

Teste_Modelo_Amazon

Runtime error

App Files Files Community

Teste_Modelo_Amazon / app.py

ProfRod100

Update app.py

5135aaf verified 5 months ago

raw

history blame

9.17 kB

	import os
	import numpy as np
	import gradio as gr
	import joblib

	from sklearn.pipeline import Pipeline
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.linear_model import LogisticRegression

	from transformers import pipeline as hf_pipeline


	# ======================================================================
	# 1. Baseline de Sentimentos (TF-IDF + Logistic Regression)
	# ======================================================================

	BASELINE_PATH = os.getenv("MODEL_PATH", "baseline_pipe.pkl")


	def train_small_baseline(save_path: str = BASELINE_PATH,
	max_samples: int = 10000):
	"""
	Treina um baseline pequeno usando uma amostra do dataset amazon_polarity.
	Usado apenas se baseline_pipe.pkl nao existir no Space.
	"""
	from datasets import load_dataset
	import pandas as pd

	ds = load_dataset("amazon_polarity", split="train")
	ds_small = ds.shuffle(seed=42).select(range(min(max_samples, len(ds))))

	df = pd.DataFrame(
	{"text": ds_small["content"], "label": ds_small["label"]}
	)

	pipe = Pipeline(
	[
	("tfidf", TfidfVectorizer(max_features=30000, ngram_range=(1, 2))),
	("clf", LogisticRegression(max_iter=1000)),
	]
	)

	pipe.fit(df["text"], df["label"])
	joblib.dump(pipe, save_path)
	return pipe


	def load_or_bootstrap_baseline():
	"""
	Se existir baseline_pipe.pkl, carrega.
	Se nao existir e DISABLE_AUTOTRAIN != 1, treina um baseline pequeno.
	"""
	if os.path.exists(BASELINE_PATH):
	return joblib.load(BASELINE_PATH)

	disable_auto = os.getenv("DISABLE_AUTOTRAIN", "0")
	if disable_auto == "1":
	return None

	return train_small_baseline()


	baseline_model = load_or_bootstrap_baseline()


	def classify_only(text: str):
	"""
	Apenas classifica o sentimento (positivo/negativo) e retorna JSON.
	"""
	if not text or text.strip() == "":
	return {"erro": "Digite um texto."}

	if baseline_model is None:
	return {
	"erro": (
	"Modelo baseline nao encontrado. "
	"Envie baseline_pipe.pkl na aba Files ou remova DISABLE_AUTOTRAIN."
	)
	}

	proba = baseline_model.predict_proba([text])[0]
	pred = int(np.argmax(proba))
	label = "positivo" if pred == 1 else "negativo"
	conf = float(np.max(proba))

	return {"sentimento": label, "confianca": round(conf, 3)}


	# ======================================================================
	# 2. IA Generativa (LLaMA 3) para resposta ao cliente
	# ======================================================================

	# Troque por outro modelo se quiser algo mais leve
	GEN_MODEL_ID = os.getenv(
	"GEN_MODEL_ID",
	"meta-llama/Meta-Llama-3-8B-Instruct",
	)

	generator = hf_pipeline(
	"text-generation",
	model=GEN_MODEL_ID,
	tokenizer=GEN_MODEL_ID,
	)


	def build_prompt(history, user_text, sentimento_json):
	"""
	Constroi um prompt amigavel para LLaMA 3, usando historico + sentimento.
	NENHUMA referencia a processo interno aparece na resposta.
	"""
	sentimento = None
	confianca = None
	if isinstance(sentimento_json, dict):
	sentimento = sentimento_json.get("sentimento")
	confianca = sentimento_json.get("confianca")

	if sentimento is None:
	sentimento = "nao identificado"

	# Cabecalho de instrucao (o modelo ve, o cliente nao)
	prompt = (
	"Você é um atendente virtual educado, empático e profissional "
	"de uma loja online. Responda SEMPRE em português do Brasil, "
	"usando entre 2 e 4 frases curtas, claras e naturais.\n\n"
	"Informação de contexto (não revele isso na resposta): "
	f"a última mensagem do cliente foi classificada com sentimento "
	f"'{sentimento}' (confiança {confianca}). "
	"Use isso apenas para ajustar o tom (mais empático se negativo, "
	"mais entusiasmado se positivo), mas não mencione a palavra "
	"'sentimento', 'classificação' ou 'modelo'.\n\n"
	"Histórico da conversa:\n"
	)

	# Historico anterior
	if history:
	for user, bot in history:
	prompt += f"Cliente: {user}\n"
	prompt += f"Atendente: {bot}\n"

	# Nova mensagem
	prompt += f"Cliente: {user_text}\n"
	prompt += "Atendente:"

	return prompt


	def generate_reply_with_history(history, user_text, sentimento_json):
	"""
	Gera uma resposta levando em conta historico + sentimento.
	"""
	if not user_text or user_text.strip() == "":
	return "Digite uma mensagem."

	prompt = build_prompt(history, user_text, sentimento_json)

	outputs = generator(
	prompt,
	max_new_tokens=160,
	temperature=0.7,
	top_p=0.9,
	do_sample=True,
	return_full_text=False,
	)

	reply = outputs[0]["generated_text"]
	return reply.strip()


	# ======================================================================
	# 3. Função de passo do Chatbot (para o Gradio)
	# ======================================================================

	def chatbot_step(history, user_text):
	"""
	- Analisa sentimento da nova mensagem
	- Gera resposta com LLaMA 3
	- Atualiza historico
	"""
	if not user_text or user_text.strip() == "":
	return history, "", {}, history

	sentiment = classify_only(user_text)
	reply = generate_reply_with_history(history, user_text, sentiment)

	if history is None:
	history = []

	history = history + [(user_text, reply)]

	return history, "", sentiment, history


	# ======================================================================
	# 4. Interface Gradio - abas, design e historico
	# ======================================================================

	with gr.Blocks(
	title="Chatbot de Sentimentos - Professor Rodrigo",
	theme=gr.themes.Default().set(
	border_radius="8px",
	shadow_drop="small",
	font=["Inter", "system-ui", "sans-serif"],
	),
	css="""
	#header-markdown h1 { font-size: 1.8rem; }
	#header-markdown p { font-size: 0.95rem; }
	"""
	) as demo:
	gr.Markdown(
	"""
	<div id="header-markdown">

	# Chatbot de Sentimentos (ML + IA Generativa)

	Professor Rodrigo — Projeto Final de Machine Learning & Deep Learning

	- Classificação: TF-IDF + Regressão Logística (baseline).
	- Geração: modelo `LLaMA 3` (Instruct) para respostas em PT-BR.

	> Dica didática: envie `baseline_pipe.pkl` na aba Files do Space
	> para usar um modelo de sentimentos treinado pelo seu grupo.

	</div>
	""",
	elem_id="header-markdown",
	)

	with gr.Tab("Análise de Sentimento (isolada)"):
	with gr.Row():
	with gr.Column(scale=3):
	input_text = gr.Textbox(
	label="Digite uma avaliação de produto",
	lines=5,
	placeholder=(
	"Ex.: O produto chegou rápido e superou minhas expectativas "
	"ou: O produto chegou quebrado, estou muito chateado."
	),
	)
	btn_analisar = gr.Button("Analisar sentimento", variant="primary")
	with gr.Column(scale=2):
	output_json = gr.JSON(
	label="Resultado da classificação (baseline)",
	)

	btn_analisar.click(classify_only, inputs=input_text, outputs=output_json)

	with gr.Tab("Chatbot (Análise + Resposta com histórico)"):
	with gr.Row():
	with gr.Column(scale=3):
	chat_history = gr.Chatbot(
	label="Conversa com o atendente virtual",
	height=400,
	)
	user_input = gr.Textbox(
	label="Mensagem do cliente",
	lines=4,
	placeholder="Ex.: Estou chateado, o produto é ruim.",
	)
	with gr.Row():
	send_btn = gr.Button("Enviar", variant="primary")
	clear_btn = gr.Button("Limpar conversa")

	with gr.Column(scale=2):
	last_sentiment = gr.JSON(
	label="Sentimento da última mensagem",
	)
	gr.Markdown(
	"""
	Como funciona esta aba?

	1. O cliente envia uma mensagem.
	2. O baseline classifica o sentimento (positivo/negativo).
	3. O modelo LLaMA 3 gera uma resposta empática, usando o sentimento apenas como contexto.
	4. O histórico da conversa é mantido e influencia as respostas seguintes.
	"""
	)

	state_history = gr.State([])

	send_btn.click(
	chatbot_step,
	inputs=[state_history, user_input],
	outputs=[chat_history, user_input, last_sentiment, state_history],
	)

	def clear_chat():
	return [], {}, []

	clear_btn.click(
	clear_chat,
	inputs=None,
	outputs=[chat_history, last_sentiment, state_history],
	)


	if __name__ == "__main__":
	demo.launch()