Spaces:

thang3092004
/

project2

Runtime error

App Files Files Community

project2 / app.py

thang3092004

app.py

750801a verified about 1 year ago

Raw

History Blame Contribute Delete

3.16 kB

	!pip install gradio
	import pandas as pd
	import gradio as gr
	from transformers import pipeline, MarianMTModel, MarianTokenizer
	from sentence_transformers import SentenceTransformer, util
	import matplotlib.pyplot as plt
	import torch

	# Load dữ liệu
	df = pd.read_csv("/kaggle/input/amazon-fine-food-reviews/Reviews.csv").dropna(subset=['Text', 'Score'])
	df = df.head(1000) # Giới hạn để chạy nhanh

	# Load model dịch
	vi_en_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-vi-en")
	vi_en_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-vi-en")

	# Model sentiment
	sentiment = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")

	# Ánh xạ nhãn cảm xúc
	label_map = {
	"LABEL_0": "Negative",
	"LABEL_1": "Neutral",
	"LABEL_2": "Positive"
	}

	# Embedding
	embedder = SentenceTransformer('all-MiniLM-L6-v2')
	df['embedding'] = df['Text'].apply(lambda x: embedder.encode(x, convert_to_tensor=True))

	def translate_vi_to_en(text):
	inputs = vi_en_tokenizer(text, return_tensors="pt", padding=True)
	translated = vi_en_model.generate(**inputs)
	return vi_en_tokenizer.decode(translated[0], skip_special_tokens=True)

	def analyze_sentiment(text):
	en_text = translate_vi_to_en(text)
	result = sentiment(en_text)[0]
	label = label_map.get(result['label'], result['label']) # đổi label
	return en_text, label, f"{result['score']:.2f}"

	def keyword_search(keyword):
	query_emb = embedder.encode(keyword, convert_to_tensor=True)
	cos_scores = util.pytorch_cos_sim(query_emb, torch.stack(df['embedding'].tolist()))[0]
	top_k_idx = torch.topk(cos_scores, k=10).indices

	subset = df.iloc[top_k_idx.cpu().numpy()]
	subset['sentiment'] = subset['Text'].apply(lambda x: label_map.get(sentiment(x)[0]['label'], sentiment(x)[0]['label']))
	avg_score = subset['Score'].mean()

	counts = subset['sentiment'].value_counts()
	counts = counts.reindex(['Negative', 'Neutral', 'Positive'], fill_value=0)

	# Biểu đồ
	fig, ax = plt.subplots()
	counts.plot(kind='bar', ax=ax, color=['red', 'gray', 'green'])
	ax.set_ylabel('Số lượng')
	ax.set_title(f'Cảm xúc cho từ khóa: {keyword}')
	plt.tight_layout()

	return subset[['Text', 'Score', 'sentiment']], round(avg_score, 2), fig

	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("# 💬 Phân tích cảm xúc & 🔍 Tìm kiếm comment liên quan")

	with gr.Tab("Phân tích cảm xúc"):
	vi_input = gr.Textbox(label="Nhập bình luận tiếng Việt")
	trans = gr.Textbox(label="Dịch sang tiếng Anh")
	label = gr.Textbox(label="Cảm xúc")
	score = gr.Textbox(label="Độ tin cậy")
	vi_input.submit(analyze_sentiment, vi_input, [trans, label, score])

	with gr.Tab("Tìm kiếm theo từ khóa"):
	keyword = gr.Textbox(label="Từ khóa")
	out_df = gr.Dataframe(label="Các comment liên quan")
	avg = gr.Textbox(label="Điểm trung bình đánh giá")
	fig = gr.Plot(label="Biểu đồ cảm xúc")
	keyword.submit(keyword_search, keyword, [out_df, avg, fig])

	demo.launch()