!pip install gradio
import pandas as pd
import gradio as gr
from transformers import pipeline, MarianMTModel, MarianTokenizer
from sentence_transformers import SentenceTransformer, util
import matplotlib.pyplot as plt
import torch

# Load dữ liệu
df = pd.read_csv("/kaggle/input/amazon-fine-food-reviews/Reviews.csv").dropna(subset=['Text', 'Score'])
df = df.head(1000)  # Giới hạn để chạy nhanh

# Load model dịch
vi_en_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-vi-en")
vi_en_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-vi-en")

# Model sentiment
sentiment = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")

# Ánh xạ nhãn cảm xúc
label_map = {
    "LABEL_0": "Negative",
    "LABEL_1": "Neutral",
    "LABEL_2": "Positive"
}

# Embedding
embedder = SentenceTransformer('all-MiniLM-L6-v2')
df['embedding'] = df['Text'].apply(lambda x: embedder.encode(x, convert_to_tensor=True))

def translate_vi_to_en(text):
    inputs = vi_en_tokenizer(text, return_tensors="pt", padding=True)
    translated = vi_en_model.generate(**inputs)
    return vi_en_tokenizer.decode(translated[0], skip_special_tokens=True)

def analyze_sentiment(text):
    en_text = translate_vi_to_en(text)
    result = sentiment(en_text)[0]
    label = label_map.get(result['label'], result['label'])  # đổi label
    return en_text, label, f"{result['score']:.2f}"

def keyword_search(keyword):
    query_emb = embedder.encode(keyword, convert_to_tensor=True)
    cos_scores = util.pytorch_cos_sim(query_emb, torch.stack(df['embedding'].tolist()))[0]
    top_k_idx = torch.topk(cos_scores, k=10).indices

    subset = df.iloc[top_k_idx.cpu().numpy()]
    subset['sentiment'] = subset['Text'].apply(lambda x: label_map.get(sentiment(x)[0]['label'], sentiment(x)[0]['label']))
    avg_score = subset['Score'].mean()

    counts = subset['sentiment'].value_counts()
    counts = counts.reindex(['Negative', 'Neutral', 'Positive'], fill_value=0)

    # Biểu đồ
    fig, ax = plt.subplots()
    counts.plot(kind='bar', ax=ax, color=['red', 'gray', 'green'])
    ax.set_ylabel('Số lượng')
    ax.set_title(f'Cảm xúc cho từ khóa: {keyword}')
    plt.tight_layout()

    return subset[['Text', 'Score', 'sentiment']], round(avg_score, 2), fig

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 💬 Phân tích cảm xúc & 🔍 Tìm kiếm comment liên quan")

    with gr.Tab("Phân tích cảm xúc"):
        vi_input = gr.Textbox(label="Nhập bình luận tiếng Việt")
        trans = gr.Textbox(label="Dịch sang tiếng Anh")
        label = gr.Textbox(label="Cảm xúc")
        score = gr.Textbox(label="Độ tin cậy")
        vi_input.submit(analyze_sentiment, vi_input, [trans, label, score])

    with gr.Tab("Tìm kiếm theo từ khóa"):
        keyword = gr.Textbox(label="Từ khóa")
        out_df = gr.Dataframe(label="Các comment liên quan")
        avg = gr.Textbox(label="Điểm trung bình đánh giá")
        fig = gr.Plot(label="Biểu đồ cảm xúc")
        keyword.submit(keyword_search, keyword, [out_df, avg, fig])

demo.launch()