import random
import gradio as gr
from datasets import load_dataset

DATASET_NAME = "sotalab/en-vi-ja-300k-triplets"
SPLIT = "train"

print("Loading dataset...")
dataset = load_dataset(DATASET_NAME, split=SPLIT)
print(f"Loaded {len(dataset)} samples")

def sample_example():
    item = random.choice(dataset)
    return (
        item["en"],
        item["vi"],
        item["ja"],
    )

with gr.Blocks() as demo:
    gr.Markdown(
        """
        # EN–VI–JA Triplet Dataset Viewer

        This Space provides a random sample viewer for the **300K English–Vietnamese–Japanese parallel triplet dataset**.

        - Languages: English / Vietnamese / Japanese  
        - Format: sentence-level aligned triplets  
        - Source: curated from OPUS parallel corpora
        """
    )

    btn = gr.Button("🔄 Sample random triplet")

    with gr.Row():
        en = gr.Textbox(label="English", lines=4)
        vi = gr.Textbox(label="Vietnamese", lines=4)
        ja = gr.Textbox(label="Japanese", lines=4)

    btn.click(
        fn=sample_example,
        outputs=[en, vi, ja]
    )

demo.launch()