import random import gradio as gr from datasets import load_dataset DATASET_NAME = "sotalab/en-vi-ja-300k-triplets" SPLIT = "train" print("Loading dataset...") dataset = load_dataset(DATASET_NAME, split=SPLIT) print(f"Loaded {len(dataset)} samples") def sample_example(): item = random.choice(dataset) return ( item["en"], item["vi"], item["ja"], ) with gr.Blocks() as demo: gr.Markdown( """ # EN–VI–JA Triplet Dataset Viewer This Space provides a random sample viewer for the **300K English–Vietnamese–Japanese parallel triplet dataset**. - Languages: English / Vietnamese / Japanese - Format: sentence-level aligned triplets - Source: curated from OPUS parallel corpora """ ) btn = gr.Button("🔄 Sample random triplet") with gr.Row(): en = gr.Textbox(label="English", lines=4) vi = gr.Textbox(label="Vietnamese", lines=4) ja = gr.Textbox(label="Japanese", lines=4) btn.click( fn=sample_example, outputs=[en, vi, ja] ) demo.launch()