| import random | |
| import gradio as gr | |
| from datasets import load_dataset | |
| DATASET_NAME = "sotalab/en-vi-ja-300k-triplets" | |
| SPLIT = "train" | |
| print("Loading dataset...") | |
| dataset = load_dataset(DATASET_NAME, split=SPLIT) | |
| print(f"Loaded {len(dataset)} samples") | |
| def sample_example(): | |
| item = random.choice(dataset) | |
| return ( | |
| item["en"], | |
| item["vi"], | |
| item["ja"], | |
| ) | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| """ | |
| # EN–VI–JA Triplet Dataset Viewer | |
| This Space provides a random sample viewer for the **300K English–Vietnamese–Japanese parallel triplet dataset**. | |
| - Languages: English / Vietnamese / Japanese | |
| - Format: sentence-level aligned triplets | |
| - Source: curated from OPUS parallel corpora | |
| """ | |
| ) | |
| btn = gr.Button("🔄 Sample random triplet") | |
| with gr.Row(): | |
| en = gr.Textbox(label="English", lines=4) | |
| vi = gr.Textbox(label="Vietnamese", lines=4) | |
| ja = gr.Textbox(label="Japanese", lines=4) | |
| btn.click( | |
| fn=sample_example, | |
| outputs=[en, vi, ja] | |
| ) | |
| demo.launch() | |