File size: 1,114 Bytes
5ee8af0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import random
import gradio as gr
from datasets import load_dataset
DATASET_NAME = "sotalab/en-vi-ja-300k-triplets"
SPLIT = "train"
print("Loading dataset...")
dataset = load_dataset(DATASET_NAME, split=SPLIT)
print(f"Loaded {len(dataset)} samples")
def sample_example():
item = random.choice(dataset)
return (
item["en"],
item["vi"],
item["ja"],
)
with gr.Blocks() as demo:
gr.Markdown(
"""
# EN–VI–JA Triplet Dataset Viewer
This Space provides a random sample viewer for the **300K English–Vietnamese–Japanese parallel triplet dataset**.
- Languages: English / Vietnamese / Japanese
- Format: sentence-level aligned triplets
- Source: curated from OPUS parallel corpora
"""
)
btn = gr.Button("🔄 Sample random triplet")
with gr.Row():
en = gr.Textbox(label="English", lines=4)
vi = gr.Textbox(label="Vietnamese", lines=4)
ja = gr.Textbox(label="Japanese", lines=4)
btn.click(
fn=sample_example,
outputs=[en, vi, ja]
)
demo.launch()
|