README / app.py
transybao1393
Add dataset viewer app
5ee8af0
import random
import gradio as gr
from datasets import load_dataset
DATASET_NAME = "sotalab/en-vi-ja-300k-triplets"
SPLIT = "train"
print("Loading dataset...")
dataset = load_dataset(DATASET_NAME, split=SPLIT)
print(f"Loaded {len(dataset)} samples")
def sample_example():
item = random.choice(dataset)
return (
item["en"],
item["vi"],
item["ja"],
)
with gr.Blocks() as demo:
gr.Markdown(
"""
# EN–VI–JA Triplet Dataset Viewer
This Space provides a random sample viewer for the **300K English–Vietnamese–Japanese parallel triplet dataset**.
- Languages: English / Vietnamese / Japanese
- Format: sentence-level aligned triplets
- Source: curated from OPUS parallel corpora
"""
)
btn = gr.Button("🔄 Sample random triplet")
with gr.Row():
en = gr.Textbox(label="English", lines=4)
vi = gr.Textbox(label="Vietnamese", lines=4)
ja = gr.Textbox(label="Japanese", lines=4)
btn.click(
fn=sample_example,
outputs=[en, vi, ja]
)
demo.launch()