Spaces:
Sleeping
Sleeping
| from datasets import load_dataset | |
| import pandas as pd | |
| def prepare_dataset(): | |
| dataset = load_dataset("ageron/tatoeba_mt_train", "eng-spa") | |
| df = pd.concat([ | |
| dataset["validation"].to_pandas(), dataset["test"].to_pandas() | |
| ], axis=0)\ | |
| .sample(frac=1, random_state=42)\ | |
| .reset_index(drop=True) | |
| df[["source_text", "target_text"]].to_parquet("eng_spa.parquet") | |
| print("Data saved to eng_spa.parquet") | |
| if __name__ == "__main__": | |
| prepare_dataset() |