Spaces:
Sleeping
Sleeping
| import datasets | |
| from datasets import load_dataset | |
| import config | |
| def download(mode): | |
| print("Downloading Dataset - ", config.DATASET, "...") | |
| dataset = load_dataset(config.DATASET, split=mode) | |
| return dataset | |
| def prepare_prompts_responses(dataset): | |
| print("Preparing Prompt and Assistant....") | |
| dataset_df = dataset.to_pandas() | |
| user_prompters = dataset_df[(dataset_df.role=="prompter")] | |
| user_prompters = user_prompters.set_index("message_id") | |
| assistants = dataset_df[(dataset_df.role=="assistant") & (dataset_df["rank"] == 0.0)] | |
| prompts_responses = [] | |
| for _,record in assistants.iterrows(): | |
| prompt_text = user_prompters.loc[record.parent_id,'text'] | |
| prompt_response = "### Human: " + prompt_text + " ### Assistant: " + record['text'] | |
| prompts_responses.append(prompt_response) | |
| assistants[config.DATASET_TEXT_FIELD] = prompts_responses | |
| return assistants | |
| def preparedata(mode): | |
| print("Preparing data for - ", mode, "...") | |
| dataset = download(mode=mode) | |
| prompts_responses = prepare_prompts_responses(dataset) | |
| prompts_responses_dataset = datasets.Dataset.from_pandas(prompts_responses) | |
| return prompts_responses_dataset | |