Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| from datasets import load_dataset, Dataset, Audio | |
| from huggingface_hub import login | |
| # Global variables to store dataset and token | |
| editable_df = pd.DataFrame() | |
| dataset_name = "" | |
| hub_token = "" | |
| def load_hf_dataset(dataset_url, token): | |
| global editable_df, dataset_name, hub_token | |
| # Extract dataset name and token | |
| dataset_name = dataset_url.split("/")[-1] | |
| hub_token = token | |
| # Authenticate and load dataset | |
| login(token) | |
| dataset = load_dataset(dataset_url) | |
| editable_df = pd.DataFrame(dataset["train"]) | |
| return editable_df | |
| def update_row(row_index, column_name, new_value): | |
| """Update a specific cell in the DataFrame.""" | |
| global editable_df | |
| if row_index < len(editable_df) and column_name in editable_df.columns: | |
| editable_df.at[row_index, column_name] = new_value.replace('"', '') | |
| print(new_value.replace('"', '')) | |
| return editable_df | |
| def save_and_upload(): | |
| """Save the updated DataFrame back to the Hugging Face Hub.""" | |
| global editable_df, dataset_name, hub_token | |
| # Convert DataFrame to Dataset | |
| updated_dataset = Dataset.from_pandas(editable_df) | |
| updated_dataset = updated_dataset.cast_column("audio", Audio(sampling_rate=16000)) | |
| # Push updated dataset to Hugging Face | |
| updated_dataset.push_to_hub(dataset_name, token=hub_token) | |
| return f"Updated dataset successfully pushed to: {dataset_name}" | |
| def handle_row_selection(selected_row, evt: gr.SelectData): | |
| # print(selected_row) | |
| index = evt.index[0] | |
| return index , selected_row.transcription[index] | |
| # Gradio interface | |
| with gr.Blocks() as app: | |
| gr.Markdown("### Hugging Face Dataset Editor") | |
| with gr.Row(): | |
| dataset_url_input = gr.Textbox(label="Dataset URL", placeholder="username/dataset_name") | |
| token_input = gr.Textbox(label="Hub Token", placeholder="Enter your Hugging Face Hub token", type="password") | |
| load_btn = gr.Button("Load Dataset") | |
| data_table = gr.DataFrame(value=editable_df) | |
| with gr.Row(): | |
| row_input = gr.Number(label="Row Index", value=0, precision=0, interactive=False) | |
| col_input = gr.Text(label="Column Name", value="transcription", interactive=False) | |
| new_value_input = gr.Text(label="New Value", value="new_value", interactive=True) | |
| update_btn = gr.Button("Update Row") | |
| # Register callback to handle row selection and update | |
| data_table.select(handle_row_selection, data_table,[row_input,new_value_input])# | |
| save_btn = gr.Button("Save and Upload") | |
| status_output = gr.Textbox(label="Status", interactive=False) | |
| # Button actions | |
| load_btn.click(load_hf_dataset, inputs=[dataset_url_input, token_input], outputs=data_table) | |
| update_btn.click(update_row, inputs=[row_input, col_input, new_value_input], outputs=data_table) | |
| save_btn.click(save_and_upload, outputs=status_output) | |
| app.launch(share=True) | |