| import pandas_profiling as pp | |
| from huggingface_hub.hf_api import create_repo | |
| from huggingface_hub.repository import Repository | |
| import gradio as gr | |
| import pandas as pd | |
| import tempfile | |
| token = gr.Textbox(label = "Your Hugging Face Token") | |
| username = gr.Textbox(label = "Your Hugging Face User name") | |
| dataset_name = gr.Textbox(label = "Dataset Name") | |
| dataset = gr.File(label = "Dataset") | |
| output_text = gr.Textbox(label = "Status") | |
| title = "Dataset Profiler πͺβ¨" | |
| description = "Drag and drop any dataset you want to get a detailed profile on, and this Space will profile and push it to your Hub profile as a new Space. πβ¨" | |
| def profile_dataset(dataset, username, token, dataset_name): | |
| df = pd.read_csv(dataset.name) | |
| profile = pp.ProfileReport(df, title=f"{dataset_name} Report") | |
| url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static") | |
| repo = Repository( | |
| local_dir = f"{username}/{dataset_name}", | |
| clone_from=url, | |
| use_auth_token=token, | |
| repo_type = "space" | |
| ) | |
| repo.git_pull(rebase=True) | |
| profile.to_file(f"{username}/{dataset_name}/index.html") | |
| repo.git_add() | |
| repo.git_commit(commit_message = "Dataset report") | |
| repo.git_push() | |
| return f"Your dataset report will be ready at {url}" | |
| gr.Interface(profile_dataset, inputs = [dataset, username, token, dataset_name], description = description, title = title, outputs=[output_text], enable_queue = True).launch(debug=True) |