Spaces:
Sleeping
Sleeping
Commit
·
9d04ba5
1
Parent(s):
1cb8519
Refactor code to show files and directories in the
Browse files
app.py
CHANGED
|
@@ -106,10 +106,11 @@ def list_git_repo_files_and_directories(repo_url: str, branch: str = "main"):
|
|
| 106 |
return get_files_and_directories(response)
|
| 107 |
|
| 108 |
|
| 109 |
-
def
|
| 110 |
with contextlib.suppress(Exception):
|
| 111 |
files_and_directories = list_git_repo_files_and_directories(url)
|
| 112 |
directories = files_and_directories.get("directories", [])
|
|
|
|
| 113 |
print(directories)
|
| 114 |
return gr.Dropdown(
|
| 115 |
label="Directories",
|
|
@@ -118,34 +119,76 @@ def show_directories(url: str):
|
|
| 118 |
visible=True,
|
| 119 |
interactive=True,
|
| 120 |
multiselect=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
)
|
| 122 |
|
| 123 |
|
| 124 |
-
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
gr.Markdown("URL for the GitHub repository where the dataset is currently hosted")
|
| 127 |
source_github_repository = gr.Textbox(lines=1, label="Source GitHub Repository URL")
|
| 128 |
-
gr.Markdown("
|
|
|
|
|
|
|
|
|
|
| 129 |
folder_in_github_repo = gr.Dropdown(
|
| 130 |
None,
|
| 131 |
-
label="Folder in GitHub Repository to
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
allow_custom_value=True,
|
| 133 |
visible=True,
|
| 134 |
)
|
| 135 |
source_github_repository.change(
|
| 136 |
-
|
|
|
|
|
|
|
| 137 |
)
|
| 138 |
-
gr.Markdown("Destination
|
|
|
|
| 139 |
destination_hf_hub_repository = gr.Textbox(
|
| 140 |
label="Destination Hugging Face Repository",
|
| 141 |
-
placeholder="username
|
| 142 |
)
|
|
|
|
| 143 |
gr.Markdown(
|
| 144 |
"""You need to provide a token with write access to the namespace you want to upload to.
|
| 145 |
-
You can generate
|
| 146 |
)
|
| 147 |
hf_token = gr.Textbox(label="Hugging Face Token", type="password")
|
| 148 |
-
summit_btn = gr.Button()
|
| 149 |
result = gr.Markdown(label="Summary", visible=True)
|
| 150 |
summit_btn.click(
|
| 151 |
push_to_hf,
|
|
|
|
| 106 |
return get_files_and_directories(response)
|
| 107 |
|
| 108 |
|
| 109 |
+
def show_files_and_directories(url: str):
|
| 110 |
with contextlib.suppress(Exception):
|
| 111 |
files_and_directories = list_git_repo_files_and_directories(url)
|
| 112 |
directories = files_and_directories.get("directories", [])
|
| 113 |
+
files = files_and_directories.get("files", [])
|
| 114 |
print(directories)
|
| 115 |
return gr.Dropdown(
|
| 116 |
label="Directories",
|
|
|
|
| 119 |
visible=True,
|
| 120 |
interactive=True,
|
| 121 |
multiselect=True,
|
| 122 |
+
), gr.Dropdown(
|
| 123 |
+
label="Files",
|
| 124 |
+
choices=files,
|
| 125 |
+
max_choices=1,
|
| 126 |
+
visible=True,
|
| 127 |
+
interactive=True,
|
| 128 |
+
multiselect=True,
|
| 129 |
)
|
| 130 |
|
| 131 |
|
| 132 |
+
html_text_app_description = """
|
| 133 |
+
Whilst GitHub is great for hosting code the Hugging Face Datasets Hub is a better place to host datasets.
|
| 134 |
+
Some of the benefits of hosting datasets on the Hugging Face Datasets Hub are:
|
| 135 |
+
<br>
|
| 136 |
+
<ul>
|
| 137 |
+
<li>Hosting for large datasets</li>
|
| 138 |
+
<li>An interactive preview of your dataset</li>
|
| 139 |
+
<li>Access to the dataset via many tools and libraries including; datasets, pandas, polars, dask and DuckDB</li>
|
| 140 |
+
</ul>
|
| 141 |
+
|
| 142 |
+
<br>
|
| 143 |
+
This app will help you migrate a dataset currently hosted on GitHub to the Hugging Face Datasets Hub.
|
| 144 |
+
"""
|
| 145 |
+
|
| 146 |
+
with gr.Blocks(theme=gr.themes.Base()) as demo:
|
| 147 |
+
gr.HTML(
|
| 148 |
+
"""<h1 style='text-align: center;'> GitHub to Hugging Face Hub Dataset Migration Tool</h1>
|
| 149 |
+
<center><i> ✨ Migrate a dataset in a few steps ✨</i></center>"""
|
| 150 |
+
)
|
| 151 |
+
gr.HTML(
|
| 152 |
+
"""<center> GitHub is a great place for sharing code but the Hugging Face Hub has many advantages for sharing datasets.
|
| 153 |
+
<br> This Space will guide you through the process of migrating a dataset from GitHub to the Hugging Face Hub. </center>"""
|
| 154 |
+
)
|
| 155 |
+
gr.Markdown("### Location of existing dataset")
|
| 156 |
gr.Markdown("URL for the GitHub repository where the dataset is currently hosted")
|
| 157 |
source_github_repository = gr.Textbox(lines=1, label="Source GitHub Repository URL")
|
| 158 |
+
gr.Markdown("### Select files and folder to migrate")
|
| 159 |
+
gr.Markdown(
|
| 160 |
+
"(Optional): select a specific folder and/or files to migrate from the GitHub repository."
|
| 161 |
+
)
|
| 162 |
folder_in_github_repo = gr.Dropdown(
|
| 163 |
None,
|
| 164 |
+
label="Folder in the GitHub Repository to migrate",
|
| 165 |
+
allow_custom_value=True,
|
| 166 |
+
visible=True,
|
| 167 |
+
)
|
| 168 |
+
files_in_github_repo = gr.Dropdown(
|
| 169 |
+
None,
|
| 170 |
+
label="Files in GitHub Repository to migrate",
|
| 171 |
allow_custom_value=True,
|
| 172 |
visible=True,
|
| 173 |
)
|
| 174 |
source_github_repository.change(
|
| 175 |
+
show_files_and_directories,
|
| 176 |
+
[source_github_repository],
|
| 177 |
+
[folder_in_github_repo, files_in_github_repo],
|
| 178 |
)
|
| 179 |
+
gr.Markdown("### Destination for your migrated dataset")
|
| 180 |
+
gr.Markdown("Destination repository for your dataset on the Hugging Face Hub")
|
| 181 |
destination_hf_hub_repository = gr.Textbox(
|
| 182 |
label="Destination Hugging Face Repository",
|
| 183 |
+
placeholder="i.e. <hugging face username>/<repository_name>",
|
| 184 |
)
|
| 185 |
+
gr.Markdown("## Authentication")
|
| 186 |
gr.Markdown(
|
| 187 |
"""You need to provide a token with write access to the namespace you want to upload to.
|
| 188 |
+
You can generate/access your Hugging FAce token from [here](https://huggingface.co/settings/token)."""
|
| 189 |
)
|
| 190 |
hf_token = gr.Textbox(label="Hugging Face Token", type="password")
|
| 191 |
+
summit_btn = gr.Button("Migrate Dataset")
|
| 192 |
result = gr.Markdown(label="Summary", visible=True)
|
| 193 |
summit_btn.click(
|
| 194 |
push_to_hf,
|