burtenshaw
commited on
Commit
·
42d4927
1
Parent(s):
723b8a3
add hub search to app
Browse files
app.py
CHANGED
|
@@ -2,6 +2,7 @@ import gradio as gr
|
|
| 2 |
from datasets import load_dataset, Dataset
|
| 3 |
from difflib import ndiff
|
| 4 |
import pandas as pd
|
|
|
|
| 5 |
|
| 6 |
from semhash import SemHash
|
| 7 |
from semhash.datamodels import DeduplicationResult
|
|
@@ -323,7 +324,12 @@ with gr.Blocks(
|
|
| 323 |
)
|
| 324 |
|
| 325 |
with gr.Row():
|
| 326 |
-
dataset1_name =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
dataset1_split = gr.Textbox(
|
| 328 |
value=default_dataset1_split, label="Dataset 1 Split"
|
| 329 |
)
|
|
@@ -334,8 +340,11 @@ with gr.Blocks(
|
|
| 334 |
dataset2_inputs = gr.Column(visible=True)
|
| 335 |
with dataset2_inputs:
|
| 336 |
with gr.Row():
|
| 337 |
-
dataset2_name =
|
| 338 |
-
|
|
|
|
|
|
|
|
|
|
| 339 |
)
|
| 340 |
dataset2_split = gr.Textbox(
|
| 341 |
value=default_dataset2_split, label="Dataset 2 Split"
|
|
|
|
| 2 |
from datasets import load_dataset, Dataset
|
| 3 |
from difflib import ndiff
|
| 4 |
import pandas as pd
|
| 5 |
+
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
| 6 |
|
| 7 |
from semhash import SemHash
|
| 8 |
from semhash.datamodels import DeduplicationResult
|
|
|
|
| 324 |
)
|
| 325 |
|
| 326 |
with gr.Row():
|
| 327 |
+
dataset1_name = HuggingfaceHubSearch(
|
| 328 |
+
label="Dataset 1 Name",
|
| 329 |
+
placeholder="Search for datasets on HuggingFace Hub",
|
| 330 |
+
search_type="dataset",
|
| 331 |
+
value=default_dataset_name,
|
| 332 |
+
)
|
| 333 |
dataset1_split = gr.Textbox(
|
| 334 |
value=default_dataset1_split, label="Dataset 1 Split"
|
| 335 |
)
|
|
|
|
| 340 |
dataset2_inputs = gr.Column(visible=True)
|
| 341 |
with dataset2_inputs:
|
| 342 |
with gr.Row():
|
| 343 |
+
dataset2_name = HuggingfaceHubSearch(
|
| 344 |
+
label="Dataset 2 Name",
|
| 345 |
+
placeholder="Search for datasets on HuggingFace Hub",
|
| 346 |
+
search_type="dataset",
|
| 347 |
+
value=default_dataset_name,
|
| 348 |
)
|
| 349 |
dataset2_split = gr.Textbox(
|
| 350 |
value=default_dataset2_split, label="Dataset 2 Split"
|