Spaces:
Runtime error
Runtime error
polinaeterna
commited on
Commit
·
4fe2860
1
Parent(s):
fd7a758
fix
Browse files
app.py
CHANGED
|
@@ -90,7 +90,7 @@ def get_first_parquet_filename(dataset, config, split):
|
|
| 90 |
|
| 91 |
|
| 92 |
@spaces.GPU
|
| 93 |
-
def run_quality_check(dataset, config, split, column, batch_size, num_examples):
|
| 94 |
logging.info(f"Fetching data for {dataset=} {config=} {split=} {column=}")
|
| 95 |
try:
|
| 96 |
filename = get_first_parquet_filename(dataset, config, split)
|
|
@@ -250,15 +250,15 @@ with gr.Blocks() as demo:
|
|
| 250 |
return gr.HTML(value=html_code)
|
| 251 |
|
| 252 |
with gr.Row():
|
| 253 |
-
text_column_dropdown = gr.Dropdown(label="Text column name"
|
| 254 |
-
nested_text_column_dropdown = gr.Dropdown(
|
| 255 |
|
| 256 |
def _resolve_dataset_selection(dataset: str, default_subset: str, default_split: str, text_feature):
|
| 257 |
if "/" not in dataset.strip().strip("/"):
|
| 258 |
return {
|
| 259 |
subset_dropdown: gr.Dropdown(visible=False),
|
| 260 |
split_dropdown: gr.Dropdown(visible=False),
|
| 261 |
-
text_column_dropdown: gr.Dropdown(
|
| 262 |
nested_text_column_dropdown: gr.Dropdown(visible=False)
|
| 263 |
}
|
| 264 |
info_resp = session.get(f"https://datasets-server.huggingface.co/info?dataset={dataset}", timeout=3).json()
|
|
@@ -266,7 +266,7 @@ with gr.Blocks() as demo:
|
|
| 266 |
return {
|
| 267 |
subset_dropdown: gr.Dropdown(visible=False),
|
| 268 |
split_dropdown: gr.Dropdown(visible=False),
|
| 269 |
-
text_column_dropdown: gr.Dropdown(label="Text column name"
|
| 270 |
nested_text_column_dropdown: gr.Dropdown(visible=False)
|
| 271 |
}
|
| 272 |
subsets: list[str] = list(info_resp["dataset_info"])
|
|
@@ -285,8 +285,7 @@ with gr.Blocks() as demo:
|
|
| 285 |
return {
|
| 286 |
subset_dropdown: gr.Dropdown(value=subset, choices=subsets, visible=len(subsets) > 1),
|
| 287 |
split_dropdown: gr.Dropdown(value=split, choices=splits, visible=len(splits) > 1),
|
| 288 |
-
text_column_dropdown: gr.Dropdown(choices=text_features + nested_text_features, label="Text column name",
|
| 289 |
-
info="Text colum name to check"),
|
| 290 |
nested_text_column_dropdown: gr.Dropdown(visible=False),
|
| 291 |
}
|
| 292 |
logging.info(nested_text_features)
|
|
@@ -296,8 +295,7 @@ with gr.Blocks() as demo:
|
|
| 296 |
subset_dropdown: gr.Dropdown(value=subset, choices=subsets, visible=len(subsets) > 1),
|
| 297 |
split_dropdown: gr.Dropdown(value=split, choices=splits, visible=len(splits) > 1),
|
| 298 |
text_column_dropdown: gr.Dropdown(choices=text_features + nested_text_features,
|
| 299 |
-
label="Text column name",
|
| 300 |
-
info="Text colum name to check (only non-nested texts are supported)"),
|
| 301 |
nested_text_column_dropdown: gr.Dropdown(value=nested_keys[0], choices=nested_keys,
|
| 302 |
label="Nested text column name", visible=True)
|
| 303 |
}
|
|
|
|
| 90 |
|
| 91 |
|
| 92 |
@spaces.GPU
|
| 93 |
+
def run_quality_check(dataset, config, split, column, nested_column, batch_size, num_examples):
|
| 94 |
logging.info(f"Fetching data for {dataset=} {config=} {split=} {column=}")
|
| 95 |
try:
|
| 96 |
filename = get_first_parquet_filename(dataset, config, split)
|
|
|
|
| 250 |
return gr.HTML(value=html_code)
|
| 251 |
|
| 252 |
with gr.Row():
|
| 253 |
+
text_column_dropdown = gr.Dropdown(label="Text column name")
|
| 254 |
+
nested_text_column_dropdown = gr.Dropdown(visible=False)
|
| 255 |
|
| 256 |
def _resolve_dataset_selection(dataset: str, default_subset: str, default_split: str, text_feature):
|
| 257 |
if "/" not in dataset.strip().strip("/"):
|
| 258 |
return {
|
| 259 |
subset_dropdown: gr.Dropdown(visible=False),
|
| 260 |
split_dropdown: gr.Dropdown(visible=False),
|
| 261 |
+
text_column_dropdown: gr.Dropdown(label="Text column name"),
|
| 262 |
nested_text_column_dropdown: gr.Dropdown(visible=False)
|
| 263 |
}
|
| 264 |
info_resp = session.get(f"https://datasets-server.huggingface.co/info?dataset={dataset}", timeout=3).json()
|
|
|
|
| 266 |
return {
|
| 267 |
subset_dropdown: gr.Dropdown(visible=False),
|
| 268 |
split_dropdown: gr.Dropdown(visible=False),
|
| 269 |
+
text_column_dropdown: gr.Dropdown(label="Text column name"),
|
| 270 |
nested_text_column_dropdown: gr.Dropdown(visible=False)
|
| 271 |
}
|
| 272 |
subsets: list[str] = list(info_resp["dataset_info"])
|
|
|
|
| 285 |
return {
|
| 286 |
subset_dropdown: gr.Dropdown(value=subset, choices=subsets, visible=len(subsets) > 1),
|
| 287 |
split_dropdown: gr.Dropdown(value=split, choices=splits, visible=len(splits) > 1),
|
| 288 |
+
text_column_dropdown: gr.Dropdown(choices=text_features + nested_text_features, label="Text column name",),
|
|
|
|
| 289 |
nested_text_column_dropdown: gr.Dropdown(visible=False),
|
| 290 |
}
|
| 291 |
logging.info(nested_text_features)
|
|
|
|
| 295 |
subset_dropdown: gr.Dropdown(value=subset, choices=subsets, visible=len(subsets) > 1),
|
| 296 |
split_dropdown: gr.Dropdown(value=split, choices=splits, visible=len(splits) > 1),
|
| 297 |
text_column_dropdown: gr.Dropdown(choices=text_features + nested_text_features,
|
| 298 |
+
label="Text column name"),
|
|
|
|
| 299 |
nested_text_column_dropdown: gr.Dropdown(value=nested_keys[0], choices=nested_keys,
|
| 300 |
label="Nested text column name", visible=True)
|
| 301 |
}
|