Spaces:
Runtime error
Runtime error
polinaeterna
commited on
Commit
Β·
7d66c17
1
Parent(s):
b3d9c4b
update text
Browse files
app.py
CHANGED
|
@@ -82,7 +82,7 @@ def plot_and_df(texts, preds):
|
|
| 82 |
|
| 83 |
|
| 84 |
def get_first_parquet_filename(dataset, config, split):
|
| 85 |
-
parquet_resp = session.get(f"https://datasets-server.huggingface.co/parquet?dataset={dataset}&config={config}", timeout=
|
| 86 |
if "error" in parquet_resp:
|
| 87 |
raise ValueError(parquet_resp["error"])
|
| 88 |
first_parquet_file_url = [file for file in parquet_resp["parquet_files"] if file["split"] == split][0]["url"]
|
|
@@ -217,11 +217,13 @@ def call_perspective_api(texts_df, column_name, nested_column_name, dataset, con
|
|
| 217 |
with gr.Blocks() as demo:
|
| 218 |
gr.Markdown(
|
| 219 |
"""
|
| 220 |
-
#
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
|
|
|
|
|
|
| 225 |
## Select dataset and text column
|
| 226 |
"""
|
| 227 |
)
|
|
@@ -235,7 +237,6 @@ with gr.Blocks() as demo:
|
|
| 235 |
subset_dropdown = gr.Dropdown(label="Subset", visible=False)
|
| 236 |
split_dropdown = gr.Dropdown(label="Split", visible=False)
|
| 237 |
|
| 238 |
-
# config_name = "default" # TODO: user input
|
| 239 |
with gr.Accordion("Dataset preview", open=False):
|
| 240 |
@gr.render(inputs=[dataset_name, subset_dropdown, split_dropdown])
|
| 241 |
def embed(name, subset, split):
|
|
@@ -261,7 +262,7 @@ with gr.Blocks() as demo:
|
|
| 261 |
text_column_dropdown: gr.Dropdown(label="Text column name"),
|
| 262 |
nested_text_column_dropdown: gr.Dropdown(visible=False)
|
| 263 |
}
|
| 264 |
-
info_resp = session.get(f"https://datasets-server.huggingface.co/info?dataset={dataset}", timeout=
|
| 265 |
if "error" in info_resp:
|
| 266 |
return {
|
| 267 |
subset_dropdown: gr.Dropdown(visible=False),
|
|
@@ -285,7 +286,7 @@ with gr.Blocks() as demo:
|
|
| 285 |
return {
|
| 286 |
subset_dropdown: gr.Dropdown(value=subset, choices=subsets, visible=len(subsets) > 1),
|
| 287 |
split_dropdown: gr.Dropdown(value=split, choices=splits, visible=len(splits) > 1),
|
| 288 |
-
text_column_dropdown: gr.Dropdown(choices=text_features + nested_text_features, label="Text column name"
|
| 289 |
nested_text_column_dropdown: gr.Dropdown(visible=False),
|
| 290 |
}
|
| 291 |
logging.info(nested_text_features)
|
|
@@ -364,8 +365,7 @@ with gr.Blocks() as demo:
|
|
| 364 |
gr.Markdown("""## Explore toxicity
|
| 365 |
Run [Perspective](https://perspectiveapi.com/how-it-works/) on 100 random samples to check toxicity
|
| 366 |
""")
|
| 367 |
-
|
| 368 |
-
gr_toxicity_btn = gr.Button("Run Perpspective API")
|
| 369 |
toxicity_progress_bar = gr.Label(show_label=False)
|
| 370 |
toxicity_hist = gr.Plot()
|
| 371 |
with gr.Accordion("Explore examples with toxicity scores:", open=False):
|
|
|
|
| 82 |
|
| 83 |
|
| 84 |
def get_first_parquet_filename(dataset, config, split):
|
| 85 |
+
parquet_resp = session.get(f"https://datasets-server.huggingface.co/parquet?dataset={dataset}&config={config}", timeout=20).json()
|
| 86 |
if "error" in parquet_resp:
|
| 87 |
raise ValueError(parquet_resp["error"])
|
| 88 |
first_parquet_file_url = [file for file in parquet_resp["parquet_files"] if file["split"] == split][0]["url"]
|
|
|
|
| 217 |
with gr.Blocks() as demo:
|
| 218 |
gr.Markdown(
|
| 219 |
"""
|
| 220 |
+
# π Data Quality Checker π
|
| 221 |
+
|
| 222 |
+
This space gives some instruments to have a quick glance at the quality of a text dataset.
|
| 223 |
+
* It uses [NVIDIA's quality classifier model](https://huggingface.co/nvidia/quality-classifier-deberta)
|
| 224 |
+
on a small subset of texts.
|
| 225 |
+
* It uses [Perspective](https://perspectiveapi.com/how-it-works/) API to check toxicity of 100 random dataset texts
|
| 226 |
+
|
| 227 |
## Select dataset and text column
|
| 228 |
"""
|
| 229 |
)
|
|
|
|
| 237 |
subset_dropdown = gr.Dropdown(label="Subset", visible=False)
|
| 238 |
split_dropdown = gr.Dropdown(label="Split", visible=False)
|
| 239 |
|
|
|
|
| 240 |
with gr.Accordion("Dataset preview", open=False):
|
| 241 |
@gr.render(inputs=[dataset_name, subset_dropdown, split_dropdown])
|
| 242 |
def embed(name, subset, split):
|
|
|
|
| 262 |
text_column_dropdown: gr.Dropdown(label="Text column name"),
|
| 263 |
nested_text_column_dropdown: gr.Dropdown(visible=False)
|
| 264 |
}
|
| 265 |
+
info_resp = session.get(f"https://datasets-server.huggingface.co/info?dataset={dataset}", timeout=20).json()
|
| 266 |
if "error" in info_resp:
|
| 267 |
return {
|
| 268 |
subset_dropdown: gr.Dropdown(visible=False),
|
|
|
|
| 286 |
return {
|
| 287 |
subset_dropdown: gr.Dropdown(value=subset, choices=subsets, visible=len(subsets) > 1),
|
| 288 |
split_dropdown: gr.Dropdown(value=split, choices=splits, visible=len(splits) > 1),
|
| 289 |
+
text_column_dropdown: gr.Dropdown(choices=text_features + nested_text_features, label="Text column name"),
|
| 290 |
nested_text_column_dropdown: gr.Dropdown(visible=False),
|
| 291 |
}
|
| 292 |
logging.info(nested_text_features)
|
|
|
|
| 365 |
gr.Markdown("""## Explore toxicity
|
| 366 |
Run [Perspective](https://perspectiveapi.com/how-it-works/) on 100 random samples to check toxicity
|
| 367 |
""")
|
| 368 |
+
gr_toxicity_btn = gr.Button("Run Perpspective")
|
|
|
|
| 369 |
toxicity_progress_bar = gr.Label(show_label=False)
|
| 370 |
toxicity_hist = gr.Plot()
|
| 371 |
with gr.Accordion("Explore examples with toxicity scores:", open=False):
|