Spaces:
Runtime error
Runtime error
polinaeterna
commited on
Commit
·
57aaee5
1
Parent(s):
4e6b23a
fix parquet filename
Browse files
app.py
CHANGED
|
@@ -90,9 +90,13 @@ def run_quality_check(dataset, column, batch_size, num_examples):
|
|
| 90 |
except pl.exceptions.ComputeError:
|
| 91 |
try:
|
| 92 |
data = pl.read_parquet(f"hf://datasets/{dataset}@~parquet/{config}/partial-{split}/0000.parquet", columns=[column])
|
| 93 |
-
except
|
| 94 |
-
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
texts = [text[:10000] for text in data[column].to_list()]
|
| 97 |
# texts_sample = data.sample(100, shuffle=True, seed=16).to_pandas()
|
| 98 |
# batch_size = 100
|
|
|
|
| 90 |
except pl.exceptions.ComputeError:
|
| 91 |
try:
|
| 92 |
data = pl.read_parquet(f"hf://datasets/{dataset}@~parquet/{config}/partial-{split}/0000.parquet", columns=[column])
|
| 93 |
+
except pl.exceptions.ComputeError:
|
| 94 |
+
try:
|
| 95 |
+
data = pl.read_parquet(f"hf://datasets/{dataset}@~parquet/{config}/{split}-part0/0000.parquet", columns=[column])
|
| 96 |
+
except Exception as error:
|
| 97 |
+
yield f"❌ {error}", gr.BarPlot(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(),
|
| 98 |
+
return
|
| 99 |
+
|
| 100 |
texts = [text[:10000] for text in data[column].to_list()]
|
| 101 |
# texts_sample = data.sample(100, shuffle=True, seed=16).to_pandas()
|
| 102 |
# batch_size = 100
|