Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -19,7 +19,7 @@ from transformers import AutoModel, AutoTokenizer, AutoConfig
|
|
| 19 |
from tqdm import tqdm
|
| 20 |
|
| 21 |
|
| 22 |
-
logging.basicConfig(level=logging.
|
| 23 |
|
| 24 |
|
| 25 |
session = requests.Session()
|
|
@@ -90,6 +90,7 @@ def run_quality_check(dataset, column, batch_size, num_examples):
|
|
| 90 |
config = "default" if "default" in info_resp["dataset_info"] else next(iter(info_resp["dataset_info"]))
|
| 91 |
split = "train" if "train" in info_resp["dataset_info"][config]["splits"] else next(
|
| 92 |
iter(info_resp["dataset_info"][config]["splits"]))
|
|
|
|
| 93 |
try:
|
| 94 |
data = pl.read_parquet(f"hf://datasets/{dataset}@~parquet/{config}/{split}/0000.parquet", columns=[column])
|
| 95 |
except pl.exceptions.ComputeError:
|
|
@@ -101,6 +102,7 @@ def run_quality_check(dataset, column, batch_size, num_examples):
|
|
| 101 |
except Exception as error:
|
| 102 |
yield f"❌ {error}", gr.BarPlot(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(),
|
| 103 |
return
|
|
|
|
| 104 |
|
| 105 |
texts = [text[:10000] for text in data[column].to_list()]
|
| 106 |
# texts_sample = data.sample(100, shuffle=True, seed=16).to_pandas()
|
|
|
|
| 19 |
from tqdm import tqdm
|
| 20 |
|
| 21 |
|
| 22 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
| 23 |
|
| 24 |
|
| 25 |
session = requests.Session()
|
|
|
|
| 90 |
config = "default" if "default" in info_resp["dataset_info"] else next(iter(info_resp["dataset_info"]))
|
| 91 |
split = "train" if "train" in info_resp["dataset_info"][config]["splits"] else next(
|
| 92 |
iter(info_resp["dataset_info"][config]["splits"]))
|
| 93 |
+
logging.info(f"Fetching data for {dataset} {config} {split}")
|
| 94 |
try:
|
| 95 |
data = pl.read_parquet(f"hf://datasets/{dataset}@~parquet/{config}/{split}/0000.parquet", columns=[column])
|
| 96 |
except pl.exceptions.ComputeError:
|
|
|
|
| 102 |
except Exception as error:
|
| 103 |
yield f"❌ {error}", gr.BarPlot(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(),
|
| 104 |
return
|
| 105 |
+
logging.info("Data fetched.")
|
| 106 |
|
| 107 |
texts = [text[:10000] for text in data[column].to_list()]
|
| 108 |
# texts_sample = data.sample(100, shuffle=True, seed=16).to_pandas()
|