Spaces:
Build error
Build error
improve errors
Browse files
src/synthetic_dataset_generator/apps/rag.py
CHANGED
|
@@ -76,7 +76,7 @@ def _load_dataset_from_hub(
|
|
| 76 |
progress=gr.Progress(track_tqdm=True),
|
| 77 |
):
|
| 78 |
if not repo_id:
|
| 79 |
-
raise gr.Error("Hub repo
|
| 80 |
subsets = get_dataset_config_names(repo_id, token=token)
|
| 81 |
splits = get_dataset_split_names(repo_id, subsets[0], token=token)
|
| 82 |
ds = load_dataset(repo_id, subsets[0], split=splits[0], token=token, streaming=True)
|
|
@@ -102,6 +102,9 @@ def _load_dataset_from_hub(
|
|
| 102 |
|
| 103 |
|
| 104 |
def _preprocess_input_data(file_paths: list[str], num_rows: int, progress=gr.Progress(track_tqdm=True)):
|
|
|
|
|
|
|
|
|
|
| 105 |
data = {}
|
| 106 |
total_chunks = 0
|
| 107 |
|
|
|
|
| 76 |
progress=gr.Progress(track_tqdm=True),
|
| 77 |
):
|
| 78 |
if not repo_id:
|
| 79 |
+
raise gr.Error("Please provide a Hub repo ID")
|
| 80 |
subsets = get_dataset_config_names(repo_id, token=token)
|
| 81 |
splits = get_dataset_split_names(repo_id, subsets[0], token=token)
|
| 82 |
ds = load_dataset(repo_id, subsets[0], split=splits[0], token=token, streaming=True)
|
|
|
|
| 102 |
|
| 103 |
|
| 104 |
def _preprocess_input_data(file_paths: list[str], num_rows: int, progress=gr.Progress(track_tqdm=True)):
|
| 105 |
+
if not file_paths:
|
| 106 |
+
raise gr.Error("Please provide an input file")
|
| 107 |
+
|
| 108 |
data = {}
|
| 109 |
total_chunks = 0
|
| 110 |
|