Update app.py
Browse files
app.py
CHANGED
|
@@ -88,11 +88,13 @@ def read_gen_logs():
|
|
| 88 |
|
| 89 |
def list_shards(folder):
|
| 90 |
if not folder or not os.path.isdir(folder): return "β Provide a valid folder path."
|
|
|
|
| 91 |
jsonl = sorted(glob.glob(os.path.join(folder,"*.jsonl")))
|
| 92 |
gz = sorted(glob.glob(os.path.join(folder,"*.jsonl.gz")))
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
|
|
|
| 96 |
lines=[f"Found {total} shard(s). Showing first {len(preview)}:"]+[f"- {os.path.basename(p)}" for p in preview]
|
| 97 |
return "\n".join(lines)
|
| 98 |
|
|
@@ -117,12 +119,12 @@ def _train_worker(dataset_path: str, shards_folder: str):
|
|
| 117 |
with open(LOG_FILE,"a") as log:
|
| 118 |
if shards_folder:
|
| 119 |
log.write(f"π Folder mode: {shards_folder}\n")
|
|
|
|
| 120 |
paths = sorted(glob.glob(os.path.join(shards_folder,"*.jsonl"))) + \
|
| 121 |
-
sorted(glob.glob(os.path.join(shards_folder,"*.
|
| 122 |
-
|
| 123 |
-
sorted(glob.glob(os.path.join(shards_folder,"*.json.gz")))
|
| 124 |
if not paths:
|
| 125 |
-
log.write("β No shards found. Aborting.\n"); ok=False
|
| 126 |
else:
|
| 127 |
tmp="tmp_train.jsonl"
|
| 128 |
for i,pth in enumerate(paths,1):
|
|
@@ -232,7 +234,7 @@ with gr.Blocks(title="JSON AI Trainer (with Dataset Generator)") as app:
|
|
| 232 |
list_btn.click(fn=list_shards, inputs=list_folder, outputs=list_out)
|
| 233 |
|
| 234 |
with gr.Tab("π§ Train"):
|
| 235 |
-
gr.Markdown("Upload a single JSON/JSONL file *or* train on a folder of shards (.
|
| 236 |
with gr.Row():
|
| 237 |
file_input = gr.File(label="Upload single dataset file", file_types=[".json",".jsonl"])
|
| 238 |
upload_btn = gr.Button("π€ Upload (single file)")
|
|
|
|
| 88 |
|
| 89 |
def list_shards(folder):
|
| 90 |
if not folder or not os.path.isdir(folder): return "β Provide a valid folder path."
|
| 91 |
+
# β¬ Only JSONL shards; ignore manifest files
|
| 92 |
jsonl = sorted(glob.glob(os.path.join(folder,"*.jsonl")))
|
| 93 |
gz = sorted(glob.glob(os.path.join(folder,"*.jsonl.gz")))
|
| 94 |
+
files = [p for p in (jsonl+gz) if "manifest" not in os.path.basename(p).lower()]
|
| 95 |
+
total = len(files)
|
| 96 |
+
if total==0: return "No shards found (*.jsonl / *.jsonl.gz)."
|
| 97 |
+
preview=files[:10]
|
| 98 |
lines=[f"Found {total} shard(s). Showing first {len(preview)}:"]+[f"- {os.path.basename(p)}" for p in preview]
|
| 99 |
return "\n".join(lines)
|
| 100 |
|
|
|
|
| 119 |
with open(LOG_FILE,"a") as log:
|
| 120 |
if shards_folder:
|
| 121 |
log.write(f"π Folder mode: {shards_folder}\n")
|
| 122 |
+
# β¬ Only JSONL shards; ignore manifest files
|
| 123 |
paths = sorted(glob.glob(os.path.join(shards_folder,"*.jsonl"))) + \
|
| 124 |
+
sorted(glob.glob(os.path.join(shards_folder,"*.jsonl.gz")))
|
| 125 |
+
paths = [p for p in paths if "manifest" not in os.path.basename(p).lower()]
|
|
|
|
| 126 |
if not paths:
|
| 127 |
+
log.write("β No shards found (*.jsonl / *.jsonl.gz). Aborting.\n"); ok=False
|
| 128 |
else:
|
| 129 |
tmp="tmp_train.jsonl"
|
| 130 |
for i,pth in enumerate(paths,1):
|
|
|
|
| 234 |
list_btn.click(fn=list_shards, inputs=list_folder, outputs=list_out)
|
| 235 |
|
| 236 |
with gr.Tab("π§ Train"):
|
| 237 |
+
gr.Markdown("Upload a single JSON/JSONL file *or* train on a folder of shards (.jsonl, .jsonl.gz). Manifests are ignored.")
|
| 238 |
with gr.Row():
|
| 239 |
file_input = gr.File(label="Upload single dataset file", file_types=[".json",".jsonl"])
|
| 240 |
upload_btn = gr.Button("π€ Upload (single file)")
|