Percy3822 commited on
Commit
bf43903
Β·
verified Β·
1 Parent(s): 8d820c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -8
app.py CHANGED
@@ -88,11 +88,13 @@ def read_gen_logs():
88
 
89
  def list_shards(folder):
90
  if not folder or not os.path.isdir(folder): return "❌ Provide a valid folder path."
 
91
  jsonl = sorted(glob.glob(os.path.join(folder,"*.jsonl")))
92
  gz = sorted(glob.glob(os.path.join(folder,"*.jsonl.gz")))
93
- total = len(jsonl)+len(gz)
94
- if total==0: return "No shards found."
95
- preview=(jsonl+gz)[:10]
 
96
  lines=[f"Found {total} shard(s). Showing first {len(preview)}:"]+[f"- {os.path.basename(p)}" for p in preview]
97
  return "\n".join(lines)
98
 
@@ -117,12 +119,12 @@ def _train_worker(dataset_path: str, shards_folder: str):
117
  with open(LOG_FILE,"a") as log:
118
  if shards_folder:
119
  log.write(f"πŸ“‚ Folder mode: {shards_folder}\n")
 
120
  paths = sorted(glob.glob(os.path.join(shards_folder,"*.jsonl"))) + \
121
- sorted(glob.glob(os.path.join(shards_folder,"*.json"))) + \
122
- sorted(glob.glob(os.path.join(shards_folder,"*.jsonl.gz"))) + \
123
- sorted(glob.glob(os.path.join(shards_folder,"*.json.gz")))
124
  if not paths:
125
- log.write("❌ No shards found. Aborting.\n"); ok=False
126
  else:
127
  tmp="tmp_train.jsonl"
128
  for i,pth in enumerate(paths,1):
@@ -232,7 +234,7 @@ with gr.Blocks(title="JSON AI Trainer (with Dataset Generator)") as app:
232
  list_btn.click(fn=list_shards, inputs=list_folder, outputs=list_out)
233
 
234
  with gr.Tab("🧠 Train"):
235
- gr.Markdown("Upload a single JSON/JSONL file *or* train on a folder of shards (.json, .jsonl, .jsonl.gz, .json.gz).")
236
  with gr.Row():
237
  file_input = gr.File(label="Upload single dataset file", file_types=[".json",".jsonl"])
238
  upload_btn = gr.Button("πŸ“€ Upload (single file)")
 
88
 
89
  def list_shards(folder):
90
  if not folder or not os.path.isdir(folder): return "❌ Provide a valid folder path."
91
+ # ⬇ Only JSONL shards; ignore manifest files
92
  jsonl = sorted(glob.glob(os.path.join(folder,"*.jsonl")))
93
  gz = sorted(glob.glob(os.path.join(folder,"*.jsonl.gz")))
94
+ files = [p for p in (jsonl+gz) if "manifest" not in os.path.basename(p).lower()]
95
+ total = len(files)
96
+ if total==0: return "No shards found (*.jsonl / *.jsonl.gz)."
97
+ preview=files[:10]
98
  lines=[f"Found {total} shard(s). Showing first {len(preview)}:"]+[f"- {os.path.basename(p)}" for p in preview]
99
  return "\n".join(lines)
100
 
 
119
  with open(LOG_FILE,"a") as log:
120
  if shards_folder:
121
  log.write(f"πŸ“‚ Folder mode: {shards_folder}\n")
122
+ # ⬇ Only JSONL shards; ignore manifest files
123
  paths = sorted(glob.glob(os.path.join(shards_folder,"*.jsonl"))) + \
124
+ sorted(glob.glob(os.path.join(shards_folder,"*.jsonl.gz")))
125
+ paths = [p for p in paths if "manifest" not in os.path.basename(p).lower()]
 
126
  if not paths:
127
+ log.write("❌ No shards found (*.jsonl / *.jsonl.gz). Aborting.\n"); ok=False
128
  else:
129
  tmp="tmp_train.jsonl"
130
  for i,pth in enumerate(paths,1):
 
234
  list_btn.click(fn=list_shards, inputs=list_folder, outputs=list_out)
235
 
236
  with gr.Tab("🧠 Train"):
237
+ gr.Markdown("Upload a single JSON/JSONL file *or* train on a folder of shards (.jsonl, .jsonl.gz). Manifests are ignored.")
238
  with gr.Row():
239
  file_input = gr.File(label="Upload single dataset file", file_types=[".json",".jsonl"])
240
  upload_btn = gr.Button("πŸ“€ Upload (single file)")