Spaces:
Running
Running
redirect training subprocess stderr to log file for debugging
Browse files
app.py
CHANGED
|
@@ -410,10 +410,11 @@ finally:
|
|
| 410 |
with open(script_path, "w") as f:
|
| 411 |
f.write(train_script)
|
| 412 |
|
|
|
|
| 413 |
subprocess.Popen(
|
| 414 |
-
["python3", script_path],
|
| 415 |
-
stdout=open(
|
| 416 |
-
stderr=open(
|
| 417 |
start_new_session=True,
|
| 418 |
)
|
| 419 |
|
|
@@ -423,10 +424,14 @@ finally:
|
|
| 423 |
f"Inference will be unavailable until training completes (ace-server stopped).")
|
| 424 |
|
| 425 |
def check_train_log():
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
|
| 431 |
# -- Build UI --
|
| 432 |
CSS = """
|
|
|
|
| 410 |
with open(script_path, "w") as f:
|
| 411 |
f.write(train_script)
|
| 412 |
|
| 413 |
+
train_stderr = os.path.join(output_dir, "train_stderr.log")
|
| 414 |
subprocess.Popen(
|
| 415 |
+
["python3", "-u", script_path],
|
| 416 |
+
stdout=open(TRAIN_LOG, "a"),
|
| 417 |
+
stderr=open(train_stderr, "w"),
|
| 418 |
start_new_session=True,
|
| 419 |
)
|
| 420 |
|
|
|
|
| 424 |
f"Inference will be unavailable until training completes (ace-server stopped).")
|
| 425 |
|
| 426 |
def check_train_log():
|
| 427 |
+
parts = []
|
| 428 |
+
if os.path.exists(TRAIN_LOG):
|
| 429 |
+
parts.append(open(TRAIN_LOG).read())
|
| 430 |
+
stderr_log = os.path.join(ADAPTER_DIR, "test-lora", "train_stderr.log")
|
| 431 |
+
if os.path.exists(stderr_log) and os.path.getsize(stderr_log) > 0:
|
| 432 |
+
stderr = open(stderr_log).read()[-2000:]
|
| 433 |
+
parts.append(f"\n--- stderr (last 2000 chars) ---\n{stderr}")
|
| 434 |
+
return "\n".join(parts) if parts else "No training log found."
|
| 435 |
|
| 436 |
# -- Build UI --
|
| 437 |
CSS = """
|