Fix: keep newline tokens during decode, then convert to actual newlines
Browse files
app.py
CHANGED
|
@@ -352,7 +352,7 @@ def get_model():
|
|
| 352 |
def clean_gcode(gcode: str) -> str:
|
| 353 |
"""Clean up generated gcode - fix formatting, remove garbage."""
|
| 354 |
|
| 355 |
-
# Replace <newline> tokens
|
| 356 |
gcode = gcode.replace("<newline>", "\n")
|
| 357 |
|
| 358 |
# If no/few newlines, split on command boundaries
|
|
@@ -360,6 +360,10 @@ def clean_gcode(gcode: str) -> str:
|
|
| 360 |
# Split before each gcode command
|
| 361 |
gcode = re.sub(r'([GM]\d+)', r'\n\1', gcode)
|
| 362 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
# Clean up each line
|
| 364 |
cleaned_lines = []
|
| 365 |
seen_coords = set() # Track to detect stuck coordinates
|
|
@@ -692,8 +696,15 @@ def generate(prompt: str, temperature: float, max_tokens: int, num_steps: int, g
|
|
| 692 |
|
| 693 |
print(f"Generated {input_ids.shape[1]} total tokens")
|
| 694 |
|
| 695 |
-
# Decode
|
| 696 |
-
gcode = gcode_tokenizer.decode(input_ids[0], skip_special_tokens=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 697 |
print(f"Raw decoded (first 300 chars): {repr(gcode[:300])}")
|
| 698 |
|
| 699 |
# Clean up the gcode
|
|
|
|
| 352 |
def clean_gcode(gcode: str) -> str:
|
| 353 |
"""Clean up generated gcode - fix formatting, remove garbage."""
|
| 354 |
|
| 355 |
+
# Replace any remaining <newline> tokens
|
| 356 |
gcode = gcode.replace("<newline>", "\n")
|
| 357 |
|
| 358 |
# If no/few newlines, split on command boundaries
|
|
|
|
| 360 |
# Split before each gcode command
|
| 361 |
gcode = re.sub(r'([GM]\d+)', r'\n\1', gcode)
|
| 362 |
|
| 363 |
+
# Add spaces after G0/G1 if missing: G0X -> G0 X
|
| 364 |
+
gcode = re.sub(r'(G[01])([XYZ])', r'\1 \2', gcode)
|
| 365 |
+
gcode = re.sub(r'(G[01])F', r'\1 F', gcode)
|
| 366 |
+
|
| 367 |
# Clean up each line
|
| 368 |
cleaned_lines = []
|
| 369 |
seen_coords = set() # Track to detect stuck coordinates
|
|
|
|
| 696 |
|
| 697 |
print(f"Generated {input_ids.shape[1]} total tokens")
|
| 698 |
|
| 699 |
+
# Decode WITHOUT skipping special tokens (so we keep <newline>)
|
| 700 |
+
gcode = gcode_tokenizer.decode(input_ids[0], skip_special_tokens=False)
|
| 701 |
+
|
| 702 |
+
# Manually remove the special tokens we don't want, but keep <newline>
|
| 703 |
+
gcode = gcode.replace("<pad>", "").replace("<s>", "").replace("</s>", "").replace("<unk>", "")
|
| 704 |
+
|
| 705 |
+
# Now convert <newline> to actual newlines
|
| 706 |
+
gcode = gcode.replace("<newline>", "\n")
|
| 707 |
+
|
| 708 |
print(f"Raw decoded (first 300 chars): {repr(gcode[:300])}")
|
| 709 |
|
| 710 |
# Clean up the gcode
|