twarner commited on
Commit
05ae1b3
·
1 Parent(s): 1b8fb6e

Fix: keep newline tokens during decode, then convert to actual newlines

Browse files
Files changed (1) hide show
  1. app.py +14 -3
app.py CHANGED
@@ -352,7 +352,7 @@ def get_model():
352
  def clean_gcode(gcode: str) -> str:
353
  """Clean up generated gcode - fix formatting, remove garbage."""
354
 
355
- # Replace <newline> tokens with actual newlines
356
  gcode = gcode.replace("<newline>", "\n")
357
 
358
  # If no/few newlines, split on command boundaries
@@ -360,6 +360,10 @@ def clean_gcode(gcode: str) -> str:
360
  # Split before each gcode command
361
  gcode = re.sub(r'([GM]\d+)', r'\n\1', gcode)
362
 
 
 
 
 
363
  # Clean up each line
364
  cleaned_lines = []
365
  seen_coords = set() # Track to detect stuck coordinates
@@ -692,8 +696,15 @@ def generate(prompt: str, temperature: float, max_tokens: int, num_steps: int, g
692
 
693
  print(f"Generated {input_ids.shape[1]} total tokens")
694
 
695
- # Decode
696
- gcode = gcode_tokenizer.decode(input_ids[0], skip_special_tokens=True)
 
 
 
 
 
 
 
697
  print(f"Raw decoded (first 300 chars): {repr(gcode[:300])}")
698
 
699
  # Clean up the gcode
 
352
  def clean_gcode(gcode: str) -> str:
353
  """Clean up generated gcode - fix formatting, remove garbage."""
354
 
355
+ # Replace any remaining <newline> tokens
356
  gcode = gcode.replace("<newline>", "\n")
357
 
358
  # If no/few newlines, split on command boundaries
 
360
  # Split before each gcode command
361
  gcode = re.sub(r'([GM]\d+)', r'\n\1', gcode)
362
 
363
+ # Add spaces after G0/G1 if missing: G0X -> G0 X
364
+ gcode = re.sub(r'(G[01])([XYZ])', r'\1 \2', gcode)
365
+ gcode = re.sub(r'(G[01])F', r'\1 F', gcode)
366
+
367
  # Clean up each line
368
  cleaned_lines = []
369
  seen_coords = set() # Track to detect stuck coordinates
 
696
 
697
  print(f"Generated {input_ids.shape[1]} total tokens")
698
 
699
+ # Decode WITHOUT skipping special tokens (so we keep <newline>)
700
+ gcode = gcode_tokenizer.decode(input_ids[0], skip_special_tokens=False)
701
+
702
+ # Manually remove the special tokens we don't want, but keep <newline>
703
+ gcode = gcode.replace("<pad>", "").replace("<s>", "").replace("</s>", "").replace("<unk>", "")
704
+
705
+ # Now convert <newline> to actual newlines
706
+ gcode = gcode.replace("<newline>", "\n")
707
+
708
  print(f"Raw decoded (first 300 chars): {repr(gcode[:300])}")
709
 
710
  # Clean up the gcode