twarner commited on
Commit
6dbfa53
·
1 Parent(s): 9f5b647

Match BLIP caption style, add seed, improve defaults

Browse files
Files changed (1) hide show
  1. app.py +38 -19
app.py CHANGED
@@ -457,22 +457,33 @@ def gcode_to_svg(gcode: str) -> str:
457
  # ============================================================================
458
 
459
  def enhance_prompt(prompt: str) -> str:
460
- """Enhance prompt for better SD line drawing generation."""
 
 
 
 
 
 
 
461
  prompt = prompt.strip().lower()
462
 
463
- # Skip if already detailed
464
- if any(x in prompt for x in ["drawing", "sketch", "line", "illustration"]):
465
  enhanced = prompt
 
 
 
 
466
  else:
467
- enhanced = f"a simple line drawing of {prompt}"
468
 
469
- # Add style suffixes for better SD output
470
- enhanced += ", black ink on white paper, single continuous line, minimalist sketch, vector art style"
471
  return enhanced
472
 
473
 
474
  @spaces.GPU
475
- def generate(prompt: str, temperature: float, max_tokens: int, num_steps: int, guidance: float):
476
  """Generate gcode from text prompt."""
477
  if not prompt or not prompt.strip():
478
  return "Enter a prompt to generate gcode", gcode_to_svg("")
@@ -490,6 +501,12 @@ def generate(prompt: str, temperature: float, max_tokens: int, num_steps: int, g
490
  enhanced = enhance_prompt(prompt)
491
  print(f"Enhanced prompt: {enhanced}")
492
 
 
 
 
 
 
 
493
  # Text -> Latent via SD diffusion
494
  with torch.no_grad():
495
  # Use negative prompt to avoid unwanted styles
@@ -499,6 +516,7 @@ def generate(prompt: str, temperature: float, max_tokens: int, num_steps: int, g
499
  num_inference_steps=num_steps,
500
  guidance_scale=guidance,
501
  output_type="latent",
 
502
  )
503
  latent = result.images.to(dtype)
504
  print(f"Latent shape: {latent.shape}, dtype: {latent.dtype}")
@@ -705,21 +723,22 @@ with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
705
  )
706
 
707
  with gr.Accordion("settings", open=False):
708
- temperature = gr.Slider(0.3, 1.2, value=0.6, label="temperature", step=0.1)
709
- max_tokens = gr.Slider(256, 2048, value=1536, step=256, label="max tokens")
710
- num_steps = gr.Slider(20, 50, value=30, step=5, label="diffusion steps")
711
- guidance = gr.Slider(5.0, 20.0, value=12.0, step=0.5, label="guidance")
 
712
 
713
  generate_btn = gr.Button("generate", variant="secondary")
714
 
715
  gr.Examples(
716
  examples=[
717
- ["horse"],
718
- ["cat face"],
719
- ["spiral"],
720
- ["star"],
721
- ["tree"],
722
- ["flower"],
723
  ],
724
  inputs=prompt,
725
  label=None,
@@ -735,8 +754,8 @@ with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
735
  gr.Markdown("---")
736
  gr.Markdown("machine: 841×1189mm / pen servo 40-90° / [github](https://github.com/Twarner491/dcode) / [model](https://huggingface.co/twarner/dcode-sd-gcode-v3) / mit")
737
 
738
- generate_btn.click(generate, [prompt, temperature, max_tokens, num_steps, guidance], [gcode_output, preview])
739
- prompt.submit(generate, [prompt, temperature, max_tokens, num_steps, guidance], [gcode_output, preview])
740
 
741
  if __name__ == "__main__":
742
  demo.launch()
 
457
  # ============================================================================
458
 
459
  def enhance_prompt(prompt: str) -> str:
460
+ """Enhance prompt to match BLIP caption style from training data.
461
+
462
+ BLIP generates captions like:
463
+ - "a drawing of a horse"
464
+ - "a sketch of a cat"
465
+ - "a black and white drawing"
466
+ - "an illustration of a flower"
467
+ """
468
  prompt = prompt.strip().lower()
469
 
470
+ # Already in BLIP style
471
+ if prompt.startswith(("a ", "an ", "the ")):
472
  enhanced = prompt
473
+ # Has style keyword
474
+ elif any(x in prompt for x in ["drawing", "sketch", "illustration", "image"]):
475
+ enhanced = f"a {prompt}"
476
+ # Simple noun - wrap in BLIP style
477
  else:
478
+ enhanced = f"a drawing of a {prompt}"
479
 
480
+ # Add subtle style hints (BLIP often includes these)
481
+ enhanced += ", black and white, simple lines, sketch style"
482
  return enhanced
483
 
484
 
485
  @spaces.GPU
486
+ def generate(prompt: str, temperature: float, max_tokens: int, num_steps: int, guidance: float, seed: int = -1):
487
  """Generate gcode from text prompt."""
488
  if not prompt or not prompt.strip():
489
  return "Enter a prompt to generate gcode", gcode_to_svg("")
 
501
  enhanced = enhance_prompt(prompt)
502
  print(f"Enhanced prompt: {enhanced}")
503
 
504
+ # Set seed for reproducibility
505
+ generator = None
506
+ if seed >= 0:
507
+ generator = torch.Generator(device=device).manual_seed(int(seed))
508
+ print(f"Using seed: {seed}")
509
+
510
  # Text -> Latent via SD diffusion
511
  with torch.no_grad():
512
  # Use negative prompt to avoid unwanted styles
 
516
  num_inference_steps=num_steps,
517
  guidance_scale=guidance,
518
  output_type="latent",
519
+ generator=generator,
520
  )
521
  latent = result.images.to(dtype)
522
  print(f"Latent shape: {latent.shape}, dtype: {latent.dtype}")
 
723
  )
724
 
725
  with gr.Accordion("settings", open=False):
726
+ temperature = gr.Slider(0.3, 1.2, value=0.5, label="temperature", step=0.1)
727
+ max_tokens = gr.Slider(256, 2048, value=2048, step=256, label="max tokens")
728
+ num_steps = gr.Slider(20, 50, value=35, step=5, label="diffusion steps")
729
+ guidance = gr.Slider(5.0, 20.0, value=10.0, step=0.5, label="guidance")
730
+ seed = gr.Number(value=-1, label="seed (-1 = random)", precision=0)
731
 
732
  generate_btn = gr.Button("generate", variant="secondary")
733
 
734
  gr.Examples(
735
  examples=[
736
+ ["a drawing of a horse"],
737
+ ["a sketch of a cat"],
738
+ ["a simple flower drawing"],
739
+ ["a drawing of a tree"],
740
+ ["abstract lines"],
741
+ ["a portrait sketch"],
742
  ],
743
  inputs=prompt,
744
  label=None,
 
754
  gr.Markdown("---")
755
  gr.Markdown("machine: 841×1189mm / pen servo 40-90° / [github](https://github.com/Twarner491/dcode) / [model](https://huggingface.co/twarner/dcode-sd-gcode-v3) / mit")
756
 
757
+ generate_btn.click(generate, [prompt, temperature, max_tokens, num_steps, guidance, seed], [gcode_output, preview])
758
+ prompt.submit(generate, [prompt, temperature, max_tokens, num_steps, guidance, seed], [gcode_output, preview])
759
 
760
  if __name__ == "__main__":
761
  demo.launch()