Match BLIP caption style, add seed, improve defaults
Browse files
app.py
CHANGED
|
@@ -457,22 +457,33 @@ def gcode_to_svg(gcode: str) -> str:
|
|
| 457 |
# ============================================================================
|
| 458 |
|
| 459 |
def enhance_prompt(prompt: str) -> str:
|
| 460 |
-
"""Enhance prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 461 |
prompt = prompt.strip().lower()
|
| 462 |
|
| 463 |
-
#
|
| 464 |
-
if
|
| 465 |
enhanced = prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
| 466 |
else:
|
| 467 |
-
enhanced = f"a
|
| 468 |
|
| 469 |
-
# Add style
|
| 470 |
-
enhanced += ", black
|
| 471 |
return enhanced
|
| 472 |
|
| 473 |
|
| 474 |
@spaces.GPU
|
| 475 |
-
def generate(prompt: str, temperature: float, max_tokens: int, num_steps: int, guidance: float):
|
| 476 |
"""Generate gcode from text prompt."""
|
| 477 |
if not prompt or not prompt.strip():
|
| 478 |
return "Enter a prompt to generate gcode", gcode_to_svg("")
|
|
@@ -490,6 +501,12 @@ def generate(prompt: str, temperature: float, max_tokens: int, num_steps: int, g
|
|
| 490 |
enhanced = enhance_prompt(prompt)
|
| 491 |
print(f"Enhanced prompt: {enhanced}")
|
| 492 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 493 |
# Text -> Latent via SD diffusion
|
| 494 |
with torch.no_grad():
|
| 495 |
# Use negative prompt to avoid unwanted styles
|
|
@@ -499,6 +516,7 @@ def generate(prompt: str, temperature: float, max_tokens: int, num_steps: int, g
|
|
| 499 |
num_inference_steps=num_steps,
|
| 500 |
guidance_scale=guidance,
|
| 501 |
output_type="latent",
|
|
|
|
| 502 |
)
|
| 503 |
latent = result.images.to(dtype)
|
| 504 |
print(f"Latent shape: {latent.shape}, dtype: {latent.dtype}")
|
|
@@ -705,21 +723,22 @@ with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
|
|
| 705 |
)
|
| 706 |
|
| 707 |
with gr.Accordion("settings", open=False):
|
| 708 |
-
temperature = gr.Slider(0.3, 1.2, value=0.
|
| 709 |
-
max_tokens = gr.Slider(256, 2048, value=
|
| 710 |
-
num_steps = gr.Slider(20, 50, value=
|
| 711 |
-
guidance = gr.Slider(5.0, 20.0, value=
|
|
|
|
| 712 |
|
| 713 |
generate_btn = gr.Button("generate", variant="secondary")
|
| 714 |
|
| 715 |
gr.Examples(
|
| 716 |
examples=[
|
| 717 |
-
["horse"],
|
| 718 |
-
["cat
|
| 719 |
-
["
|
| 720 |
-
["
|
| 721 |
-
["
|
| 722 |
-
["
|
| 723 |
],
|
| 724 |
inputs=prompt,
|
| 725 |
label=None,
|
|
@@ -735,8 +754,8 @@ with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
|
|
| 735 |
gr.Markdown("---")
|
| 736 |
gr.Markdown("machine: 841×1189mm / pen servo 40-90° / [github](https://github.com/Twarner491/dcode) / [model](https://huggingface.co/twarner/dcode-sd-gcode-v3) / mit")
|
| 737 |
|
| 738 |
-
generate_btn.click(generate, [prompt, temperature, max_tokens, num_steps, guidance], [gcode_output, preview])
|
| 739 |
-
prompt.submit(generate, [prompt, temperature, max_tokens, num_steps, guidance], [gcode_output, preview])
|
| 740 |
|
| 741 |
if __name__ == "__main__":
|
| 742 |
demo.launch()
|
|
|
|
| 457 |
# ============================================================================
|
| 458 |
|
| 459 |
def enhance_prompt(prompt: str) -> str:
|
| 460 |
+
"""Enhance prompt to match BLIP caption style from training data.
|
| 461 |
+
|
| 462 |
+
BLIP generates captions like:
|
| 463 |
+
- "a drawing of a horse"
|
| 464 |
+
- "a sketch of a cat"
|
| 465 |
+
- "a black and white drawing"
|
| 466 |
+
- "an illustration of a flower"
|
| 467 |
+
"""
|
| 468 |
prompt = prompt.strip().lower()
|
| 469 |
|
| 470 |
+
# Already in BLIP style
|
| 471 |
+
if prompt.startswith(("a ", "an ", "the ")):
|
| 472 |
enhanced = prompt
|
| 473 |
+
# Has style keyword
|
| 474 |
+
elif any(x in prompt for x in ["drawing", "sketch", "illustration", "image"]):
|
| 475 |
+
enhanced = f"a {prompt}"
|
| 476 |
+
# Simple noun - wrap in BLIP style
|
| 477 |
else:
|
| 478 |
+
enhanced = f"a drawing of a {prompt}"
|
| 479 |
|
| 480 |
+
# Add subtle style hints (BLIP often includes these)
|
| 481 |
+
enhanced += ", black and white, simple lines, sketch style"
|
| 482 |
return enhanced
|
| 483 |
|
| 484 |
|
| 485 |
@spaces.GPU
|
| 486 |
+
def generate(prompt: str, temperature: float, max_tokens: int, num_steps: int, guidance: float, seed: int = -1):
|
| 487 |
"""Generate gcode from text prompt."""
|
| 488 |
if not prompt or not prompt.strip():
|
| 489 |
return "Enter a prompt to generate gcode", gcode_to_svg("")
|
|
|
|
| 501 |
enhanced = enhance_prompt(prompt)
|
| 502 |
print(f"Enhanced prompt: {enhanced}")
|
| 503 |
|
| 504 |
+
# Set seed for reproducibility
|
| 505 |
+
generator = None
|
| 506 |
+
if seed >= 0:
|
| 507 |
+
generator = torch.Generator(device=device).manual_seed(int(seed))
|
| 508 |
+
print(f"Using seed: {seed}")
|
| 509 |
+
|
| 510 |
# Text -> Latent via SD diffusion
|
| 511 |
with torch.no_grad():
|
| 512 |
# Use negative prompt to avoid unwanted styles
|
|
|
|
| 516 |
num_inference_steps=num_steps,
|
| 517 |
guidance_scale=guidance,
|
| 518 |
output_type="latent",
|
| 519 |
+
generator=generator,
|
| 520 |
)
|
| 521 |
latent = result.images.to(dtype)
|
| 522 |
print(f"Latent shape: {latent.shape}, dtype: {latent.dtype}")
|
|
|
|
| 723 |
)
|
| 724 |
|
| 725 |
with gr.Accordion("settings", open=False):
|
| 726 |
+
temperature = gr.Slider(0.3, 1.2, value=0.5, label="temperature", step=0.1)
|
| 727 |
+
max_tokens = gr.Slider(256, 2048, value=2048, step=256, label="max tokens")
|
| 728 |
+
num_steps = gr.Slider(20, 50, value=35, step=5, label="diffusion steps")
|
| 729 |
+
guidance = gr.Slider(5.0, 20.0, value=10.0, step=0.5, label="guidance")
|
| 730 |
+
seed = gr.Number(value=-1, label="seed (-1 = random)", precision=0)
|
| 731 |
|
| 732 |
generate_btn = gr.Button("generate", variant="secondary")
|
| 733 |
|
| 734 |
gr.Examples(
|
| 735 |
examples=[
|
| 736 |
+
["a drawing of a horse"],
|
| 737 |
+
["a sketch of a cat"],
|
| 738 |
+
["a simple flower drawing"],
|
| 739 |
+
["a drawing of a tree"],
|
| 740 |
+
["abstract lines"],
|
| 741 |
+
["a portrait sketch"],
|
| 742 |
],
|
| 743 |
inputs=prompt,
|
| 744 |
label=None,
|
|
|
|
| 754 |
gr.Markdown("---")
|
| 755 |
gr.Markdown("machine: 841×1189mm / pen servo 40-90° / [github](https://github.com/Twarner491/dcode) / [model](https://huggingface.co/twarner/dcode-sd-gcode-v3) / mit")
|
| 756 |
|
| 757 |
+
generate_btn.click(generate, [prompt, temperature, max_tokens, num_steps, guidance, seed], [gcode_output, preview])
|
| 758 |
+
prompt.submit(generate, [prompt, temperature, max_tokens, num_steps, guidance, seed], [gcode_output, preview])
|
| 759 |
|
| 760 |
if __name__ == "__main__":
|
| 761 |
demo.launch()
|