Spaces:

twarner
/

dcode

Running on Zero

App Files Files Community

twarner commited on 21 days ago

Commit

9a32c26

1 Parent(s): 89d775f

Fix model loading, minimal monochrome design

Browse files

Files changed (1) hide show

app.py +100 -71

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""dcode Gradio Space - Text to Gcode via SD-Gcode Diffusion."""
 import re
 import os
@@ -17,7 +17,6 @@ _model = None
 class GcodeDecoderConfig:
-    """Configuration for gcode decoder."""
     def __init__(
         self,
         latent_channels: int = 4,
@@ -41,8 +40,6 @@ class GcodeDecoderConfig:
 class GcodeDecoder(nn.Module):
-    """Transformer decoder: SD latent -> gcode tokens."""
     def __init__(self, config: GcodeDecoderConfig):
         super().__init__()
         self.config = config
@@ -120,7 +117,7 @@ class GcodeDecoder(nn.Module):
 def get_model():
-    """Load and cache the SD-Gcode model."""
     global _model
     if _model is None:
         from diffusers import StableDiffusionPipeline
@@ -139,7 +136,7 @@ def get_model():
         with open(config_path) as f:
             config = json.load(f)
-        # Load SD pipeline
         sd_model_id = config.get("sd_model_id", "runwayml/stable-diffusion-v1-5")
         print(f"Loading SD from {sd_model_id}...")
         pipe = StableDiffusionPipeline.from_pretrained(
@@ -161,13 +158,31 @@ def get_model():
         )
         gcode_decoder = GcodeDecoder(decoder_config).to(device, dtype)
-        # Load weights
-        state_dict = torch.load(weights_path, map_location=device)
-        # Extract decoder weights
         decoder_state = {k.replace("gcode_decoder.", ""): v for k, v in state_dict.items()
                         if k.startswith("gcode_decoder.")}
-        gcode_decoder.load_state_dict(decoder_state)
         gcode_decoder.eval()
         # Gcode tokenizer
@@ -268,25 +283,24 @@ def gcode_to_svg(gcode: str) -> str:
     h = BOUNDS["top"] - BOUNDS["bottom"]
     padding = 20
     svg = f'''<svg xmlns="http://www.w3.org/2000/svg"
                   viewBox="{BOUNDS["left"] - padding} {-BOUNDS["top"] - padding} {w + 2*padding} {h + 2*padding}"
-                  style="background: #fafafa; width: 100%; height: 500px; border-radius: 8px; border: 1px solid #e5e5e5;">
         <rect x="{BOUNDS["left"]}" y="{-BOUNDS["top"]}" width="{w}" height="{h}"
-              fill="#fff" stroke="#ccc" stroke-width="2"/>
-        <line x1="0" y1="{-BOUNDS["top"]}" x2="0" y2="{-BOUNDS["bottom"]}" stroke="#ddd" stroke-width="1"/>
-        <line x1="{BOUNDS["left"]}" y1="0" x2="{BOUNDS["right"]}" y2="0" stroke="#ddd" stroke-width="1"/>
     '''
     for path in paths:
         if len(path) < 2:
             continue
         d = " ".join(f"{'M' if i == 0 else 'L'}{p[0]:.1f},{-p[1]:.1f}" for i, p in enumerate(path))
-        svg += f'<path d="{d}" fill="none" stroke="#1a1a1a" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>'
     total_points = sum(len(p) for p in paths)
     svg += f'''
-        <text x="{BOUNDS["left"] + 10}" y="{-BOUNDS["top"] + 25}" fill="#666" font-family="monospace" font-size="14">
-            Paths: {len(paths)} | Points: {total_points}
         </text>
     '''
     svg += "</svg>"
@@ -295,7 +309,7 @@ def gcode_to_svg(gcode: str) -> str:
 @spaces.GPU
 def generate(prompt: str, temperature: float, max_tokens: int, num_steps: int, guidance: float):
-    """Generate gcode from text prompt via SD-Gcode diffusion."""
     if not prompt or not prompt.strip():
         return "Enter a prompt to generate gcode", gcode_to_svg("")
@@ -307,7 +321,7 @@ def generate(prompt: str, temperature: float, max_tokens: int, num_steps: int, g
         device = m["device"]
         dtype = m["dtype"]
-        # 1. Text -> Latent via full SD diffusion
         with torch.no_grad():
             result = pipe(
                 prompt,
@@ -315,9 +329,9 @@ def generate(prompt: str, temperature: float, max_tokens: int, num_steps: int, g
                 guidance_scale=guidance,
                 output_type="latent",
             )
-            latent = result.images.to(dtype)  # [1, 4, 64, 64]
-        # 2. Latent -> Gcode via trained decoder
         with torch.no_grad():
             gcode = gcode_decoder.generate(
                 latent,
@@ -327,11 +341,11 @@ def generate(prompt: str, temperature: float, max_tokens: int, num_steps: int, g
             )
         gcode = validate_gcode(gcode)
-        line_count = len(gcode.split("\n"))
         svg = gcode_to_svg(gcode)
-        gcode_with_header = f"; dcode SD-Gcode output - {line_count} lines\n; Prompt: {prompt}\n; Machine validated\n\n{gcode}"
-        return gcode_with_header, svg
     except Exception as e:
         import traceback
@@ -339,76 +353,91 @@ def generate(prompt: str, temperature: float, max_tokens: int, num_steps: int, g
         return f"; Error: {e}", gcode_to_svg("")
-# Custom CSS
-custom_css = """
 .gradio-container {
-    max-width: 1200px !important;
 }
 """
-with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="emerald")) as demo:
-    gr.Markdown("""
-    # dcode
-    **Text -> Polargraph Gcode via Stable Diffusion**
-    Single end-to-end diffusion model: text -> CLIP -> UNet -> latent -> gcode decoder -> gcode
-    [GitHub](https://github.com/Twarner491/dcode) | [Model](https://huggingface.co/twarner/dcode-sd-gcode) | [Dataset](https://huggingface.co/datasets/twarner/dcode-polargraph-gcode)
-    """)
     with gr.Row():
         with gr.Column(scale=1):
             prompt = gr.Textbox(
-                label="Prompt",
-                placeholder="drawing of a cat, abstract spiral, portrait...",
-                lines=2
             )
-            with gr.Row():
-                temperature = gr.Slider(0.5, 1.5, value=0.8, label="Temperature")
-                max_tokens = gr.Slider(256, 1024, value=512, step=128, label="Max Tokens")
-            with gr.Row():
-                num_steps = gr.Slider(10, 50, value=20, step=5, label="Diffusion Steps")
-                guidance = gr.Slider(1.0, 15.0, value=7.5, step=0.5, label="Guidance Scale")
-            generate_btn = gr.Button("Generate", variant="primary", size="lg")
             gr.Examples(
                 examples=[
-                    ["line drawing of a cat"],
-                    ["abstract spiral pattern"],
-                    ["simple house with chimney"],
                     ["portrait sketch"],
-                    ["geometric shapes and lines"],
                 ],
                 inputs=prompt,
             )
         with gr.Column(scale=2):
-            preview = gr.HTML(
-                value=gcode_to_svg(""),
-                label="Preview",
-            )
-    with gr.Accordion("Gcode Output", open=False):
-        gcode_output = gr.Code(label="Gcode", language=None, lines=15)
-    gr.Markdown("""
-    ---
-    **Machine Bounds**: X: +/-420.5mm, Y: +/-594.5mm | Pen servo: 40 deg (down) / 90 deg (up) | **License**: MIT
-    """)
-    generate_btn.click(
-        generate,
-        [prompt, temperature, max_tokens, num_steps, guidance],
-        [gcode_output, preview]
-    )
-    prompt.submit(
-        generate,
-        [prompt, temperature, max_tokens, num_steps, guidance],
-        [gcode_output, preview]
-    )
 if __name__ == "__main__":
     demo.launch()

+"""dcode - Text to Polargraph Gcode via Stable Diffusion"""
 import re
 import os
 class GcodeDecoderConfig:
     def __init__(
         self,
         latent_channels: int = 4,
 class GcodeDecoder(nn.Module):
     def __init__(self, config: GcodeDecoderConfig):
         super().__init__()
         self.config = config
 def get_model():
+    """Load and cache the SD-Gcode model with full finetuned weights."""
     global _model
     if _model is None:
         from diffusers import StableDiffusionPipeline
         with open(config_path) as f:
             config = json.load(f)
+        # Load SD pipeline (we'll replace weights with finetuned ones)
         sd_model_id = config.get("sd_model_id", "runwayml/stable-diffusion-v1-5")
         print(f"Loading SD from {sd_model_id}...")
         pipe = StableDiffusionPipeline.from_pretrained(
         )
         gcode_decoder = GcodeDecoder(decoder_config).to(device, dtype)
+        # Load ALL finetuned weights
+        print("Loading finetuned weights...")
+        state_dict = torch.load(weights_path, map_location=device, weights_only=False)
+        # Load text encoder weights
+        text_encoder_state = {k.replace("text_encoder.", ""): v for k, v in state_dict.items()
+                             if k.startswith("text_encoder.")}
+        if text_encoder_state:
+            pipe.text_encoder.load_state_dict(text_encoder_state, strict=False)
+            print(f"Loaded {len(text_encoder_state)} text encoder weights")
+        # Load UNet weights
+        unet_state = {k.replace("unet.", ""): v for k, v in state_dict.items()
+                     if k.startswith("unet.")}
+        if unet_state:
+            pipe.unet.load_state_dict(unet_state, strict=False)
+            print(f"Loaded {len(unet_state)} UNet weights")
+        # Load gcode decoder weights
         decoder_state = {k.replace("gcode_decoder.", ""): v for k, v in state_dict.items()
                         if k.startswith("gcode_decoder.")}
+        if decoder_state:
+            gcode_decoder.load_state_dict(decoder_state, strict=False)
+            print(f"Loaded {len(decoder_state)} decoder weights")
         gcode_decoder.eval()
         # Gcode tokenizer
     h = BOUNDS["top"] - BOUNDS["bottom"]
     padding = 20
+    # Minimal monochrome styling
     svg = f'''<svg xmlns="http://www.w3.org/2000/svg"
                   viewBox="{BOUNDS["left"] - padding} {-BOUNDS["top"] - padding} {w + 2*padding} {h + 2*padding}"
+                  style="background: #fff; width: 100%; height: 480px; border: 1px solid #e0e0e0;">
         <rect x="{BOUNDS["left"]}" y="{-BOUNDS["top"]}" width="{w}" height="{h}"
+              fill="#fafafa" stroke="#ccc" stroke-width="1"/>
     '''
     for path in paths:
         if len(path) < 2:
             continue
         d = " ".join(f"{'M' if i == 0 else 'L'}{p[0]:.1f},{-p[1]:.1f}" for i, p in enumerate(path))
+        svg += f'<path d="{d}" fill="none" stroke="#000" stroke-width="1" stroke-linecap="round" stroke-linejoin="round"/>'
     total_points = sum(len(p) for p in paths)
     svg += f'''
+        <text x="{BOUNDS["left"] + 8}" y="{-BOUNDS["top"] + 20}" fill="#999" font-family="monospace" font-size="12">
+            {len(paths)} paths / {total_points} points
         </text>
     '''
     svg += "</svg>"
 @spaces.GPU
 def generate(prompt: str, temperature: float, max_tokens: int, num_steps: int, guidance: float):
+    """Generate gcode from text prompt."""
     if not prompt or not prompt.strip():
         return "Enter a prompt to generate gcode", gcode_to_svg("")
         device = m["device"]
         dtype = m["dtype"]
+        # Text -> Latent via SD diffusion
         with torch.no_grad():
             result = pipe(
                 prompt,
                 guidance_scale=guidance,
                 output_type="latent",
             )
+            latent = result.images.to(dtype)
+        # Latent -> Gcode via trained decoder
         with torch.no_grad():
             gcode = gcode_decoder.generate(
                 latent,
             )
         gcode = validate_gcode(gcode)
+        line_count = len([l for l in gcode.split("\n") if l.strip()])
         svg = gcode_to_svg(gcode)
+        header = f"; dcode output\n; prompt: {prompt}\n; {line_count} commands\n\n"
+        return header + gcode, svg
     except Exception as e:
         import traceback
         return f"; Error: {e}", gcode_to_svg("")
+# Minimal monochrome CSS
+css = """
+@import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;500&display=swap');
+* {
+    font-family: 'IBM Plex Mono', monospace !important;
+}
 .gradio-container {
+    max-width: 900px !important;
+    margin: auto;
+    background: #fff !important;
+}
+.gr-button-primary {
+    background: #000 !important;
+    border: none !important;
+    color: #fff !important;
+    font-weight: 500 !important;
+}
+.gr-button-primary:hover {
+    background: #333 !important;
+}
+footer {
+    display: none !important;
+}
+h1 {
+    font-weight: 500 !important;
+    letter-spacing: -0.02em !important;
+}
+.gr-box {
+    border-radius: 0 !important;
+    border: 1px solid #e0e0e0 !important;
+}
+input, textarea {
+    border-radius: 0 !important;
 }
 """
+with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
+    gr.Markdown("# dcode")
+    gr.Markdown("text → polargraph gcode via stable diffusion")
     with gr.Row():
         with gr.Column(scale=1):
             prompt = gr.Textbox(
+                label="prompt",
+                placeholder="describe what to draw...",
+                lines=2,
+                show_label=True,
             )
+            with gr.Accordion("settings", open=False):
+                temperature = gr.Slider(0.5, 1.5, value=0.8, label="temperature", step=0.1)
+                max_tokens = gr.Slider(256, 1024, value=512, step=128, label="max tokens")
+                num_steps = gr.Slider(10, 50, value=20, step=5, label="diffusion steps")
+                guidance = gr.Slider(1.0, 15.0, value=7.5, step=0.5, label="guidance")
+            generate_btn = gr.Button("generate", variant="primary")
             gr.Examples(
                 examples=[
+                    ["a line drawing of a horse"],
                     ["portrait sketch"],
+                    ["geometric shapes"],
                 ],
                 inputs=prompt,
             )
         with gr.Column(scale=2):
+            preview = gr.HTML(value=gcode_to_svg(""))
+    with gr.Accordion("gcode", open=False):
+        gcode_output = gr.Code(label=None, language=None, lines=12)
+    gr.Markdown("---")
+    gr.Markdown("machine: 841×1189mm / pen servo 40-90° / [github](https://github.com/Twarner491/dcode) / [model](https://huggingface.co/twarner/dcode-sd-gcode) / mit")
+    generate_btn.click(generate, [prompt, temperature, max_tokens, num_steps, guidance], [gcode_output, preview])
+    prompt.submit(generate, [prompt, temperature, max_tokens, num_steps, guidance], [gcode_output, preview])
 if __name__ == "__main__":
     demo.launch()