Pasipid791 commited on
Commit
4e1acb7
Β·
verified Β·
1 Parent(s): d8d9c3f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -103
app.py CHANGED
@@ -21,32 +21,29 @@ try:
21
  LlamaTokenizer,
22
  LlamaForCausalLM
23
  )
24
- from huggingface_hub import snapshot_download
25
  print("βœ… Successfully imported transformers and huggingface_hub")
26
  except ImportError as e:
27
  print(f"❌ Import error: {e}")
28
  print("Installing required packages...")
29
  subprocess.run([sys.executable, "-m", "pip", "install", "transformers", "huggingface_hub", "torch", "accelerate"])
30
  from transformers import AutoTokenizer, AutoModelForCausalLM
31
- from huggingface_hub import snapshot_download
32
 
33
  class CADFusionModel:
34
- def __init__(self, model_path: str = "microsoft/CADFusion", version: str = "v1_1"):
35
  """
36
  Initialize the CADFusion model
37
 
38
  Args:
39
  model_path: Path to the model on Hugging Face Hub
40
- version: Model version (v1_0 or v1_1)
41
  """
42
  self.model_path = model_path
43
- self.version = version
44
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
45
 
46
- print(f"πŸš€ Initializing CADFusion {version} on {self.device}")
47
-
48
- # Download model if not already present
49
- self.model_dir = self._download_model()
50
 
51
  # Initialize tokenizer and model
52
  self.tokenizer = None
@@ -56,63 +53,52 @@ class CADFusionModel:
56
  # CAD sequence processing utilities
57
  self.max_sequence_length = 512
58
 
59
- def _download_model(self) -> str:
60
- """Download the model from Hugging Face Hub"""
61
  try:
62
- cache_dir = "./model_cache"
63
- model_dir = snapshot_download(
64
- repo_id=self.model_path,
65
- revision=self.version,
66
- cache_dir=cache_dir,
 
 
 
67
  token=os.getenv("HF_TOKEN") # Use HF token if available
68
  )
69
- print(f"βœ… Model downloaded to: {model_dir}")
70
- return model_dir
71
- except Exception as e:
72
- print(f"❌ Error downloading model: {e}")
73
- # Fallback to local directory structure
74
- return f"./{self.version}"
75
-
76
- def _load_model(self):
77
- """Load the tokenizer and model"""
78
- try:
79
- # Try loading as LLaMA model first (CADFusion is based on LLaMA)
80
- model_files = list(Path(self.model_dir).glob("*.bin")) + list(Path(self.model_dir).glob("*.safetensors"))
81
 
82
- if model_files:
83
- print(f"πŸ“¦ Loading model from {self.model_dir}")
84
-
85
- # Load tokenizer
86
- self.tokenizer = AutoTokenizer.from_pretrained(
87
- self.model_dir,
88
- trust_remote_code=True,
89
- padding_side="left"
90
- )
91
-
92
- # Ensure pad token exists
93
- if self.tokenizer.pad_token is None:
94
- self.tokenizer.pad_token = self.tokenizer.eos_token
95
-
96
- # Load model
97
- self.model = AutoModelForCausalLM.from_pretrained(
98
- self.model_dir,
99
- torch_dtype=torch.float16 if self.device.type == "cuda" else torch.float32,
100
- device_map="auto" if self.device.type == "cuda" else None,
101
- trust_remote_code=True
102
- )
103
-
104
- if self.device.type != "cuda":
105
- self.model = self.model.to(self.device)
106
-
107
- self.model.eval()
108
- print("βœ… Model loaded successfully")
109
-
110
- else:
111
- raise FileNotFoundError("No model files found")
112
-
113
  except Exception as e:
114
  print(f"❌ Error loading model: {e}")
115
- print("πŸ“ Using placeholder model for demo purposes")
116
  self._setup_placeholder_model()
117
 
118
  def _setup_placeholder_model(self):
@@ -164,9 +150,13 @@ class CADFusionModel:
164
  # Preprocess input text
165
  processed_text = self.preprocess_text(text)
166
 
 
 
 
 
167
  # Tokenize input
168
  inputs = self.tokenizer(
169
- processed_text,
170
  return_tensors="pt",
171
  padding=True,
172
  truncation=True,
@@ -184,7 +174,8 @@ class CADFusionModel:
184
  top_p=0.9,
185
  top_k=50,
186
  pad_token_id=self.tokenizer.pad_token_id,
187
- eos_token_id=self.tokenizer.eos_token_id
 
188
  )
189
 
190
  # Decode output
@@ -194,8 +185,10 @@ class CADFusionModel:
194
  )
195
 
196
  # Extract the generated part (remove input prompt)
197
- if processed_text in generated_sequence:
198
- generated_part = generated_sequence.replace(processed_text, "").strip()
 
 
199
  else:
200
  generated_part = generated_sequence
201
 
@@ -223,10 +216,14 @@ class CADFusionModel:
223
  """Generate a demo CAD sequence for demonstration purposes"""
224
  # This is a simplified demo sequence based on the input text
225
  demo_sequences = {
226
- "cube": "Sketch('xy') -> Rectangle(0, 0, 10, 10) -> Extrude(10)",
227
- "cylinder": "Sketch('xy') -> Circle(0, 0, 5) -> Extrude(15)",
228
- "sphere": "Sketch('xy') -> Circle(0, 0, 5) -> Revolve(360)",
229
- "bracket": "Sketch('xy') -> Rectangle(0, 0, 20, 10) -> Extrude(5) -> Sketch('top') -> Circle(15, 5, 2) -> Cut(5)"
 
 
 
 
230
  }
231
 
232
  text_lower = text.lower()
@@ -234,8 +231,8 @@ class CADFusionModel:
234
  if key in text_lower:
235
  return sequence
236
 
237
- # Default sequence
238
- return "Sketch('xy') -> Rectangle(0, 0, 10, 10) -> Extrude(5)"
239
 
240
  # Global model instance
241
  model = None
@@ -310,18 +307,31 @@ def create_gradio_interface():
310
  border-radius: 8px;
311
  box-shadow: 0 2px 4px rgba(0,0,0,0.1);
312
  }
 
 
 
 
 
 
 
 
313
  """
314
 
315
  with gr.Blocks(css=css, title="CADFusion - Text to CAD Generation") as interface:
316
 
317
  # Header
318
- gr.Markdown("""
319
- # πŸ”§ CADFusion - Text to CAD Generation
320
-
321
- Convert natural language descriptions into CAD parametric sequences using Microsoft's CADFusion model.
 
 
 
322
 
323
- **Model**: microsoft/CADFusion v1.1
324
- **Paper**: [Text-to-CAD Generation Through Infusing Visual Feedback in Large Language Models](https://arxiv.org/abs/2501.19054)
 
 
325
  """)
326
 
327
  with gr.Row():
@@ -331,8 +341,8 @@ def create_gradio_interface():
331
  text_input = gr.Textbox(
332
  label="CAD Description",
333
  placeholder="Describe the CAD object you want to create (e.g., 'Create a cylindrical bracket with mounting holes')",
334
- lines=3,
335
- value="Create a simple rectangular bracket with two circular holes"
336
  )
337
 
338
  # Parameters section
@@ -367,7 +377,7 @@ def create_gradio_interface():
367
  gr.Markdown("### 🎯 Generated CAD Sequence")
368
  sequence_output = gr.Textbox(
369
  label="Parametric Sequence",
370
- lines=8,
371
  interactive=False,
372
  placeholder="Generated CAD sequence will appear here..."
373
  )
@@ -395,33 +405,47 @@ def create_gradio_interface():
395
  ["Design a gear wheel with 12 teeth"],
396
  ["Make a pipe elbow joint at 90 degrees"],
397
  ["Create a hexagonal bolt head"],
398
- ["Design a simple housing enclosure"]
 
 
399
  ],
400
  inputs=[text_input],
401
  label="Click on any example to try it out"
402
  )
403
 
404
  # Information section
405
- gr.Markdown("""
406
- ### ℹ️ About CADFusion
407
-
408
- CADFusion is a state-of-the-art text-to-CAD generation model that:
409
- - Uses visual feedback to enhance LLM performance
410
- - Generates parametric sequences for CAD modeling
411
- - Supports complex 3D object descriptions
412
- - Based on alternating sequential and visual learning stages
413
-
414
- **Usage Tips**:
415
- - Be specific about shapes, dimensions, and features
416
- - Use technical CAD terminology when possible
417
- - Mention materials or constraints if relevant
418
- - Start with simple descriptions and add complexity gradually
419
-
420
- **Model Info**:
421
- - Version: v1.1 (9 rounds of alternate training)
422
- - Base Model: LLaMA architecture
423
- - Training Data: SkexGen dataset with human annotations
424
- """)
 
 
 
 
 
 
 
 
 
 
 
 
425
 
426
  # Connect the generate button to the function
427
  generate_btn.click(
@@ -430,11 +454,22 @@ def create_gradio_interface():
430
  outputs=[sequence_output, status_output, params_output],
431
  show_progress=True
432
  )
 
 
 
 
 
 
 
 
433
 
434
  return interface
435
 
436
  def main():
437
  """Main function to run the Gradio app"""
 
 
 
438
  print("🌟 Starting CADFusion Gradio App")
439
 
440
  # Initialize model
@@ -449,11 +484,10 @@ def main():
449
  server_name="0.0.0.0", # Allow external access
450
  server_port=7860, # Standard Gradio port
451
  share=False, # Set to True for public sharing
452
- debug=True, # Enable debug mode
453
  show_error=True, # Show errors in interface
454
  quiet=False # Show startup logs
455
  )
456
 
457
  if __name__ == "__main__":
458
- main()
459
-
 
21
  LlamaTokenizer,
22
  LlamaForCausalLM
23
  )
24
+ from huggingface_hub import snapshot_download, hf_hub_download
25
  print("βœ… Successfully imported transformers and huggingface_hub")
26
  except ImportError as e:
27
  print(f"❌ Import error: {e}")
28
  print("Installing required packages...")
29
  subprocess.run([sys.executable, "-m", "pip", "install", "transformers", "huggingface_hub", "torch", "accelerate"])
30
  from transformers import AutoTokenizer, AutoModelForCausalLM
31
+ from huggingface_hub import snapshot_download, hf_hub_download
32
 
33
  class CADFusionModel:
34
+ def __init__(self, model_path: str = "microsoft/CADFusion", revision: str = "main"):
35
  """
36
  Initialize the CADFusion model
37
 
38
  Args:
39
  model_path: Path to the model on Hugging Face Hub
40
+ revision: Model revision/branch (use 'main' instead of version numbers)
41
  """
42
  self.model_path = model_path
43
+ self.revision = revision
44
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
45
 
46
+ print(f"πŸš€ Initializing CADFusion from {model_path}@{revision} on {self.device}")
 
 
 
47
 
48
  # Initialize tokenizer and model
49
  self.tokenizer = None
 
53
  # CAD sequence processing utilities
54
  self.max_sequence_length = 512
55
 
56
+ def _load_model(self):
57
+ """Load the tokenizer and model directly from Hugging Face Hub"""
58
  try:
59
+ print(f"πŸ“¦ Loading model from {self.model_path}")
60
+
61
+ # Load tokenizer
62
+ self.tokenizer = AutoTokenizer.from_pretrained(
63
+ self.model_path,
64
+ revision=self.revision,
65
+ trust_remote_code=True,
66
+ padding_side="left",
67
  token=os.getenv("HF_TOKEN") # Use HF token if available
68
  )
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
+ # Ensure pad token exists
71
+ if self.tokenizer.pad_token is None:
72
+ self.tokenizer.pad_token = self.tokenizer.eos_token
73
+
74
+ # Load model with appropriate dtype based on device
75
+ model_kwargs = {
76
+ "revision": self.revision,
77
+ "trust_remote_code": True,
78
+ "torch_dtype": torch.float16 if self.device.type == "cuda" else torch.float32,
79
+ "token": os.getenv("HF_TOKEN")
80
+ }
81
+
82
+ # Add device mapping for CUDA
83
+ if self.device.type == "cuda":
84
+ model_kwargs["device_map"] = "auto"
85
+ model_kwargs["low_cpu_mem_usage"] = True
86
+
87
+ self.model = AutoModelForCausalLM.from_pretrained(
88
+ self.model_path,
89
+ **model_kwargs
90
+ )
91
+
92
+ # Move to device if not using device_map
93
+ if self.device.type != "cuda":
94
+ self.model = self.model.to(self.device)
95
+
96
+ self.model.eval()
97
+ print("βœ… Model loaded successfully")
98
+
 
 
99
  except Exception as e:
100
  print(f"❌ Error loading model: {e}")
101
+ print("πŸ“ Setting up placeholder model for demo purposes")
102
  self._setup_placeholder_model()
103
 
104
  def _setup_placeholder_model(self):
 
150
  # Preprocess input text
151
  processed_text = self.preprocess_text(text)
152
 
153
+ # Add special formatting for CADFusion if needed
154
+ # CADFusion may expect specific prompt formatting
155
+ prompt = f"Design a CAD model: {processed_text}\nCAD sequence:"
156
+
157
  # Tokenize input
158
  inputs = self.tokenizer(
159
+ prompt,
160
  return_tensors="pt",
161
  padding=True,
162
  truncation=True,
 
174
  top_p=0.9,
175
  top_k=50,
176
  pad_token_id=self.tokenizer.pad_token_id,
177
+ eos_token_id=self.tokenizer.eos_token_id,
178
+ repetition_penalty=1.1
179
  )
180
 
181
  # Decode output
 
185
  )
186
 
187
  # Extract the generated part (remove input prompt)
188
+ if "CAD sequence:" in generated_sequence:
189
+ generated_part = generated_sequence.split("CAD sequence:")[-1].strip()
190
+ elif prompt in generated_sequence:
191
+ generated_part = generated_sequence.replace(prompt, "").strip()
192
  else:
193
  generated_part = generated_sequence
194
 
 
216
  """Generate a demo CAD sequence for demonstration purposes"""
217
  # This is a simplified demo sequence based on the input text
218
  demo_sequences = {
219
+ "cube": "NewSketch().Rectangle(0, 0, 10, 10).Extrude(10)",
220
+ "cylinder": "NewSketch().Circle(0, 0, 5).Extrude(15)",
221
+ "sphere": "NewSketch().Circle(0, 0, 5).Revolve(360, [0, 0, 1])",
222
+ "bracket": "NewSketch().Rectangle(0, 0, 20, 10).Extrude(5).NewSketch('top').Circle(15, 5, 2).Cut(5)",
223
+ "hole": "NewSketch().Rectangle(0, 0, 15, 8).Extrude(4).NewSketch('top').Circle(7.5, 4, 1.5).Cut(4)",
224
+ "gear": "NewSketch().Circle(0, 0, 10).Extrude(3).NewSketch('top').Circle(0, 0, 2).Cut(3)",
225
+ "pipe": "NewSketch().Circle(0, 0, 8).Extrude(20).NewSketch('top').Circle(0, 0, 6).Cut(20)",
226
+ "bolt": "NewSketch().Circle(0, 0, 4).Extrude(15).NewSketch('top').RegularPolygon(6, 0, 0, 6).Extrude(3)"
227
  }
228
 
229
  text_lower = text.lower()
 
231
  if key in text_lower:
232
  return sequence
233
 
234
+ # Default sequence for rectangular objects
235
+ return "NewSketch().Rectangle(0, 0, 10, 10).Extrude(5)"
236
 
237
  # Global model instance
238
  model = None
 
307
  border-radius: 8px;
308
  box-shadow: 0 2px 4px rgba(0,0,0,0.1);
309
  }
310
+ .title-container {
311
+ text-align: center;
312
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
313
+ padding: 2rem;
314
+ border-radius: 10px;
315
+ margin-bottom: 2rem;
316
+ color: white;
317
+ }
318
  """
319
 
320
  with gr.Blocks(css=css, title="CADFusion - Text to CAD Generation") as interface:
321
 
322
  # Header
323
+ with gr.HTML():
324
+ gr.HTML("""
325
+ <div class="title-container">
326
+ <h1>πŸ”§ CADFusion - Text to CAD Generation</h1>
327
+ <p>Convert natural language descriptions into CAD parametric sequences using Microsoft's CADFusion model.</p>
328
+ </div>
329
+ """)
330
 
331
+ gr.Markdown("""
332
+ **Model**: microsoft/CADFusion (based on LLaMA-3-8B)
333
+ **Paper**: [Text-to-CAD Generation Through Infusing Visual Feedback in Large Language Models](https://arxiv.org/abs/2501.19054)
334
+ **Repository**: [GitHub](https://github.com/microsoft/CADFusion)
335
  """)
336
 
337
  with gr.Row():
 
341
  text_input = gr.Textbox(
342
  label="CAD Description",
343
  placeholder="Describe the CAD object you want to create (e.g., 'Create a cylindrical bracket with mounting holes')",
344
+ lines=4,
345
+ value="Create a rectangular bracket with two circular mounting holes"
346
  )
347
 
348
  # Parameters section
 
377
  gr.Markdown("### 🎯 Generated CAD Sequence")
378
  sequence_output = gr.Textbox(
379
  label="Parametric Sequence",
380
+ lines=10,
381
  interactive=False,
382
  placeholder="Generated CAD sequence will appear here..."
383
  )
 
405
  ["Design a gear wheel with 12 teeth"],
406
  ["Make a pipe elbow joint at 90 degrees"],
407
  ["Create a hexagonal bolt head"],
408
+ ["Design a simple housing enclosure"],
409
+ ["Create a rectangular plate with center hole"],
410
+ ["Design a cylindrical bearing housing"]
411
  ],
412
  inputs=[text_input],
413
  label="Click on any example to try it out"
414
  )
415
 
416
  # Information section
417
+ with gr.Accordion("ℹ️ About CADFusion", open=False):
418
+ gr.Markdown("""
419
+ ### Model Overview
420
+
421
+ CADFusion is a state-of-the-art text-to-CAD generation model that:
422
+ - Uses visual feedback to enhance LLM performance
423
+ - Generates parametric sequences for CAD modeling
424
+ - Supports complex 3D object descriptions
425
+ - Based on alternating sequential and visual learning stages
426
+
427
+ ### Training Approach
428
+ - **Sequential Learning**: Fine-tuning LLM with paired text-CAD data
429
+ - **Visual Feedback**: Using vision-language models to improve generation quality
430
+ - **Alternating Training**: 9 rounds of SL and VF stages for optimal performance
431
+
432
+ ### Usage Tips
433
+ - Be specific about shapes, dimensions, and features
434
+ - Use technical CAD terminology when possible
435
+ - Mention materials or constraints if relevant
436
+ - Start with simple descriptions and add complexity gradually
437
+
438
+ ### Model Specifications
439
+ - **Base Model**: LLaMA-3-8B
440
+ - **Training Data**: SkexGen dataset with human annotations
441
+ - **License**: MIT License
442
+ - **Intended Use**: Research and educational purposes
443
+
444
+ ### Performance
445
+ CADFusion significantly outperforms baselines like GPT-4o and Text2CAD:
446
+ - **VLM Score**: 8.96 (vs 5.13 for GPT-4o, 2.01 for Text2CAD)
447
+ - **Better**: Generation diversity, visual quality, and technical accuracy
448
+ """)
449
 
450
  # Connect the generate button to the function
451
  generate_btn.click(
 
454
  outputs=[sequence_output, status_output, params_output],
455
  show_progress=True
456
  )
457
+
458
+ # Auto-generate on example selection
459
+ examples.click(
460
+ fn=generate_cad,
461
+ inputs=[text_input, max_length, temperature],
462
+ outputs=[sequence_output, status_output, params_output],
463
+ show_progress=True
464
+ )
465
 
466
  return interface
467
 
468
  def main():
469
  """Main function to run the Gradio app"""
470
+ print("===== Application Startup at {} =====".format(
471
+ __import__('datetime').datetime.now().strftime('%Y-%m-%d %H:%M:%S')
472
+ ))
473
  print("🌟 Starting CADFusion Gradio App")
474
 
475
  # Initialize model
 
484
  server_name="0.0.0.0", # Allow external access
485
  server_port=7860, # Standard Gradio port
486
  share=False, # Set to True for public sharing
487
+ debug=False, # Disable debug mode in production
488
  show_error=True, # Show errors in interface
489
  quiet=False # Show startup logs
490
  )
491
 
492
  if __name__ == "__main__":
493
+ main()