Abhiroopvanaone commited on
Commit
10db99e
Β·
verified Β·
1 Parent(s): f461213

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +245 -141
app.py CHANGED
@@ -1,161 +1,265 @@
1
- import os
2
- os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
 
 
3
 
4
- import gradio as gr
5
  import torch
6
- from transformers import pipeline
 
 
7
  from PIL import Image
8
- import spaces
 
 
 
9
 
10
- # Global model storage
11
- models = {}
 
 
 
 
 
 
 
 
 
 
12
 
13
- # Wrap the entire generation in try-except to handle Zero GPU issues
14
- @spaces.GPU(duration=120)
15
- def generate_on_gpu(image_array, model_name, prompt_text):
16
- """Core GPU function - simplified to avoid context issues."""
17
- pipe = pipeline(
18
- "image-text-to-text",
19
- model=model_name,
20
- device_map="auto",
21
- torch_dtype=torch.float16,
22
- trust_remote_code=True
23
- )
24
 
25
- # Convert array back to PIL
26
- image = Image.fromarray(image_array)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- messages = [{
29
- "role": "user",
30
- "content": [
31
- {"type": "image", "image": image},
32
- {"type": "text", "text": prompt_text}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  ]
34
- }]
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- result = pipe(messages, max_new_tokens=512, temperature=0.7)
 
37
 
38
- if isinstance(result, list) and len(result) > 0:
39
- return result[0].get("generated_text", str(result))
40
- else:
41
- return str(result)
42
-
43
- def generate_code(image, model_choice, prompt_style):
44
- """Wrapper function that handles the UI logic."""
45
- if image is None:
46
- return "❌ Please upload an image first."
47
-
48
- # Prompts
49
- prompts = {
50
- "Simple": "Generate CADQuery Python code for this 3D model:",
51
- "Detailed": "Analyze this 3D CAD model and generate Python CADQuery code.\n\nRequirements:\n- Import cadquery as cq\n- Store result in 'result' variable\n- Use proper CADQuery syntax\n\nCode:",
52
- "Chain-of-Thought": "Analyze this 3D CAD model step by step:\n\nStep 1: Identify the basic geometry\nStep 2: Note any features\nStep 3: Generate clean CADQuery Python code\n\n```python\nimport cadquery as cq\n\n# Generated code:"
53
- }
54
 
55
- try:
56
- # Model mapping
57
- model_map = {
58
- "GLM-4.5V-AWQ": "QuantTrio/GLM-4.5V-AWQ",
59
- "GLM-4.5V-FP8": "zai-org/GLM-4.5V-FP8",
60
- "GLM-4.5V": "zai-org/GLM-4.5V"
61
- }
62
-
63
- model_name = model_map[model_choice]
64
- prompt_text = prompts[prompt_style]
65
-
66
- # Convert PIL to array for passing
67
- import numpy as np
68
- image_array = np.array(image)
69
-
70
- # Call GPU function
71
- generated_text = generate_on_gpu(image_array, model_name, prompt_text)
72
-
73
- # Extract code
74
- code = generated_text.strip()
75
- if "```python" in code:
76
- start = code.find("```python") + 9
77
- end = code.find("```", start)
78
- if end > start:
79
- code = code[start:end].strip()
80
-
81
- if "import cadquery" not in code:
82
- code = "import cadquery as cq\n\n" + code
83
-
84
- return f"""## 🎯 Generated CADQuery Code
85
-
86
- ```python
87
- {code}
88
- ```
89
 
90
- ## πŸ“Š Info
91
- - **Model**: {model_choice}
92
- - **Prompt**: {prompt_style}
93
- - **Device**: GPU
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
- ## πŸ”§ Usage
96
- ```bash
97
- pip install cadquery
98
- python your_script.py
99
- ```
100
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  except Exception as e:
103
- return f"❌ **Generation Failed**: {str(e)[:500]}"
104
-
105
- def system_info():
106
- """Get system info."""
107
- info = f"""## πŸ–₯️ System Information
108
-
109
- - **CUDA Available**: {torch.cuda.is_available()}
110
- - **CUDA Devices**: {torch.cuda.device_count() if torch.cuda.is_available() else 0}
111
- - **PyTorch Version**: {torch.__version__}
112
- - **Device**: {"GPU" if torch.cuda.is_available() else "CPU"}
113
- """
114
- return info
115
 
116
- # Create interface WITHOUT ssr_mode
117
- with gr.Blocks(title="GLM-4.5V CAD Generator", theme=gr.themes.Soft()) as demo:
118
- gr.Markdown("""
119
- # πŸ”§ GLM-4.5V CAD Generator
120
-
121
- Generate CADQuery Python code from 3D CAD model images using GLM-4.5V models!
122
-
123
- **Models**: GLM-4.5V-AWQ (fastest) | GLM-4.5V-FP8 (balanced) | GLM-4.5V (best quality)
124
- """)
125
-
126
- with gr.Tab("πŸš€ Generate"):
127
- with gr.Row():
128
- with gr.Column():
129
- image_input = gr.Image(type="pil", label="Upload CAD Model Image")
130
- model_choice = gr.Dropdown(
131
- choices=["GLM-4.5V-AWQ", "GLM-4.5V-FP8", "GLM-4.5V"],
132
- value="GLM-4.5V-AWQ",
133
- label="Select Model"
134
- )
135
- prompt_style = gr.Dropdown(
136
- choices=["Simple", "Detailed", "Chain-of-Thought"],
137
- value="Chain-of-Thought",
138
- label="Prompt Style"
139
- )
140
- generate_btn = gr.Button("πŸš€ Generate CADQuery Code", variant="primary")
141
-
142
- with gr.Column():
143
- output = gr.Markdown("Upload an image and click Generate!")
144
-
145
- generate_btn.click(
146
- fn=generate_code,
147
- inputs=[image_input, model_choice, prompt_style],
148
- outputs=output
149
- )
150
 
151
- with gr.Tab("βš™οΈ System"):
152
- info_display = gr.Markdown()
153
- refresh_btn = gr.Button("πŸ”„ Refresh")
 
154
 
155
- demo.load(fn=system_info, outputs=info_display)
156
- refresh_btn.click(fn=system_info, outputs=info_display)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
  if __name__ == "__main__":
159
- print("πŸš€ Starting GLM-4.5V CAD Generator...")
160
- print(f"CUDA available: {torch.cuda.is_available()}")
161
- demo.queue().launch(ssr_mode=False) # Disable SSR mode
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CPU-Friendly Training Script for GLM-4.5V CAD Generation
3
+ Simplified version for testing and development
4
+ """
5
 
 
6
  import torch
7
+ from datasets import load_dataset
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
9
+ from peft import LoraConfig, get_peft_model, TaskType
10
  from PIL import Image
11
+ import json
12
+ import os
13
+ from dataclasses import dataclass
14
+ from typing import Dict, List
15
 
16
+ # Simple configuration for CPU testing
17
+ CONFIG = {
18
+ "base_model": "microsoft/DialoGPT-small", # Small model for CPU testing
19
+ "dataset_name": "CADCODER/GenCAD-Code",
20
+ "output_dir": "./test-cad-model",
21
+ "max_samples": 50, # Very small for CPU
22
+ "batch_size": 1,
23
+ "gradient_accumulation": 4,
24
+ "epochs": 1,
25
+ "learning_rate": 5e-5,
26
+ "max_length": 512
27
+ }
28
 
29
+ @dataclass
30
+ class SimpleDataCollator:
31
+ """Simple data collator for text-only training."""
32
+ tokenizer: any
33
+ max_length: int = 512
 
 
 
 
 
 
34
 
35
+ def __call__(self, features: List[Dict]) -> Dict[str, torch.Tensor]:
36
+ # Extract texts
37
+ texts = [f["text"] for f in features]
38
+
39
+ # Tokenize
40
+ batch = self.tokenizer(
41
+ texts,
42
+ return_tensors="pt",
43
+ padding=True,
44
+ truncation=True,
45
+ max_length=self.max_length
46
+ )
47
+
48
+ # Create labels for causal LM
49
+ batch["labels"] = batch["input_ids"].clone()
50
+ batch["labels"][batch["labels"] == self.tokenizer.pad_token_id] = -100
51
+
52
+ return batch
53
+
54
+ def prepare_simple_dataset(dataset_name: str, max_samples: int = 50):
55
+ """Prepare a simplified text-only dataset for CPU training."""
56
+ print(f"πŸ“Š Loading dataset: {dataset_name}")
57
 
58
+ try:
59
+ # Load small subset
60
+ dataset = load_dataset(dataset_name, split=f"train[:{max_samples}]")
61
+
62
+ def create_text_examples(examples):
63
+ """Convert to text-only format."""
64
+ texts = []
65
+
66
+ for i in range(len(examples["code"])):
67
+ # Create simple prompt-response format
68
+ text = f"Generate CADQuery code:\n{examples['code'][i]}<|endoftext|>"
69
+ texts.append(text)
70
+
71
+ return {"text": texts}
72
+
73
+ # Process dataset
74
+ dataset = dataset.map(
75
+ create_text_examples,
76
+ batched=True,
77
+ remove_columns=dataset.column_names
78
+ )
79
+
80
+ print(f"βœ… Dataset prepared: {len(dataset)} samples")
81
+ return dataset
82
+
83
+ except Exception as e:
84
+ print(f"❌ Dataset loading failed: {e}")
85
+
86
+ # Create dummy dataset for testing
87
+ print("πŸ”„ Creating dummy dataset for testing...")
88
+ dummy_codes = [
89
+ "import cadquery as cq\nresult = cq.Workplane('XY').box(10, 10, 5)",
90
+ "import cadquery as cq\nresult = cq.Workplane('XY').cylinder(5, 10)",
91
+ "import cadquery as cq\nresult = cq.Workplane('XY').box(20, 15, 8).fillet(2)",
92
  ]
93
+
94
+ texts = [f"Generate CADQuery code:\n{code}<|endoftext|>" for code in dummy_codes]
95
+
96
+ from datasets import Dataset
97
+ dataset = Dataset.from_dict({"text": texts * (max_samples // 3 + 1)})
98
+ dataset = dataset.select(range(max_samples))
99
+
100
+ print(f"βœ… Dummy dataset created: {len(dataset)} samples")
101
+ return dataset
102
+
103
+ def setup_simple_model(model_name: str):
104
+ """Set up a simple model for CPU training."""
105
+ print(f"πŸ”§ Loading model: {model_name}")
106
 
107
+ # Load tokenizer
108
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
109
 
110
+ # Add pad token if missing
111
+ if tokenizer.pad_token is None:
112
+ tokenizer.pad_token = tokenizer.eos_token
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
+ # Load model for CPU
115
+ model = AutoModelForCausalLM.from_pretrained(
116
+ model_name,
117
+ torch_dtype=torch.float32, # Use float32 for CPU
118
+ device_map="cpu"
119
+ )
120
+
121
+ # Simple LoRA config for CPU
122
+ lora_config = LoraConfig(
123
+ r=8, # Small rank for CPU
124
+ lora_alpha=16,
125
+ lora_dropout=0.1,
126
+ bias="none",
127
+ task_type=TaskType.CAUSAL_LM,
128
+ target_modules=["c_attn", "c_proj"] # DialoGPT modules
129
+ )
130
+
131
+ # Apply LoRA
132
+ model = get_peft_model(model, lora_config)
133
+
134
+ # Print parameters
135
+ trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
136
+ total_params = sum(p.numel() for p in model.parameters())
137
+ print(f"πŸ’‘ Trainable: {trainable_params:,} ({100 * trainable_params / total_params:.2f}%)")
138
+
139
+ return model, tokenizer
 
 
 
 
 
 
 
 
140
 
141
+ def train_simple_model(model, tokenizer, dataset, config):
142
+ """Train the model with simple settings."""
143
+ print("πŸ‹οΈ Starting CPU training...")
144
+
145
+ # Training arguments for CPU
146
+ training_args = TrainingArguments(
147
+ output_dir=config["output_dir"],
148
+ per_device_train_batch_size=config["batch_size"],
149
+ gradient_accumulation_steps=config["gradient_accumulation"],
150
+ num_train_epochs=config["epochs"],
151
+ learning_rate=config["learning_rate"],
152
+ warmup_steps=10,
153
+ logging_steps=5,
154
+ save_steps=100,
155
+ evaluation_strategy="no",
156
+ save_total_limit=1,
157
+ remove_unused_columns=False,
158
+ report_to="none",
159
+ fp16=False, # No FP16 on CPU
160
+ dataloader_pin_memory=False,
161
+ use_cpu=True
162
+ )
163
+
164
+ # Data collator
165
+ data_collator = SimpleDataCollator(
166
+ tokenizer=tokenizer,
167
+ max_length=config["max_length"]
168
+ )
169
+
170
+ # Trainer
171
+ trainer = Trainer(
172
+ model=model,
173
+ args=training_args,
174
+ train_dataset=dataset,
175
+ data_collator=data_collator,
176
+ tokenizer=tokenizer
177
+ )
178
+
179
+ # Train
180
+ print("⏳ Training will take a few minutes on CPU...")
181
+ trainer.train()
182
+
183
+ # Save
184
+ trainer.save_model()
185
+ tokenizer.save_pretrained(config["output_dir"])
186
+
187
+ print(f"βœ… Training complete! Model saved to {config['output_dir']}")
188
+ return trainer
189
 
190
+ def test_simple_model(model_path: str):
191
+ """Test the trained model."""
192
+ print(f"πŸ§ͺ Testing model: {model_path}")
193
+
194
+ try:
195
+ # Load model
196
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
197
+ model = AutoModelForCausalLM.from_pretrained(model_path)
198
+
199
+ # Test generation
200
+ prompt = "Generate CADQuery code:"
201
+ inputs = tokenizer.encode(prompt, return_tensors="pt")
202
+
203
+ with torch.no_grad():
204
+ outputs = model.generate(
205
+ inputs,
206
+ max_new_tokens=100,
207
+ temperature=0.7,
208
+ do_sample=True,
209
+ pad_token_id=tokenizer.eos_token_id
210
+ )
211
+
212
+ generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
213
+
214
+ print("🎯 Generated:")
215
+ print(generated)
216
+ return generated
217
 
218
  except Exception as e:
219
+ print(f"❌ Testing failed: {e}")
220
+ return str(e)
 
 
 
 
 
 
 
 
 
 
221
 
222
+ def main():
223
+ """Main training pipeline for CPU."""
224
+ print("πŸš€ Starting CPU Training Pipeline")
225
+ print("=" * 50)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
 
227
+ try:
228
+ # 1. Prepare dataset
229
+ print("\nπŸ“Š Step 1: Preparing dataset...")
230
+ dataset = prepare_simple_dataset(CONFIG["dataset_name"], CONFIG["max_samples"])
231
 
232
+ # 2. Setup model
233
+ print("\nπŸ”§ Step 2: Setting up model...")
234
+ model, tokenizer = setup_simple_model(CONFIG["base_model"])
235
+
236
+ # 3. Train
237
+ print("\nπŸ‹οΈ Step 3: Training...")
238
+ trainer = train_simple_model(model, tokenizer, dataset, CONFIG)
239
+
240
+ # 4. Test
241
+ print("\nπŸ§ͺ Step 4: Testing...")
242
+ test_simple_model(CONFIG["output_dir"])
243
+
244
+ print("\nπŸŽ‰ Pipeline complete!")
245
+ print(f"Model saved to: {CONFIG['output_dir']}")
246
+
247
+ return True
248
+
249
+ except Exception as e:
250
+ print(f"\n❌ Pipeline failed: {e}")
251
+ return False
252
 
253
  if __name__ == "__main__":
254
+ success = main()
255
+
256
+ if success:
257
+ print("\nπŸ“ Next steps:")
258
+ print("1. Check the generated model in ./test-cad-model/")
259
+ print("2. Run test_simple_model() to generate more examples")
260
+ print("3. Once working, move to GPU version")
261
+ else:
262
+ print("\nπŸ”§ Troubleshooting:")
263
+ print("1. Check internet connection for dataset download")
264
+ print("2. Ensure you have enough disk space")
265
+ print("3. Try reducing max_samples to 10")