jjohnson5253
/

BrickGPTFork

Text-to-3D

Transformers

Safetensors

English

Model card Files Files and versions

xet

Community

jjohnson5253 commited on Sep 25, 2025

Commit

262acca

1 Parent(s): 0372169

what is life

Browse files

Files changed (1) hide show

handler.py +24 -80

handler.py CHANGED Viewed

@@ -98,7 +98,7 @@ class EndpointHandler:
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
-        Process inference request exactly like BrickGPT does
         """
         inputs = data.pop("inputs", data)
         parameters = data.pop("parameters", {})
@@ -119,70 +119,39 @@ class EndpointHandler:
         # Check if this is a continuation (has assistant message)
         has_assistant = any(msg.get("role") == "assistant" for msg in messages)
-        # Extract the actual user instruction
-        user_content = ""
-        for msg in messages:
-            if msg.get("role") == "user":
-                content = msg["content"]
-                if "### Input:" in content:
-                    user_content = content.split("### Input:")[-1].strip()
-                else:
-                    user_content = content
-                break
-        # Create the proper instruction format (use few_shot for better results)
-        if not has_assistant:
-            instruction = self.create_instruction_few_shot(user_content)
-            messages = [
-                {"role": "system", "content": "You are a helpful assistant."},
-                {"role": "user", "content": instruction}
-            ]
-        # Format input using chat template exactly like BrickGPT
         if has_assistant:
-            # For continuation, use continue_final_message=True
-            formatted_input = self.tokenizer.apply_chat_template(
                 messages,
-                tokenize=False,
-                continue_final_message=True
             )
         else:
-            # For new generation, add generation prompt
-            formatted_input = self.tokenizer.apply_chat_template(
                 messages,
-                tokenize=False,
-                add_generation_prompt=True
             )
-        # FIXED: Always use sufficient tokens, ignore small requests
-        requested_tokens = parameters.get("max_new_tokens", 100)
-        # For continuation (single brick), use minimal tokens for just one brick
-        if has_assistant:
-            # A complete brick like "1x4 (16,14,1)\n" needs ~15-20 tokens
-            # Use exactly enough for one brick to avoid generating multiple bricks
-            actual_tokens = 15  # Reduced from 20 to prevent multiple bricks
-        else:
-            # For initial generation, use the requested amount or reasonable default
-            actual_tokens = max(50, requested_tokens)
-        # MATCH LOCAL LLM BEHAVIOR EXACTLY
         generation_params = {
-            "max_new_tokens": actual_tokens,
             "temperature": parameters.get("temperature", 0.6),
             "top_k": parameters.get("top_k", 20),
             "top_p": parameters.get("top_p", 1.0),
-            "do_sample": True,  # Always True like local LLM
-            "num_return_sequences": 1,  # Match local LLM
             "pad_token_id": self.tokenizer.pad_token_id,
             "return_dict_in_generate": True,
-            # Remove stop_strings - local LLM doesn't use them
         }
-        # Tokenize input
-        input_ids = self.tokenizer(formatted_input, return_tensors="pt").input_ids.to(self.model.device)
-        attention_mask = torch.ones_like(input_ids)
         # Generate exactly like the local LLM class
         with torch.no_grad():
             output_dict = self.model.generate(
@@ -191,39 +160,14 @@ class EndpointHandler:
                 **generation_params
             )
-        # Decode exactly like the local LLM class
         input_length = input_ids.shape[1]
         result_ids = output_dict['sequences'][0][input_length:]
-        generated_text = self.tokenizer.decode(result_ids)  # No skip_special_tokens like local
-        # Clean up the generated text
-        generated_text = generated_text.strip()
-        # Remove any trailing continuation artifacts
-        if generated_text.endswith("### Output:"):
-            generated_text = generated_text[:-11].strip()
-        # CRITICAL FIX: Ensure single brick output for continuation
-        if has_assistant and generated_text:
-            # Split by lines and take only the first valid brick
-            lines = [line.strip() for line in generated_text.split('\n') if line.strip()]
-            if lines:
-                first_line = lines[0]
-                # Verify it's a complete brick format
-                if re.match(r'\d+x\d+\s*\(\d+,\d+,\d+\)$', first_line):
-                    generated_text = first_line  # NO trailing newline!
-                else:
-                    # If first line isn't complete, try to find any complete brick
-                    for line in lines:
-                        if re.match(r'\d+x\d+\s*\(\d+,\d+,\d+\)$', line):
-                            generated_text = line  # NO trailing newline!
-                            break
-                    else:
-                        # No complete brick found
-                        generated_text = ""
-        # Extract LEGO instructions
         lego_instructions = self.extract_lego_instructions(generated_text)
         return [{

     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
+        Process inference request EXACTLY like local BrickGPT does
         """
         inputs = data.pop("inputs", data)
         parameters = data.pop("parameters", {})
         # Check if this is a continuation (has assistant message)
         has_assistant = any(msg.get("role") == "assistant" for msg in messages)
+        # Format prompt EXACTLY like local BrickGPT does
         if has_assistant:
+            # For continuation, use continue_final_message=True and return tensors
+            prompt = self.tokenizer.apply_chat_template(
                 messages,
+                continue_final_message=True,
+                return_tensors='pt'
             )
         else:
+            # For new generation, add generation prompt and return tensors
+            prompt = self.tokenizer.apply_chat_template(
                 messages,
+                add_generation_prompt=True,
+                return_tensors='pt'
             )
+        # Move to device
+        input_ids = prompt.to(self.model.device)
+        attention_mask = torch.ones_like(input_ids)
+        # Generate EXACTLY like local BrickGPT's generate_brick method
+        # Local BrickGPT uses max_new_tokens=10 for single brick generation
         generation_params = {
+            "max_new_tokens": 10,  # EXACTLY like local BrickGPT
             "temperature": parameters.get("temperature", 0.6),
             "top_k": parameters.get("top_k", 20),
             "top_p": parameters.get("top_p", 1.0),
             "pad_token_id": self.tokenizer.pad_token_id,
+            "do_sample": True,  # EXACTLY like local LLM
+            "num_return_sequences": 1,  # EXACTLY like local LLM
             "return_dict_in_generate": True,
         }
         # Generate exactly like the local LLM class
         with torch.no_grad():
             output_dict = self.model.generate(
                 **generation_params
             )
+        # Decode EXACTLY like local BrickGPT does
         input_length = input_ids.shape[1]
         result_ids = output_dict['sequences'][0][input_length:]
+        # Local BrickGPT uses skip_special_tokens=True in generate_brick methods
+        generated_text = self.tokenizer.decode(result_ids, skip_special_tokens=True)
+        # Extract LEGO instructions (same as before)
         lego_instructions = self.extract_lego_instructions(generated_text)
         return [{