text-to-3d / nlp_processor.py
jainarham's picture
Update nlp_processor.py
5c3adf5 verified
"""
Advanced NLP Processor - Professional Grade
Understands complex prompts, spatial relationships, and compositions
"""
import re
import logging
from typing import Dict, Any, Optional, List, Tuple
from dataclasses import dataclass
from enum import Enum
logger = logging.getLogger(__name__)
class Relation(Enum):
NONE = "none"
ON_TOP = "on_top"
BELOW = "below"
NEXT_TO = "next_to"
LEFT_OF = "left_of"
RIGHT_OF = "right_of"
IN_FRONT = "in_front"
BEHIND = "behind"
INSIDE = "inside"
AROUND = "around"
BETWEEN = "between"
class Arrangement(Enum):
NONE = "none"
ROW = "row"
COLUMN = "column"
CIRCLE = "circle"
GRID = "grid"
STACK = "stack"
RANDOM = "random"
INCREASING = "increasing"
DECREASING = "decreasing"
@dataclass
class ParsedObject:
shape: str
color: str
size: float
quantity: int
material: str
relation: Relation
relation_target: Optional[str]
arrangement: Arrangement
modifiers: List[str]
class AdvancedNLPProcessor:
def __init__(self):
self._init_vocabularies()
self._compile_patterns()
logger.info("Advanced NLP Processor initialized")
def _init_vocabularies(self):
"""Initialize all vocabularies and mappings"""
# Shapes with synonyms
self.shapes = {
"cube": ["cube", "cubes", "box", "boxes", "block", "blocks", "square", "squares"],
"sphere": ["sphere", "spheres", "ball", "balls", "orb", "orbs", "globe", "globes"],
"cylinder": ["cylinder", "cylinders", "tube", "tubes", "pipe", "pipes", "pillar", "pillars", "column", "columns"],
"cone": ["cone", "cones", "spike", "spikes"],
"torus": ["torus", "toruses", "tori", "donut", "donuts", "ring", "rings", "hoop", "hoops"],
"pyramid": ["pyramid", "pyramids", "tetrahedron", "tetrahedrons"],
"capsule": ["capsule", "capsules", "pill", "pills", "lozenge"],
"plane": ["plane", "planes", "floor", "ground", "platform", "base", "surface"],
}
# Flatten for quick lookup
self.shape_lookup = {}
for canonical, synonyms in self.shapes.items():
for syn in synonyms:
self.shape_lookup[syn] = canonical
# Colors with hex values
self.colors = {
# Basic
"red": "#e74c3c", "green": "#2ecc71", "blue": "#3498db",
"yellow": "#f1c40f", "orange": "#e67e22", "purple": "#9b59b6",
"pink": "#e91e63", "white": "#ecf0f1", "black": "#2c3e50",
"gray": "#95a5a6", "grey": "#95a5a6", "brown": "#8b4513",
# Extended
"cyan": "#00bcd4", "magenta": "#e91e63", "lime": "#8bc34a",
"teal": "#009688", "navy": "#1a237e", "maroon": "#800000",
"olive": "#808000", "coral": "#ff7f50", "salmon": "#fa8072",
"gold": "#ffd700", "golden": "#ffd700", "silver": "#c0c0c0",
"bronze": "#cd7f32", "copper": "#b87333", "platinum": "#e5e4e2",
"crimson": "#dc143c", "scarlet": "#ff2400", "ruby": "#e0115f",
"emerald": "#50c878", "jade": "#00a86b", "mint": "#98fb98",
"sapphire": "#0f52ba", "azure": "#007fff", "indigo": "#4b0082",
"violet": "#8f00ff", "lavender": "#e6e6fa", "plum": "#dda0dd",
"turquoise": "#40e0d0", "aqua": "#00ffff", "sky": "#87ceeb",
"peach": "#ffdab9", "beige": "#f5f5dc", "ivory": "#fffff0",
"cream": "#fffdd0", "tan": "#d2b48c", "chocolate": "#7b3f00",
"charcoal": "#36454f", "slate": "#708090", "steel": "#71797e",
# Metallic descriptors
"metallic": "#a8a9ad", "shiny": "#d4d4d4", "chrome": "#dbe4eb",
}
# Sizes with scale factors
self.sizes = {
"tiny": 0.25, "very small": 0.35, "small": 0.5, "little": 0.5,
"medium": 1.0, "normal": 1.0, "regular": 1.0, "average": 1.0,
"large": 1.5, "big": 1.5, "huge": 2.0, "giant": 2.5,
"massive": 3.0, "enormous": 3.5, "colossal": 4.0,
"mini": 0.3, "micro": 0.2, "nano": 0.15,
}
# Number words
self.numbers = {
"a": 1, "an": 1, "one": 1, "single": 1,
"two": 2, "pair": 2, "couple": 2, "double": 2,
"three": 3, "triple": 3, "trio": 3,
"four": 4, "quad": 4, "quadruple": 4,
"five": 5, "six": 6, "seven": 7, "eight": 8,
"nine": 9, "ten": 10, "eleven": 11, "twelve": 12,
"dozen": 12, "fifteen": 15, "twenty": 20,
"few": 3, "several": 4, "many": 6, "lots": 8,
}
# Spatial relations
self.relations = {
# On top
"on": Relation.ON_TOP, "on top of": Relation.ON_TOP,
"above": Relation.ON_TOP, "over": Relation.ON_TOP,
"atop": Relation.ON_TOP, "upon": Relation.ON_TOP,
# Below
"under": Relation.BELOW, "below": Relation.BELOW,
"beneath": Relation.BELOW, "underneath": Relation.BELOW,
# Next to
"next to": Relation.NEXT_TO, "beside": Relation.NEXT_TO,
"by": Relation.NEXT_TO, "near": Relation.NEXT_TO,
"adjacent": Relation.NEXT_TO, "alongside": Relation.NEXT_TO,
# Left/Right
"left of": Relation.LEFT_OF, "to the left": Relation.LEFT_OF,
"right of": Relation.RIGHT_OF, "to the right": Relation.RIGHT_OF,
# Front/Behind
"in front of": Relation.IN_FRONT, "before": Relation.IN_FRONT,
"behind": Relation.BEHIND, "back of": Relation.BEHIND,
# Inside
"inside": Relation.INSIDE, "within": Relation.INSIDE,
"in": Relation.INSIDE,
# Around
"around": Relation.AROUND, "surrounding": Relation.AROUND,
# Between
"between": Relation.BETWEEN,
}
# Arrangements
self.arrangements = {
"row": Arrangement.ROW, "line": Arrangement.ROW, "horizontal": Arrangement.ROW,
"column": Arrangement.COLUMN, "vertical": Arrangement.COLUMN, "tower": Arrangement.STACK,
"circle": Arrangement.CIRCLE, "ring": Arrangement.CIRCLE, "circular": Arrangement.CIRCLE,
"grid": Arrangement.GRID, "matrix": Arrangement.GRID, "array": Arrangement.GRID,
"stack": Arrangement.STACK, "stacked": Arrangement.STACK, "pile": Arrangement.STACK,
"random": Arrangement.RANDOM, "scattered": Arrangement.RANDOM,
"increasing": Arrangement.INCREASING, "ascending": Arrangement.INCREASING,
"decreasing": Arrangement.DECREASING, "descending": Arrangement.DECREASING,
}
# Materials/textures
self.materials = {
"metallic": "metallic", "metal": "metallic", "shiny": "shiny",
"matte": "matte", "flat": "matte", "glossy": "glossy",
"glass": "glass", "transparent": "glass", "crystal": "glass",
"wood": "wood", "wooden": "wood", "stone": "stone",
"plastic": "plastic", "rubber": "rubber", "chrome": "chrome",
"brushed": "brushed", "polished": "polished", "rough": "rough",
"smooth": "smooth", "textured": "textured",
}
# Modification keywords for refinement
self.modifications = {
"bigger": ("scale", 1.5), "larger": ("scale", 1.5),
"smaller": ("scale", 0.7), "tinier": ("scale", 0.5),
"taller": ("scale_y", 1.5), "shorter": ("scale_y", 0.7),
"wider": ("scale_x", 1.5), "thinner": ("scale_x", 0.7),
"longer": ("scale_z", 1.5), "deeper": ("scale_z", 1.5),
"double": ("scale", 2.0), "half": ("scale", 0.5),
"rotate": ("rotate_y", 45), "spin": ("rotate_y", 90),
"flip": ("rotate_x", 180), "tilt": ("rotate_z", 30),
}
def _compile_patterns(self):
"""Compile regex patterns for efficiency"""
# Number pattern: "5", "five", "a few"
number_words = '|'.join(self.numbers.keys())
self.num_pattern = re.compile(
rf'\b(\d+|{number_words})\b',
re.IGNORECASE
)
# Shape pattern
all_shapes = []
for synonyms in self.shapes.values():
all_shapes.extend(synonyms)
shape_words = '|'.join(sorted(all_shapes, key=len, reverse=True))
self.shape_pattern = re.compile(
rf'\b({shape_words})\b',
re.IGNORECASE
)
# Color pattern
color_words = '|'.join(sorted(self.colors.keys(), key=len, reverse=True))
self.color_pattern = re.compile(
rf'\b({color_words})\b',
re.IGNORECASE
)
# Size pattern
size_words = '|'.join(sorted(self.sizes.keys(), key=len, reverse=True))
self.size_pattern = re.compile(
rf'\b({size_words})\b',
re.IGNORECASE
)
# Relation pattern
relation_words = '|'.join(sorted(self.relations.keys(), key=len, reverse=True))
self.relation_pattern = re.compile(
rf'\b({relation_words})\b',
re.IGNORECASE
)
# Arrangement pattern
arr_words = '|'.join(sorted(self.arrangements.keys(), key=len, reverse=True))
self.arrangement_pattern = re.compile(
rf'\b({arr_words})\b',
re.IGNORECASE
)
# Material pattern
mat_words = '|'.join(sorted(self.materials.keys(), key=len, reverse=True))
self.material_pattern = re.compile(
rf'\b({mat_words})\b',
re.IGNORECASE
)
# Compound object pattern: "X on Y", "X with Y"
self.compound_pattern = re.compile(
r'(.+?)\s+(on|with|and|next to|beside|above|below|under|behind|in front of)\s+(.+)',
re.IGNORECASE
)
def _extract_number(self, text: str, shape_word: str) -> int:
"""Extract quantity for a specific shape"""
text_lower = text.lower()
# Pattern: "3 red cubes", "three cubes", "a cube"
patterns = [
rf'(\d+)\s+\w*\s*{shape_word}', # "3 red cubes"
rf'(\d+)\s+{shape_word}', # "3 cubes"
]
for pattern in patterns:
match = re.search(pattern, text_lower)
if match:
return int(match.group(1))
# Check number words
for word, num in self.numbers.items():
patterns = [
rf'\b{word}\s+\w*\s*{shape_word}',
rf'\b{word}\s+{shape_word}',
]
for pattern in patterns:
if re.search(pattern, text_lower):
return num
return 1
def _parse_segment(self, text: str) -> ParsedObject:
"""Parse a single object segment"""
text_lower = text.lower().strip()
# Find shape
shape = "cube" # default
shape_match = self.shape_pattern.search(text_lower)
if shape_match:
matched = shape_match.group(1)
shape = self.shape_lookup.get(matched, "cube")
# Find color
color = "#808080" # default gray
color_match = self.color_pattern.search(text_lower)
if color_match:
color = self.colors.get(color_match.group(1).lower(), "#808080")
# Find size
size = 1.0
size_match = self.size_pattern.search(text_lower)
if size_match:
size = self.sizes.get(size_match.group(1).lower(), 1.0)
# Find quantity
quantity = self._extract_number(text_lower, shape_match.group(1) if shape_match else "cube")
# Find material
material = "default"
mat_match = self.material_pattern.search(text_lower)
if mat_match:
material = self.materials.get(mat_match.group(1).lower(), "default")
# Find arrangement
arrangement = Arrangement.NONE
arr_match = self.arrangement_pattern.search(text_lower)
if arr_match:
arrangement = self.arrangements.get(arr_match.group(1).lower(), Arrangement.NONE)
# Collect modifiers
modifiers = []
for mod in ["smooth", "rough", "shiny", "matte", "glossy"]:
if mod in text_lower:
modifiers.append(mod)
return ParsedObject(
shape=shape,
color=color,
size=size,
quantity=quantity,
material=material,
relation=Relation.NONE,
relation_target=None,
arrangement=arrangement,
modifiers=modifiers
)
def _parse_compound(self, text: str) -> List[Tuple[ParsedObject, Relation, int]]:
"""Parse compound prompts with relationships"""
results = []
text_lower = text.lower()
# Split by "and" first for multiple independent objects
and_parts = re.split(r'\s+and\s+', text, flags=re.IGNORECASE)
for part in and_parts:
part = part.strip()
if not part:
continue
# Check for relational phrases
relation_found = False
for rel_phrase, relation in sorted(self.relations.items(), key=lambda x: len(x[0]), reverse=True):
pattern = rf'(.+?)\s+{re.escape(rel_phrase)}\s+(.+)'
match = re.match(pattern, part, re.IGNORECASE)
if match:
# Object A relation Object B
obj_a_text = match.group(1).strip()
obj_b_text = match.group(2).strip()
obj_b = self._parse_segment(obj_b_text)
results.append((obj_b, Relation.NONE, -1))
obj_a = self._parse_segment(obj_a_text)
obj_a.relation = relation
obj_a.relation_target = len(results) - 1
results.append((obj_a, relation, len(results) - 1))
relation_found = True
break
if not relation_found:
# Single object or simple description
obj = self._parse_segment(part)
results.append((obj, Relation.NONE, -1))
return results
def _build_objects(self, parsed_items: List[Tuple[ParsedObject, Relation, int]]) -> List[Dict]:
"""Convert parsed objects to 3D parameters with positions"""
objects = []
object_positions = {} # Track positions by index
current_x = 0
current_z = 0
for idx, (parsed, relation, target_idx) in enumerate(parsed_items):
base_x, base_y, base_z = 0, 0, 0
# Calculate position based on relation
if relation != Relation.NONE and target_idx >= 0 and target_idx in object_positions:
target_pos = object_positions[target_idx]
base_x, base_y, base_z = target_pos
if relation == Relation.ON_TOP:
base_y += 1.2 * parsed.size
elif relation == Relation.BELOW:
base_y -= 1.2 * parsed.size
elif relation == Relation.NEXT_TO or relation == Relation.RIGHT_OF:
base_x += 2.0 * parsed.size
elif relation == Relation.LEFT_OF:
base_x -= 2.0 * parsed.size
elif relation == Relation.IN_FRONT:
base_z += 2.0 * parsed.size
elif relation == Relation.BEHIND:
base_z -= 2.0 * parsed.size
# Generate objects based on quantity and arrangement
for q in range(parsed.quantity):
obj_x, obj_y, obj_z = base_x, base_y, base_z
obj_scale = parsed.size
# Apply arrangement
if parsed.arrangement == Arrangement.ROW:
obj_x += q * 2.2 * parsed.size
elif parsed.arrangement == Arrangement.COLUMN:
obj_z += q * 2.2 * parsed.size
elif parsed.arrangement == Arrangement.STACK:
obj_y += q * 1.2 * parsed.size
elif parsed.arrangement == Arrangement.CIRCLE:
import math
angle = (2 * math.pi * q) / parsed.quantity
radius = max(2.0, parsed.quantity * 0.5) * parsed.size
obj_x += math.cos(angle) * radius
obj_z += math.sin(angle) * radius
elif parsed.arrangement == Arrangement.GRID:
import math
grid_size = int(math.ceil(math.sqrt(parsed.quantity)))
row = q // grid_size
col = q % grid_size
obj_x += col * 2.2 * parsed.size
obj_z += row * 2.2 * parsed.size
elif parsed.arrangement == Arrangement.INCREASING:
obj_scale = parsed.size * (0.5 + q * 0.3)
obj_x += q * 2.5 * parsed.size
elif parsed.arrangement == Arrangement.DECREASING:
obj_scale = parsed.size * (1.5 - q * 0.2)
obj_x += q * 2.5 * parsed.size
elif parsed.arrangement == Arrangement.RANDOM:
import random
obj_x += random.uniform(-3, 3)
obj_z += random.uniform(-3, 3)
elif parsed.arrangement == Arrangement.NONE and parsed.quantity > 1:
# Default: spread in a row
obj_x += q * 2.2 * parsed.size
# No relation and no arrangement - use sequential positioning
if relation == Relation.NONE and parsed.arrangement == Arrangement.NONE and q == 0:
if len(objects) > 0:
obj_x = current_x
current_x += 2.5 * parsed.size
obj = {
"type": parsed.shape,
"color": parsed.color,
"scale": obj_scale,
"position": {"x": obj_x, "y": obj_y, "z": obj_z},
"rotation": {"x": 0, "y": 0, "z": 0},
"material": parsed.material,
"modifiers": parsed.modifiers
}
objects.append(obj)
# Store first object position for this parsed item
if objects:
last_obj = objects[-parsed.quantity] if parsed.quantity <= len(objects) else objects[-1]
object_positions[idx] = (
last_obj["position"]["x"],
last_obj["position"]["y"],
last_obj["position"]["z"]
)
current_x = last_obj["position"]["x"] + 2.5 * parsed.size
# Center all objects
if objects:
min_x = min(o["position"]["x"] for o in objects)
max_x = max(o["position"]["x"] for o in objects)
min_z = min(o["position"]["z"] for o in objects)
max_z = max(o["position"]["z"] for o in objects)
center_x = (min_x + max_x) / 2
center_z = (min_z + max_z) / 2
for obj in objects:
obj["position"]["x"] -= center_x
obj["position"]["z"] -= center_z
return objects
def process_prompt(self, prompt: str, existing_context: Optional[Dict] = None) -> Dict[str, Any]:
"""Main processing method"""
try:
logger.info(f"Processing: {prompt}")
# Check for modification request
if existing_context and self._is_modification(prompt):
return self._process_modification(prompt, existing_context)
# Parse the prompt
parsed_items = self._parse_compound(prompt)
if not parsed_items:
parsed_items = [(self._parse_segment(prompt), Relation.NONE, -1)]
# Build objects
objects = self._build_objects(parsed_items)
# Build interpretation
interpretation = self._build_interpretation(parsed_items, objects)
model_params = {
"objects": objects,
"scene_settings": {
"background_color": "#1a1a2e",
"ambient_light": 0.5,
"directional_light": 0.8
}
}
logger.info(f"Generated {len(objects)} objects")
return {
"success": True,
"original_prompt": prompt,
"interpretation": interpretation,
"model_params": model_params,
"is_modification": False
}
except Exception as e:
logger.error(f"Processing error: {str(e)}")
return {
"success": False,
"error": str(e),
"interpretation": f"Error: {str(e)}"
}
def _is_modification(self, text: str) -> bool:
"""Check if prompt is a modification request"""
mod_keywords = [
"make it", "change", "modify", "update", "make them",
"bigger", "smaller", "larger", "taller", "shorter",
"rotate", "spin", "flip", "move", "shift",
"different color", "change color", "new color",
"add more", "remove", "delete", "more", "less"
]
text_lower = text.lower()
return any(kw in text_lower for kw in mod_keywords)
def _process_modification(self, prompt: str, context: Dict) -> Dict[str, Any]:
"""Process modification request"""
text_lower = prompt.lower()
# Get existing objects
existing_objects = context.get("model_params", {}).get("objects", [])
if not existing_objects:
return self.process_prompt(prompt, None)
modified_objects = []
for obj in existing_objects:
new_obj = obj.copy()
new_obj["position"] = obj["position"].copy()
new_obj["rotation"] = obj.get("rotation", {"x": 0, "y": 0, "z": 0}).copy()
# Apply modifications
for mod_word, (action, value) in self.modifications.items():
if mod_word in text_lower:
if action == "scale":
new_obj["scale"] = obj.get("scale", 1.0) * value
elif action == "scale_x":
if "scale_axes" not in new_obj:
new_obj["scale_axes"] = {"x": 1, "y": 1, "z": 1}
new_obj["scale_axes"]["x"] *= value
elif action == "scale_y":
if "scale_axes" not in new_obj:
new_obj["scale_axes"] = {"x": 1, "y": 1, "z": 1}
new_obj["scale_axes"]["y"] *= value
elif action == "scale_z":
if "scale_axes" not in new_obj:
new_obj["scale_axes"] = {"x": 1, "y": 1, "z": 1}
new_obj["scale_axes"]["z"] *= value
elif action.startswith("rotate"):
axis = action.split("_")[1]
new_obj["rotation"][axis] = new_obj["rotation"].get(axis, 0) + value
# Check for color change
color_match = self.color_pattern.search(text_lower)
if color_match and ("color" in text_lower or "change" in text_lower):
new_obj["color"] = self.colors.get(color_match.group(1).lower(), new_obj["color"])
modified_objects.append(new_obj)
# Check for "add more"
if "add" in text_lower or "more" in text_lower:
# Parse what to add
add_parsed = self._parse_segment(prompt)
if add_parsed.quantity > 0:
# Add new objects
last_x = max(o["position"]["x"] for o in modified_objects) if modified_objects else 0
for i in range(add_parsed.quantity):
modified_objects.append({
"type": add_parsed.shape,
"color": add_parsed.color,
"scale": add_parsed.size,
"position": {"x": last_x + 2.5 * (i + 1), "y": 0, "z": 0},
"rotation": {"x": 0, "y": 0, "z": 0},
"material": add_parsed.material,
"modifiers": add_parsed.modifiers
})
model_params = {
"objects": modified_objects,
"scene_settings": context.get("model_params", {}).get("scene_settings", {
"background_color": "#1a1a2e",
"ambient_light": 0.5,
"directional_light": 0.8
})
}
return {
"success": True,
"original_prompt": prompt,
"interpretation": f"Modified {len(modified_objects)} object(s)",
"model_params": model_params,
"is_modification": True
}
def _build_interpretation(self, parsed_items: List[Tuple[ParsedObject, Relation, int]], objects: List[Dict]) -> str:
"""Build human-readable interpretation"""
parts = []
for parsed, relation, target in parsed_items:
desc = []
if parsed.quantity > 1:
desc.append(f"{parsed.quantity}x")
if parsed.size != 1.0:
size_name = "small" if parsed.size < 1 else "large" if parsed.size > 1 else ""
if size_name:
desc.append(size_name)
# Get color name
color_name = next((name for name, hex_val in self.colors.items() if hex_val == parsed.color), None)
if color_name:
desc.append(color_name)
desc.append(parsed.shape)
if parsed.arrangement != Arrangement.NONE:
desc.append(f"in {parsed.arrangement.value}")
if relation != Relation.NONE:
desc.append(relation.value.replace("_", " "))
parts.append(" ".join(desc))
interpretation = "Creating: " + ", ".join(parts)
interpretation += f" | Total: {len(objects)} objects"
return interpretation
# Global instance
_processor = None
def get_processor() -> AdvancedNLPProcessor:
global _processor
if _processor is None:
_processor = AdvancedNLPProcessor()
return _processor
class NLPProcessor:
"""Wrapper for backward compatibility"""
def __init__(self):
self._processor = get_processor()
def process_prompt(self, prompt: str, existing_context: Optional[Dict] = None) -> Dict[str, Any]:
return self._processor.process_prompt(prompt, existing_context)