Spaces:

jainarham
/

text-to-3d

Sleeping

App Files Files Community

text-to-3d / nlp_processor.py

jainarham

Update nlp_processor.py

5c3adf5 verified about 2 months ago

raw

history blame contribute delete

27.5 kB

	"""
	Advanced NLP Processor - Professional Grade
	Understands complex prompts, spatial relationships, and compositions
	"""

	import re
	import logging
	from typing import Dict, Any, Optional, List, Tuple
	from dataclasses import dataclass
	from enum import Enum

	logger = logging.getLogger(__name__)


	class Relation(Enum):
	NONE = "none"
	ON_TOP = "on_top"
	BELOW = "below"
	NEXT_TO = "next_to"
	LEFT_OF = "left_of"
	RIGHT_OF = "right_of"
	IN_FRONT = "in_front"
	BEHIND = "behind"
	INSIDE = "inside"
	AROUND = "around"
	BETWEEN = "between"


	class Arrangement(Enum):
	NONE = "none"
	ROW = "row"
	COLUMN = "column"
	CIRCLE = "circle"
	GRID = "grid"
	STACK = "stack"
	RANDOM = "random"
	INCREASING = "increasing"
	DECREASING = "decreasing"


	@dataclass
	class ParsedObject:
	shape: str
	color: str
	size: float
	quantity: int
	material: str
	relation: Relation
	relation_target: Optional[str]
	arrangement: Arrangement
	modifiers: List[str]


	class AdvancedNLPProcessor:
	def __init__(self):
	self._init_vocabularies()
	self._compile_patterns()
	logger.info("Advanced NLP Processor initialized")

	def _init_vocabularies(self):
	"""Initialize all vocabularies and mappings"""

	# Shapes with synonyms
	self.shapes = {
	"cube": ["cube", "cubes", "box", "boxes", "block", "blocks", "square", "squares"],
	"sphere": ["sphere", "spheres", "ball", "balls", "orb", "orbs", "globe", "globes"],
	"cylinder": ["cylinder", "cylinders", "tube", "tubes", "pipe", "pipes", "pillar", "pillars", "column", "columns"],
	"cone": ["cone", "cones", "spike", "spikes"],
	"torus": ["torus", "toruses", "tori", "donut", "donuts", "ring", "rings", "hoop", "hoops"],
	"pyramid": ["pyramid", "pyramids", "tetrahedron", "tetrahedrons"],
	"capsule": ["capsule", "capsules", "pill", "pills", "lozenge"],
	"plane": ["plane", "planes", "floor", "ground", "platform", "base", "surface"],
	}

	# Flatten for quick lookup
	self.shape_lookup = {}
	for canonical, synonyms in self.shapes.items():
	for syn in synonyms:
	self.shape_lookup[syn] = canonical

	# Colors with hex values
	self.colors = {
	# Basic
	"red": "#e74c3c", "green": "#2ecc71", "blue": "#3498db",
	"yellow": "#f1c40f", "orange": "#e67e22", "purple": "#9b59b6",
	"pink": "#e91e63", "white": "#ecf0f1", "black": "#2c3e50",
	"gray": "#95a5a6", "grey": "#95a5a6", "brown": "#8b4513",
	# Extended
	"cyan": "#00bcd4", "magenta": "#e91e63", "lime": "#8bc34a",
	"teal": "#009688", "navy": "#1a237e", "maroon": "#800000",
	"olive": "#808000", "coral": "#ff7f50", "salmon": "#fa8072",
	"gold": "#ffd700", "golden": "#ffd700", "silver": "#c0c0c0",
	"bronze": "#cd7f32", "copper": "#b87333", "platinum": "#e5e4e2",
	"crimson": "#dc143c", "scarlet": "#ff2400", "ruby": "#e0115f",
	"emerald": "#50c878", "jade": "#00a86b", "mint": "#98fb98",
	"sapphire": "#0f52ba", "azure": "#007fff", "indigo": "#4b0082",
	"violet": "#8f00ff", "lavender": "#e6e6fa", "plum": "#dda0dd",
	"turquoise": "#40e0d0", "aqua": "#00ffff", "sky": "#87ceeb",
	"peach": "#ffdab9", "beige": "#f5f5dc", "ivory": "#fffff0",
	"cream": "#fffdd0", "tan": "#d2b48c", "chocolate": "#7b3f00",
	"charcoal": "#36454f", "slate": "#708090", "steel": "#71797e",
	# Metallic descriptors
	"metallic": "#a8a9ad", "shiny": "#d4d4d4", "chrome": "#dbe4eb",
	}

	# Sizes with scale factors
	self.sizes = {
	"tiny": 0.25, "very small": 0.35, "small": 0.5, "little": 0.5,
	"medium": 1.0, "normal": 1.0, "regular": 1.0, "average": 1.0,
	"large": 1.5, "big": 1.5, "huge": 2.0, "giant": 2.5,
	"massive": 3.0, "enormous": 3.5, "colossal": 4.0,
	"mini": 0.3, "micro": 0.2, "nano": 0.15,
	}

	# Number words
	self.numbers = {
	"a": 1, "an": 1, "one": 1, "single": 1,
	"two": 2, "pair": 2, "couple": 2, "double": 2,
	"three": 3, "triple": 3, "trio": 3,
	"four": 4, "quad": 4, "quadruple": 4,
	"five": 5, "six": 6, "seven": 7, "eight": 8,
	"nine": 9, "ten": 10, "eleven": 11, "twelve": 12,
	"dozen": 12, "fifteen": 15, "twenty": 20,
	"few": 3, "several": 4, "many": 6, "lots": 8,
	}

	# Spatial relations
	self.relations = {
	# On top
	"on": Relation.ON_TOP, "on top of": Relation.ON_TOP,
	"above": Relation.ON_TOP, "over": Relation.ON_TOP,
	"atop": Relation.ON_TOP, "upon": Relation.ON_TOP,
	# Below
	"under": Relation.BELOW, "below": Relation.BELOW,
	"beneath": Relation.BELOW, "underneath": Relation.BELOW,
	# Next to
	"next to": Relation.NEXT_TO, "beside": Relation.NEXT_TO,
	"by": Relation.NEXT_TO, "near": Relation.NEXT_TO,
	"adjacent": Relation.NEXT_TO, "alongside": Relation.NEXT_TO,
	# Left/Right
	"left of": Relation.LEFT_OF, "to the left": Relation.LEFT_OF,
	"right of": Relation.RIGHT_OF, "to the right": Relation.RIGHT_OF,
	# Front/Behind
	"in front of": Relation.IN_FRONT, "before": Relation.IN_FRONT,
	"behind": Relation.BEHIND, "back of": Relation.BEHIND,
	# Inside
	"inside": Relation.INSIDE, "within": Relation.INSIDE,
	"in": Relation.INSIDE,
	# Around
	"around": Relation.AROUND, "surrounding": Relation.AROUND,
	# Between
	"between": Relation.BETWEEN,
	}

	# Arrangements
	self.arrangements = {
	"row": Arrangement.ROW, "line": Arrangement.ROW, "horizontal": Arrangement.ROW,
	"column": Arrangement.COLUMN, "vertical": Arrangement.COLUMN, "tower": Arrangement.STACK,
	"circle": Arrangement.CIRCLE, "ring": Arrangement.CIRCLE, "circular": Arrangement.CIRCLE,
	"grid": Arrangement.GRID, "matrix": Arrangement.GRID, "array": Arrangement.GRID,
	"stack": Arrangement.STACK, "stacked": Arrangement.STACK, "pile": Arrangement.STACK,
	"random": Arrangement.RANDOM, "scattered": Arrangement.RANDOM,
	"increasing": Arrangement.INCREASING, "ascending": Arrangement.INCREASING,
	"decreasing": Arrangement.DECREASING, "descending": Arrangement.DECREASING,
	}

	# Materials/textures
	self.materials = {
	"metallic": "metallic", "metal": "metallic", "shiny": "shiny",
	"matte": "matte", "flat": "matte", "glossy": "glossy",
	"glass": "glass", "transparent": "glass", "crystal": "glass",
	"wood": "wood", "wooden": "wood", "stone": "stone",
	"plastic": "plastic", "rubber": "rubber", "chrome": "chrome",
	"brushed": "brushed", "polished": "polished", "rough": "rough",
	"smooth": "smooth", "textured": "textured",
	}

	# Modification keywords for refinement
	self.modifications = {
	"bigger": ("scale", 1.5), "larger": ("scale", 1.5),
	"smaller": ("scale", 0.7), "tinier": ("scale", 0.5),
	"taller": ("scale_y", 1.5), "shorter": ("scale_y", 0.7),
	"wider": ("scale_x", 1.5), "thinner": ("scale_x", 0.7),
	"longer": ("scale_z", 1.5), "deeper": ("scale_z", 1.5),
	"double": ("scale", 2.0), "half": ("scale", 0.5),
	"rotate": ("rotate_y", 45), "spin": ("rotate_y", 90),
	"flip": ("rotate_x", 180), "tilt": ("rotate_z", 30),
	}

	def _compile_patterns(self):
	"""Compile regex patterns for efficiency"""

	# Number pattern: "5", "five", "a few"
	number_words = '\|'.join(self.numbers.keys())
	self.num_pattern = re.compile(
	rf'\b(\d+\|{number_words})\b',
	re.IGNORECASE
	)

	# Shape pattern
	all_shapes = []
	for synonyms in self.shapes.values():
	all_shapes.extend(synonyms)
	shape_words = '\|'.join(sorted(all_shapes, key=len, reverse=True))
	self.shape_pattern = re.compile(
	rf'\b({shape_words})\b',
	re.IGNORECASE
	)

	# Color pattern
	color_words = '\|'.join(sorted(self.colors.keys(), key=len, reverse=True))
	self.color_pattern = re.compile(
	rf'\b({color_words})\b',
	re.IGNORECASE
	)

	# Size pattern
	size_words = '\|'.join(sorted(self.sizes.keys(), key=len, reverse=True))
	self.size_pattern = re.compile(
	rf'\b({size_words})\b',
	re.IGNORECASE
	)

	# Relation pattern
	relation_words = '\|'.join(sorted(self.relations.keys(), key=len, reverse=True))
	self.relation_pattern = re.compile(
	rf'\b({relation_words})\b',
	re.IGNORECASE
	)

	# Arrangement pattern
	arr_words = '\|'.join(sorted(self.arrangements.keys(), key=len, reverse=True))
	self.arrangement_pattern = re.compile(
	rf'\b({arr_words})\b',
	re.IGNORECASE
	)

	# Material pattern
	mat_words = '\|'.join(sorted(self.materials.keys(), key=len, reverse=True))
	self.material_pattern = re.compile(
	rf'\b({mat_words})\b',
	re.IGNORECASE
	)

	# Compound object pattern: "X on Y", "X with Y"
	self.compound_pattern = re.compile(
	r'(.+?)\s+(on\|with\|and\|next to\|beside\|above\|below\|under\|behind\|in front of)\s+(.+)',
	re.IGNORECASE
	)

	def _extract_number(self, text: str, shape_word: str) -> int:
	"""Extract quantity for a specific shape"""
	text_lower = text.lower()

	# Pattern: "3 red cubes", "three cubes", "a cube"
	patterns = [
	rf'(\d+)\s+\w\s{shape_word}', # "3 red cubes"
	rf'(\d+)\s+{shape_word}', # "3 cubes"
	]

	for pattern in patterns:
	match = re.search(pattern, text_lower)
	if match:
	return int(match.group(1))

	# Check number words
	for word, num in self.numbers.items():
	patterns = [
	rf'\b{word}\s+\w\s{shape_word}',
	rf'\b{word}\s+{shape_word}',
	]
	for pattern in patterns:
	if re.search(pattern, text_lower):
	return num

	return 1

	def _parse_segment(self, text: str) -> ParsedObject:
	"""Parse a single object segment"""
	text_lower = text.lower().strip()

	# Find shape
	shape = "cube" # default
	shape_match = self.shape_pattern.search(text_lower)
	if shape_match:
	matched = shape_match.group(1)
	shape = self.shape_lookup.get(matched, "cube")

	# Find color
	color = "#808080" # default gray
	color_match = self.color_pattern.search(text_lower)
	if color_match:
	color = self.colors.get(color_match.group(1).lower(), "#808080")

	# Find size
	size = 1.0
	size_match = self.size_pattern.search(text_lower)
	if size_match:
	size = self.sizes.get(size_match.group(1).lower(), 1.0)

	# Find quantity
	quantity = self._extract_number(text_lower, shape_match.group(1) if shape_match else "cube")

	# Find material
	material = "default"
	mat_match = self.material_pattern.search(text_lower)
	if mat_match:
	material = self.materials.get(mat_match.group(1).lower(), "default")

	# Find arrangement
	arrangement = Arrangement.NONE
	arr_match = self.arrangement_pattern.search(text_lower)
	if arr_match:
	arrangement = self.arrangements.get(arr_match.group(1).lower(), Arrangement.NONE)

	# Collect modifiers
	modifiers = []
	for mod in ["smooth", "rough", "shiny", "matte", "glossy"]:
	if mod in text_lower:
	modifiers.append(mod)

	return ParsedObject(
	shape=shape,
	color=color,
	size=size,
	quantity=quantity,
	material=material,
	relation=Relation.NONE,
	relation_target=None,
	arrangement=arrangement,
	modifiers=modifiers
	)

	def _parse_compound(self, text: str) -> List[Tuple[ParsedObject, Relation, int]]:
	"""Parse compound prompts with relationships"""
	results = []
	text_lower = text.lower()

	# Split by "and" first for multiple independent objects
	and_parts = re.split(r'\s+and\s+', text, flags=re.IGNORECASE)

	for part in and_parts:
	part = part.strip()
	if not part:
	continue

	# Check for relational phrases
	relation_found = False
	for rel_phrase, relation in sorted(self.relations.items(), key=lambda x: len(x[0]), reverse=True):
	pattern = rf'(.+?)\s+{re.escape(rel_phrase)}\s+(.+)'
	match = re.match(pattern, part, re.IGNORECASE)
	if match:
	# Object A relation Object B
	obj_a_text = match.group(1).strip()
	obj_b_text = match.group(2).strip()

	obj_b = self._parse_segment(obj_b_text)
	results.append((obj_b, Relation.NONE, -1))

	obj_a = self._parse_segment(obj_a_text)
	obj_a.relation = relation
	obj_a.relation_target = len(results) - 1
	results.append((obj_a, relation, len(results) - 1))

	relation_found = True
	break

	if not relation_found:
	# Single object or simple description
	obj = self._parse_segment(part)
	results.append((obj, Relation.NONE, -1))

	return results

	def _build_objects(self, parsed_items: List[Tuple[ParsedObject, Relation, int]]) -> List[Dict]:
	"""Convert parsed objects to 3D parameters with positions"""
	objects = []
	object_positions = {} # Track positions by index

	current_x = 0
	current_z = 0

	for idx, (parsed, relation, target_idx) in enumerate(parsed_items):
	base_x, base_y, base_z = 0, 0, 0

	# Calculate position based on relation
	if relation != Relation.NONE and target_idx >= 0 and target_idx in object_positions:
	target_pos = object_positions[target_idx]
	base_x, base_y, base_z = target_pos

	if relation == Relation.ON_TOP:
	base_y += 1.2 * parsed.size
	elif relation == Relation.BELOW:
	base_y -= 1.2 * parsed.size
	elif relation == Relation.NEXT_TO or relation == Relation.RIGHT_OF:
	base_x += 2.0 * parsed.size
	elif relation == Relation.LEFT_OF:
	base_x -= 2.0 * parsed.size
	elif relation == Relation.IN_FRONT:
	base_z += 2.0 * parsed.size
	elif relation == Relation.BEHIND:
	base_z -= 2.0 * parsed.size

	# Generate objects based on quantity and arrangement
	for q in range(parsed.quantity):
	obj_x, obj_y, obj_z = base_x, base_y, base_z
	obj_scale = parsed.size

	# Apply arrangement
	if parsed.arrangement == Arrangement.ROW:
	obj_x += q * 2.2 * parsed.size
	elif parsed.arrangement == Arrangement.COLUMN:
	obj_z += q * 2.2 * parsed.size
	elif parsed.arrangement == Arrangement.STACK:
	obj_y += q * 1.2 * parsed.size
	elif parsed.arrangement == Arrangement.CIRCLE:
	import math
	angle = (2 * math.pi * q) / parsed.quantity
	radius = max(2.0, parsed.quantity * 0.5) * parsed.size
	obj_x += math.cos(angle) * radius
	obj_z += math.sin(angle) * radius
	elif parsed.arrangement == Arrangement.GRID:
	import math
	grid_size = int(math.ceil(math.sqrt(parsed.quantity)))
	row = q // grid_size
	col = q % grid_size
	obj_x += col * 2.2 * parsed.size
	obj_z += row * 2.2 * parsed.size
	elif parsed.arrangement == Arrangement.INCREASING:
	obj_scale = parsed.size * (0.5 + q * 0.3)
	obj_x += q * 2.5 * parsed.size
	elif parsed.arrangement == Arrangement.DECREASING:
	obj_scale = parsed.size * (1.5 - q * 0.2)
	obj_x += q * 2.5 * parsed.size
	elif parsed.arrangement == Arrangement.RANDOM:
	import random
	obj_x += random.uniform(-3, 3)
	obj_z += random.uniform(-3, 3)
	elif parsed.arrangement == Arrangement.NONE and parsed.quantity > 1:
	# Default: spread in a row
	obj_x += q * 2.2 * parsed.size

	# No relation and no arrangement - use sequential positioning
	if relation == Relation.NONE and parsed.arrangement == Arrangement.NONE and q == 0:
	if len(objects) > 0:
	obj_x = current_x
	current_x += 2.5 * parsed.size

	obj = {
	"type": parsed.shape,
	"color": parsed.color,
	"scale": obj_scale,
	"position": {"x": obj_x, "y": obj_y, "z": obj_z},
	"rotation": {"x": 0, "y": 0, "z": 0},
	"material": parsed.material,
	"modifiers": parsed.modifiers
	}

	objects.append(obj)

	# Store first object position for this parsed item
	if objects:
	last_obj = objects[-parsed.quantity] if parsed.quantity <= len(objects) else objects[-1]
	object_positions[idx] = (
	last_obj["position"]["x"],
	last_obj["position"]["y"],
	last_obj["position"]["z"]
	)
	current_x = last_obj["position"]["x"] + 2.5 * parsed.size

	# Center all objects
	if objects:
	min_x = min(o["position"]["x"] for o in objects)
	max_x = max(o["position"]["x"] for o in objects)
	min_z = min(o["position"]["z"] for o in objects)
	max_z = max(o["position"]["z"] for o in objects)

	center_x = (min_x + max_x) / 2
	center_z = (min_z + max_z) / 2

	for obj in objects:
	obj["position"]["x"] -= center_x
	obj["position"]["z"] -= center_z

	return objects

	def process_prompt(self, prompt: str, existing_context: Optional[Dict] = None) -> Dict[str, Any]:
	"""Main processing method"""
	try:
	logger.info(f"Processing: {prompt}")

	# Check for modification request
	if existing_context and self._is_modification(prompt):
	return self._process_modification(prompt, existing_context)

	# Parse the prompt
	parsed_items = self._parse_compound(prompt)

	if not parsed_items:
	parsed_items = [(self._parse_segment(prompt), Relation.NONE, -1)]

	# Build objects
	objects = self._build_objects(parsed_items)

	# Build interpretation
	interpretation = self._build_interpretation(parsed_items, objects)

	model_params = {
	"objects": objects,
	"scene_settings": {
	"background_color": "#1a1a2e",
	"ambient_light": 0.5,
	"directional_light": 0.8
	}
	}

	logger.info(f"Generated {len(objects)} objects")

	return {
	"success": True,
	"original_prompt": prompt,
	"interpretation": interpretation,
	"model_params": model_params,
	"is_modification": False
	}

	except Exception as e:
	logger.error(f"Processing error: {str(e)}")
	return {
	"success": False,
	"error": str(e),
	"interpretation": f"Error: {str(e)}"
	}

	def _is_modification(self, text: str) -> bool:
	"""Check if prompt is a modification request"""
	mod_keywords = [
	"make it", "change", "modify", "update", "make them",
	"bigger", "smaller", "larger", "taller", "shorter",
	"rotate", "spin", "flip", "move", "shift",
	"different color", "change color", "new color",
	"add more", "remove", "delete", "more", "less"
	]
	text_lower = text.lower()
	return any(kw in text_lower for kw in mod_keywords)

	def _process_modification(self, prompt: str, context: Dict) -> Dict[str, Any]:
	"""Process modification request"""
	text_lower = prompt.lower()

	# Get existing objects
	existing_objects = context.get("model_params", {}).get("objects", [])
	if not existing_objects:
	return self.process_prompt(prompt, None)

	modified_objects = []

	for obj in existing_objects:
	new_obj = obj.copy()
	new_obj["position"] = obj["position"].copy()
	new_obj["rotation"] = obj.get("rotation", {"x": 0, "y": 0, "z": 0}).copy()

	# Apply modifications
	for mod_word, (action, value) in self.modifications.items():
	if mod_word in text_lower:
	if action == "scale":
	new_obj["scale"] = obj.get("scale", 1.0) * value
	elif action == "scale_x":
	if "scale_axes" not in new_obj:
	new_obj["scale_axes"] = {"x": 1, "y": 1, "z": 1}
	new_obj["scale_axes"]["x"] *= value
	elif action == "scale_y":
	if "scale_axes" not in new_obj:
	new_obj["scale_axes"] = {"x": 1, "y": 1, "z": 1}
	new_obj["scale_axes"]["y"] *= value
	elif action == "scale_z":
	if "scale_axes" not in new_obj:
	new_obj["scale_axes"] = {"x": 1, "y": 1, "z": 1}
	new_obj["scale_axes"]["z"] *= value
	elif action.startswith("rotate"):
	axis = action.split("_")[1]
	new_obj["rotation"][axis] = new_obj["rotation"].get(axis, 0) + value

	# Check for color change
	color_match = self.color_pattern.search(text_lower)
	if color_match and ("color" in text_lower or "change" in text_lower):
	new_obj["color"] = self.colors.get(color_match.group(1).lower(), new_obj["color"])

	modified_objects.append(new_obj)

	# Check for "add more"
	if "add" in text_lower or "more" in text_lower:
	# Parse what to add
	add_parsed = self._parse_segment(prompt)
	if add_parsed.quantity > 0:
	# Add new objects
	last_x = max(o["position"]["x"] for o in modified_objects) if modified_objects else 0
	for i in range(add_parsed.quantity):
	modified_objects.append({
	"type": add_parsed.shape,
	"color": add_parsed.color,
	"scale": add_parsed.size,
	"position": {"x": last_x + 2.5 * (i + 1), "y": 0, "z": 0},
	"rotation": {"x": 0, "y": 0, "z": 0},
	"material": add_parsed.material,
	"modifiers": add_parsed.modifiers
	})

	model_params = {
	"objects": modified_objects,
	"scene_settings": context.get("model_params", {}).get("scene_settings", {
	"background_color": "#1a1a2e",
	"ambient_light": 0.5,
	"directional_light": 0.8
	})
	}

	return {
	"success": True,
	"original_prompt": prompt,
	"interpretation": f"Modified {len(modified_objects)} object(s)",
	"model_params": model_params,
	"is_modification": True
	}

	def _build_interpretation(self, parsed_items: List[Tuple[ParsedObject, Relation, int]], objects: List[Dict]) -> str:
	"""Build human-readable interpretation"""
	parts = []

	for parsed, relation, target in parsed_items:
	desc = []
	if parsed.quantity > 1:
	desc.append(f"{parsed.quantity}x")
	if parsed.size != 1.0:
	size_name = "small" if parsed.size < 1 else "large" if parsed.size > 1 else ""
	if size_name:
	desc.append(size_name)

	# Get color name
	color_name = next((name for name, hex_val in self.colors.items() if hex_val == parsed.color), None)
	if color_name:
	desc.append(color_name)

	desc.append(parsed.shape)

	if parsed.arrangement != Arrangement.NONE:
	desc.append(f"in {parsed.arrangement.value}")

	if relation != Relation.NONE:
	desc.append(relation.value.replace("_", " "))

	parts.append(" ".join(desc))

	interpretation = "Creating: " + ", ".join(parts)
	interpretation += f" \| Total: {len(objects)} objects"

	return interpretation


	# Global instance
	_processor = None

	def get_processor() -> AdvancedNLPProcessor:
	global _processor
	if _processor is None:
	_processor = AdvancedNLPProcessor()
	return _processor


	class NLPProcessor:
	"""Wrapper for backward compatibility"""
	def __init__(self):
	self._processor = get_processor()

	def process_prompt(self, prompt: str, existing_context: Optional[Dict] = None) -> Dict[str, Any]:
	return self._processor.process_prompt(prompt, existing_context)