Spaces:
Running
on
Zero
Running
on
Zero
Update utils.py
Browse files
utils.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
Utility functions for Phramer AI
|
| 3 |
By Pariente AI, for MIA TV Series
|
| 4 |
|
| 5 |
-
Enhanced with professional cinematography knowledge and
|
| 6 |
"""
|
| 7 |
|
| 8 |
import re
|
|
@@ -130,14 +130,14 @@ def detect_scene_type_from_analysis(analysis_metadata: Dict[str, Any]) -> str:
|
|
| 130 |
|
| 131 |
def apply_flux_rules(prompt: str, analysis_metadata: Optional[Dict[str, Any]] = None) -> str:
|
| 132 |
"""
|
| 133 |
-
Apply enhanced prompt optimization
|
| 134 |
|
| 135 |
Args:
|
| 136 |
prompt: Raw prompt text from BAGEL analysis
|
| 137 |
analysis_metadata: Enhanced metadata with cinematography suggestions
|
| 138 |
|
| 139 |
Returns:
|
| 140 |
-
Optimized
|
| 141 |
"""
|
| 142 |
if not prompt or not isinstance(prompt, str):
|
| 143 |
return ""
|
|
@@ -150,10 +150,10 @@ def apply_flux_rules(prompt: str, analysis_metadata: Optional[Dict[str, Any]] =
|
|
| 150 |
# Extract description part only (remove CAMERA_SETUP section if present)
|
| 151 |
description_part = _extract_description_only(cleaned_prompt)
|
| 152 |
|
| 153 |
-
# NEW: Convert
|
| 154 |
if PROFESSIONAL_PHOTOGRAPHY_CONFIG.get("prompt_condensation", True):
|
| 155 |
-
description_part =
|
| 156 |
-
logger.info("
|
| 157 |
|
| 158 |
# Check if BAGEL provided intelligent camera setup with cinematography context
|
| 159 |
camera_config = ""
|
|
@@ -177,15 +177,15 @@ def apply_flux_rules(prompt: str, analysis_metadata: Optional[Dict[str, Any]] =
|
|
| 177 |
# Add style enhancement for multi-engine compatibility
|
| 178 |
style_enhancement = _get_style_enhancement(scene_type, description_part.lower())
|
| 179 |
|
| 180 |
-
# NEW:
|
| 181 |
-
|
| 182 |
|
| 183 |
-
# Build final prompt: Description + Camera + Lighting + Style + Keywords
|
| 184 |
-
final_prompt = description_part + camera_config + lighting_enhancement + style_enhancement +
|
| 185 |
|
| 186 |
-
# NEW: Final length optimization
|
| 187 |
if PROFESSIONAL_PHOTOGRAPHY_CONFIG.get("prompt_optimization", {}).get("max_length"):
|
| 188 |
-
final_prompt =
|
| 189 |
|
| 190 |
# Clean up formatting
|
| 191 |
final_prompt = _clean_prompt_formatting(final_prompt)
|
|
@@ -224,9 +224,63 @@ def _extract_description_only(prompt: str) -> str:
|
|
| 224 |
return description.strip()
|
| 225 |
|
| 226 |
|
| 227 |
-
def
|
| 228 |
-
"""
|
| 229 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
generative = description
|
| 231 |
|
| 232 |
# Remove descriptive introduction phrases
|
|
@@ -238,17 +292,13 @@ def _convert_to_generative_language(description: str) -> str:
|
|
| 238 |
r'This is (?:a|an) (?:image|photograph|picture) (?:of|showing)',
|
| 239 |
r'The setting (?:appears to be|is)',
|
| 240 |
r'The scene (?:appears to be|is|shows)',
|
| 241 |
-
r'(?:In the background|In the foreground), (?:there are|there is)',
|
| 242 |
-
r'(?:The background|The foreground) (?:features|shows|contains)',
|
| 243 |
-
r'(?:There are|There is) [^,]+ (?:in the background|in the foreground)',
|
| 244 |
-
r'The overall (?:setting|atmosphere|mood) (?:suggests|indicates)',
|
| 245 |
]
|
| 246 |
|
| 247 |
for pattern in descriptive_intros:
|
| 248 |
generative = re.sub(pattern, '', generative, flags=re.IGNORECASE)
|
| 249 |
|
| 250 |
-
# Remove uncertainty phrases
|
| 251 |
-
|
| 252 |
r'possibly (?:a|an) ',
|
| 253 |
r'appears to be (?:a|an) ',
|
| 254 |
r'seems to be (?:a|an) ',
|
|
@@ -257,18 +307,26 @@ def _convert_to_generative_language(description: str) -> str:
|
|
| 257 |
r'suggests (?:a|an) ',
|
| 258 |
r'indicating (?:a|an) ',
|
| 259 |
r'(?:possibly|apparently|seemingly|likely)',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
]
|
| 261 |
|
| 262 |
-
for pattern in
|
| 263 |
generative = re.sub(pattern, '', generative, flags=re.IGNORECASE)
|
| 264 |
|
| 265 |
-
# Convert
|
| 266 |
-
|
| 267 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
(r'(?:close-up|medium shot|wide shot) of (?:a|an|the) ', r'close-up '),
|
| 269 |
-
# "blurred figures of people" -> "blurred people"
|
| 270 |
-
(r'(?:blurred )?(?:figures|silhouettes) of (\w+)', r'blurred \1'),
|
| 271 |
-
# "people walking on a sidewalk" -> "people walking on sidewalk"
|
| 272 |
(r'(?:a|an|the) (\w+)', r'\1'),
|
| 273 |
# Remove excessive connecting words
|
| 274 |
(r'(?:, and|, with|, featuring)', ','),
|
|
@@ -277,7 +335,7 @@ def _convert_to_generative_language(description: str) -> str:
|
|
| 277 |
(r'in (?:a|an|the) ', r'in '),
|
| 278 |
]
|
| 279 |
|
| 280 |
-
for pattern, replacement in
|
| 281 |
generative = re.sub(pattern, replacement, generative, flags=re.IGNORECASE)
|
| 282 |
|
| 283 |
# Convert action descriptions to present participles
|
|
@@ -291,6 +349,11 @@ def _convert_to_generative_language(description: str) -> str:
|
|
| 291 |
for pattern, replacement in action_conversions:
|
| 292 |
generative = re.sub(pattern, replacement, generative, flags=re.IGNORECASE)
|
| 293 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
# Clean up extra spaces and punctuation
|
| 295 |
generative = re.sub(r'\s+', ' ', generative)
|
| 296 |
generative = re.sub(r'^\s*,\s*', '', generative) # Remove leading commas
|
|
@@ -302,47 +365,73 @@ def _convert_to_generative_language(description: str) -> str:
|
|
| 302 |
if generative:
|
| 303 |
generative = generative[0].upper() + generative[1:] if len(generative) > 1 else generative.upper()
|
| 304 |
|
| 305 |
-
logger.info(f"
|
| 306 |
return generative
|
| 307 |
|
| 308 |
except Exception as e:
|
| 309 |
-
logger.warning(f"
|
| 310 |
return description
|
| 311 |
|
| 312 |
|
| 313 |
-
def
|
| 314 |
-
"""
|
| 315 |
try:
|
| 316 |
-
mandatory = FLUX_RULES.get("mandatory_keywords", {})
|
| 317 |
-
|
| 318 |
keywords = []
|
| 319 |
|
| 320 |
-
#
|
| 321 |
-
|
| 322 |
-
|
| 323 |
|
| 324 |
-
#
|
| 325 |
-
|
| 326 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
|
| 328 |
-
#
|
| 329 |
-
style_by_scene =
|
| 330 |
if scene_type in style_by_scene:
|
| 331 |
scene_keywords = style_by_scene[scene_type]
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
else:
|
|
|
|
| 337 |
return ""
|
| 338 |
|
| 339 |
except Exception as e:
|
| 340 |
-
logger.warning(f"
|
| 341 |
return ""
|
| 342 |
|
| 343 |
|
| 344 |
-
def
|
| 345 |
-
"""Optimize prompt length
|
| 346 |
try:
|
| 347 |
max_words = PROFESSIONAL_PHOTOGRAPHY_CONFIG.get("prompt_optimization", {}).get("max_length", 150)
|
| 348 |
|
|
@@ -350,34 +439,53 @@ def _optimize_prompt_length(prompt: str) -> str:
|
|
| 350 |
if len(words) <= max_words:
|
| 351 |
return prompt
|
| 352 |
|
| 353 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
essential_parts = []
|
| 355 |
-
|
| 356 |
|
| 357 |
-
for
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
|
| 377 |
return optimized
|
| 378 |
|
| 379 |
except Exception as e:
|
| 380 |
-
logger.warning(f"
|
| 381 |
return prompt
|
| 382 |
|
| 383 |
|
|
@@ -461,15 +569,15 @@ def _format_professional_camera_suggestion(bagel_camera: str, scene_type: str) -
|
|
| 461 |
if composition_match:
|
| 462 |
camera_setup += f", {composition_match.group()}"
|
| 463 |
|
| 464 |
-
#
|
| 465 |
if scene_type == "cinematic":
|
| 466 |
-
result = f", Shot on {camera_setup}
|
| 467 |
elif scene_type == "portrait":
|
| 468 |
-
result = f", Shot on {camera_setup}
|
| 469 |
else:
|
| 470 |
-
result = f", Shot on {camera_setup}
|
| 471 |
|
| 472 |
-
logger.info(f"Formatted camera setup: {result}")
|
| 473 |
return result
|
| 474 |
else:
|
| 475 |
# Fallback to enhanced config if parsing fails
|
|
@@ -484,12 +592,12 @@ def _get_enhanced_camera_config(scene_type: str, description_lower: str) -> str:
|
|
| 484 |
"""Get enhanced camera configuration with cinematography knowledge"""
|
| 485 |
# Enhanced camera configurations with cinema equipment
|
| 486 |
enhanced_configs = {
|
| 487 |
-
"cinematic": ", Shot on ARRI Alexa LF, 35mm anamorphic lens at f/2.8, ISO 400
|
| 488 |
-
"portrait": ", Shot on Canon EOS R5, 85mm f/1.4 lens at f/2.8, ISO 200, rule of thirds
|
| 489 |
-
"landscape": ", Shot on Phase One XT, 24-70mm f/4 lens at f/8, ISO 100, hyperfocal distance
|
| 490 |
-
"street": ", Shot on Leica M11, 35mm f/1.4 lens at f/2.8, ISO 800
|
| 491 |
-
"architectural": ", Shot on Canon EOS R5, 24-70mm f/2.8 lens at f/8, ISO 100, symmetrical composition
|
| 492 |
-
"commercial": ", Shot on Hasselblad X2D 100C, 90mm f/2.5 lens at f/4, ISO 100
|
| 493 |
}
|
| 494 |
|
| 495 |
# Use enhanced config if available, otherwise fall back to FLUX_RULES
|
|
@@ -510,30 +618,27 @@ def _get_cinematography_lighting_enhancement(description_lower: str, camera_conf
|
|
| 510 |
# Enhanced lighting based on scene type and cinematography knowledge
|
| 511 |
if scene_type == "cinematic":
|
| 512 |
if any(term in description_lower for term in ["dramatic", "moody", "dark"]):
|
| 513 |
-
return ", dramatic
|
| 514 |
else:
|
| 515 |
-
return ",
|
| 516 |
elif scene_type == "portrait":
|
| 517 |
-
return ",
|
| 518 |
elif "dramatic" in description_lower or "chaos" in description_lower:
|
| 519 |
-
return
|
| 520 |
else:
|
| 521 |
-
return
|
| 522 |
|
| 523 |
|
| 524 |
def _get_style_enhancement(scene_type: str, description_lower: str) -> str:
|
| 525 |
-
"""Get style enhancement for multi-engine compatibility"""
|
| 526 |
-
|
| 527 |
-
|
| 528 |
if scene_type == "cinematic":
|
| 529 |
if "film grain" not in description_lower:
|
| 530 |
-
return ",
|
| 531 |
-
elif scene_type
|
| 532 |
-
return ",
|
| 533 |
-
elif "editorial" in description_lower:
|
| 534 |
-
return ", " + style_enhancements.get("editorial", "editorial photography style")
|
| 535 |
|
| 536 |
-
return ""
|
| 537 |
|
| 538 |
|
| 539 |
def _clean_prompt_formatting(prompt: str) -> str:
|
|
@@ -593,12 +698,16 @@ def calculate_prompt_score(prompt: str, analysis_data: Optional[Dict[str, Any]]
|
|
| 593 |
if re.search(r'\d+mm.*f/[\d.]+', prompt):
|
| 594 |
tech_score += 5
|
| 595 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 596 |
# Anamorphic and specialized lenses
|
| 597 |
if 'anamorphic' in prompt.lower():
|
| 598 |
tech_score += 4
|
| 599 |
|
| 600 |
# Professional terminology
|
| 601 |
-
tech_keywords = ['shot on', 'lens', '
|
| 602 |
for keyword in tech_keywords:
|
| 603 |
if keyword in prompt.lower():
|
| 604 |
tech_score += 2
|
|
@@ -609,19 +718,23 @@ def calculate_prompt_score(prompt: str, analysis_data: Optional[Dict[str, Any]]
|
|
| 609 |
|
| 610 |
breakdown["technical_details"] = min(25, tech_score)
|
| 611 |
|
| 612 |
-
# Professional Cinematography (0-25 points) -
|
| 613 |
cinema_score = 0
|
| 614 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 615 |
# Professional lighting techniques
|
| 616 |
lighting_terms = ['cinematic lighting', 'dramatic lighting', 'studio lighting', 'rim light', 'practical lights']
|
| 617 |
cinema_score += sum(3 for term in lighting_terms if term in prompt.lower())
|
| 618 |
|
| 619 |
# Composition techniques
|
| 620 |
-
composition_terms = ['composition', 'framing', 'depth of field', 'bokeh', 'rule of thirds']
|
| 621 |
cinema_score += sum(2 for term in composition_terms if term in prompt.lower())
|
| 622 |
|
| 623 |
# Cinematography style elements
|
| 624 |
-
style_terms = ['film grain', 'anamorphic', 'telephoto compression', 'wide-angle']
|
| 625 |
cinema_score += sum(3 for term in style_terms if term in prompt.lower())
|
| 626 |
|
| 627 |
# Professional context bonus
|
|
@@ -630,24 +743,37 @@ def calculate_prompt_score(prompt: str, analysis_data: Optional[Dict[str, Any]]
|
|
| 630 |
|
| 631 |
breakdown["professional_cinematography"] = min(25, cinema_score)
|
| 632 |
|
| 633 |
-
# Multi-Engine Optimization (0-25 points)
|
| 634 |
optimization_score = 0
|
| 635 |
|
| 636 |
-
# Check for
|
| 637 |
-
|
| 638 |
-
|
| 639 |
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 649 |
if any(style in prompt for style in FLUX_RULES.get("style_enhancements", {}).values()):
|
| 650 |
optimization_score += 3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 651 |
|
| 652 |
breakdown["multi_engine_optimization"] = min(25, optimization_score)
|
| 653 |
|
|
@@ -716,11 +842,12 @@ def format_analysis_report(analysis_data: Dict[str, Any], processing_time: float
|
|
| 716 |
**Professional Context:** {'✅ Applied' if has_cinema_context else '❌ Not Applied'}
|
| 717 |
|
| 718 |
**🎯 OPTIMIZATIONS APPLIED:**
|
|
|
|
| 719 |
✅ Professional camera configuration
|
| 720 |
✅ Cinematography lighting setup
|
| 721 |
-
✅
|
| 722 |
✅ Multi-engine compatibility
|
| 723 |
-
✅
|
| 724 |
|
| 725 |
**⚡ Powered by Pariente AI for MIA TV Series**"""
|
| 726 |
|
|
|
|
| 2 |
Utility functions for Phramer AI
|
| 3 |
By Pariente AI, for MIA TV Series
|
| 4 |
|
| 5 |
+
Enhanced with professional cinematography knowledge and intelligent token economy
|
| 6 |
"""
|
| 7 |
|
| 8 |
import re
|
|
|
|
| 130 |
|
| 131 |
def apply_flux_rules(prompt: str, analysis_metadata: Optional[Dict[str, Any]] = None) -> str:
|
| 132 |
"""
|
| 133 |
+
Apply enhanced prompt optimization with cinematography knowledge and intelligent token economy
|
| 134 |
|
| 135 |
Args:
|
| 136 |
prompt: Raw prompt text from BAGEL analysis
|
| 137 |
analysis_metadata: Enhanced metadata with cinematography suggestions
|
| 138 |
|
| 139 |
Returns:
|
| 140 |
+
Optimized prompt with professional cinematography terms and efficient token usage
|
| 141 |
"""
|
| 142 |
if not prompt or not isinstance(prompt, str):
|
| 143 |
return ""
|
|
|
|
| 150 |
# Extract description part only (remove CAMERA_SETUP section if present)
|
| 151 |
description_part = _extract_description_only(cleaned_prompt)
|
| 152 |
|
| 153 |
+
# NEW: Convert to generative language with cinematography angle detection
|
| 154 |
if PROFESSIONAL_PHOTOGRAPHY_CONFIG.get("prompt_condensation", True):
|
| 155 |
+
description_part = _convert_to_cinematographic_language(description_part)
|
| 156 |
+
logger.info("Applied cinematographic language conversion")
|
| 157 |
|
| 158 |
# Check if BAGEL provided intelligent camera setup with cinematography context
|
| 159 |
camera_config = ""
|
|
|
|
| 177 |
# Add style enhancement for multi-engine compatibility
|
| 178 |
style_enhancement = _get_style_enhancement(scene_type, description_part.lower())
|
| 179 |
|
| 180 |
+
# NEW: Smart keyword insertion with token economy
|
| 181 |
+
smart_keywords = _apply_smart_keyword_insertion(description_part, camera_config, scene_type)
|
| 182 |
|
| 183 |
+
# Build final prompt: Description + Camera + Lighting + Style + Smart Keywords
|
| 184 |
+
final_prompt = description_part + camera_config + lighting_enhancement + style_enhancement + smart_keywords
|
| 185 |
|
| 186 |
+
# NEW: Final length optimization with token economy
|
| 187 |
if PROFESSIONAL_PHOTOGRAPHY_CONFIG.get("prompt_optimization", {}).get("max_length"):
|
| 188 |
+
final_prompt = _optimize_prompt_with_token_economy(final_prompt)
|
| 189 |
|
| 190 |
# Clean up formatting
|
| 191 |
final_prompt = _clean_prompt_formatting(final_prompt)
|
|
|
|
| 224 |
return description.strip()
|
| 225 |
|
| 226 |
|
| 227 |
+
def _detect_camera_angles(description: str) -> List[str]:
|
| 228 |
+
"""Detect camera angles and perspectives using professional cinematography knowledge"""
|
| 229 |
try:
|
| 230 |
+
angles_detected = []
|
| 231 |
+
description_lower = description.lower()
|
| 232 |
+
|
| 233 |
+
# Low angle (contrapicado) detection
|
| 234 |
+
low_angle_indicators = [
|
| 235 |
+
"looking up at", "from below", "upward angle", "towering", "looming",
|
| 236 |
+
"shot from ground level", "worm's eye", "low angle"
|
| 237 |
+
]
|
| 238 |
+
if any(indicator in description_lower for indicator in low_angle_indicators):
|
| 239 |
+
angles_detected.append("low-angle shot")
|
| 240 |
+
|
| 241 |
+
# High angle (picado) detection
|
| 242 |
+
high_angle_indicators = [
|
| 243 |
+
"looking down", "from above", "overhead", "bird's eye", "aerial view",
|
| 244 |
+
"downward angle", "top-down", "high angle"
|
| 245 |
+
]
|
| 246 |
+
if any(indicator in description_lower for indicator in high_angle_indicators):
|
| 247 |
+
angles_detected.append("high-angle shot")
|
| 248 |
+
|
| 249 |
+
# Eye level detection
|
| 250 |
+
eye_level_indicators = [
|
| 251 |
+
"eye level", "straight on", "direct view", "level with"
|
| 252 |
+
]
|
| 253 |
+
if any(indicator in description_lower for indicator in eye_level_indicators):
|
| 254 |
+
angles_detected.append("eye-level shot")
|
| 255 |
+
|
| 256 |
+
# Dutch angle detection
|
| 257 |
+
dutch_indicators = [
|
| 258 |
+
"tilted", "angled", "diagonal", "off-kilter", "dutch angle"
|
| 259 |
+
]
|
| 260 |
+
if any(indicator in description_lower for indicator in dutch_indicators):
|
| 261 |
+
angles_detected.append("dutch angle")
|
| 262 |
+
|
| 263 |
+
# Perspective analysis for mixed angles
|
| 264 |
+
if ("foreground" in description_lower and "background" in description_lower):
|
| 265 |
+
if ("close" in description_lower or "prominent" in description_lower) and "blurred" in description_lower:
|
| 266 |
+
# Suggests foreground element shot from specific angle with background perspective
|
| 267 |
+
if not angles_detected: # Only add if no specific angle detected
|
| 268 |
+
angles_detected.append("shallow depth perspective")
|
| 269 |
+
|
| 270 |
+
logger.info(f"Camera angles detected: {angles_detected}")
|
| 271 |
+
return angles_detected
|
| 272 |
+
|
| 273 |
+
except Exception as e:
|
| 274 |
+
logger.warning(f"Camera angle detection failed: {e}")
|
| 275 |
+
return []
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
def _convert_to_cinematographic_language(description: str) -> str:
|
| 279 |
+
"""Convert descriptive analysis to cinematographic prompt language with angle detection"""
|
| 280 |
+
try:
|
| 281 |
+
# First detect camera angles
|
| 282 |
+
camera_angles = _detect_camera_angles(description)
|
| 283 |
+
|
| 284 |
generative = description
|
| 285 |
|
| 286 |
# Remove descriptive introduction phrases
|
|
|
|
| 292 |
r'This is (?:a|an) (?:image|photograph|picture) (?:of|showing)',
|
| 293 |
r'The setting (?:appears to be|is)',
|
| 294 |
r'The scene (?:appears to be|is|shows)',
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
]
|
| 296 |
|
| 297 |
for pattern in descriptive_intros:
|
| 298 |
generative = re.sub(pattern, '', generative, flags=re.IGNORECASE)
|
| 299 |
|
| 300 |
+
# Remove uncertainty and verbose connector phrases
|
| 301 |
+
verbose_phrases = [
|
| 302 |
r'possibly (?:a|an) ',
|
| 303 |
r'appears to be (?:a|an) ',
|
| 304 |
r'seems to be (?:a|an) ',
|
|
|
|
| 307 |
r'suggests (?:a|an) ',
|
| 308 |
r'indicating (?:a|an) ',
|
| 309 |
r'(?:possibly|apparently|seemingly|likely)',
|
| 310 |
+
r'which (?:is|are|creates|adds)',
|
| 311 |
+
r'(?:In the background|In the foreground), (?:there are|there is)',
|
| 312 |
+
r'(?:The background|The foreground) (?:features|shows|contains)',
|
| 313 |
+
r'(?:There are|There is) [^,]+ (?:in the background|in the foreground)',
|
| 314 |
+
r'The overall (?:setting|atmosphere|mood) (?:suggests|indicates)',
|
| 315 |
]
|
| 316 |
|
| 317 |
+
for pattern in verbose_phrases:
|
| 318 |
generative = re.sub(pattern, '', generative, flags=re.IGNORECASE)
|
| 319 |
|
| 320 |
+
# Convert spatial relationships to cinematographic terms
|
| 321 |
+
spatial_conversions = [
|
| 322 |
+
# Background/foreground to cinematographic terms
|
| 323 |
+
(r'prominently displayed in (?:the )?foreground', 'foreground focus'),
|
| 324 |
+
(r'in (?:the )?foreground', 'foreground'),
|
| 325 |
+
(r'in (?:the )?background', 'background'),
|
| 326 |
+
(r'blurred (?:figures|people|objects)', 'bokeh blur'),
|
| 327 |
+
(r'out of focus', 'soft focus'),
|
| 328 |
+
# Convert descriptive structure to noun phrases
|
| 329 |
(r'(?:close-up|medium shot|wide shot) of (?:a|an|the) ', r'close-up '),
|
|
|
|
|
|
|
|
|
|
| 330 |
(r'(?:a|an|the) (\w+)', r'\1'),
|
| 331 |
# Remove excessive connecting words
|
| 332 |
(r'(?:, and|, with|, featuring)', ','),
|
|
|
|
| 335 |
(r'in (?:a|an|the) ', r'in '),
|
| 336 |
]
|
| 337 |
|
| 338 |
+
for pattern, replacement in spatial_conversions:
|
| 339 |
generative = re.sub(pattern, replacement, generative, flags=re.IGNORECASE)
|
| 340 |
|
| 341 |
# Convert action descriptions to present participles
|
|
|
|
| 349 |
for pattern, replacement in action_conversions:
|
| 350 |
generative = re.sub(pattern, replacement, generative, flags=re.IGNORECASE)
|
| 351 |
|
| 352 |
+
# Add detected camera angles at the beginning
|
| 353 |
+
if camera_angles:
|
| 354 |
+
angle_prefix = ", ".join(camera_angles)
|
| 355 |
+
generative = f"{angle_prefix}, {generative}"
|
| 356 |
+
|
| 357 |
# Clean up extra spaces and punctuation
|
| 358 |
generative = re.sub(r'\s+', ' ', generative)
|
| 359 |
generative = re.sub(r'^\s*,\s*', '', generative) # Remove leading commas
|
|
|
|
| 365 |
if generative:
|
| 366 |
generative = generative[0].upper() + generative[1:] if len(generative) > 1 else generative.upper()
|
| 367 |
|
| 368 |
+
logger.info(f"Cinematographic conversion: angles={len(camera_angles)}, {len(description)} → {len(generative)} chars")
|
| 369 |
return generative
|
| 370 |
|
| 371 |
except Exception as e:
|
| 372 |
+
logger.warning(f"Cinematographic language conversion failed: {e}")
|
| 373 |
return description
|
| 374 |
|
| 375 |
|
| 376 |
+
def _apply_smart_keyword_insertion(description: str, camera_config: str, scene_type: str) -> str:
|
| 377 |
+
"""Smart keyword insertion with token economy - avoid redundancy"""
|
| 378 |
try:
|
|
|
|
|
|
|
| 379 |
keywords = []
|
| 380 |
|
| 381 |
+
# Token Economy Rule 1: If camera specs exist, skip "photorealistic" keywords
|
| 382 |
+
has_camera_specs = bool(re.search(r'(?:Canon|Sony|Leica|ARRI|RED|Hasselblad|Phase One)', camera_config))
|
| 383 |
+
has_lens_specs = bool(re.search(r'\d+mm.*f/[\d.]+', camera_config))
|
| 384 |
|
| 385 |
+
# Only add quality keywords if NO technical specs present
|
| 386 |
+
if not (has_camera_specs and has_lens_specs):
|
| 387 |
+
quality_keywords = FLUX_RULES.get("mandatory_keywords", {}).get("quality", [])
|
| 388 |
+
keywords.extend(quality_keywords[:2]) # Limit to 2 quality keywords max
|
| 389 |
+
logger.info("Added fallback quality keywords (no camera specs detected)")
|
| 390 |
+
else:
|
| 391 |
+
logger.info("Skipped redundant quality keywords (camera specs present)")
|
| 392 |
|
| 393 |
+
# Token Economy Rule 2: Scene-specific keywords only if they add value
|
| 394 |
+
style_by_scene = FLUX_RULES.get("mandatory_keywords", {}).get("style_by_scene", {})
|
| 395 |
if scene_type in style_by_scene:
|
| 396 |
scene_keywords = style_by_scene[scene_type]
|
| 397 |
+
|
| 398 |
+
# Check if scene keywords are already implied by camera config or description
|
| 399 |
+
for keyword in scene_keywords:
|
| 400 |
+
if keyword.lower() not in camera_config.lower() and keyword.lower() not in description.lower():
|
| 401 |
+
keywords.append(keyword)
|
| 402 |
+
|
| 403 |
+
# Token Economy Rule 3: Technical keywords only if not redundant
|
| 404 |
+
technical_keywords = FLUX_RULES.get("mandatory_keywords", {}).get("technical", [])
|
| 405 |
+
for tech_keyword in technical_keywords:
|
| 406 |
+
# Skip "professional photography" if camera specs already indicate professional level
|
| 407 |
+
if tech_keyword == "professional photography" and has_camera_specs:
|
| 408 |
+
continue
|
| 409 |
+
# Skip "high resolution" if camera specs include resolution indicators
|
| 410 |
+
if tech_keyword == "high resolution" and has_camera_specs:
|
| 411 |
+
continue
|
| 412 |
+
keywords.append(tech_keyword)
|
| 413 |
+
|
| 414 |
+
# Remove duplicates while preserving order
|
| 415 |
+
unique_keywords = []
|
| 416 |
+
for keyword in keywords:
|
| 417 |
+
if keyword not in unique_keywords:
|
| 418 |
+
unique_keywords.append(keyword)
|
| 419 |
+
|
| 420 |
+
if unique_keywords:
|
| 421 |
+
result = ", " + ", ".join(unique_keywords)
|
| 422 |
+
logger.info(f"Smart keywords applied: {unique_keywords}")
|
| 423 |
+
return result
|
| 424 |
else:
|
| 425 |
+
logger.info("No additional keywords needed (all redundant)")
|
| 426 |
return ""
|
| 427 |
|
| 428 |
except Exception as e:
|
| 429 |
+
logger.warning(f"Smart keyword insertion failed: {e}")
|
| 430 |
return ""
|
| 431 |
|
| 432 |
|
| 433 |
+
def _optimize_prompt_with_token_economy(prompt: str) -> str:
|
| 434 |
+
"""Optimize prompt length with intelligent token economy"""
|
| 435 |
try:
|
| 436 |
max_words = PROFESSIONAL_PHOTOGRAPHY_CONFIG.get("prompt_optimization", {}).get("max_length", 150)
|
| 437 |
|
|
|
|
| 439 |
if len(words) <= max_words:
|
| 440 |
return prompt
|
| 441 |
|
| 442 |
+
# Priority preservation order for token economy
|
| 443 |
+
essential_patterns = [
|
| 444 |
+
# 1. Camera angles (highest priority)
|
| 445 |
+
r'(?:low-angle|high-angle|eye-level|dutch angle|bird\'s eye|worm\'s eye) shot',
|
| 446 |
+
# 2. Camera and lens specs
|
| 447 |
+
r'(?:Canon|Sony|Leica|ARRI|RED|Hasselblad|Phase One) [^,]+',
|
| 448 |
+
r'\d+mm[^,]*f/[\d.]+[^,]*',
|
| 449 |
+
r'ISO \d+',
|
| 450 |
+
# 3. Core subject and composition
|
| 451 |
+
r'(?:close-up|medium shot|wide shot|shallow depth)',
|
| 452 |
+
r'(?:foreground|background|bokeh)',
|
| 453 |
+
# 4. Scene-specific technical terms
|
| 454 |
+
r'(?:cinematic|anamorphic|telephoto|wide-angle)',
|
| 455 |
+
]
|
| 456 |
+
|
| 457 |
+
# Extract essential parts first
|
| 458 |
essential_parts = []
|
| 459 |
+
remaining_text = prompt
|
| 460 |
|
| 461 |
+
for pattern in essential_patterns:
|
| 462 |
+
matches = re.findall(pattern, remaining_text, re.IGNORECASE)
|
| 463 |
+
for match in matches:
|
| 464 |
+
if match not in essential_parts:
|
| 465 |
+
essential_parts.append(match)
|
| 466 |
+
# Remove from remaining text to avoid duplication
|
| 467 |
+
remaining_text = re.sub(re.escape(match), '', remaining_text, count=1, flags=re.IGNORECASE)
|
| 468 |
+
|
| 469 |
+
# Add essential parts to start
|
| 470 |
+
optimized_words = []
|
| 471 |
+
for part in essential_parts:
|
| 472 |
+
optimized_words.extend(part.split())
|
| 473 |
+
|
| 474 |
+
# Fill remaining space with most important remaining words
|
| 475 |
+
remaining_words = [w for w in remaining_text.split() if w.strip() and w not in optimized_words]
|
| 476 |
+
remaining_space = max_words - len(optimized_words)
|
| 477 |
+
|
| 478 |
+
if remaining_space > 0:
|
| 479 |
+
optimized_words.extend(remaining_words[:remaining_space])
|
| 480 |
+
|
| 481 |
+
optimized = " ".join(optimized_words[:max_words])
|
| 482 |
+
|
| 483 |
+
logger.info(f"Token economy optimization: {len(words)} → {len(optimized_words)} words, preserved {len(essential_parts)} essential elements")
|
| 484 |
|
| 485 |
return optimized
|
| 486 |
|
| 487 |
except Exception as e:
|
| 488 |
+
logger.warning(f"Token economy optimization failed: {e}")
|
| 489 |
return prompt
|
| 490 |
|
| 491 |
|
|
|
|
| 569 |
if composition_match:
|
| 570 |
camera_setup += f", {composition_match.group()}"
|
| 571 |
|
| 572 |
+
# Scene-specific enhancement with token economy
|
| 573 |
if scene_type == "cinematic":
|
| 574 |
+
result = f", Shot on {camera_setup}" # Skip redundant "cinematic photography"
|
| 575 |
elif scene_type == "portrait":
|
| 576 |
+
result = f", Shot on {camera_setup}" # Skip redundant "professional portrait photography"
|
| 577 |
else:
|
| 578 |
+
result = f", Shot on {camera_setup}"
|
| 579 |
|
| 580 |
+
logger.info(f"Formatted camera setup with token economy: {result}")
|
| 581 |
return result
|
| 582 |
else:
|
| 583 |
# Fallback to enhanced config if parsing fails
|
|
|
|
| 592 |
"""Get enhanced camera configuration with cinematography knowledge"""
|
| 593 |
# Enhanced camera configurations with cinema equipment
|
| 594 |
enhanced_configs = {
|
| 595 |
+
"cinematic": ", Shot on ARRI Alexa LF, 35mm anamorphic lens at f/2.8, ISO 400",
|
| 596 |
+
"portrait": ", Shot on Canon EOS R5, 85mm f/1.4 lens at f/2.8, ISO 200, rule of thirds",
|
| 597 |
+
"landscape": ", Shot on Phase One XT, 24-70mm f/4 lens at f/8, ISO 100, hyperfocal distance",
|
| 598 |
+
"street": ", Shot on Leica M11, 35mm f/1.4 lens at f/2.8, ISO 800",
|
| 599 |
+
"architectural": ", Shot on Canon EOS R5, 24-70mm f/2.8 lens at f/8, ISO 100, symmetrical composition",
|
| 600 |
+
"commercial": ", Shot on Hasselblad X2D 100C, 90mm f/2.5 lens at f/4, ISO 100"
|
| 601 |
}
|
| 602 |
|
| 603 |
# Use enhanced config if available, otherwise fall back to FLUX_RULES
|
|
|
|
| 618 |
# Enhanced lighting based on scene type and cinematography knowledge
|
| 619 |
if scene_type == "cinematic":
|
| 620 |
if any(term in description_lower for term in ["dramatic", "moody", "dark"]):
|
| 621 |
+
return ", dramatic lighting"
|
| 622 |
else:
|
| 623 |
+
return ", cinematic lighting"
|
| 624 |
elif scene_type == "portrait":
|
| 625 |
+
return ", studio lighting"
|
| 626 |
elif "dramatic" in description_lower or "chaos" in description_lower:
|
| 627 |
+
return ", dramatic lighting"
|
| 628 |
else:
|
| 629 |
+
return "" # Skip redundant lighting terms
|
| 630 |
|
| 631 |
|
| 632 |
def _get_style_enhancement(scene_type: str, description_lower: str) -> str:
|
| 633 |
+
"""Get style enhancement for multi-engine compatibility with token economy"""
|
| 634 |
+
# Token economy: only add style if it adds unique value
|
|
|
|
| 635 |
if scene_type == "cinematic":
|
| 636 |
if "film grain" not in description_lower:
|
| 637 |
+
return ", film grain"
|
| 638 |
+
elif scene_type == "architectural":
|
| 639 |
+
return ", clean lines"
|
|
|
|
|
|
|
| 640 |
|
| 641 |
+
return "" # Skip redundant style terms
|
| 642 |
|
| 643 |
|
| 644 |
def _clean_prompt_formatting(prompt: str) -> str:
|
|
|
|
| 698 |
if re.search(r'\d+mm.*f/[\d.]+', prompt):
|
| 699 |
tech_score += 5
|
| 700 |
|
| 701 |
+
# Camera angles (NEW - high value)
|
| 702 |
+
angle_terms = ['low-angle shot', 'high-angle shot', 'eye-level shot', 'dutch angle', 'bird\'s eye', 'worm\'s eye']
|
| 703 |
+
tech_score += sum(4 for term in angle_terms if term in prompt.lower())
|
| 704 |
+
|
| 705 |
# Anamorphic and specialized lenses
|
| 706 |
if 'anamorphic' in prompt.lower():
|
| 707 |
tech_score += 4
|
| 708 |
|
| 709 |
# Professional terminology
|
| 710 |
+
tech_keywords = ['shot on', 'lens', 'cinematography', 'lighting']
|
| 711 |
for keyword in tech_keywords:
|
| 712 |
if keyword in prompt.lower():
|
| 713 |
tech_score += 2
|
|
|
|
| 718 |
|
| 719 |
breakdown["technical_details"] = min(25, tech_score)
|
| 720 |
|
| 721 |
+
# Professional Cinematography (0-25 points) - Enhanced with angle detection
|
| 722 |
cinema_score = 0
|
| 723 |
|
| 724 |
+
# Camera angles (high value for professional cinematography)
|
| 725 |
+
angle_terms = ['low-angle', 'high-angle', 'eye-level', 'dutch angle', 'bird\'s eye', 'worm\'s eye']
|
| 726 |
+
cinema_score += sum(5 for term in angle_terms if term in prompt.lower())
|
| 727 |
+
|
| 728 |
# Professional lighting techniques
|
| 729 |
lighting_terms = ['cinematic lighting', 'dramatic lighting', 'studio lighting', 'rim light', 'practical lights']
|
| 730 |
cinema_score += sum(3 for term in lighting_terms if term in prompt.lower())
|
| 731 |
|
| 732 |
# Composition techniques
|
| 733 |
+
composition_terms = ['composition', 'framing', 'depth of field', 'bokeh', 'rule of thirds', 'foreground', 'background']
|
| 734 |
cinema_score += sum(2 for term in composition_terms if term in prompt.lower())
|
| 735 |
|
| 736 |
# Cinematography style elements
|
| 737 |
+
style_terms = ['film grain', 'anamorphic', 'telephoto compression', 'wide-angle', 'shallow depth']
|
| 738 |
cinema_score += sum(3 for term in style_terms if term in prompt.lower())
|
| 739 |
|
| 740 |
# Professional context bonus
|
|
|
|
| 743 |
|
| 744 |
breakdown["professional_cinematography"] = min(25, cinema_score)
|
| 745 |
|
| 746 |
+
# Multi-Engine Optimization (0-25 points) - Token economy aware
|
| 747 |
optimization_score = 0
|
| 748 |
|
| 749 |
+
# Check for technical specifications (more valuable than generic keywords)
|
| 750 |
+
if re.search(r'(?:Canon|Sony|Leica|ARRI|RED|Hasselblad|Phase One)', prompt):
|
| 751 |
+
optimization_score += 8 # Higher score for actual camera specs
|
| 752 |
|
| 753 |
+
if re.search(r'\d+mm.*f/[\d.]+.*ISO \d+', prompt):
|
| 754 |
+
optimization_score += 7 # Complete technical specs
|
| 755 |
+
|
| 756 |
+
# Token economy bonus: penalize redundant keywords
|
| 757 |
+
redundant_keywords = ['photorealistic', 'ultra-detailed', 'professional photography']
|
| 758 |
+
has_camera_specs = bool(re.search(r'(?:Canon|Sony|Leica|ARRI|RED)', prompt))
|
| 759 |
+
|
| 760 |
+
if has_camera_specs:
|
| 761 |
+
# Bonus for NOT having redundant keywords when camera specs present
|
| 762 |
+
redundant_count = sum(1 for keyword in redundant_keywords if keyword in prompt.lower())
|
| 763 |
+
optimization_score += max(0, 5 - redundant_count * 2) # Penalty for redundancy
|
| 764 |
+
else:
|
| 765 |
+
# If no camera specs, quality keywords are valuable
|
| 766 |
+
quality_keywords = sum(1 for keyword in redundant_keywords if keyword in prompt.lower())
|
| 767 |
+
optimization_score += min(5, quality_keywords * 2)
|
| 768 |
+
|
| 769 |
+
# Scene-specific optimization
|
| 770 |
if any(style in prompt for style in FLUX_RULES.get("style_enhancements", {}).values()):
|
| 771 |
optimization_score += 3
|
| 772 |
+
|
| 773 |
+
# Length efficiency bonus
|
| 774 |
+
word_count = len(prompt.split())
|
| 775 |
+
if word_count <= 120: # Reward conciseness
|
| 776 |
+
optimization_score += 2
|
| 777 |
|
| 778 |
breakdown["multi_engine_optimization"] = min(25, optimization_score)
|
| 779 |
|
|
|
|
| 842 |
**Professional Context:** {'✅ Applied' if has_cinema_context else '❌ Not Applied'}
|
| 843 |
|
| 844 |
**🎯 OPTIMIZATIONS APPLIED:**
|
| 845 |
+
✅ Camera angle detection
|
| 846 |
✅ Professional camera configuration
|
| 847 |
✅ Cinematography lighting setup
|
| 848 |
+
✅ Token economy optimization
|
| 849 |
✅ Multi-engine compatibility
|
| 850 |
+
✅ Redundancy elimination
|
| 851 |
|
| 852 |
**⚡ Powered by Pariente AI for MIA TV Series**"""
|
| 853 |
|