Spaces:
Running
on
Zero
Running
on
Zero
Update models.py
Browse files
models.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
"""
|
| 2 |
-
Model management for
|
| 3 |
-
|
|
|
|
|
|
|
| 4 |
"""
|
| 5 |
|
| 6 |
import spaces
|
|
@@ -12,8 +14,13 @@ from typing import Optional, Dict, Any, Tuple
|
|
| 12 |
from PIL import Image
|
| 13 |
from gradio_client import Client, handle_file
|
| 14 |
|
| 15 |
-
from config import get_device_config
|
| 16 |
from utils import clean_memory, safe_execute
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
logger = logging.getLogger(__name__)
|
| 19 |
|
|
@@ -39,14 +46,15 @@ class BaseImageAnalyzer:
|
|
| 39 |
|
| 40 |
|
| 41 |
class BagelAPIAnalyzer(BaseImageAnalyzer):
|
| 42 |
-
"""BAGEL 7B model
|
| 43 |
|
| 44 |
def __init__(self):
|
| 45 |
super().__init__()
|
| 46 |
self.client = None
|
| 47 |
self.space_url = "Malaji71/Bagel-7B-Demo"
|
| 48 |
self.api_endpoint = "/image_understanding"
|
| 49 |
-
self.hf_token = os.getenv("HF_TOKEN")
|
|
|
|
| 50 |
|
| 51 |
def initialize(self) -> bool:
|
| 52 |
"""Initialize BAGEL API client with authentication"""
|
|
@@ -54,14 +62,14 @@ class BagelAPIAnalyzer(BaseImageAnalyzer):
|
|
| 54 |
return True
|
| 55 |
|
| 56 |
try:
|
| 57 |
-
logger.info("Initializing BAGEL API client...")
|
| 58 |
|
| 59 |
-
# Initialize client with token if available
|
| 60 |
if self.hf_token:
|
| 61 |
-
logger.info("Using HF token for
|
| 62 |
self.client = Client(self.space_url, hf_token=self.hf_token)
|
| 63 |
else:
|
| 64 |
-
logger.info("
|
| 65 |
self.client = Client(self.space_url)
|
| 66 |
|
| 67 |
self.is_initialized = True
|
|
@@ -70,66 +78,130 @@ class BagelAPIAnalyzer(BaseImageAnalyzer):
|
|
| 70 |
|
| 71 |
except Exception as e:
|
| 72 |
logger.error(f"BAGEL API client initialization failed: {e}")
|
| 73 |
-
# If private space fails, try without token as fallback
|
| 74 |
if self.hf_token:
|
| 75 |
logger.info("Retrying without token...")
|
| 76 |
try:
|
| 77 |
self.client = Client(self.space_url)
|
| 78 |
self.is_initialized = True
|
| 79 |
-
logger.info("BAGEL API client initialized
|
| 80 |
return True
|
| 81 |
except Exception as e2:
|
| 82 |
-
logger.error(f"Fallback initialization
|
| 83 |
return False
|
| 84 |
|
| 85 |
-
def
|
| 86 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
try:
|
| 88 |
-
|
|
|
|
|
|
|
| 89 |
if "CAMERA_SETUP:" in description:
|
| 90 |
parts = description.split("CAMERA_SETUP:")
|
| 91 |
if len(parts) > 1:
|
| 92 |
camera_section = parts[1].strip()
|
| 93 |
-
# Take the first meaningful sentence from camera setup
|
| 94 |
camera_text = camera_section.split('\n')[0].strip()
|
| 95 |
-
if len(camera_text) > 20:
|
| 96 |
-
|
| 97 |
|
| 98 |
-
|
| 99 |
-
if "2. CAMERA_SETUP" in description:
|
| 100 |
parts = description.split("2. CAMERA_SETUP")
|
| 101 |
if len(parts) > 1:
|
| 102 |
camera_section = parts[1].strip()
|
| 103 |
camera_text = camera_section.split('\n')[0].strip()
|
| 104 |
if len(camera_text) > 20:
|
| 105 |
-
|
| 106 |
|
| 107 |
-
#
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
return camera_recommendation
|
| 111 |
|
| 112 |
-
return
|
| 113 |
|
| 114 |
except Exception as e:
|
| 115 |
-
logger.warning(f"Failed to extract camera setup: {e}")
|
| 116 |
return None
|
| 117 |
|
| 118 |
-
def
|
| 119 |
-
"""Parse
|
| 120 |
try:
|
| 121 |
-
#
|
| 122 |
camera_text = re.sub(r'^(Based on.*?recommend|I would recommend|For this.*?recommend)\s*', '', camera_text, flags=re.IGNORECASE)
|
| 123 |
-
camera_text = re.sub(r'^(using a|use a|cameras? like)\s*', '', camera_text, flags=re.IGNORECASE)
|
| 124 |
|
| 125 |
-
#
|
| 126 |
camera_patterns = [
|
| 127 |
-
r'(Canon EOS
|
| 128 |
-
r'(Sony A[^\s,]
|
| 129 |
r'(Leica [^\s,]+)',
|
| 130 |
r'(Hasselblad [^\s,]+)',
|
| 131 |
r'(Phase One [^\s,]+)',
|
| 132 |
-
r'(Fujifilm [^\s,]+)'
|
|
|
|
|
|
|
|
|
|
| 133 |
]
|
| 134 |
|
| 135 |
camera_model = None
|
|
@@ -139,12 +211,14 @@ class BagelAPIAnalyzer(BaseImageAnalyzer):
|
|
| 139 |
camera_model = match.group(1).strip()
|
| 140 |
break
|
| 141 |
|
| 142 |
-
#
|
| 143 |
lens_patterns = [
|
| 144 |
-
r'(\d+mm\s*f/[\d.]+(?:\s*lens)?)',
|
| 145 |
r'(\d+-\d+mm\s*f/[\d.]+(?:\s*lens)?)',
|
| 146 |
r'(with\s+(?:a\s+)?(\d+mm[^,.]*))',
|
| 147 |
-
r'(paired with.*?(\d+mm[^,.]*))'
|
|
|
|
|
|
|
| 148 |
]
|
| 149 |
|
| 150 |
lens_info = None
|
|
@@ -155,14 +229,7 @@ class BagelAPIAnalyzer(BaseImageAnalyzer):
|
|
| 155 |
lens_info = re.sub(r'^(with\s+(?:a\s+)?|paired with\s+)', '', lens_info, flags=re.IGNORECASE)
|
| 156 |
break
|
| 157 |
|
| 158 |
-
#
|
| 159 |
-
if not lens_info or 'f/' not in lens_info:
|
| 160 |
-
aperture_match = re.search(r'(f/[\d.]+)', camera_text)
|
| 161 |
-
aperture = aperture_match.group(1) if aperture_match else None
|
| 162 |
-
if aperture and lens_info:
|
| 163 |
-
lens_info = f"{lens_info} {aperture}"
|
| 164 |
-
|
| 165 |
-
# Build clean recommendation
|
| 166 |
parts = []
|
| 167 |
if camera_model:
|
| 168 |
parts.append(camera_model)
|
|
@@ -171,44 +238,71 @@ class BagelAPIAnalyzer(BaseImageAnalyzer):
|
|
| 171 |
|
| 172 |
if parts:
|
| 173 |
result = ', '.join(parts)
|
| 174 |
-
logger.info(f"
|
| 175 |
return result
|
| 176 |
|
| 177 |
return None
|
| 178 |
|
| 179 |
except Exception as e:
|
| 180 |
-
logger.warning(f"Failed to parse camera recommendation: {e}")
|
| 181 |
return None
|
| 182 |
|
| 183 |
-
def
|
| 184 |
-
"""Find camera recommendations
|
| 185 |
try:
|
| 186 |
-
# Look for sentences containing camera info
|
| 187 |
sentences = re.split(r'[.!?]', text)
|
| 188 |
|
| 189 |
for sentence in sentences:
|
| 190 |
-
#
|
| 191 |
-
if any(brand in sentence.lower() for brand in ['canon', 'sony', 'leica', 'hasselblad', 'phase one', 'fujifilm']):
|
| 192 |
-
if any(term in sentence.lower() for term in ['recommend', 'suggest', 'would use', 'camera', 'lens']):
|
| 193 |
-
parsed = self.
|
| 194 |
if parsed:
|
| 195 |
return parsed
|
| 196 |
|
| 197 |
return None
|
| 198 |
|
| 199 |
except Exception as e:
|
| 200 |
-
logger.warning(f"Failed to find camera recommendation: {e}")
|
| 201 |
return None
|
| 202 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
def _save_temp_image(self, image: Image.Image) -> str:
|
| 204 |
"""Save image to temporary file for API call"""
|
| 205 |
try:
|
| 206 |
-
# Create temporary file
|
| 207 |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
|
| 208 |
temp_path = temp_file.name
|
| 209 |
temp_file.close()
|
| 210 |
|
| 211 |
-
# Save image
|
| 212 |
if image.mode != 'RGB':
|
| 213 |
image = image.convert('RGB')
|
| 214 |
image.save(temp_path, 'PNG')
|
|
@@ -229,53 +323,37 @@ class BagelAPIAnalyzer(BaseImageAnalyzer):
|
|
| 229 |
|
| 230 |
@spaces.GPU(duration=60)
|
| 231 |
def analyze_image(self, image: Image.Image, prompt: str = None) -> Tuple[str, Dict[str, Any]]:
|
| 232 |
-
"""Analyze image using BAGEL API"""
|
| 233 |
if not self.is_initialized:
|
| 234 |
success = self.initialize()
|
| 235 |
if not success:
|
| 236 |
return "BAGEL API not available", {"error": "API initialization failed"}
|
| 237 |
|
| 238 |
temp_path = None
|
| 239 |
-
|
| 240 |
-
# Initialize metadata early
|
| 241 |
metadata = {
|
| 242 |
-
"model": "BAGEL-7B-
|
| 243 |
"device": "api",
|
| 244 |
"confidence": 0.9,
|
| 245 |
"api_endpoint": self.api_endpoint,
|
| 246 |
"space_url": self.space_url,
|
| 247 |
"prompt_used": prompt,
|
| 248 |
-
"has_camera_suggestion": False
|
|
|
|
| 249 |
}
|
| 250 |
|
| 251 |
try:
|
| 252 |
-
#
|
| 253 |
if prompt is None:
|
| 254 |
-
prompt = ""
|
| 255 |
-
|
| 256 |
-
1. DESCRIPTION: Write a single flowing paragraph describing what you see. Start directly with the subject (e.g., "A color photograph showing..." or "A black and white image depicting..."). Include:
|
| 257 |
-
- Image type (photograph, illustration, artwork)
|
| 258 |
-
- Subject and composition
|
| 259 |
-
- Color palette and lighting conditions
|
| 260 |
-
- Mood and atmosphere
|
| 261 |
-
- Photographic style and format
|
| 262 |
-
|
| 263 |
-
2. CAMERA_SETUP: Based on the scene type you observe, recommend ONE specific professional camera and lens combination:
|
| 264 |
-
- For street/documentary scenes: Canon EOS R6 with 35mm f/1.4 lens
|
| 265 |
-
- For portrait photography: Canon EOS R5 with 85mm f/1.4 lens
|
| 266 |
-
- For landscape photography: Phase One XT with 24-70mm f/4 lens
|
| 267 |
-
- For action/sports: Sony A1 with 70-200mm f/2.8 lens
|
| 268 |
-
|
| 269 |
-
Give only the camera model and lens specification, nothing else."""
|
| 270 |
|
| 271 |
# Save image to temporary file
|
| 272 |
temp_path = self._save_temp_image(image)
|
| 273 |
if not temp_path:
|
| 274 |
return "Image processing failed", {"error": "Could not save image"}
|
| 275 |
|
| 276 |
-
logger.info("Calling BAGEL API
|
| 277 |
|
| 278 |
-
# Call BAGEL API
|
| 279 |
result = self.client.predict(
|
| 280 |
image=handle_file(temp_path),
|
| 281 |
prompt=prompt,
|
|
@@ -286,132 +364,151 @@ Give only the camera model and lens specification, nothing else."""
|
|
| 286 |
api_name=self.api_endpoint
|
| 287 |
)
|
| 288 |
|
| 289 |
-
# Extract
|
| 290 |
if isinstance(result, tuple) and len(result) >= 2:
|
| 291 |
description = result[1] if result[1] else result[0]
|
| 292 |
else:
|
| 293 |
description = str(result)
|
| 294 |
|
| 295 |
-
# Process the description and extract camera setup
|
| 296 |
if isinstance(description, str) and description.strip():
|
| 297 |
description = description.strip()
|
| 298 |
|
| 299 |
-
# Extract camera setup
|
| 300 |
-
camera_setup = self.
|
| 301 |
if camera_setup:
|
| 302 |
metadata["camera_setup"] = camera_setup
|
| 303 |
metadata["has_camera_suggestion"] = True
|
| 304 |
-
logger.info(f"
|
| 305 |
else:
|
| 306 |
metadata["has_camera_suggestion"] = False
|
| 307 |
-
logger.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 308 |
else:
|
| 309 |
-
description = "
|
| 310 |
metadata["has_camera_suggestion"] = False
|
| 311 |
|
| 312 |
-
# Update
|
| 313 |
metadata.update({
|
| 314 |
-
"response_length": len(description)
|
|
|
|
| 315 |
})
|
| 316 |
|
| 317 |
-
logger.info(f"BAGEL
|
| 318 |
return description, metadata
|
| 319 |
|
| 320 |
except Exception as e:
|
| 321 |
-
logger.error(f"BAGEL
|
| 322 |
-
return "
|
| 323 |
|
| 324 |
finally:
|
| 325 |
-
# Always cleanup temporary file
|
| 326 |
if temp_path:
|
| 327 |
self._cleanup_temp_file(temp_path)
|
| 328 |
|
| 329 |
-
def
|
| 330 |
-
"""Analyze image specifically for
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
1. DESCRIPTION: Create a single flowing paragraph starting directly with the subject. Be precise about:
|
| 334 |
-
- Image type (photograph, illustration, artwork)
|
| 335 |
-
- Subject matter and composition
|
| 336 |
-
- Color palette (specific colors, warm/cool tones, monochrome)
|
| 337 |
-
- Lighting conditions and photographic style
|
| 338 |
-
- Mood, atmosphere, and artistic elements
|
| 339 |
-
|
| 340 |
-
2. CAMERA_SETUP: Recommend ONE specific professional camera and lens for this scene type:
|
| 341 |
-
- Street/urban/documentary: Canon EOS R6 with 35mm f/1.4 lens
|
| 342 |
-
- Portrait photography: Canon EOS R5 with 85mm f/1.4 lens
|
| 343 |
-
- Landscape photography: Phase One XT with 24-70mm f/4 lens
|
| 344 |
-
- Action/sports: Sony A1 with 70-200mm f/2.8 lens
|
| 345 |
|
| 346 |
-
|
| 347 |
-
|
|
|
|
| 348 |
return self.analyze_image(image, flux_prompt)
|
| 349 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
def cleanup(self) -> None:
|
| 351 |
"""Clean up API client resources"""
|
| 352 |
try:
|
| 353 |
if hasattr(self, 'client'):
|
| 354 |
self.client = None
|
| 355 |
super().cleanup()
|
| 356 |
-
logger.info("BAGEL API resources cleaned up")
|
| 357 |
except Exception as e:
|
| 358 |
-
logger.warning(f"BAGEL API cleanup warning: {e}")
|
| 359 |
|
| 360 |
|
| 361 |
class FallbackAnalyzer(BaseImageAnalyzer):
|
| 362 |
-
"""
|
| 363 |
|
| 364 |
def __init__(self):
|
| 365 |
super().__init__()
|
|
|
|
| 366 |
|
| 367 |
def initialize(self) -> bool:
|
| 368 |
-
"""Fallback is always ready"""
|
| 369 |
self.is_initialized = True
|
| 370 |
return True
|
| 371 |
|
| 372 |
def analyze_image(self, image: Image.Image) -> Tuple[str, Dict[str, Any]]:
|
| 373 |
-
"""Provide
|
| 374 |
try:
|
| 375 |
-
# Basic image analysis
|
| 376 |
width, height = image.size
|
| 377 |
mode = image.mode
|
| 378 |
-
|
| 379 |
-
# Simple descriptive text based on image properties
|
| 380 |
aspect_ratio = width / height
|
| 381 |
|
|
|
|
| 382 |
if aspect_ratio > 1.5:
|
| 383 |
orientation = "landscape"
|
| 384 |
-
|
|
|
|
| 385 |
elif aspect_ratio < 0.75:
|
| 386 |
orientation = "portrait"
|
| 387 |
-
|
|
|
|
| 388 |
else:
|
| 389 |
orientation = "square"
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
|
| 394 |
metadata = {
|
| 395 |
-
"model": "Fallback",
|
| 396 |
"device": "cpu",
|
| 397 |
-
"confidence": 0.
|
| 398 |
"image_size": f"{width}x{height}",
|
| 399 |
"color_mode": mode,
|
| 400 |
"orientation": orientation,
|
| 401 |
-
"aspect_ratio": round(aspect_ratio, 2)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 402 |
}
|
| 403 |
|
| 404 |
return description, metadata
|
| 405 |
|
| 406 |
except Exception as e:
|
| 407 |
-
logger.error(f"
|
| 408 |
-
return "Professional image suitable for detailed analysis and prompt generation", {
|
|
|
|
|
|
|
|
|
|
| 409 |
|
| 410 |
|
| 411 |
class ModelManager:
|
| 412 |
-
"""
|
| 413 |
|
| 414 |
-
def __init__(self, preferred_model: str = "bagel-
|
| 415 |
self.preferred_model = preferred_model
|
| 416 |
self.analyzers = {}
|
| 417 |
self.current_analyzer = None
|
|
@@ -421,42 +518,45 @@ class ModelManager:
|
|
| 421 |
model_name = model_name or self.preferred_model
|
| 422 |
|
| 423 |
if model_name not in self.analyzers:
|
| 424 |
-
if model_name
|
| 425 |
self.analyzers[model_name] = BagelAPIAnalyzer()
|
| 426 |
elif model_name == "fallback":
|
| 427 |
self.analyzers[model_name] = FallbackAnalyzer()
|
| 428 |
else:
|
| 429 |
-
logger.warning(f"Unknown model: {model_name}, using fallback")
|
| 430 |
model_name = "fallback"
|
| 431 |
self.analyzers[model_name] = FallbackAnalyzer()
|
| 432 |
|
| 433 |
return self.analyzers[model_name]
|
| 434 |
|
| 435 |
-
def analyze_image(self, image: Image.Image, model_name: str = None, analysis_type: str = "
|
| 436 |
-
"""Analyze image with
|
| 437 |
-
# Try preferred model first
|
| 438 |
analyzer = self.get_analyzer(model_name)
|
| 439 |
if analyzer is None:
|
| 440 |
return "No analyzer available", {"error": "Model not found"}
|
| 441 |
|
| 442 |
-
# Choose analysis method based on type
|
| 443 |
-
if analysis_type == "
|
| 444 |
-
success, result = safe_execute(analyzer.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 445 |
else:
|
| 446 |
success, result = safe_execute(analyzer.analyze_image, image)
|
| 447 |
|
| 448 |
if success and result[1].get("error") is None:
|
| 449 |
return result
|
| 450 |
else:
|
| 451 |
-
#
|
| 452 |
-
logger.warning(f"Primary model failed, using fallback: {result}")
|
| 453 |
fallback_analyzer = self.get_analyzer("fallback")
|
| 454 |
fallback_success, fallback_result = safe_execute(fallback_analyzer.analyze_image, image)
|
| 455 |
|
| 456 |
if fallback_success:
|
| 457 |
return fallback_result
|
| 458 |
else:
|
| 459 |
-
return "All analyzers failed", {"error": "Complete analysis failure"}
|
| 460 |
|
| 461 |
def cleanup_all(self) -> None:
|
| 462 |
"""Clean up all model resources"""
|
|
@@ -464,24 +564,24 @@ class ModelManager:
|
|
| 464 |
analyzer.cleanup()
|
| 465 |
self.analyzers.clear()
|
| 466 |
clean_memory()
|
| 467 |
-
logger.info("All analyzers cleaned up")
|
| 468 |
|
| 469 |
|
| 470 |
-
# Global model manager instance
|
| 471 |
-
model_manager = ModelManager(preferred_model="bagel-
|
| 472 |
|
| 473 |
|
| 474 |
-
def analyze_image(image: Image.Image, model_name: str = None, analysis_type: str = "
|
| 475 |
"""
|
| 476 |
-
|
| 477 |
|
| 478 |
Args:
|
| 479 |
image: PIL Image to analyze
|
| 480 |
-
model_name: Optional model name ("bagel-
|
| 481 |
-
analysis_type: Type of analysis ("
|
| 482 |
|
| 483 |
Returns:
|
| 484 |
-
Tuple of (description, metadata)
|
| 485 |
"""
|
| 486 |
return model_manager.analyze_image(image, model_name, analysis_type)
|
| 487 |
|
|
|
|
| 1 |
"""
|
| 2 |
+
Model management for Phramer AI
|
| 3 |
+
By Pariente AI, for MIA TV Series
|
| 4 |
+
|
| 5 |
+
BAGEL 7B integration with professional photography knowledge enhancement
|
| 6 |
"""
|
| 7 |
|
| 8 |
import spaces
|
|
|
|
| 14 |
from PIL import Image
|
| 15 |
from gradio_client import Client, handle_file
|
| 16 |
|
| 17 |
+
from config import get_device_config, PROFESSIONAL_PHOTOGRAPHY_CONFIG
|
| 18 |
from utils import clean_memory, safe_execute
|
| 19 |
+
from professional_photography import (
|
| 20 |
+
ProfessionalPhotoAnalyzer,
|
| 21 |
+
enhance_flux_prompt_with_professional_knowledge,
|
| 22 |
+
professional_analyzer
|
| 23 |
+
)
|
| 24 |
|
| 25 |
logger = logging.getLogger(__name__)
|
| 26 |
|
|
|
|
| 46 |
|
| 47 |
|
| 48 |
class BagelAPIAnalyzer(BaseImageAnalyzer):
|
| 49 |
+
"""BAGEL 7B model with professional photography knowledge integration"""
|
| 50 |
|
| 51 |
def __init__(self):
|
| 52 |
super().__init__()
|
| 53 |
self.client = None
|
| 54 |
self.space_url = "Malaji71/Bagel-7B-Demo"
|
| 55 |
self.api_endpoint = "/image_understanding"
|
| 56 |
+
self.hf_token = os.getenv("HF_TOKEN")
|
| 57 |
+
self.professional_analyzer = professional_analyzer
|
| 58 |
|
| 59 |
def initialize(self) -> bool:
|
| 60 |
"""Initialize BAGEL API client with authentication"""
|
|
|
|
| 62 |
return True
|
| 63 |
|
| 64 |
try:
|
| 65 |
+
logger.info("Initializing BAGEL API client for Phramer AI...")
|
| 66 |
|
| 67 |
+
# Initialize client with token if available
|
| 68 |
if self.hf_token:
|
| 69 |
+
logger.info("Using HF token for enhanced API access")
|
| 70 |
self.client = Client(self.space_url, hf_token=self.hf_token)
|
| 71 |
else:
|
| 72 |
+
logger.info("Using public API access")
|
| 73 |
self.client = Client(self.space_url)
|
| 74 |
|
| 75 |
self.is_initialized = True
|
|
|
|
| 78 |
|
| 79 |
except Exception as e:
|
| 80 |
logger.error(f"BAGEL API client initialization failed: {e}")
|
|
|
|
| 81 |
if self.hf_token:
|
| 82 |
logger.info("Retrying without token...")
|
| 83 |
try:
|
| 84 |
self.client = Client(self.space_url)
|
| 85 |
self.is_initialized = True
|
| 86 |
+
logger.info("BAGEL API client initialized (fallback mode)")
|
| 87 |
return True
|
| 88 |
except Exception as e2:
|
| 89 |
+
logger.error(f"Fallback initialization failed: {e2}")
|
| 90 |
return False
|
| 91 |
|
| 92 |
+
def _create_professional_enhanced_prompt(self, analysis_type: str = "multimodal") -> str:
|
| 93 |
+
"""Create professionally enhanced prompt for BAGEL analysis"""
|
| 94 |
+
|
| 95 |
+
if analysis_type == "cinematic":
|
| 96 |
+
return """Analyze this image for professional cinematic prompt generation. You are an expert cinematographer with 30+ years of cinema experience. Provide exactly two sections:
|
| 97 |
+
|
| 98 |
+
1. DESCRIPTION: Create a detailed, flowing paragraph describing the image for cinematic reproduction:
|
| 99 |
+
- Scene composition and visual storytelling elements
|
| 100 |
+
- Lighting quality, direction, and dramatic mood
|
| 101 |
+
- Color palette, tonal relationships, and atmospheric elements
|
| 102 |
+
- Subject positioning, environmental context, and framing
|
| 103 |
+
- Cinematic qualities: film grain, depth of field, visual style
|
| 104 |
+
- Technical photographic elements that enhance realism
|
| 105 |
+
|
| 106 |
+
2. CAMERA_SETUP: Recommend professional cinema/photography equipment based on scene analysis:
|
| 107 |
+
- Camera body: Choose from Canon EOS R5/R6, Sony A7R/A1, Leica M11, ARRI Alexa, RED cameras
|
| 108 |
+
- Lens: Specific focal length and aperture (e.g., "85mm f/1.4", "35mm anamorphic f/2.8")
|
| 109 |
+
- Technical settings: Aperture consideration for depth of field and story mood
|
| 110 |
+
- Lighting setup: Professional lighting rationale (key, fill, rim, practical lights)
|
| 111 |
+
- Shooting style: Documentary, portrait, landscape, architectural, or cinematic approach
|
| 112 |
+
|
| 113 |
+
Apply professional cinematography principles: rule of thirds, leading lines, depth layering, lighting direction for mood, and technical excellence. Focus on creating prompts optimized for photorealistic, cinema-quality generation."""
|
| 114 |
+
|
| 115 |
+
elif analysis_type == "flux_optimized":
|
| 116 |
+
return """Analyze this image for FLUX prompt generation with professional cinematography expertise. You have 30+ years of cinema experience. Provide exactly two sections:
|
| 117 |
+
|
| 118 |
+
1. DESCRIPTION: Professional analysis for photorealistic reproduction:
|
| 119 |
+
- Image type and photographic classification
|
| 120 |
+
- Subject matter with precise visual details
|
| 121 |
+
- Lighting analysis: quality, direction, color temperature, shadows
|
| 122 |
+
- Composition elements: framing, balance, visual flow
|
| 123 |
+
- Color relationships and tonal values
|
| 124 |
+
- Artistic style and photographic technique employed
|
| 125 |
+
- Technical qualities that contribute to image impact
|
| 126 |
+
|
| 127 |
+
2. CAMERA_SETUP: Expert equipment recommendation:
|
| 128 |
+
- Professional camera body suited for scene type
|
| 129 |
+
- Specific lens with focal length and maximum aperture
|
| 130 |
+
- Recommended shooting aperture for optimal depth of field
|
| 131 |
+
- Technical considerations: ISO, lighting setup, focus technique
|
| 132 |
+
- Professional shooting approach and methodology
|
| 133 |
+
|
| 134 |
+
Integrate advanced cinematography principles: exposure triangle mastery, lighting ratios, compositional rules, focus techniques, and professional equipment knowledge. Output should be optimized for FLUX's photorealistic capabilities."""
|
| 135 |
+
|
| 136 |
+
else: # multimodal analysis
|
| 137 |
+
return """Analyze this image with professional cinematography expertise for multi-platform prompt generation. You are a master cinematographer with extensive technical and artistic knowledge from 30+ years in cinema. Provide exactly two sections:
|
| 138 |
+
|
| 139 |
+
1. DESCRIPTION: Expert visual analysis for prompt generation:
|
| 140 |
+
- Comprehensive scene description with photographic insight
|
| 141 |
+
- Subject matter, composition, and visual hierarchy
|
| 142 |
+
- Lighting analysis: quality, direction, mood, technical setup
|
| 143 |
+
- Color palette, contrast, and tonal relationships
|
| 144 |
+
- Artistic elements: style, mood, atmosphere, visual impact
|
| 145 |
+
- Technical photographic qualities and execution
|
| 146 |
+
|
| 147 |
+
2. CAMERA_SETUP: Professional equipment and technique recommendation:
|
| 148 |
+
- Camera system recommendation based on scene requirements
|
| 149 |
+
- Lens selection with specific focal length and aperture range
|
| 150 |
+
- Technical shooting parameters and considerations
|
| 151 |
+
- Lighting setup and methodology for scene recreation
|
| 152 |
+
- Professional approach: shooting style and technical execution
|
| 153 |
+
|
| 154 |
+
Apply master-level cinematography knowledge: advanced composition techniques, professional lighting principles, camera system expertise, lens characteristics, and technical excellence. Create content suitable for multiple generative engines (Flux, Midjourney, etc.) with emphasis on photorealistic quality."""
|
| 155 |
+
|
| 156 |
+
def _extract_professional_camera_setup(self, description: str) -> Optional[str]:
|
| 157 |
+
"""Extract and enhance camera setup with professional photography knowledge"""
|
| 158 |
try:
|
| 159 |
+
camera_setup = None
|
| 160 |
+
|
| 161 |
+
# Extract BAGEL's camera recommendation
|
| 162 |
if "CAMERA_SETUP:" in description:
|
| 163 |
parts = description.split("CAMERA_SETUP:")
|
| 164 |
if len(parts) > 1:
|
| 165 |
camera_section = parts[1].strip()
|
|
|
|
| 166 |
camera_text = camera_section.split('\n')[0].strip()
|
| 167 |
+
if len(camera_text) > 20:
|
| 168 |
+
camera_setup = self._parse_professional_camera_recommendation(camera_text)
|
| 169 |
|
| 170 |
+
elif "2. CAMERA_SETUP" in description:
|
|
|
|
| 171 |
parts = description.split("2. CAMERA_SETUP")
|
| 172 |
if len(parts) > 1:
|
| 173 |
camera_section = parts[1].strip()
|
| 174 |
camera_text = camera_section.split('\n')[0].strip()
|
| 175 |
if len(camera_text) > 20:
|
| 176 |
+
camera_setup = self._parse_professional_camera_recommendation(camera_text)
|
| 177 |
|
| 178 |
+
# Fallback: look for camera recommendations in text
|
| 179 |
+
if not camera_setup:
|
| 180 |
+
camera_setup = self._find_professional_camera_recommendation(description)
|
|
|
|
| 181 |
|
| 182 |
+
return camera_setup
|
| 183 |
|
| 184 |
except Exception as e:
|
| 185 |
+
logger.warning(f"Failed to extract professional camera setup: {e}")
|
| 186 |
return None
|
| 187 |
|
| 188 |
+
def _parse_professional_camera_recommendation(self, camera_text: str) -> Optional[str]:
|
| 189 |
+
"""Parse camera recommendation with professional photography enhancement"""
|
| 190 |
try:
|
| 191 |
+
# Clean and extract with professional patterns
|
| 192 |
camera_text = re.sub(r'^(Based on.*?recommend|I would recommend|For this.*?recommend)\s*', '', camera_text, flags=re.IGNORECASE)
|
|
|
|
| 193 |
|
| 194 |
+
# Professional camera patterns (more comprehensive)
|
| 195 |
camera_patterns = [
|
| 196 |
+
r'(Canon EOS R[^\s,]*(?:\s+[^\s,]*)?)',
|
| 197 |
+
r'(Sony A[^\s,]*(?:\s+[^\s,]*)?)',
|
| 198 |
r'(Leica [^\s,]+)',
|
| 199 |
r'(Hasselblad [^\s,]+)',
|
| 200 |
r'(Phase One [^\s,]+)',
|
| 201 |
+
r'(Fujifilm [^\s,]+)',
|
| 202 |
+
r'(ARRI [^\s,]+)',
|
| 203 |
+
r'(RED [^\s,]+)',
|
| 204 |
+
r'(Nikon [^\s,]+)'
|
| 205 |
]
|
| 206 |
|
| 207 |
camera_model = None
|
|
|
|
| 211 |
camera_model = match.group(1).strip()
|
| 212 |
break
|
| 213 |
|
| 214 |
+
# Professional lens patterns (enhanced)
|
| 215 |
lens_patterns = [
|
| 216 |
+
r'(\d+mm\s*f/[\d.]+(?:\s*(?:lens|anamorphic|telephoto|wide))?)',
|
| 217 |
r'(\d+-\d+mm\s*f/[\d.]+(?:\s*lens)?)',
|
| 218 |
r'(with\s+(?:a\s+)?(\d+mm[^,.]*))',
|
| 219 |
+
r'(paired with.*?(\d+mm[^,.]*))',
|
| 220 |
+
r'(\d+mm[^,]*anamorphic[^,]*)',
|
| 221 |
+
r'(\d+mm[^,]*telephoto[^,]*)'
|
| 222 |
]
|
| 223 |
|
| 224 |
lens_info = None
|
|
|
|
| 229 |
lens_info = re.sub(r'^(with\s+(?:a\s+)?|paired with\s+)', '', lens_info, flags=re.IGNORECASE)
|
| 230 |
break
|
| 231 |
|
| 232 |
+
# Build professional recommendation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
parts = []
|
| 234 |
if camera_model:
|
| 235 |
parts.append(camera_model)
|
|
|
|
| 238 |
|
| 239 |
if parts:
|
| 240 |
result = ', '.join(parts)
|
| 241 |
+
logger.info(f"Professional camera setup extracted: {result}")
|
| 242 |
return result
|
| 243 |
|
| 244 |
return None
|
| 245 |
|
| 246 |
except Exception as e:
|
| 247 |
+
logger.warning(f"Failed to parse professional camera recommendation: {e}")
|
| 248 |
return None
|
| 249 |
|
| 250 |
+
def _find_professional_camera_recommendation(self, text: str) -> Optional[str]:
|
| 251 |
+
"""Find professional camera recommendations with enhanced detection"""
|
| 252 |
try:
|
|
|
|
| 253 |
sentences = re.split(r'[.!?]', text)
|
| 254 |
|
| 255 |
for sentence in sentences:
|
| 256 |
+
# Professional camera brands and technical terms
|
| 257 |
+
if any(brand in sentence.lower() for brand in ['canon', 'sony', 'leica', 'hasselblad', 'phase one', 'fujifilm', 'arri', 'red']):
|
| 258 |
+
if any(term in sentence.lower() for term in ['recommend', 'suggest', 'would use', 'camera', 'lens', 'shot on']):
|
| 259 |
+
parsed = self._parse_professional_camera_recommendation(sentence.strip())
|
| 260 |
if parsed:
|
| 261 |
return parsed
|
| 262 |
|
| 263 |
return None
|
| 264 |
|
| 265 |
except Exception as e:
|
| 266 |
+
logger.warning(f"Failed to find professional camera recommendation: {e}")
|
| 267 |
return None
|
| 268 |
|
| 269 |
+
def _enhance_description_with_professional_context(self, description: str, image: Image.Image) -> str:
|
| 270 |
+
"""Enhance BAGEL description with professional cinematography context"""
|
| 271 |
+
try:
|
| 272 |
+
if not PROFESSIONAL_PHOTOGRAPHY_CONFIG.get("enable_expert_analysis", True):
|
| 273 |
+
return description
|
| 274 |
+
|
| 275 |
+
# Get professional cinematography context without being invasive
|
| 276 |
+
enhanced_context = self.professional_analyzer.generate_enhanced_context(description)
|
| 277 |
+
|
| 278 |
+
# Extract key professional insights
|
| 279 |
+
scene_type = enhanced_context.get("scene_type", "general")
|
| 280 |
+
technical_context = enhanced_context.get("technical_context", "")
|
| 281 |
+
professional_insight = enhanced_context.get("professional_insight", "")
|
| 282 |
+
|
| 283 |
+
# Enhance description subtly with professional terminology
|
| 284 |
+
enhanced_description = description
|
| 285 |
+
|
| 286 |
+
# Add professional context if not already present
|
| 287 |
+
if technical_context and len(technical_context) > 20:
|
| 288 |
+
# Only add if it doesn't duplicate existing information
|
| 289 |
+
if not any(term in description.lower() for term in ["shot on", "professional", "camera"]):
|
| 290 |
+
enhanced_description += f"\n\nProfessional Context: {technical_context}"
|
| 291 |
+
|
| 292 |
+
logger.info(f"Enhanced description with cinematography context for {scene_type} scene")
|
| 293 |
+
return enhanced_description
|
| 294 |
+
|
| 295 |
+
except Exception as e:
|
| 296 |
+
logger.warning(f"Cinematography context enhancement failed: {e}")
|
| 297 |
+
return description
|
| 298 |
+
|
| 299 |
def _save_temp_image(self, image: Image.Image) -> str:
|
| 300 |
"""Save image to temporary file for API call"""
|
| 301 |
try:
|
|
|
|
| 302 |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
|
| 303 |
temp_path = temp_file.name
|
| 304 |
temp_file.close()
|
| 305 |
|
|
|
|
| 306 |
if image.mode != 'RGB':
|
| 307 |
image = image.convert('RGB')
|
| 308 |
image.save(temp_path, 'PNG')
|
|
|
|
| 323 |
|
| 324 |
@spaces.GPU(duration=60)
|
| 325 |
def analyze_image(self, image: Image.Image, prompt: str = None) -> Tuple[str, Dict[str, Any]]:
|
| 326 |
+
"""Analyze image using BAGEL API with professional cinematography enhancement"""
|
| 327 |
if not self.is_initialized:
|
| 328 |
success = self.initialize()
|
| 329 |
if not success:
|
| 330 |
return "BAGEL API not available", {"error": "API initialization failed"}
|
| 331 |
|
| 332 |
temp_path = None
|
|
|
|
|
|
|
| 333 |
metadata = {
|
| 334 |
+
"model": "BAGEL-7B-Professional",
|
| 335 |
"device": "api",
|
| 336 |
"confidence": 0.9,
|
| 337 |
"api_endpoint": self.api_endpoint,
|
| 338 |
"space_url": self.space_url,
|
| 339 |
"prompt_used": prompt,
|
| 340 |
+
"has_camera_suggestion": False,
|
| 341 |
+
"professional_enhancement": True
|
| 342 |
}
|
| 343 |
|
| 344 |
try:
|
| 345 |
+
# Use professional enhanced prompt if none provided
|
| 346 |
if prompt is None:
|
| 347 |
+
prompt = self._create_professional_enhanced_prompt("multimodal")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
|
| 349 |
# Save image to temporary file
|
| 350 |
temp_path = self._save_temp_image(image)
|
| 351 |
if not temp_path:
|
| 352 |
return "Image processing failed", {"error": "Could not save image"}
|
| 353 |
|
| 354 |
+
logger.info("Calling BAGEL API with professional cinematography context...")
|
| 355 |
|
| 356 |
+
# Call BAGEL API with enhanced prompt
|
| 357 |
result = self.client.predict(
|
| 358 |
image=handle_file(temp_path),
|
| 359 |
prompt=prompt,
|
|
|
|
| 364 |
api_name=self.api_endpoint
|
| 365 |
)
|
| 366 |
|
| 367 |
+
# Extract and process response
|
| 368 |
if isinstance(result, tuple) and len(result) >= 2:
|
| 369 |
description = result[1] if result[1] else result[0]
|
| 370 |
else:
|
| 371 |
description = str(result)
|
| 372 |
|
|
|
|
| 373 |
if isinstance(description, str) and description.strip():
|
| 374 |
description = description.strip()
|
| 375 |
|
| 376 |
+
# Extract professional camera setup
|
| 377 |
+
camera_setup = self._extract_professional_camera_setup(description)
|
| 378 |
if camera_setup:
|
| 379 |
metadata["camera_setup"] = camera_setup
|
| 380 |
metadata["has_camera_suggestion"] = True
|
| 381 |
+
logger.info(f"Professional camera setup extracted: {camera_setup}")
|
| 382 |
else:
|
| 383 |
metadata["has_camera_suggestion"] = False
|
| 384 |
+
logger.info("No camera setup found, will use professional fallback")
|
| 385 |
+
|
| 386 |
+
# Enhance description with cinematography context
|
| 387 |
+
if PROFESSIONAL_PHOTOGRAPHY_CONFIG.get("knowledge_base_integration", True):
|
| 388 |
+
description = self._enhance_description_with_professional_context(description, image)
|
| 389 |
+
metadata["cinematography_context_applied"] = True
|
| 390 |
+
|
| 391 |
else:
|
| 392 |
+
description = "Professional image analysis completed successfully"
|
| 393 |
metadata["has_camera_suggestion"] = False
|
| 394 |
|
| 395 |
+
# Update metadata
|
| 396 |
metadata.update({
|
| 397 |
+
"response_length": len(description),
|
| 398 |
+
"analysis_type": "professional_enhanced"
|
| 399 |
})
|
| 400 |
|
| 401 |
+
logger.info(f"BAGEL Professional analysis complete: {len(description)} chars, Camera: {metadata.get('has_camera_suggestion', False)}")
|
| 402 |
return description, metadata
|
| 403 |
|
| 404 |
except Exception as e:
|
| 405 |
+
logger.error(f"BAGEL Professional analysis failed: {e}")
|
| 406 |
+
return "Professional analysis failed", {"error": str(e), "model": "BAGEL-7B-Professional"}
|
| 407 |
|
| 408 |
finally:
|
|
|
|
| 409 |
if temp_path:
|
| 410 |
self._cleanup_temp_file(temp_path)
|
| 411 |
|
| 412 |
+
def analyze_for_cinematic_prompt(self, image: Image.Image) -> Tuple[str, Dict[str, Any]]:
|
| 413 |
+
"""Analyze image specifically for cinematic/MIA TV Series prompt generation"""
|
| 414 |
+
cinematic_prompt = self._create_professional_enhanced_prompt("cinematic")
|
| 415 |
+
return self.analyze_image(image, cinematic_prompt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 416 |
|
| 417 |
+
def analyze_for_flux_with_professional_context(self, image: Image.Image) -> Tuple[str, Dict[str, Any]]:
|
| 418 |
+
"""Analyze image for FLUX with enhanced professional cinematography context"""
|
| 419 |
+
flux_prompt = self._create_professional_enhanced_prompt("flux_optimized")
|
| 420 |
return self.analyze_image(image, flux_prompt)
|
| 421 |
|
| 422 |
+
def analyze_for_multiengine_prompt(self, image: Image.Image) -> Tuple[str, Dict[str, Any]]:
|
| 423 |
+
"""Analyze image for multi-engine compatibility (Flux, Midjourney, etc.)"""
|
| 424 |
+
multiengine_prompt = self._create_professional_enhanced_prompt("multimodal")
|
| 425 |
+
return self.analyze_image(image, multiengine_prompt)
|
| 426 |
+
|
| 427 |
def cleanup(self) -> None:
|
| 428 |
"""Clean up API client resources"""
|
| 429 |
try:
|
| 430 |
if hasattr(self, 'client'):
|
| 431 |
self.client = None
|
| 432 |
super().cleanup()
|
| 433 |
+
logger.info("BAGEL Professional API resources cleaned up")
|
| 434 |
except Exception as e:
|
| 435 |
+
logger.warning(f"BAGEL Professional API cleanup warning: {e}")
|
| 436 |
|
| 437 |
|
| 438 |
class FallbackAnalyzer(BaseImageAnalyzer):
|
| 439 |
+
"""Enhanced fallback analyzer with basic professional cinematography principles"""
|
| 440 |
|
| 441 |
def __init__(self):
|
| 442 |
super().__init__()
|
| 443 |
+
self.professional_analyzer = professional_analyzer
|
| 444 |
|
| 445 |
def initialize(self) -> bool:
|
| 446 |
+
"""Fallback with cinematography enhancement is always ready"""
|
| 447 |
self.is_initialized = True
|
| 448 |
return True
|
| 449 |
|
| 450 |
def analyze_image(self, image: Image.Image) -> Tuple[str, Dict[str, Any]]:
|
| 451 |
+
"""Provide enhanced image description with cinematography context"""
|
| 452 |
try:
|
|
|
|
| 453 |
width, height = image.size
|
| 454 |
mode = image.mode
|
|
|
|
|
|
|
| 455 |
aspect_ratio = width / height
|
| 456 |
|
| 457 |
+
# Enhanced scene detection
|
| 458 |
if aspect_ratio > 1.5:
|
| 459 |
orientation = "landscape"
|
| 460 |
+
scene_type = "landscape"
|
| 461 |
+
camera_suggestion = "Phase One XT with 24-70mm f/4 lens, landscape photography"
|
| 462 |
elif aspect_ratio < 0.75:
|
| 463 |
orientation = "portrait"
|
| 464 |
+
scene_type = "portrait_studio"
|
| 465 |
+
camera_suggestion = "Canon EOS R5 with 85mm f/1.4 lens, portrait photography"
|
| 466 |
else:
|
| 467 |
orientation = "square"
|
| 468 |
+
scene_type = "general"
|
| 469 |
+
camera_suggestion = "Canon EOS R6 with 50mm f/1.8 lens, standard photography"
|
| 470 |
+
|
| 471 |
+
# Generate professional description
|
| 472 |
+
description = f"A {orientation} format professional photograph with balanced composition and technical excellence. The image demonstrates clear visual hierarchy and professional execution, suitable for high-quality reproduction across multiple generative platforms. Recommended professional setup: {camera_suggestion}, with careful attention to exposure, lighting, and artistic composition."
|
| 473 |
+
|
| 474 |
+
# Add cinematography context if available
|
| 475 |
+
try:
|
| 476 |
+
if PROFESSIONAL_PHOTOGRAPHY_CONFIG.get("enable_expert_analysis", True):
|
| 477 |
+
enhanced_context = self.professional_analyzer.generate_enhanced_context(description)
|
| 478 |
+
technical_context = enhanced_context.get("technical_context", "")
|
| 479 |
+
if technical_context:
|
| 480 |
+
description += f" Cinematography context: {technical_context}"
|
| 481 |
+
except Exception as e:
|
| 482 |
+
logger.warning(f"Cinematography context enhancement failed in fallback: {e}")
|
| 483 |
|
| 484 |
metadata = {
|
| 485 |
+
"model": "Professional-Fallback",
|
| 486 |
"device": "cpu",
|
| 487 |
+
"confidence": 0.7,
|
| 488 |
"image_size": f"{width}x{height}",
|
| 489 |
"color_mode": mode,
|
| 490 |
"orientation": orientation,
|
| 491 |
+
"aspect_ratio": round(aspect_ratio, 2),
|
| 492 |
+
"scene_type": scene_type,
|
| 493 |
+
"has_camera_suggestion": True,
|
| 494 |
+
"camera_setup": camera_suggestion,
|
| 495 |
+
"professional_enhancement": True
|
| 496 |
}
|
| 497 |
|
| 498 |
return description, metadata
|
| 499 |
|
| 500 |
except Exception as e:
|
| 501 |
+
logger.error(f"Professional fallback analysis failed: {e}")
|
| 502 |
+
return "Professional image suitable for detailed analysis and multi-engine prompt generation", {
|
| 503 |
+
"error": str(e),
|
| 504 |
+
"model": "Professional-Fallback"
|
| 505 |
+
}
|
| 506 |
|
| 507 |
|
| 508 |
class ModelManager:
|
| 509 |
+
"""Enhanced manager for handling image analysis models with professional cinematography integration"""
|
| 510 |
|
| 511 |
+
def __init__(self, preferred_model: str = "bagel-professional"):
|
| 512 |
self.preferred_model = preferred_model
|
| 513 |
self.analyzers = {}
|
| 514 |
self.current_analyzer = None
|
|
|
|
| 518 |
model_name = model_name or self.preferred_model
|
| 519 |
|
| 520 |
if model_name not in self.analyzers:
|
| 521 |
+
if model_name in ["bagel-api", "bagel-professional"]:
|
| 522 |
self.analyzers[model_name] = BagelAPIAnalyzer()
|
| 523 |
elif model_name == "fallback":
|
| 524 |
self.analyzers[model_name] = FallbackAnalyzer()
|
| 525 |
else:
|
| 526 |
+
logger.warning(f"Unknown model: {model_name}, using professional fallback")
|
| 527 |
model_name = "fallback"
|
| 528 |
self.analyzers[model_name] = FallbackAnalyzer()
|
| 529 |
|
| 530 |
return self.analyzers[model_name]
|
| 531 |
|
| 532 |
+
def analyze_image(self, image: Image.Image, model_name: str = None, analysis_type: str = "multiengine") -> Tuple[str, Dict[str, Any]]:
|
| 533 |
+
"""Analyze image with professional cinematography enhancement"""
|
|
|
|
| 534 |
analyzer = self.get_analyzer(model_name)
|
| 535 |
if analyzer is None:
|
| 536 |
return "No analyzer available", {"error": "Model not found"}
|
| 537 |
|
| 538 |
+
# Choose analysis method based on type and analyzer capabilities
|
| 539 |
+
if analysis_type == "cinematic" and hasattr(analyzer, 'analyze_for_cinematic_prompt'):
|
| 540 |
+
success, result = safe_execute(analyzer.analyze_for_cinematic_prompt, image)
|
| 541 |
+
elif analysis_type == "flux" and hasattr(analyzer, 'analyze_for_flux_with_professional_context'):
|
| 542 |
+
success, result = safe_execute(analyzer.analyze_for_flux_with_professional_context, image)
|
| 543 |
+
elif analysis_type == "multiengine" and hasattr(analyzer, 'analyze_for_multiengine_prompt'):
|
| 544 |
+
success, result = safe_execute(analyzer.analyze_for_multiengine_prompt, image)
|
| 545 |
else:
|
| 546 |
success, result = safe_execute(analyzer.analyze_image, image)
|
| 547 |
|
| 548 |
if success and result[1].get("error") is None:
|
| 549 |
return result
|
| 550 |
else:
|
| 551 |
+
# Enhanced fallback with cinematography context
|
| 552 |
+
logger.warning(f"Primary model failed, using cinematography-enhanced fallback: {result}")
|
| 553 |
fallback_analyzer = self.get_analyzer("fallback")
|
| 554 |
fallback_success, fallback_result = safe_execute(fallback_analyzer.analyze_image, image)
|
| 555 |
|
| 556 |
if fallback_success:
|
| 557 |
return fallback_result
|
| 558 |
else:
|
| 559 |
+
return "All cinematography analyzers failed", {"error": "Complete analysis failure"}
|
| 560 |
|
| 561 |
def cleanup_all(self) -> None:
|
| 562 |
"""Clean up all model resources"""
|
|
|
|
| 564 |
analyzer.cleanup()
|
| 565 |
self.analyzers.clear()
|
| 566 |
clean_memory()
|
| 567 |
+
logger.info("All cinematography analyzers cleaned up")
|
| 568 |
|
| 569 |
|
| 570 |
+
# Global model manager instance with cinematography enhancement
|
| 571 |
+
model_manager = ModelManager(preferred_model="bagel-professional")
|
| 572 |
|
| 573 |
|
| 574 |
+
def analyze_image(image: Image.Image, model_name: str = None, analysis_type: str = "multiengine") -> Tuple[str, Dict[str, Any]]:
|
| 575 |
"""
|
| 576 |
+
Enhanced convenience function for professional cinematography analysis
|
| 577 |
|
| 578 |
Args:
|
| 579 |
image: PIL Image to analyze
|
| 580 |
+
model_name: Optional model name ("bagel-professional", "fallback")
|
| 581 |
+
analysis_type: Type of analysis ("multiengine", "cinematic", "flux")
|
| 582 |
|
| 583 |
Returns:
|
| 584 |
+
Tuple of (description, metadata) with professional cinematography enhancement
|
| 585 |
"""
|
| 586 |
return model_manager.analyze_image(image, model_name, analysis_type)
|
| 587 |
|