Lector_de_Fotogramas_0 / sara_v3_direct_simple.py
Malaji71's picture
Upload sara_v3_direct_simple.py
fbd9405 verified
# PARTE COMPLETA: SARA V3 DIRECT SIMPLE - SOLO SISTEMA TÉCNICO GEN-4
# Modificación temporal para priorizar Gen-4 y liberar memoria
# DESACTIVA sistema poético para usar toda la GPU en Gen-4
import time
import torch
from typing import Dict, Any
from PIL import Image
from sara_v3_parte_1 import sara_v3_logger
from sara_v3_parte_3 import get_blip_models
# SOLO IMPORTAR SISTEMA TÉCNICO - DESACTIVAR POÉTICO
SARA_POETIC_AVAILABLE = False # FORZAR DESACTIVACIÓN
try:
from sara_v3_parte_4_gen4_carga import (
load_sara_gen4_complete,
generate_technical_prompts_gen4,
is_gen4_ready,
get_gen4_info
)
SARA_TECHNICAL_AVAILABLE = True
sara_v3_logger.info("✅ Sistema técnico Gen-4 disponible - MODO SOLO TÉCNICO")
except ImportError as e:
sara_v3_logger.warning(f"❌ Sistema técnico no disponible: {e}")
SARA_TECHNICAL_AVAILABLE = False
class DirectSARAGen4Only:
"""
Motor SARA SOLO TÉCNICO - Optimizado para Gen-4
Usa toda la memoria GPU disponible para sistema técnico
"""
def __init__(self):
self.logger = sara_v3_logger
self.logger.info("🎯 SARA inicializado en MODO SOLO TÉCNICO Gen-4")
def analyze_and_generate_direct(self, image: Image.Image, user_idea: str = "", system_preference: str = "technical") -> Dict[str, Any]:
"""
Función PRINCIPAL SOLO GEN-4:
Fuerza uso del sistema técnico con toda la memoria disponible
"""
start_time = time.time()
self.logger.info("🎬 ANÁLISIS DIRECTO SARA Gen-4 SOLO iniciado...")
try:
# PASO 1: Análisis completo con BLIP
caption = self._blip_comprehensive_analysis(image)
self.logger.info(f"📝 BLIP Caption: {caption}")
# PASO 2: FORZAR sistema técnico Gen-4
self.logger.info("🎯 MODO FORZADO: Solo sistema técnico Gen-4")
# PASO 3: Generar con Gen-4
prompts, method = self._generate_with_technical_system_only(caption, user_idea, image)
total_time = time.time() - start_time
self.logger.info(f"✅ Análisis Gen-4 solo completado en {total_time:.2f}s con {method}")
return {
'success': True,
'prompts': prompts,
'caption': caption,
'analysis_time': total_time,
'method': method,
'system_used': 'technical_only'
}
except Exception as e:
total_time = time.time() - start_time
self.logger.error(f"💥 Error en análisis Gen-4 solo: {e}")
# Fallback técnico
fallback_prompts = self._generate_technical_fallback(caption if 'caption' in locals() else "Scene analysis", user_idea)
return {
'success': True,
'prompts': fallback_prompts,
'caption': caption if 'caption' in locals() else 'Fallback analysis',
'analysis_time': total_time,
'method': 'technical_fallback',
'system_used': 'fallback_technical'
}
def _generate_with_technical_system_only(self, caption: str, user_idea: str, image: Image.Image) -> tuple:
"""Generar SOLO con sistema técnico Gen-4 - optimizado para memoria completa"""
self.logger.info("⚡ Generando SOLO con sistema técnico Gen-4...")
self.logger.info("🎯 Usando toda la memoria GPU disponible para Gen-4")
if not SARA_TECHNICAL_AVAILABLE:
raise RuntimeError("Sistema técnico no disponible")
if not is_gen4_ready():
# Intentar cargar Gen-4 con toda la memoria
self.logger.info("🔄 Cargando Gen-4 con memoria completa...")
success = load_sara_gen4_complete()
if not success:
raise RuntimeError("No se pudo cargar sistema técnico Gen-4")
# Generar prompts técnicos
result = generate_technical_prompts_gen4(caption, user_idea)
if result['success']:
self.logger.info("✅ Gen-4 generó prompts exitosamente")
return result['prompts'], f"sara_gen4_only ({result.get('method', 'unknown')})"
else:
raise RuntimeError(f"Error en Gen-4: {result.get('error', 'unknown')}")
def _blip_comprehensive_analysis(self, image: Image.Image) -> str:
"""Análisis BLIP optimizado (misma función que antes)"""
blip_model, blip_processor = get_blip_models()
if not blip_model or not blip_processor:
return "Scene with subject in natural environment"
try:
inputs = blip_processor(image, return_tensors="pt")
if torch.cuda.is_available():
inputs = inputs.to('cuda')
with torch.no_grad():
outputs = blip_model.generate(
**inputs,
max_length=60,
num_beams=5,
do_sample=False,
early_stopping=True
)
raw_caption = blip_processor.decode(outputs[0], skip_special_tokens=True)
clean_description = self._clean_blip_caption(raw_caption)
return clean_description
except Exception as e:
self.logger.error(f"Error en análisis BLIP: {e}")
return "Person in natural environment"
def _clean_blip_caption(self, caption: str) -> str:
"""Limpiar caption de BLIP"""
prefixes = ["arafed ", "there is a ", "there are ", "this is a ", "this image shows "]
for prefix in prefixes:
if caption.lower().startswith(prefix):
caption = caption[len(prefix):]
break
caption = caption.strip().capitalize()
if not caption.endswith('.'):
caption = caption.rstrip('.') + '.'
return caption
def _generate_technical_fallback(self, caption: str, user_idea: str = "") -> Dict[str, str]:
"""Fallback técnico cuando Gen-4 falla"""
self.logger.info("🔄 Generando fallback técnico especializado...")
# Extraer elementos técnicos
subject = "Subject"
if "woman" in caption.lower():
subject = "Woman"
elif "man" in caption.lower():
subject = "Man"
elif "person" in caption.lower():
subject = "Person"
# Integrar user_idea con enfoque técnico
if user_idea.strip():
action_base = user_idea.strip()
self.logger.info(f"💡 Integrando idea técnica: '{action_base}'")
else:
action_base = "moves with technical precision"
# Generar prompts técnicos específicos
return {
'basic': f"{subject} {action_base} while camera tracks steadily, {caption.lower()}, professional lighting setup.",
'intermediate': f"Camera employs smooth tracking techniques as {subject.lower()} {action_base} with controlled movement, {caption.lower()}, cinematic lighting enhances technical precision.",
'advanced': f"Sophisticated cinematographic approach: {subject.lower()} {action_base} utilizing advanced staging techniques, {caption.lower()}, dramatic lighting creates compelling visual narrative with professional camera work including dolly movements and focus pulls.",
'experimental': f"Innovative filming techniques merge as {subject.lower()} {action_base} transcending conventional cinematographic boundaries, {caption.lower()}, reality fragments through experimental camera approaches including time-lapse sequences and impossible perspective shifts."
}
def get_system_status(self) -> Dict[str, Any]:
"""Obtener estado del sistema técnico solo"""
status = {
'mode': 'technical_only',
'poetic_system': {
'available': False,
'loaded': False,
'note': 'Desactivado para liberar memoria GPU'
},
'technical_system': {
'available': SARA_TECHNICAL_AVAILABLE,
'loaded': False
}
}
# Verificar sistema técnico
if SARA_TECHNICAL_AVAILABLE:
status['technical_system']['loaded'] = is_gen4_ready()
if status['technical_system']['loaded']:
status['technical_system']['info'] = get_gen4_info()
return status
# Instancia global solo técnica
direct_sara_gen4_only = DirectSARAGen4Only()
def analyze_and_generate_BRUTAL(image: Image.Image, user_idea: str = "", system_preference: str = "technical") -> Dict[str, Any]:
"""
Función BRUTAL SOLO GEN-4
Fuerza uso del sistema técnico ignorando preferencias
"""
return direct_sara_gen4_only.analyze_and_generate_direct(image, user_idea, "technical")
def _blip_analysis_brutal(image: Image.Image) -> str:
"""Función de análisis BLIP para uso directo en interfaces"""
return direct_sara_gen4_only._blip_comprehensive_analysis(image)
def analyze_with_direct_sara(image: Image.Image, user_idea: str = "", system_preference: str = "technical") -> Dict[str, Any]:
"""
Función principal SOLO GEN-4
"""
return direct_sara_gen4_only.analyze_and_generate_direct(image, user_idea, "technical")
def get_dual_system_status() -> Dict[str, Any]:
"""Obtener estado del sistema (solo técnico ahora)"""
return direct_sara_gen4_only.get_system_status()
if __name__ == "__main__":
print("🧪 Probando SARA DIRECTO SOLO GEN-4...")
# Test sistema solo técnico
import numpy as np
test_array = np.random.randint(0, 255, (512, 512, 3), dtype=np.uint8)
test_image = Image.fromarray(test_array)
# Test con idea técnica
result = analyze_with_direct_sara(test_image, "camera tracks smoothly")
print(f"✅ Éxito: {result['success']}")
print(f"📝 Caption: {result['caption']}")
print(f"⏱️ Tiempo: {result['analysis_time']:.2f}s")
print(f"🎯 Sistema usado: {result['system_used']}")
print(f"🔧 Método: {result['method']}")
print("🎬 Prompts generados:")
for level, prompt in result['prompts'].items():
print(f" {level.upper()}: {prompt}")
# Test estado del sistema
status = get_dual_system_status()
print(f"\n📊 Estado sistema: {status}")
# FINAL SARA V3 DIRECT SIMPLE - SOLO SISTEMA TÉCNICO GEN-4
# Versión optimizada que usa toda la memoria GPU para Gen-4
# Desactiva sistema poético temporalmente para liberar recursos