Spaces:

Mustafa-albakkar
/

Image_agent

Sleeping

App Files Files Community

Image_agent / app.py

Mustafa-albakkar

Update app.py

a8c54b2 verified about 1 month ago

raw

history blame contribute delete

31.1 kB

	"""
	SPACE 2: Image Agent with Enhanced Prompting & English Text
	===================================================
	✅ Smart prompt engineering for beautiful, consistent images
	✅ English text in thumbnails
	✅ Automatic video reception from Space 3
	✅ FIX: Triple-layer EmergencyTranslator - never returns Arabic to TTS
	✅ NEW: Uses character type (human/animal/fantasy) to generate accurate images
	"""
	import os
	import io
	import json
	import base64
	import logging
	import shutil
	import gradio as gr
	from typing import List, Dict, Any, Optional
	from PIL import Image
	import torch
	from gradio_client import Client
	from datetime import datetime
	import pickle

	logging.basicConfig(level=logging.INFO)
	log = logging.getLogger("image_agent_space")


	# ==================== Configuration ====================

	HF_MODEL = os.getenv("HF_MODEL", "stabilityai/stable-diffusion-2-1")
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	IMAGE_SIZE = (1024, 576)

	TEXT_AGENT_URL = os.getenv("TEXT_AGENT_URL", "https://mustafa-albakkar-text_agent.hf.space")
	VIDEO_AGENT_URL = os.getenv("VIDEO_AGENT_URL", "https://mustafa-albakkar-video_agent.hf.space")
	GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")

	MEMORY_FILE = "/tmp/video_memory.pkl"
	VIDEO_STORAGE = "/tmp/videos"
	os.makedirs(VIDEO_STORAGE, exist_ok=True)


	# ==================== Local Fallback Translator ====================

	class LocalFallbackTranslator:
	"""Guaranteed translation - no API key. Never raises. Never returns Arabic."""

	def __init__(self):
	self.backends = []
	self._init_backends()

	def _init_backends(self):
	try:
	from deep_translator import GoogleTranslator
	test = GoogleTranslator(source='ar', target='en').translate("مرحبا")
	if test:
	self.backends.append(('deep_translator', self._translate_deep))
	log.info("✅ LocalFallback: deep_translator available")
	except Exception as e:
	log.warning(f"deep_translator unavailable: {e}")

	try:
	from googletrans import Translator as GT
	test = GT().translate("مرحبا", dest='en')
	if test and test.text:
	self.backends.append(('googletrans', self._translate_googletrans))
	log.info("✅ LocalFallback: googletrans available")
	except Exception as e:
	log.warning(f"googletrans unavailable: {e}")

	try:
	import translators as ts
	test = ts.translate_text("مرحبا", translator='bing', to_language='en')
	if test:
	self.backends.append(('translators', self._translate_translators))
	log.info("✅ LocalFallback: translators available")
	except Exception as e:
	log.warning(f"translators unavailable: {e}")

	def _translate_deep(self, text: str) -> str:
	from deep_translator import GoogleTranslator
	if len(text) <= 4500:
	return GoogleTranslator(source='ar', target='en').translate(text)
	chunks = [text[i:i+4500] for i in range(0, len(text), 4500)]
	return ' '.join(GoogleTranslator(source='ar', target='en').translate(c) for c in chunks)

	def _translate_googletrans(self, text: str) -> str:
	from googletrans import Translator as GT
	return GT().translate(text, dest='en').text

	def _translate_translators(self, text: str) -> str:
	import translators as ts
	return ts.translate_text(text, translator='bing', to_language='en')

	def _keyword_fallback(self, text: str) -> str:
	log.error("🚨 All translation backends failed - keyword extraction")
	import re
	latin = re.findall(r'[A-Za-z0-9\s,.\-]+', text)
	clean = ' '.join(latin).strip()
	if clean and len(clean) > 10:
	return clean
	return f"narrative scene with {len(text.split())} words describing characters and events"

	def translate(self, text: str) -> str:
	if not text or not text.strip():
	return ""
	for name, fn in self.backends:
	try:
	result = fn(text)
	if result and len(result.strip()) > 5:
	return result.strip()
	except Exception as e:
	log.warning(f"LocalFallback [{name}] failed: {e}")
	return self._keyword_fallback(text)

	@property
	def available(self) -> bool:
	return True


	# ==================== Emergency Translator ====================

	class EmergencyTranslator:
	"""
	Backup translator: Groq → LocalFallback → keyword extraction.
	Always returns English. Never raises.
	"""

	def __init__(self):
	self.groq_client = None
	self.groq_available = False
	self.local_fallback = LocalFallbackTranslator()

	if GROQ_API_KEY:
	try:
	from groq import Groq
	self.groq_client = Groq(api_key=GROQ_API_KEY)
	self.groq_available = True
	log.info("✅ EmergencyTranslator: Groq available")
	except Exception as e:
	log.warning(f"EmergencyTranslator Groq unavailable: {e}")

	fb = [n for n, _ in self.local_fallback.backends]
	log.info(f"EmergencyTranslator: Groq={'✅' if self.groq_available else '❌'} \| LocalFallback={fb or 'keyword-only'}")

	@property
	def available(self) -> bool:
	return True

	def is_arabic(self, text: str) -> bool:
	return sum(1 for c in text if '\u0600' <= c <= '\u06FF') > len(text) * 0.1

	def translate_to_english(self, text: str) -> str:
	if not text or not text.strip():
	return ""
	if not self.is_arabic(text):
	return text

	log.warning(f"🚨 EmergencyTranslator: {text[:50]}...")

	if self.groq_available:
	result = self._translate_groq(text)
	if result and not self.is_arabic(result):
	return result

	return self.local_fallback.translate(text)

	def _translate_groq(self, text: str) -> str:
	try:
	resp = self.groq_client.chat.completions.create(
	model="qwen-2.5-72b-instruct",
	messages=[
	{"role": "system", "content": "Arabic to English translator. Provide ONLY the translation."},
	{"role": "user", "content": f"Translate to English:\n{text}"}
	],
	temperature=0.3, max_tokens=500
	)
	return resp.choices[0].message.content.strip()
	except Exception as e:
	log.error(f"Groq translation failed: {e}")
	return ""


	# ==================== ✅ NEW: Character Type Prompt Builder ====================

	class CharacterPromptBuilder:
	"""
	Builds image prompt additions based on character types detected by Space 1.
	Ensures images match the actual characters in each scene.
	"""

	# Per-type quality hints added to every prompt
	TYPE_HINTS = {
	'human': 'realistic human beings, photorealistic people, detailed faces and clothing',
	'animal': 'realistic animals, detailed fur and feathers, wildlife photography style, natural behavior',
	'fantasy': 'fantasy creatures, magical and ethereal beings, intricate details, fantasy art style',
	'object': 'detailed object, studio lighting, high detail product shot',
	'none': 'wide establishing shot, no characters, landscape focus'
	}

	# Negative additions to AVOID per type (prevents wrong character type from appearing)
	TYPE_NEGATIVES = {
	'human': 'animals, creatures, monsters',
	'animal': 'people, humans, persons',
	'fantasy': '',
	'object': 'people, animals',
	'none': 'people, animals, characters'
	}

	@staticmethod
	def get_dominant_type(characters: List[Dict]) -> str:
	priority = ['human', 'animal', 'fantasy', 'object', 'none']
	types = {c.get('type', 'none') for c in characters}
	for t in priority:
	if t in types:
	return t
	return 'none'

	@staticmethod
	def build_character_block(characters: List[Dict]) -> str:
	"""
	Build a compact description block from character list.
	Uses the 'description' field from Space 1 output.
	"""
	if not characters:
	return ""
	parts = []
	for c in characters:
	desc = c.get('description', '').strip()
	if desc:
	parts.append(desc)
	return ", ".join(parts)

	@staticmethod
	def get_type_hint(dominant_type: str) -> str:
	return CharacterPromptBuilder.TYPE_HINTS.get(dominant_type, '')

	@staticmethod
	def get_type_negative(characters: List[Dict]) -> str:
	dominant = CharacterPromptBuilder.get_dominant_type(characters)
	return CharacterPromptBuilder.TYPE_NEGATIVES.get(dominant, '')


	# ==================== Smart Prompt Engineering ====================

	class PromptEnhancer:
	"""Enhances prompts using visual_prompt + character type data."""

	def __init__(self, emergency_translator=None):
	self.emergency_translator = emergency_translator
	self.char_builder = CharacterPromptBuilder()

	QUALITY_BOOSTERS = [
	"high quality", "detailed", "professional",
	"sharp focus", "4k resolution", "masterpiece"
	]

	LIGHTING_STYLES = {
	"cinematic": "cinematic lighting, dramatic shadows, golden hour",
	"soft": "soft diffused lighting, gentle shadows, natural light",
	"dramatic": "dramatic lighting, high contrast, chiaroscuro",
	"bright": "bright even lighting, well lit, studio lighting",
	"mystical": "ethereal lighting, magical glow, ambient light"
	}

	CAMERA_MOVEMENTS = [
	"slow zoom in", "slow pan right", "slow pan left",
	"subtle tilt up", "gentle dolly forward",
	"smooth tracking shot", "slow zoom out"
	]

	BASE_NEGATIVE = (
	"ugly, blurry, low quality, distorted, deformed, "
	"bad anatomy, worst quality, low res, jpeg artifacts, "
	"watermark, text, signature, logo, username"
	)

	def enhance_prompt(
	self,
	base_prompt: str,
	visual_style: str,
	scene_number: int,
	total_scenes: int,
	context_text: str = "",
	characters: List[Dict] = None # ✅ NEW param
	) -> tuple:
	"""
	Build enhanced prompt using:
	- base_prompt (visual description from Space 1)
	- character type hints (human/animal/fantasy)
	- character descriptions from Space 1 registry
	"""
	characters = characters or []
	prompt = base_prompt.strip()

	# Fix Arabic in prompt
	has_arabic = any('\u0600' <= c <= '\u06FF' for c in prompt)
	if has_arabic:
	log.warning(f"⚠️ Scene {scene_number}: visual_prompt is Arabic — translating")
	prompt = self.emergency_translator.translate_to_english(prompt)

	# Enrich short prompts with context
	elif context_text and len(prompt.split()) < 10:
	if not self.emergency_translator.is_arabic(context_text):
	prompt = f"{prompt}, depicting: {context_text[:80]}"
	else:
	translated_ctx = self.emergency_translator.translate_to_english(context_text[:80])
	prompt = f"{prompt}, depicting: {translated_ctx}"

	# ✅ NEW: Add character type hint
	dominant_type = self.char_builder.get_dominant_type(characters)
	type_hint = self.char_builder.get_type_hint(dominant_type)
	type_negative = self.char_builder.get_type_negative(characters)

	log.info(f" 🎭 Scene {scene_number}: dominant_type={dominant_type} \| hint={type_hint[:40]}")

	# ✅ NEW: If characters have descriptions from Space 1, inject them
	char_block = self.char_builder.build_character_block(characters)
	if char_block and char_block not in prompt:
	# Prepend character block to ensure it's weighted highest by SD
	prompt = f"{char_block}, {prompt}"
	log.info(f" 💉 Injected character block: {char_block[:60]}...")

	# Lighting
	style_lower = visual_style.lower()
	lighting = next(
	(v for k, v in self.LIGHTING_STYLES.items() if k in style_lower),
	self.LIGHTING_STYLES["cinematic"]
	)

	camera = self.CAMERA_MOVEMENTS[(scene_number - 1) % len(self.CAMERA_MOVEMENTS)]

	# Framing
	if scene_number == 1:
	framing = "establishing shot, wide angle"
	elif scene_number == total_scenes:
	framing = "closing shot, thoughtful composition"
	else:
	framing = "medium shot, balanced composition"

	# Assemble
	components = [prompt, type_hint, lighting, visual_style]
	components.extend(self.QUALITY_BOOSTERS[:3])
	components.extend([framing, camera])
	components = [c for c in components if c] # remove empty strings

	enhanced = ", ".join(components)

	# Build negative prompt
	negative = self.BASE_NEGATIVE
	if type_negative:
	negative = f"{negative}, {type_negative}"

	log.info(f"📸 Scene {scene_number} [{dominant_type}]: {enhanced[:90]}...")
	return enhanced, negative


	# ==================== Memory Manager ====================

	class VideoMemory:

	def __init__(self):
	self.current_video = None
	self.current_thumbnail = None
	self.history = []
	self.load_memory()

	def load_memory(self):
	try:
	if os.path.exists(MEMORY_FILE):
	with open(MEMORY_FILE, 'rb') as f:
	data = pickle.load(f)
	self.current_video = data.get('current_video')
	self.current_thumbnail = data.get('current_thumbnail')
	self.history = data.get('history', [])
	log.info("✅ Memory loaded")
	except Exception as e:
	log.error(f"Memory load failed: {e}")

	def save_memory(self):
	try:
	with open(MEMORY_FILE, 'wb') as f:
	pickle.dump({
	'current_video': self.current_video,
	'current_thumbnail': self.current_thumbnail,
	'history': self.history
	}, f)
	except Exception as e:
	log.error(f"Memory save failed: {e}")

	def add_video(self, video_path: str, thumbnail_path: str = None, metadata: dict = None):
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	stored_video = os.path.join(VIDEO_STORAGE, f"video_{timestamp}.mp4")
	stored_thumb = None
	try:
	shutil.copy2(video_path, stored_video)
	self.current_video = stored_video
	if thumbnail_path and os.path.exists(thumbnail_path):
	stored_thumb = os.path.join(VIDEO_STORAGE, f"thumb_{timestamp}.png")
	shutil.copy2(thumbnail_path, stored_thumb)
	self.current_thumbnail = stored_thumb
	entry = {
	'video_path': stored_video,
	'thumbnail_path': stored_thumb,
	'timestamp': datetime.now().isoformat(),
	'metadata': metadata or {}
	}
	self.history.append(entry)
	if len(self.history) > 10:
	old = self.history.pop(0)
	for k in ['video_path', 'thumbnail_path']:
	p = old.get(k)
	if p and os.path.exists(p):
	try:
	os.remove(p)
	except:
	pass
	self.save_memory()
	except Exception as e:
	log.error(f"Failed to save video: {e}")

	def get_current(self):
	return self.current_video, self.current_thumbnail


	# ==================== Image Generator ====================

	class ImageGenerator:

	def __init__(self, emergency_translator=None):
	self.pipeline = None
	self.device = DEVICE
	self.prompt_enhancer = PromptEnhancer(emergency_translator)
	log.info(f"Initializing on device: {self.device}")
	try:
	self._load_pipeline()
	except Exception as e:
	log.error(f"Failed to load pipeline: {e}")

	def _load_pipeline(self):
	try:
	from optimum.intel.openvino import OVDiffusionPipeline
	for model in [
	"OpenVINO/stable-diffusion-2-1-int8-ov",
	"OpenVINO/stable-diffusion-xl-base-1.0-int8-ov"
	]:
	try:
	self.pipeline = OVDiffusionPipeline.from_pretrained(model)
	log.info(f"✅ OpenVINO model: {model}")
	return
	except:
	continue
	raise RuntimeError("No OpenVINO model available")
	except Exception as e:
	log.warning(f"OpenVINO failed: {e}")
	from diffusers import StableDiffusionPipeline
	args = {'torch_dtype': torch.float16} if self.device == "cuda" else {}
	self.pipeline = StableDiffusionPipeline.from_pretrained(HF_MODEL, **args)
	self.pipeline = self.pipeline.to(self.device)
	log.info(f"✅ Diffusers model: {HF_MODEL}")

	def generate(
	self,
	prompt: str,
	scene_id: int,
	visual_style: str = "",
	scene_number: int = 1,
	total_scenes: int = 1,
	context_text: str = "",
	characters: List[Dict] = None # ✅ NEW
	) -> Dict[str, Any]:

	if self.pipeline is None:
	return {"success": False, "scene_id": scene_id, "error": "Pipeline not initialized"}

	try:
	enhanced_prompt, negative_prompt = self.prompt_enhancer.enhance_prompt(
	prompt, visual_style, scene_number, total_scenes,
	context_text, characters # ✅ pass characters
	)

	params = {
	"prompt": enhanced_prompt,
	"num_inference_steps": 50,
	"guidance_scale": 7.5,
	"height": IMAGE_SIZE[1],
	"width": IMAGE_SIZE[0]
	}
	if hasattr(self.pipeline, 'negative_prompt'):
	params["negative_prompt"] = negative_prompt

	result = self.pipeline(**params)

	if hasattr(result, 'nsfw_content_detected') and any(result.nsfw_content_detected):
	return {"success": False, "scene_id": scene_id, "error": "NSFW detected"}

	if not hasattr(result, 'images') or not result.images:
	return {"success": False, "scene_id": scene_id, "error": "No image generated"}

	image = result.images[0].convert('RGB')
	buf = io.BytesIO()
	image.save(buf, format="PNG")
	img_b64 = base64.b64encode(buf.getvalue()).decode('utf-8')

	log.info(f"✅ Scene {scene_id} image generated")
	return {"success": True, "scene_id": scene_id, "image_base64": img_b64, "image": image}

	except Exception as e:
	log.error(f"Generation failed scene {scene_id}: {e}")
	import traceback
	traceback.print_exc()
	return {"success": False, "scene_id": scene_id, "error": str(e)}


	# ==================== Space Connector ====================

	class SpaceConnector:

	def __init__(self):
	self.text_agent = None
	self.video_agent = None

	if TEXT_AGENT_URL:
	try:
	self.text_agent = Client(TEXT_AGENT_URL)
	log.info("✅ Text Agent connected")
	except Exception as e:
	log.error(f"Text Agent connection failed: {e}")

	if VIDEO_AGENT_URL:
	try:
	self.video_agent = Client(VIDEO_AGENT_URL)
	log.info("✅ Video Agent connected")
	except Exception as e:
	log.error(f"Video Agent connection failed: {e}")

	def get_scenes_from_text_agent(self, text, language="ar", visual_style="", target_duration=15):
	if not self.text_agent:
	return None
	try:
	return self.text_agent.predict(
	text=text, language=language,
	visual_style=visual_style,
	target_scene_duration=target_duration,
	api_name="/process_text"
	)
	except Exception as e:
	log.error(f"Text Agent call failed: {e}")
	return None

	def send_to_video_agent(self, scenes_data):
	if not self.video_agent:
	return None
	try:
	return self.video_agent.predict(
	scenes_json=json.dumps(scenes_data),
	api_name="/create_video_api"
	)
	except Exception as e:
	log.error(f"Video Agent call failed: {e}")
	return None


	# ==================== Global Instances ====================

	emergency_translator = EmergencyTranslator()
	image_generator = ImageGenerator(emergency_translator)
	space_connector = SpaceConnector()
	video_memory = VideoMemory()


	# ==================== Gradio Functions ====================

	def receive_video_from_space3(video_path: str, thumbnail_path: str = None):
	try:
	if video_path and os.path.exists(video_path):
	video_memory.add_video(video_path, thumbnail_path)
	return {"success": True, "message": "Video received"}
	return {"success": False, "message": "Invalid video path"}
	except Exception as e:
	return {"success": False, "message": str(e)}


	def process_full_pipeline(text, language, visual_style, target_duration, auto_send_to_video):
	if not text or len(text.strip()) < 100:
	cv, ct = video_memory.get_current()
	return None, None, cv, ct, "❌ Text must be at least 100 characters"

	try:
	# Step 1: Get scenes
	scenes_data = space_connector.get_scenes_from_text_agent(text, language, visual_style, target_duration)
	if not scenes_data:
	cv, ct = video_memory.get_current()
	return None, None, cv, ct, "❌ Failed to get scenes from Text Agent"

	scenes = scenes_data.get("scenes", [])
	visual_style = scenes_data.get("visual_style", visual_style)

	if not scenes:
	cv, ct = video_memory.get_current()
	return None, None, cv, ct, "❌ No scenes received"

	total_scenes = len(scenes)
	log.info(f"Processing {total_scenes} scenes with character-aware prompting...")

	# Step 2: Generate images
	results = []
	gallery_images = []

	for idx, scene in enumerate(scenes, 1):
	scene_id = scene.get("scene_id", idx)
	visual_prompt = scene.get("visual_prompt", "")

	# ✅ Extract characters from scene
	characters = scene.get("characters", [])
	char_summary = scene.get("character_summary", "")
	dominant_type = CharacterPromptBuilder.get_dominant_type(characters)

	log.info(f"Scene {scene_id}: characters={char_summary} \| dominant={dominant_type}")

	# Extract and validate English text
	english_text = scene.get("text_english", "") or scene.get("text", "")
	if emergency_translator.is_arabic(english_text):
	log.warning(f"⚠️ Scene {scene_id}: text_english is Arabic — translating")
	english_text = emergency_translator.translate_to_english(english_text)

	if not visual_prompt:
	continue

	result = image_generator.generate(
	prompt=visual_prompt,
	scene_id=scene_id,
	visual_style=visual_style,
	scene_number=idx,
	total_scenes=total_scenes,
	context_text=english_text,
	characters=characters # ✅ pass character data
	)

	if result["success"]:
	results.append({
	"scene_id": scene_id,
	"text": english_text,
	"text_english": english_text,
	"image_base64": result["image_base64"],
	"prompt": visual_prompt,
	"characters": characters, # ✅ forward to video agent
	"character_summary": char_summary,
	"dominant_character_type": dominant_type
	})
	gallery_images.append((result["image"], f"Scene {scene_id} [{dominant_type}]"))
	else:
	log.error(f"Failed scene {scene_id}: {result.get('error')}")

	# Step 3: Final Arabic safety check
	for r in results:
	if emergency_translator.is_arabic(r.get("text", "")):
	log.error(f"❌ Scene {r['scene_id']} still Arabic - force translating")
	r["text"] = emergency_translator.translate_to_english(r["text"])
	r["text_english"] = r["text"]

	output_json = {
	"scenes": results,
	"total_scenes": len(results),
	"visual_style": visual_style,
	"language": "en"
	}

	# Build status
	type_icons = {'human': '👤', 'animal': '🐾', 'fantasy': '✨', 'object': '📦', 'none': '🌄'}
	status_msg = f"""✅ Image Generation Complete!

	📊 Results:
	- Total Scenes: {total_scenes}
	- Images Generated: {len(results)}
	- Failed: {total_scenes - len(results)}

	🎭 Character Types per Scene:
	"""
	for r in results:
	icon = type_icons.get(r.get('dominant_character_type', 'none'), '❓')
	status_msg += f"\n{icon} Scene {r['scene_id']}: {r.get('character_summary', 'none')}"

	# Step 4: Send to Video Agent
	if auto_send_to_video and results:
	status_msg += "\n\n🎬 Sending to Video Agent..."
	video_result = space_connector.send_to_video_agent(output_json)
	status_msg += "\n✅ Video processing started!" if video_result else "\n⚠️ Failed to start video"

	cv, ct = video_memory.get_current()
	return json.dumps(output_json, indent=2), gallery_images, cv, ct, status_msg

	except Exception as e:
	log.error(f"Pipeline failed: {e}")
	import traceback
	traceback.print_exc()
	cv, ct = video_memory.get_current()
	return None, None, cv, ct, f"❌ Error: {str(e)}"


	def refresh_video_display():
	vp, tp = video_memory.get_current()
	if vp and os.path.exists(vp):
	return vp, tp, "✅ Video loaded"
	return None, None, "ℹ️ No video yet"


	# ==================== Gradio Interface ====================

	text_agent_status = "✅ Connected" if space_connector.text_agent else "⚠️ Not Connected"
	video_agent_status = "✅ Connected" if space_connector.video_agent else "⚠️ Not Connected"
	groq_ok = emergency_translator.groq_available
	fb = [n for n, _ in emergency_translator.local_fallback.backends]
	em_status = f"Groq={'✅' if groq_ok else '❌'} + LocalFallback={'✅ (' + ', '.join(fb) + ')' if fb else '⚠️ keyword'}"

	with gr.Blocks(title="Image Agent - Character-Aware", theme=gr.themes.Soft()) as demo:

	gr.Markdown("# 🎨 Image Agent - Character-Aware Image Generation")
	gr.Markdown("Space 2/3 - Images match scene characters: 👤 human / 🐾 animal / ✨ fantasy")

	gr.Markdown(
	f"Device: {DEVICE.upper()} \| "
	f"Text Agent: {text_agent_status} \| "
	f"Video Agent: {video_agent_status} \| "
	f"Translation: {em_status} \| "
	f"🎭 Character-Aware: ON"
	)

	gr.Markdown("---")

	with gr.Tab("🚀 Pipeline"):
	with gr.Row():
	with gr.Column(scale=1):
	text_input = gr.Textbox(label="Input Text", placeholder="أدخل نصك هنا...", lines=10)
	with gr.Row():
	language_input = gr.Radio(choices=["ar", "en"], value="ar", label="Language")
	duration_input = gr.Slider(minimum=10, maximum=30, value=15, step=1, label="Scene Duration (sec)")
	style_input = gr.Textbox(label="Visual Style", value="cinematic, high quality, 4k")
	auto_video = gr.Checkbox(label="Auto-send to Video Agent", value=True)
	process_btn = gr.Button("🚀 Start Pipeline", variant="primary", size="lg")
	status_output = gr.Textbox(label="Status", lines=20)

	with gr.Column(scale=1):
	gallery_output = gr.Gallery(label="Generated Images", columns=2, height=400)
	gr.Markdown("### 📹 Final Video")
	refresh_btn = gr.Button("🔄 Refresh Video", size="sm")
	video_display = gr.Video(label="Video", height=300)
	thumbnail_display = gr.Image(label="Thumbnail", type="filepath", height=200)

	json_output = gr.Code(label="JSON Output", language="json", lines=10)

	process_btn.click(
	fn=process_full_pipeline,
	inputs=[text_input, language_input, style_input, duration_input, auto_video],
	outputs=[json_output, gallery_output, video_display, thumbnail_display, status_output]
	)
	refresh_btn.click(fn=refresh_video_display, inputs=[], outputs=[video_display, thumbnail_display, status_output])

	with gr.Tab("🔌 API"):
	api_video_path = gr.Textbox(label="video_path")
	api_thumb_path = gr.Textbox(label="thumbnail_path")
	api_receive_btn = gr.Button("Receive Video")
	api_result = gr.JSON(label="Result")
	api_receive_btn.click(
	fn=receive_video_from_space3,
	inputs=[api_video_path, api_thumb_path],
	outputs=api_result,
	api_name="receive_video"
	)

	gr.Markdown("---")
	gr.Markdown(f"""
	### ✨ Character-Aware Image Generation (NEW)

	How it works:
	- Space 1 detects characters and classifies them as human/animal/fantasy/object/none
	- Space 2 reads the `characters` array from each scene
	- Adds the correct type hint to the image prompt:

	\| Type \| Added to prompt \| Negative \|
	\|------\|----------------\|---------\|
	\| 👤 human \| "realistic human beings, photorealistic people" \| "animals, creatures" \|
	\| 🐾 animal \| "realistic animals, detailed fur/feathers" \| "people, humans" \|
	\| ✨ fantasy \| "fantasy creatures, magical beings" \| — \|
	\| 🌄 none \| "landscape focus, no characters" \| "people, animals" \|

	- Character descriptions from Space 1 are injected at the start of the prompt
	- This ensures the image model generates the correct character types for every scene

	Translation: {em_status}
	""")


	if __name__ == "__main__":
	PORT = int(os.getenv("PORT", "7860"))
	log.info("Starting Character-Aware Image Agent...")
	demo.launch(server_name="0.0.0.0", server_port=PORT)