Spaces:
Running
Running
| """ | |
| SPACE 1: Text Agent with Translation + Enhanced Visual Prompts + Character Detection | |
| ===================================== | |
| - Analyzes text and generates scenes | |
| - Translates Arabic to English using Qwen | |
| - Provides API endpoint for Space 2 | |
| - ✅ ENSURES visual_prompt is ALWAYS in English | |
| - ✅ Validates and fixes visual_prompts automatically | |
| - ✅ FIX: Triple-layer translation - never fails silently | |
| - ✅ NEW: Character detection - human / animal / fantasy / none | |
| """ | |
| import os | |
| import json | |
| import logging | |
| import gradio as gr | |
| from typing import List, Optional | |
| from pydantic import BaseModel, Field, validator | |
| import re | |
| logging.basicConfig(level=logging.INFO) | |
| log = logging.getLogger("text_agent_space") | |
| # ==================== Data Models ==================== | |
| class Character(BaseModel): | |
| """Single character in a scene""" | |
| name: str = Field(..., description="Character name or role") | |
| type: str = Field(..., description="human | animal | fantasy | object | none") | |
| description: str = Field(..., description="Full visual description in English") | |
| is_recurring: bool = Field(default=False, description="Appears in multiple scenes?") | |
| def validate_type(cls, v): | |
| allowed = {'human', 'animal', 'fantasy', 'object', 'none'} | |
| if v not in allowed: | |
| log.warning(f"Unknown character type '{v}', defaulting to 'none'") | |
| return 'none' | |
| return v | |
| class Scene(BaseModel): | |
| """Single scene data model""" | |
| scene_id: int = Field(..., ge=1) | |
| text: str = Field(..., min_length=10) | |
| text_english: str = Field(..., min_length=10) | |
| visual_prompt: str = Field(..., min_length=20) | |
| estimated_duration_sec: int = Field(..., ge=5, le=60) | |
| # ✅ NEW | |
| characters: List[Character] = Field(default_factory=list) | |
| character_summary: str = Field( | |
| default="", | |
| description="e.g. 'elderly human man (human), wise owl (animal)'" | |
| ) | |
| def validate_text(cls, v): | |
| if not v or len(v.strip()) < 10: | |
| raise ValueError("Scene text must be at least 10 characters") | |
| return v.strip() | |
| def validate_visual_prompt(cls, v): | |
| v = v.strip() | |
| if bool(re.search(r'[\u0600-\u06FF]', v)): | |
| log.warning(f"⚠️ visual_prompt contains Arabic: {v[:50]}") | |
| if len(v.split()) < 5: | |
| log.warning(f"⚠️ visual_prompt too short: {v}") | |
| return v | |
| class NarrativeInput(BaseModel): | |
| text: str = Field(..., min_length=100) | |
| language: str = Field(default="ar") | |
| visual_style: Optional[str] = Field(default=None) | |
| target_scene_duration: int = Field(default=15, ge=10, le=30) | |
| def validate_language(cls, v): | |
| if v not in ['ar', 'en']: | |
| raise ValueError("Language must be 'ar' or 'en'") | |
| return v | |
| class NarrativeOutput(BaseModel): | |
| scenes: List[Scene] | |
| total_scenes: int | |
| estimated_total_duration: int | |
| language: str | |
| visual_style: str | |
| # ✅ NEW: global character registry | |
| all_characters: List[Character] = Field(default_factory=list) | |
| # ==================== Configuration ==================== | |
| DEFAULT_VISUAL_STYLE = "cinematic, high quality, 4k, detailed, professional" | |
| ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY") | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| # ==================== ✅ NEW: Character Analyzer ==================== | |
| class CharacterAnalyzer: | |
| """ | |
| Normalizes character descriptions across all scenes. | |
| Ensures recurring characters look identical in every scene's visual_prompt. | |
| """ | |
| # Image-prompt hints injected by image_agent based on dominant type | |
| TYPE_PROMPT_HINTS = { | |
| 'human': 'realistic human beings, photorealistic people, detailed faces', | |
| 'animal': 'realistic animals, detailed fur and feathers, wildlife photography style', | |
| 'fantasy': 'fantasy creatures, magical beings, ethereal and detailed', | |
| 'object': 'detailed object, studio lighting, high detail', | |
| 'none': '' | |
| } | |
| def build_character_summary(characters: List[Character]) -> str: | |
| if not characters: | |
| return "no characters" | |
| return ", ".join(f"{c.name} ({c.type})" for c in characters) | |
| def extract_dominant_type(characters: List[Character]) -> str: | |
| priority = ['human', 'animal', 'fantasy', 'object', 'none'] | |
| types_present = {c.type for c in characters} | |
| for t in priority: | |
| if t in types_present: | |
| return t | |
| return 'none' | |
| def get_type_hint(characters: List[Character]) -> str: | |
| dominant = CharacterAnalyzer.extract_dominant_type(characters) | |
| return CharacterAnalyzer.TYPE_PROMPT_HINTS.get(dominant, '') | |
| def normalize_characters(scenes: List[Scene]) -> List[Scene]: | |
| """ | |
| Build a master character registry from the first occurrence of each character, | |
| then apply the master description to ALL scenes so images stay consistent. | |
| """ | |
| master: dict = {} | |
| for scene in scenes: | |
| for char in scene.characters: | |
| key = char.name.lower().strip() | |
| if key not in master: | |
| master[key] = char | |
| log.info(f"📋 Character registered: '{char.name}' → {char.type}") | |
| for scene in scenes: | |
| updated = [] | |
| for char in scene.characters: | |
| key = char.name.lower().strip() | |
| m = master.get(key, char) | |
| updated.append(Character( | |
| name=m.name, | |
| type=m.type, | |
| description=m.description, | |
| is_recurring=char.is_recurring | |
| )) | |
| scene.characters = updated | |
| scene.character_summary = CharacterAnalyzer.build_character_summary(updated) | |
| return scenes | |
| # ==================== Visual Prompt Validator ==================== | |
| class VisualPromptValidator: | |
| def is_english(text: str) -> bool: | |
| arabic_chars = sum(1 for c in text if '\u0600' <= c <= '\u06FF') | |
| return arabic_chars < len(text) * 0.1 | |
| def fix_visual_prompt(prompt: str, text_english: str, scene_id: int) -> str: | |
| prompt = prompt.strip() | |
| if not VisualPromptValidator.is_english(prompt): | |
| log.warning(f"⚠️ Scene {scene_id}: visual_prompt not English — fixing") | |
| words = text_english.split()[:15] | |
| prompt = f"scene depicting: {' '.join(words)}" | |
| if len(prompt.split()) < 5: | |
| prompt = f"{prompt}, detailed scene: {text_english[:80]}" | |
| return prompt | |
| # ==================== Local Fallback Translator ==================== | |
| class LocalFallbackTranslator: | |
| """Guaranteed translation - no API key needed. Never raises, never returns Arabic.""" | |
| def __init__(self): | |
| self.backends = [] | |
| self._init_backends() | |
| def _init_backends(self): | |
| try: | |
| from deep_translator import GoogleTranslator | |
| test = GoogleTranslator(source='ar', target='en').translate("مرحبا") | |
| if test: | |
| self.backends.append(('deep_translator', self._translate_deep)) | |
| log.info("✅ LocalFallback: deep_translator available") | |
| except Exception as e: | |
| log.warning(f"deep_translator unavailable: {e}") | |
| try: | |
| from googletrans import Translator as GT | |
| test = GT().translate("مرحبا", dest='en') | |
| if test and test.text: | |
| self.backends.append(('googletrans', self._translate_googletrans)) | |
| log.info("✅ LocalFallback: googletrans available") | |
| except Exception as e: | |
| log.warning(f"googletrans unavailable: {e}") | |
| try: | |
| import translators as ts | |
| test = ts.translate_text("مرحبا", translator='bing', to_language='en') | |
| if test: | |
| self.backends.append(('translators', self._translate_translators)) | |
| log.info("✅ LocalFallback: translators available") | |
| except Exception as e: | |
| log.warning(f"translators unavailable: {e}") | |
| def _translate_deep(self, text: str) -> str: | |
| from deep_translator import GoogleTranslator | |
| if len(text) <= 4500: | |
| return GoogleTranslator(source='ar', target='en').translate(text) | |
| chunks = [text[i:i+4500] for i in range(0, len(text), 4500)] | |
| return ' '.join(GoogleTranslator(source='ar', target='en').translate(c) for c in chunks) | |
| def _translate_googletrans(self, text: str) -> str: | |
| from googletrans import Translator as GT | |
| return GT().translate(text, dest='en').text | |
| def _translate_translators(self, text: str) -> str: | |
| import translators as ts | |
| return ts.translate_text(text, translator='bing', to_language='en') | |
| def _keyword_fallback(self, text: str) -> str: | |
| log.error("🚨 All translation backends failed - keyword extraction") | |
| latin = re.findall(r'[A-Za-z0-9\s,.\-]+', text) | |
| clean = ' '.join(latin).strip() | |
| if clean and len(clean) > 10: | |
| return clean | |
| return f"narrative scene with {len(text.split())} words describing events and characters" | |
| def translate(self, text: str) -> str: | |
| if not text or not text.strip(): | |
| return "" | |
| for name, fn in self.backends: | |
| try: | |
| result = fn(text) | |
| if result and len(result.strip()) > 5: | |
| return result.strip() | |
| except Exception as e: | |
| log.warning(f"LocalFallback [{name}] failed: {e}") | |
| return self._keyword_fallback(text) | |
| def available(self) -> bool: | |
| return True | |
| # ==================== Translation Service ==================== | |
| class TranslationService: | |
| """Layer 1: Groq → Layer 2: Local Qwen → Layer 3: LocalFallback""" | |
| def __init__(self): | |
| self.groq_available = False | |
| self.local_available = False | |
| self.groq_client = None | |
| self.local_llm = None | |
| self.local_fallback = LocalFallbackTranslator() | |
| if GROQ_API_KEY: | |
| try: | |
| from groq import Groq | |
| self.groq_client = Groq(api_key=GROQ_API_KEY) | |
| self.groq_available = True | |
| log.info("✅ Groq API initialized") | |
| except Exception as e: | |
| log.warning(f"Groq init failed: {e}") | |
| if not self.groq_available: | |
| try: | |
| from llama_cpp import Llama | |
| model_path = "./models/Qwen2.5-14B-Instruct-Q6_K_L.gguf" | |
| if not os.path.exists(model_path): | |
| from huggingface_hub import hf_hub_download | |
| model_path = hf_hub_download( | |
| repo_id="bartowski/Qwen2.5-14B-Instruct-GGUF", | |
| filename="Qwen2.5-14B-Instruct-Q6_K_L.gguf", | |
| local_dir="./models" | |
| ) | |
| self.local_llm = Llama(model_path=model_path, n_ctx=4096, n_threads=4, n_gpu_layers=0) | |
| self.local_available = True | |
| log.info("✅ Local Qwen 14B initialized") | |
| except Exception as e: | |
| log.error(f"Local Qwen failed: {e}") | |
| def translate_to_english(self, arabic_text: str) -> str: | |
| if not arabic_text or not arabic_text.strip(): | |
| return "" | |
| if self._is_english(arabic_text): | |
| return arabic_text | |
| if self.groq_available: | |
| result = self._translate_with_groq(arabic_text) | |
| if result and self._is_english(result): | |
| return result | |
| if self.local_available: | |
| result = self._translate_with_local(arabic_text) | |
| if result and self._is_english(result): | |
| return result | |
| return self.local_fallback.translate(arabic_text) | |
| def _is_english(self, text: str) -> bool: | |
| return sum(1 for c in text if '\u0600' <= c <= '\u06FF') < len(text) * 0.1 | |
| def _translate_with_groq(self, text: str) -> str: | |
| try: | |
| resp = self.groq_client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| messages=[ | |
| {"role": "system", "content": "Professional Arabic to English translator. Provide ONLY the translation."}, | |
| {"role": "user", "content": f"Translate to English:\n{text}"} | |
| ], | |
| temperature=0.3, max_tokens=2000 | |
| ) | |
| return resp.choices[0].message.content.strip() | |
| except Exception as e: | |
| log.error(f"Groq translation failed: {e}") | |
| return "" | |
| def _translate_with_local(self, text: str) -> str: | |
| try: | |
| prompt = f"<|im_start|>system\nTranslate Arabic to English only.<|im_end|>\n<|im_start|>user\n{text}<|im_end|>\n<|im_start|>assistant\n" | |
| resp = self.local_llm(prompt, max_tokens=1500, temperature=0.3, stop=["<|im_end|>"]) | |
| return resp['choices'][0]['text'].strip() | |
| except Exception as e: | |
| log.error(f"Local translation failed: {e}") | |
| return "" | |
| # ==================== Text Agent ==================== | |
| class TextAgent: | |
| def __init__(self): | |
| self.client = None | |
| self.translator = TranslationService() | |
| self.validator = VisualPromptValidator() | |
| self.char_analyzer = CharacterAnalyzer() | |
| if ANTHROPIC_API_KEY: | |
| try: | |
| from anthropic import Anthropic | |
| self.client = Anthropic(api_key=ANTHROPIC_API_KEY) | |
| log.info("✅ Text Agent initialized with Claude API") | |
| except ImportError: | |
| log.warning("⚠️ Anthropic library not installed") | |
| def process_narrative(self, input_data: NarrativeInput) -> NarrativeOutput: | |
| log.info(f"Processing: {len(input_data.text)} chars, lang={input_data.language}") | |
| scenes = self._process_with_claude(input_data) if self.client else self._process_fallback(input_data) | |
| # Translate text fields | |
| if input_data.language == 'ar': | |
| for scene in scenes: | |
| translated = self.translator.translate_to_english(scene.text) | |
| if not self.translator._is_english(translated): | |
| translated = self.translator.local_fallback.translate(scene.text) | |
| scene.text_english = translated | |
| # Translate character descriptions if Arabic | |
| for char in scene.characters: | |
| if not VisualPromptValidator.is_english(char.description): | |
| char.description = self.translator.translate_to_english(char.description) | |
| else: | |
| for scene in scenes: | |
| scene.text_english = scene.text | |
| # Normalize character descriptions across scenes | |
| scenes = CharacterAnalyzer.normalize_characters(scenes) | |
| # Validate visual prompts | |
| for scene in scenes: | |
| fixed = self.validator.fix_visual_prompt(scene.visual_prompt, scene.text_english, scene.scene_id) | |
| if fixed != scene.visual_prompt: | |
| scene.visual_prompt = fixed | |
| # Build global character registry | |
| all_chars: dict = {} | |
| for scene in scenes: | |
| for char in scene.characters: | |
| key = char.name.lower().strip() | |
| if key not in all_chars: | |
| all_chars[key] = char | |
| total_duration = sum(s.estimated_duration_sec for s in scenes) | |
| output = NarrativeOutput( | |
| scenes=scenes, | |
| total_scenes=len(scenes), | |
| estimated_total_duration=total_duration, | |
| language=input_data.language, | |
| visual_style=input_data.visual_style or DEFAULT_VISUAL_STYLE, | |
| all_characters=list(all_chars.values()) | |
| ) | |
| log.info(f"✅ {len(scenes)} scenes | {len(all_chars)} characters | {total_duration}s") | |
| return output | |
| def _process_with_claude(self, input_data: NarrativeInput) -> List[Scene]: | |
| log.info("Using Claude API") | |
| system_prompt = f"""You are a professional narrative analyzer for video production. | |
| Analyze the text, divide it into scenes, and identify every character. | |
| ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ | |
| CHARACTER CLASSIFICATION RULES | |
| ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ | |
| Classify EVERY character as ONE of: | |
| "human" → any person: man, woman, child, elderly, warrior, king, etc. | |
| "animal" → any real animal: owl, rabbit, cat, dog, horse, bird, etc. | |
| "fantasy" → dragons, fairies, giants, demons, magical creatures, etc. | |
| "object" → significant non-living subject: sword, ship, tower, etc. | |
| "none" → landscape / setting only, no main character | |
| CRITICAL IMAGE ACCURACY RULE: | |
| → If characters are HUMAN: visual_prompt MUST show realistic human beings | |
| → If characters are ANIMAL: visual_prompt MUST show realistic animals | |
| → If characters are FANTASY: visual_prompt MUST describe fantasy traits explicitly | |
| → NEVER mix types: don't describe a human character as an animal or vice versa | |
| ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ | |
| CHARACTER CONSISTENCY RULE | |
| ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ | |
| For EACH recurring character: | |
| 1. Scene 1: Write FULL description (age, appearance, clothing/fur/color, features) | |
| 2. ALL other scenes: COPY the EXACT SAME description word-for-word | |
| Example (animal story): | |
| Scene 1 visual_prompt: "wise elderly owl with pure white feathers, large golden eyes, small curved beak, perched on gnarled oak branch in enchanted moonlit forest" | |
| Scene 2 visual_prompt: "wise elderly owl with pure white feathers, large golden eyes, small curved beak, sitting beside small rabbit with soft grey fur and large brown eyes on mossy forest floor" | |
| Example (human story): | |
| Scene 1 visual_prompt: "brave young knight in silver armor with dark hair and blue eyes, riding white horse through dark forest at dusk" | |
| Scene 2 visual_prompt: "brave young knight in silver armor with dark hair and blue eyes, standing before ancient stone castle gate, sword raised" | |
| ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ | |
| OUTPUT FORMAT (JSON array only) | |
| ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ | |
| [ | |
| {{ | |
| "scene_id": 1, | |
| "text": "Original text in {input_data.language}", | |
| "text_english": "Accurate English translation for TTS", | |
| "visual_prompt": "DETAILED ENGLISH visual description (15+ words) — repeat full character descriptions", | |
| "estimated_duration_sec": {input_data.target_scene_duration}, | |
| "characters": [ | |
| {{ | |
| "name": "character name or role", | |
| "type": "human | animal | fantasy | object | none", | |
| "description": "Full English visual description of this character", | |
| "is_recurring": true | |
| }} | |
| ] | |
| }} | |
| ] | |
| ABSOLUTE RULES: | |
| - visual_prompt = ENGLISH ONLY, 15+ words, no Arabic ever | |
| - Character type must match the actual nature of the character | |
| - Repeat exact character descriptions across all scenes | |
| - text_english = natural English translation for voice narration | |
| - target duration: {input_data.target_scene_duration}±5 seconds""" | |
| user_prompt = f"""Analyze this text. Detect all characters and classify them as human/animal/fantasy/object/none: | |
| {input_data.text} | |
| Requirements: | |
| - Classify each character correctly (human stays human, animal stays animal) | |
| - Repeat full character descriptions in every scene's visual_prompt | |
| - visual_prompt in ENGLISH only""" | |
| try: | |
| response = self.client.messages.create( | |
| model="claude-sonnet-4-20250514", | |
| max_tokens=8000, | |
| temperature=0.3, | |
| system=system_prompt, | |
| messages=[{"role": "user", "content": user_prompt}] | |
| ) | |
| return self._parse_claude_response(response.content[0].text) | |
| except Exception as e: | |
| log.error(f"Claude API error: {e}") | |
| return self._process_fallback(input_data) | |
| def _parse_claude_response(self, content: str) -> List[Scene]: | |
| content = re.sub(r'^```json\s*', '', content.strip()) | |
| content = re.sub(r'\s*```$', '', content) | |
| scenes_data = json.loads(content) | |
| if not isinstance(scenes_data, list): | |
| raise ValueError("Response is not a list") | |
| scenes = [] | |
| for sd in scenes_data: | |
| raw_chars = sd.pop('characters', []) | |
| chars = [] | |
| for rc in raw_chars: | |
| try: | |
| chars.append(Character(**rc)) | |
| except Exception as e: | |
| log.warning(f"Character parse error: {e} — {rc}") | |
| scenes.append(Scene(**sd, characters=chars)) | |
| return scenes | |
| def _process_fallback(self, input_data: NarrativeInput) -> List[Scene]: | |
| log.info("Using fallback method") | |
| text = input_data.text.strip() | |
| paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()] | |
| if len(paragraphs) <= 1: | |
| sentences = re.split(r'[.!?]+', text) | |
| paragraphs = [s.strip() for s in sentences if len(s.strip()) > 20] | |
| scenes = [] | |
| for idx, para in enumerate(paragraphs, 1): | |
| if len(para) < 30: | |
| continue | |
| word_count = len(para.split()) | |
| duration = max(10, min(30, int(word_count / 2.5))) | |
| if input_data.language == 'ar': | |
| english_text = self.translator.translate_to_english(para[:100]) | |
| else: | |
| english_text = para[:100] | |
| style = input_data.visual_style or DEFAULT_VISUAL_STYLE | |
| visual_prompt = f"scene depicting: {' '.join(english_text.split()[:15])}, {style}" | |
| scenes.append(Scene( | |
| scene_id=idx, | |
| text=para, | |
| text_english=para, | |
| visual_prompt=visual_prompt, | |
| estimated_duration_sec=duration, | |
| characters=[], | |
| character_summary="unknown" | |
| )) | |
| return scenes | |
| # ==================== Global Instance ==================== | |
| text_agent = TextAgent() | |
| # ==================== Gradio Functions ==================== | |
| def process_text_gradio(text: str, language: str, visual_style: str, target_duration: int) -> tuple: | |
| if not text or len(text.strip()) < 100: | |
| return None, "❌ Text must be at least 100 characters" | |
| try: | |
| input_data = NarrativeInput( | |
| text=text.strip(), language=language, | |
| visual_style=visual_style if visual_style.strip() else None, | |
| target_scene_duration=target_duration | |
| ) | |
| output = text_agent.process_narrative(input_data) | |
| output_json = { | |
| "scenes": [scene.dict() for scene in output.scenes], | |
| "total_scenes": output.total_scenes, | |
| "estimated_total_duration": output.estimated_total_duration, | |
| "language": output.language, | |
| "visual_style": output.visual_style, | |
| "all_characters": [c.dict() for c in output.all_characters] | |
| } | |
| type_icons = {'human': '👤', 'animal': '🐾', 'fantasy': '✨', 'object': '📦', 'none': '🌄'} | |
| status_msg = f"""✅ Analysis Complete! | |
| 📊 **Summary:** | |
| - Scenes: {output.total_scenes} | |
| - Duration: {output.estimated_total_duration}s ({output.estimated_total_duration/60:.1f} min) | |
| - Unique Characters: {len(output.all_characters)} | |
| 🎭 **Characters Detected:** | |
| """ | |
| for char in output.all_characters: | |
| icon = type_icons.get(char.type, '❓') | |
| recurring = " (recurring)" if char.is_recurring else "" | |
| status_msg += f"\n{icon} **{char.name}** [{char.type}]{recurring}" | |
| status_msg += f"\n └ {char.description[:70]}..." | |
| status_msg += "\n\n📋 **Scenes:**" | |
| for scene in output.scenes: | |
| chars_str = scene.character_summary or "none" | |
| status_msg += f"\n\n**Scene {scene.scene_id}** ({scene.estimated_duration_sec}s)" | |
| status_msg += f"\n 🎭 {chars_str}" | |
| if output.language == 'ar': | |
| status_msg += f"\n 🔤 Arabic: {scene.text[:50]}..." | |
| status_msg += f"\n 🇬🇧 English: {scene.text_english[:50]}..." | |
| else: | |
| status_msg += f"\n 📝 Text: {scene.text[:50]}..." | |
| status_msg += f"\n 🎨 Visual: {scene.visual_prompt[:70]}..." | |
| return json.dumps(output_json, indent=2, ensure_ascii=False), status_msg | |
| except Exception as e: | |
| log.error(f"Processing failed: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return None, f"❌ Error: {str(e)}" | |
| def api_endpoint(text: str, language: str = "ar", visual_style: str = "", target_scene_duration: int = 15): | |
| input_data = NarrativeInput( | |
| text=text, language=language, | |
| visual_style=visual_style if visual_style else None, | |
| target_scene_duration=target_scene_duration | |
| ) | |
| output = text_agent.process_narrative(input_data) | |
| return { | |
| "scenes": [scene.dict() for scene in output.scenes], | |
| "total_scenes": output.total_scenes, | |
| "estimated_total_duration": output.estimated_total_duration, | |
| "language": output.language, | |
| "visual_style": output.visual_style, | |
| "all_characters": [c.dict() for c in output.all_characters] | |
| } | |
| # ==================== Gradio Interface ==================== | |
| groq_ok = text_agent.translator.groq_available | |
| local_ok = text_agent.translator.local_available | |
| fallback_backends = [n for n, _ in text_agent.translator.local_fallback.backends] | |
| translation_status = ( | |
| ("✅ Groq" if groq_ok else ("✅ Local Qwen" if local_ok else "⚠️ API unavailable")) + | |
| f" + LocalFallback ({', '.join(fallback_backends) or 'keyword'})" | |
| ) | |
| with gr.Blocks(title="Text Agent - Character Detection", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# 📝 Text Agent - With Character Detection") | |
| gr.Markdown("**Space 1/3** - Scenes + Character type: 👤 human / 🐾 animal / ✨ fantasy") | |
| gr.Markdown( | |
| f"**Claude:** {'✅' if ANTHROPIC_API_KEY else '⚠️ Fallback'} | " | |
| f"**Translation:** {translation_status} | " | |
| f"**🎭 Character Detection: ON** | **✨ Visual Validation: ON**" | |
| ) | |
| gr.Markdown("---") | |
| with gr.Tab("Interactive Interface"): | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| text_input = gr.Textbox( | |
| label="Input Text (Arabic or English)", | |
| placeholder="أدخل نصك هنا...", | |
| lines=15 | |
| ) | |
| with gr.Row(): | |
| language_input = gr.Radio(choices=["ar", "en"], value="ar", label="Language") | |
| duration_input = gr.Slider(minimum=10, maximum=30, value=15, step=1, label="Scene Duration (sec)") | |
| visual_style_input = gr.Textbox(label="Visual Style", value=DEFAULT_VISUAL_STYLE) | |
| process_btn = gr.Button("🔍 Analyze & Detect Characters", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| status_output = gr.Textbox(label="Status + Characters", lines=30) | |
| json_output = gr.Code(label="JSON Output (ready for Space 2)", language="json", lines=15) | |
| process_btn.click( | |
| fn=process_text_gradio, | |
| inputs=[text_input, language_input, visual_style_input, duration_input], | |
| outputs=[json_output, status_output] | |
| ) | |
| with gr.Tab("API Endpoint"): | |
| gr.Markdown("### API for Space 2") | |
| with gr.Row(): | |
| api_text = gr.Textbox(label="text", lines=5) | |
| api_lang = gr.Dropdown(choices=["ar", "en"], value="ar", label="language") | |
| api_style = gr.Textbox(label="visual_style", value=DEFAULT_VISUAL_STYLE) | |
| api_duration = gr.Number(label="target_scene_duration", value=15) | |
| api_btn = gr.Button("Test API") | |
| api_output = gr.JSON(label="API Response") | |
| api_btn.click( | |
| fn=api_endpoint, | |
| inputs=[api_text, api_lang, api_style, api_duration], | |
| outputs=api_output, | |
| api_name="process_text" | |
| ) | |
| with gr.Tab("Examples"): | |
| gr.Examples( | |
| examples=[ | |
| [ | |
| """في غابة سحرية قديمة، عاشت بومة حكيمة اسمها أوفيليا. شهدت مواسم لا حصر لها تمر. | |
| في مساء خريفي، عثر أرنب صغير يدعى أوليفر على شجرة أوفيليا. كان تائهاً وخائفاً منفصلاً عن عائلته. | |
| طوال الليل، روت له قصصاً عن الشجاعة والمرونة وكيف تجد الغابة طريقها دائماً.""", | |
| "ar", "magical realism, enchanted forest, mystical, cinematic", 15 | |
| ], | |
| [ | |
| """A brave knight named Arthur rode through the dark forest on his white horse. | |
| He encountered a wise old wizard who warned him of the dragon ahead. | |
| Arthur pressed on and finally faced the enormous fire-breathing dragon at the castle gates.""", | |
| "en", "epic fantasy, dramatic lighting, cinematic", 15 | |
| ] | |
| ], | |
| inputs=[text_input, language_input, visual_style_input, duration_input] | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown(f""" | |
| ### ✨ New: Character Detection | |
| Each scene now includes a `characters` array: | |
| ```json | |
| "characters": [ | |
| {{"name": "Arthur", "type": "human", "description": "brave young knight in silver armor...", "is_recurring": true}}, | |
| {{"name": "Dragon", "type": "fantasy", "description": "massive fire-breathing dragon...", "is_recurring": false}} | |
| ], | |
| "character_summary": "Arthur (human), Dragon (fantasy)" | |
| ``` | |
| **Type → Image Impact (used by Space 2):** | |
| | Type | Image Prompt Addition | | |
| |------|----------------------| | |
| | 👤 human | realistic human beings, photorealistic people | | |
| | 🐾 animal | realistic animals, detailed fur/feathers | | |
| | ✨ fantasy | fantasy creatures, magical beings | | |
| | 📦 object | detailed object, studio lighting | | |
| | 🌄 none | landscape only | | |
| **Translation:** {translation_status} | |
| """) | |
| if __name__ == "__main__": | |
| PORT = int(os.getenv("PORT", "7860")) | |
| log.info("Starting Text Agent with Character Detection...") | |
| demo.launch(server_name="0.0.0.0", server_port=PORT) | |