| | import gradio as gr |
| | import torch |
| | from transformers import BlipProcessor, BlipForConditionalGeneration |
| | from PIL import Image |
| | import random |
| |
|
| | |
| | print("Loading BLIP model (lighter version)...") |
| | processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large") |
| | model = BlipForConditionalGeneration.from_pretrained( |
| | "Salesforce/blip-image-captioning-large", |
| | torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32 |
| | ) |
| |
|
| | |
| | unified_instructions = """ |
| | # π¬ Universal Video Prompting Guide |
| | *Compatible with Gen-4, Sora, Pika, Luma, Runway and all diffusion-based video models* |
| | |
| | ## Core Principles (Universal) |
| | β
**Focus on MOTION, not static description** |
| | β
**Use positive phrasing exclusively** |
| | β
**Start simple, iterate progressively** |
| | β
**Refer to subjects in general terms** ("the subject," "the woman") |
| | β
**Keep prompts direct and easily understood** |
| | |
| | ## Two Complementary Approaches |
| | |
| | ### π **Gen-4 Official Method** (Recommended for beginners) |
| | **Structure**: Simple iterative building |
| | 1. Start with essential motion only |
| | 2. Add one element at a time: Subject Motion β Camera Motion β Scene Motion β Style Descriptors |
| | 3. Use general terms and avoid complex descriptions |
| | |
| | **Example**: |
| | - Basic: "The subject walks forward" |
| | - + Camera: "The subject walks forward. Handheld camera follows" |
| | - + Scene: "The subject walks forward. Handheld camera follows. Dust trails behind" |
| | - + Style: "The subject walks forward. Handheld camera follows. Dust trails behind. Cinematic." |
| | |
| | ### π― **SARA Framework** (Advanced precision) |
| | **Structure**: [Subject] + [Action] + [Reference] + [Atmosphere] |
| | - **Subject (S)**: Main element to control |
| | - **Action (A)**: Movement/transformation ([verb] + [adverb]) |
| | - **Reference (R)**: Spatial anchors ("while X remains steady") |
| | - **Atmosphere (A)**: Context and style |
| | |
| | **Template**: [Subject] [verb] [adverb] while [reference] [atmosphere] |
| | **Example**: "The subject walks smoothly while background remains steady, cinematic atmosphere" |
| | |
| | ## Essential Vocabulary |
| | |
| | ### Effective Verbs (Action) |
| | - **Movement**: walks, runs, moves, glides, flows, drifts |
| | - **Rotation**: turns, spins, rotates, pivots, tilts |
| | - **Transformation**: transforms, morphs, transitions, evolves |
| | - **Expression**: speaks, gestures, looks, smiles, nods |
| | |
| | ### Effective Adverbs (Quality) |
| | - **Speed**: slowly, quickly, gradually, suddenly, steadily |
| | - **Style**: smoothly, naturally, elegantly, gracefully, dramatically |
| | - **Intensity**: gently, softly, powerfully, intensely, subtly |
| | |
| | ### Camera Motion Terms |
| | - **Basic**: locked camera, handheld, steady cam |
| | - **Movement**: pan left/right, tilt up/down, zoom in/out, dolly forward/back |
| | - **Advanced**: tracking shot, crane movement, orbital movement |
| | |
| | ### Style Descriptors |
| | - **Aesthetic**: cinematic, live-action, smooth animation, stop motion |
| | - **Mood**: dramatic, peaceful, energetic, mysterious, professional |
| | - **Technical**: 4K, slow motion, time-lapse, documentary style |
| | |
| | ## Multi-Subject Guidelines |
| | - **Positional**: "The subject on the left walks. The subject on the right remains still." |
| | - **Descriptive**: "The woman nods. The man waves." |
| | - **Sequential**: "The woman speaks then the man responds." |
| | |
| | ## Scene Motion Approaches |
| | - **Insinuated**: "The subject runs across the dusty desert" (natural) |
| | - **Explicit**: "The subject runs across the desert. Dust trails behind them" (emphasized) |
| | |
| | ## Proven Examples (from SARA Framework) |
| | ### Character Motion |
| | - "The woman speaks enthusiastically to camera while camera remains still, online tutorial" |
| | - "The subject transitions from walking to jumping while background stays constant" |
| | |
| | ### Camera Motion |
| | - "The subject remains centered as camera smoothly moves left with steady background" |
| | - "Handheld camera tracks the subject as they walk forward naturally" |
| | |
| | ### Environmental |
| | - "Camera stays fixed while day cycles into night over the temple, stone structures remain still" |
| | - "The red cup slides smoothly to the right on white table, maintaining background constant" |
| | |
| | ### Complex Scenes |
| | - "The pile of rocks transforms into a humanoid made of rugged volcanic rocks. The rock humanoid walks around" |
| | - "The woman inspects her reflection in mirror. Surface bubbles with translucent bubbles. Locked camera" |
| | |
| | ## Technical Notes |
| | - **Gen-4/Runway**: Prefer SARA structure for precision |
| | - **Sora/OpenAI**: Works well with both approaches |
| | - **Pika/Stable**: Gen-4 method often more effective |
| | - **All models**: Start simple, iterate based on results |
| | """ |
| |
|
| | |
| | SARA_TEMPLATES = { |
| | "character_motion": [ |
| | "{subject} speaks {adverb} to camera while camera remains still, {genre}", |
| | "{subject} {action} {adverb} while background stays constant, {style}", |
| | "{subject} transitions from {action1} to {action2} while frame remains fixed, {genre}" |
| | ], |
| | "camera_motion": [ |
| | "{subject} remains centered as camera {movement} {adverb} with steady background", |
| | "{camera_type} camera {action} the {subject} as they {movement} {adverb}", |
| | "Camera {movement} {adverb} while {subject} maintains position, {style}" |
| | ], |
| | "environmental": [ |
| | "Camera stays fixed while {environment} {transformation} over {subject}, {reference} remain still", |
| | "{subject} {action} while {environmental_effect} around them, {style}", |
| | "{environmental_element} {movement} {adverb} as {subject} {action}, maintaining {reference}" |
| | ], |
| | "transformations": [ |
| | "{object} transforms into {new_form} made of {material}. The {new_subject} {action} around", |
| | "{subject} {action} in {location}. {environmental_reaction} {adverb}. {camera_style}", |
| | "The {subject} {action} while {environmental_change} occurs {adverb}, {atmosphere}" |
| | ] |
| | } |
| |
|
| | GEN4_TEMPLATES = { |
| | "basic": [ |
| | "The subject {action}", |
| | "The {subject} {movement} {direction}", |
| | "{subject} {expression} to camera" |
| | ], |
| | "with_camera": [ |
| | "The subject {action}. {camera_movement}", |
| | "{subject} {movement} {direction}. Camera {camera_action}", |
| | "Handheld camera {camera_behavior} as {subject} {action}" |
| | ], |
| | "with_scene": [ |
| | "The subject {action}. {camera_movement}. {scene_element} {scene_action}", |
| | "{subject} {movement} across {environment}. {environmental_reaction}", |
| | "Camera {camera_movement} while {subject} {action}, {scene_description}" |
| | ], |
| | "complete": [ |
| | "The subject {action}. {camera_movement}. {scene_element} {scene_action}. {style}", |
| | "{subject} {movement} {adverb} across {environment}. {camera_type} camera {camera_action}. {style}", |
| | "Camera {camera_movement} as {subject} {action}, {environmental_reaction}, {atmosphere}" |
| | ] |
| | } |
| |
|
| | |
| | VOCABULARY = { |
| | "subjects": ["the subject", "the woman", "the man", "the person", "the character"], |
| | "actions": ["walks", "runs", "moves", "glides", "flows", "turns", "speaks", "gestures"], |
| | "adverbs": ["smoothly", "slowly", "quickly", "naturally", "gracefully", "steadily", "gently"], |
| | "camera_movements": ["locked camera", "handheld", "dolly forward", "pan left", "pan right", "tracking shot"], |
| | "environments": ["dusty desert", "forest", "urban street", "open field", "indoor space"], |
| | "styles": ["cinematic", "documentary", "live-action", "dramatic", "peaceful", "energetic"] |
| | } |
| |
|
| | def analyze_image_simple(image): |
| | """Enhanced image analysis using BLIP + AI reasoning""" |
| | if image is None: |
| | return "Please upload an image first.", "", {} |
| | |
| | try: |
| | |
| | if not isinstance(image, Image.Image): |
| | image = Image.fromarray(image) |
| | |
| | |
| | width, height = image.size |
| | aspect_ratio = width / height |
| | |
| | if aspect_ratio > 1.5: |
| | composition = "Wide landscape shot" |
| | elif aspect_ratio < 0.7: |
| | composition = "Vertical portrait shot" |
| | else: |
| | composition = "Balanced composition" |
| | |
| | |
| | inputs = processor(image, return_tensors="pt") |
| | if torch.cuda.is_available(): |
| | inputs = {k: v.cuda() for k, v in inputs.items()} |
| | model.cuda() |
| | |
| | out = model.generate(**inputs, max_length=50, num_beams=3) |
| | basic_caption = processor.decode(out[0], skip_special_tokens=True) |
| | |
| | |
| | enhanced_analysis = analyze_scene_with_ai(basic_caption, aspect_ratio, composition) |
| | |
| | |
| | analysis = f"""π **Image Analysis:** |
| | β’ **Dimensions**: {width} x {height} |
| | β’ **Composition**: {composition} |
| | β’ **Aspect Ratio**: {aspect_ratio:.2f} |
| | |
| | π¨ **Basic Description**: |
| | "{basic_caption}" |
| | |
| | π§ **AI-Enhanced Analysis**: |
| | {enhanced_analysis['scene_interpretation']} |
| | |
| | π‘ **Motion & Cinematography Insights**: |
| | {chr(10).join(f"β’ {insight}" for insight in enhanced_analysis['motion_insights'])} |
| | |
| | π― **Recommended Approach**: |
| | {enhanced_analysis['recommended_approach']}""" |
| | |
| | |
| | scene_info = { |
| | 'basic_description': basic_caption, |
| | 'enhanced_description': enhanced_analysis['detailed_description'], |
| | 'composition': composition, |
| | 'aspect_ratio': aspect_ratio, |
| | 'has_person': enhanced_analysis['has_person'], |
| | 'emotional_tone': enhanced_analysis['emotional_tone'], |
| | 'visual_style': enhanced_analysis['visual_style'], |
| | 'setting': enhanced_analysis['setting'], |
| | 'distinctive_elements': enhanced_analysis['distinctive_elements'], |
| | 'motion_potential': enhanced_analysis['motion_potential'], |
| | 'cinematic_qualities': enhanced_analysis['cinematic_qualities'] |
| | } |
| | |
| | return analysis, basic_caption, scene_info |
| | |
| | except Exception as e: |
| | return f"Error analyzing image: {str(e)}", "", {} |
| |
|
| | def analyze_scene_with_ai(basic_caption, aspect_ratio, composition): |
| | """Use AI reasoning to enhance basic image analysis""" |
| | text = basic_caption.lower() if isinstance(basic_caption, str) else "" |
| | |
| | |
| | scene_elements = extract_scene_elements(text) |
| | |
| | |
| | emotional_tone = determine_emotional_tone(text, scene_elements) |
| | |
| | |
| | visual_style = determine_visual_style(text, scene_elements, composition) |
| | |
| | |
| | distinctive_elements = identify_distinctive_elements(text) |
| | |
| | |
| | motion_potential = assess_motion_potential(text, scene_elements) |
| | |
| | |
| | cinematic_qualities = analyze_cinematic_potential(text, composition, aspect_ratio) |
| | |
| | |
| | enhanced_description = create_enhanced_description(basic_caption, scene_elements, emotional_tone) |
| | |
| | |
| | motion_insights = generate_motion_insights(scene_elements, emotional_tone, visual_style, composition) |
| | |
| | |
| | recommended_approach = recommend_approach(scene_elements, emotional_tone, visual_style) |
| | |
| | return { |
| | 'detailed_description': enhanced_description, |
| | 'scene_interpretation': f"Scene shows {scene_elements['subject']} in {scene_elements['setting']} with {emotional_tone} mood. Key elements: {', '.join(distinctive_elements)}", |
| | 'motion_insights': motion_insights, |
| | 'recommended_approach': recommended_approach, |
| | 'has_person': scene_elements['has_person'], |
| | 'emotional_tone': emotional_tone, |
| | 'visual_style': visual_style, |
| | 'setting': scene_elements['setting'], |
| | 'distinctive_elements': distinctive_elements, |
| | 'motion_potential': motion_potential, |
| | 'cinematic_qualities': cinematic_qualities |
| | } |
| |
|
| | def extract_scene_elements(text): |
| | """Extract and interpret scene elements intelligently""" |
| | elements = { |
| | 'subject': 'subject', |
| | 'setting': 'neutral', |
| | 'clothing': None, |
| | 'colors': [], |
| | 'objects': [], |
| | 'has_person': False |
| | } |
| | |
| | |
| | if any(word in text for word in ['man', 'male', 'gentleman']): |
| | elements['subject'] = 'man' |
| | elements['has_person'] = True |
| | |
| | if 'costume' in text: |
| | elements['subject'] = 'man in costume' |
| | elements['clothing'] = 'costume' |
| | elif 'suit' in text: |
| | elements['subject'] = 'man in suit' |
| | elements['clothing'] = 'suit' |
| | elif any(word in text for word in ['woman', 'female', 'lady']): |
| | elements['subject'] = 'woman' |
| | elements['has_person'] = True |
| | if 'dress' in text: |
| | elements['subject'] = 'woman in dress' |
| | elements['clothing'] = 'dress' |
| | |
| | |
| | if any(word in text for word in ['outdoor', 'outside', 'street', 'nature', 'park']): |
| | elements['setting'] = 'outdoor' |
| | elif any(word in text for word in ['indoor', 'inside', 'room', 'office', 'studio']): |
| | elements['setting'] = 'indoor' |
| | elif any(word in text for word in ['stage', 'performance']): |
| | elements['setting'] = 'performance' |
| | |
| | |
| | color_words = ['red', 'blue', 'green', 'yellow', 'black', 'white', 'brown', 'pink', 'purple', 'orange', 'gold', 'silver'] |
| | elements['colors'] = [color for color in color_words if color in text] |
| | |
| | |
| | objects = ['hat', 'cape', 'flag', 'chair', 'table', 'background', 'wall'] |
| | elements['objects'] = [obj for obj in objects if obj in text] |
| | |
| | return elements |
| |
|
| | def determine_emotional_tone(text, scene_elements): |
| | """Intelligently determine the emotional tone of the scene""" |
| | |
| | if not isinstance(text, str): |
| | text = "" |
| | |
| | text_lower = text.lower() |
| | |
| | |
| | if any(word in text_lower for word in ['serious', 'formal', 'stern', 'professional']): |
| | return 'serious' |
| | elif any(word in text_lower for word in ['happy', 'smiling', 'cheerful', 'joyful']): |
| | return 'cheerful' |
| | elif any(word in text_lower for word in ['dramatic', 'intense', 'powerful', 'bold']): |
| | return 'dramatic' |
| | elif any(word in text_lower for word in ['elegant', 'graceful', 'refined']): |
| | return 'elegant' |
| | elif 'costume' in text_lower or 'performance' in text_lower: |
| | return 'theatrical' |
| | else: |
| | |
| | if scene_elements['setting'] == 'performance': |
| | return 'theatrical' |
| | elif scene_elements['clothing'] in ['suit', 'formal']: |
| | return 'professional' |
| | else: |
| | return 'neutral' |
| |
|
| | def determine_visual_style(text, scene_elements, composition): |
| | """Determine the most suitable visual style""" |
| | |
| | if not isinstance(text, str): |
| | text = "" |
| | |
| | text_lower = text.lower() |
| | |
| | |
| | if scene_elements['setting'] == 'performance' or 'costume' in text_lower: |
| | return 'theatrical' |
| | elif scene_elements['setting'] == 'indoor' and 'formal' in text_lower: |
| | return 'professional' |
| | elif composition in ['Wide landscape shot']: |
| | return 'cinematic' |
| | elif any(color in scene_elements['colors'] for color in ['red', 'gold', 'dramatic']): |
| | return 'dramatic' |
| | else: |
| | return 'cinematic' |
| |
|
| | def identify_distinctive_elements(text): |
| | """Identify unique elements that can enhance video prompts""" |
| | |
| | if not isinstance(text, str): |
| | text = "" |
| | |
| | elements = [] |
| | text_lower = text.lower() |
| | |
| | |
| | if 'costume' in text_lower: |
| | elements.append('elaborate costume') |
| | if 'cape' in text_lower: |
| | elements.append('flowing cape') |
| | if 'hat' in text_lower: |
| | elements.append('distinctive hat') |
| | if 'flag' in text_lower: |
| | elements.append('flag detail') |
| | |
| | |
| | colors = ['red', 'blue', 'green', 'gold'] |
| | found_colors = [color for color in colors if color in text_lower] |
| | if found_colors: |
| | elements.append(f"{', '.join(found_colors)} coloring") |
| | |
| | |
| | if 'background' in text_lower: |
| | elements.append('detailed background') |
| | |
| | return elements if elements else ['natural elements'] |
| |
|
| | def assess_motion_potential(text, scene_elements): |
| | """Assess what types of motion would work best""" |
| | |
| | if not isinstance(text, str): |
| | text = "" |
| | |
| | potential = [] |
| | text_lower = text.lower() |
| | |
| | if scene_elements['has_person']: |
| | potential.extend(['facial expressions', 'hand gestures', 'body movement']) |
| | |
| | if scene_elements['clothing'] == 'costume': |
| | potential.append('costume dynamics') |
| | if scene_elements['clothing'] == 'cape': |
| | potential.append('cape flow') |
| | if scene_elements['clothing'] == 'dress': |
| | potential.append('fabric movement') |
| | |
| | if scene_elements['setting'] == 'outdoor': |
| | potential.extend(['environmental effects', 'natural lighting changes']) |
| | elif scene_elements['setting'] == 'indoor': |
| | potential.extend(['controlled lighting', 'subtle environment shifts']) |
| | |
| | return potential |
| |
|
| | def analyze_cinematic_potential(text, composition, aspect_ratio): |
| | """Analyze the cinematic qualities and potential""" |
| | |
| | if not isinstance(text, str): |
| | text = "" |
| | |
| | qualities = [] |
| | text_lower = text.lower() |
| | |
| | |
| | if composition == 'Wide landscape shot': |
| | qualities.extend(['horizontal camera movements', 'panoramic reveals', 'environmental context']) |
| | elif composition == 'Vertical portrait shot': |
| | qualities.extend(['character focus', 'intimate framing', 'vertical movement']) |
| | else: |
| | qualities.extend(['balanced framing', 'versatile movement', 'centered composition']) |
| | |
| | |
| | if 'costume' in text_lower or 'dramatic' in text_lower: |
| | qualities.append('dramatic lighting potential') |
| | |
| | if any(color in text_lower for color in ['red', 'gold', 'rich']): |
| | qualities.append('color enhancement opportunities') |
| | |
| | return qualities |
| |
|
| | def create_enhanced_description(basic_caption, scene_elements, emotional_tone): |
| | """Create a richer description using AI analysis""" |
| | subject = scene_elements['subject'] |
| | setting = scene_elements['setting'] |
| | clothing = scene_elements['clothing'] |
| | |
| | enhanced = f"A {emotional_tone} scene featuring {subject}" |
| | |
| | if clothing: |
| | enhanced += f" wearing {clothing}" |
| | |
| | enhanced += f" in a {setting} setting" |
| | |
| | if scene_elements['colors']: |
| | enhanced += f" with prominent {', '.join(scene_elements['colors'])} elements" |
| | |
| | return enhanced |
| |
|
| | def generate_motion_insights(scene_elements, emotional_tone, visual_style, composition): |
| | """Generate intelligent motion and cinematography insights""" |
| | insights = [] |
| | |
| | |
| | if scene_elements['has_person']: |
| | if emotional_tone == 'dramatic': |
| | insights.append('Emphasize powerful gestures and dynamic poses') |
| | elif emotional_tone == 'elegant': |
| | insights.append('Focus on graceful, refined movements') |
| | elif emotional_tone == 'theatrical': |
| | insights.append('Capture performance-style expressions and gestures') |
| | |
| | if scene_elements['clothing']: |
| | clothing = scene_elements['clothing'] |
| | if clothing == 'costume': |
| | insights.append('Highlight costume details with movement') |
| | elif clothing == 'cape': |
| | insights.append('Showcase cape flow and dramatic movement') |
| | elif clothing == 'dress': |
| | insights.append('Capture fabric dynamics and elegant motion') |
| | |
| | |
| | if composition == 'Wide landscape shot': |
| | insights.append('Utilize horizontal camera movements and wide reveals') |
| | elif composition == 'Vertical portrait shot': |
| | insights.append('Focus on vertical movement and character detail') |
| | |
| | |
| | if visual_style == 'cinematic': |
| | insights.append('Use cinematic camera techniques and dramatic lighting') |
| | elif visual_style == 'dramatic': |
| | insights.append('Emphasize bold movements and high contrast lighting') |
| | elif visual_style == 'professional': |
| | insights.append('Maintain clean, controlled camera work') |
| | |
| | |
| | if scene_elements['colors']: |
| | insights.append(f"Enhance {', '.join(scene_elements['colors'])} tones through lighting") |
| | |
| | return insights[:6] |
| |
|
| | def recommend_approach(scene_elements, emotional_tone, visual_style): |
| | """Intelligently recommend the best prompting approach""" |
| | |
| | |
| | if scene_elements['has_person'] and scene_elements['clothing'] in ['costume', 'suit', 'dress']: |
| | return "SARA Framework recommended for precise character and costume control" |
| | |
| | |
| | elif emotional_tone in ['dramatic', 'theatrical']: |
| | return "SARA Framework ideal for complex dramatic scenes with multiple elements" |
| | |
| | |
| | elif emotional_tone in ['neutral', 'peaceful'] and visual_style != 'dramatic': |
| | return "Gen-4 method perfect for natural, iterative scene building" |
| | |
| | |
| | elif emotional_tone == 'professional' or visual_style == 'professional': |
| | return "Either approach works - SARA for precision, Gen-4 for simplicity" |
| | |
| | else: |
| | return "Start with Gen-4 for base prompt, then refine with SARA for complexity" |
| |
|
| | def generate_motion_suggestions(description, aspect_ratio): |
| | """Generate contextual motion suggestions""" |
| | text = description.lower() |
| | suggestions = [] |
| | |
| | |
| | if any(word in text for word in ['person', 'woman', 'man', 'people']): |
| | suggestions.extend([ |
| | 'Focus on character expressions and gestures', |
| | 'Use "the subject" or "the woman/man" for clarity', |
| | 'Consider handheld camera for natural movement' |
| | ]) |
| | |
| | if any(word in text for word in ['sitting', 'standing']): |
| | suggestions.extend([ |
| | 'Start with simple movements: speaking, gesturing', |
| | 'Locked or steady camera works well for portraits' |
| | ]) |
| | |
| | if any(word in text for word in ['outdoor', 'landscape', 'nature']): |
| | suggestions.extend([ |
| | 'Camera movement can explore the environment', |
| | 'Consider environmental motion: wind, clouds', |
| | 'Cinematic style complements outdoor scenes' |
| | ]) |
| | |
| | if any(word in text for word in ['indoor', 'room']): |
| | suggestions.extend([ |
| | 'Controlled movements work best indoors', |
| | 'Focus on subject motion within the space' |
| | ]) |
| | |
| | |
| | if aspect_ratio > 1.5: |
| | suggestions.append('Wide format perfect for horizontal camera movements') |
| | elif aspect_ratio < 0.8: |
| | suggestions.append('Portrait format ideal for character-focused content') |
| | |
| | return suggestions[:6] if suggestions else [ |
| | 'Start with simple motion: "The subject moves"', |
| | 'Add camera movement: "Camera follows naturally"', |
| | 'Include environment: "Background remains steady"' |
| | ] |
| |
|
| | def get_recommended_approach(description): |
| | """Recommend best approach based on image content""" |
| | text = description.lower() |
| | |
| | if any(word in text for word in ['person', 'woman', 'man']): |
| | return "SARA Framework recommended for character precision" |
| | elif any(word in text for word in ['landscape', 'building', 'nature']): |
| | return "Gen-4 method works well for environmental scenes" |
| | else: |
| | return "Try both approaches - start with Gen-4, refine with SARA" |
| |
|
| | def detect_setting(description): |
| | """Detect setting type from description""" |
| | text = description.lower() |
| | |
| | if any(word in text for word in ['outdoor', 'outside', 'street', 'nature']): |
| | return 'outdoor' |
| | elif any(word in text for word in ['indoor', 'inside', 'room', 'building']): |
| | return 'indoor' |
| | else: |
| | return 'neutral' |
| |
|
| | def extract_specific_details(description): |
| | """Extract specific details from the image description""" |
| | details = { |
| | 'colors': [], |
| | 'clothing': None, |
| | 'distinctive_feature': None, |
| | 'main_object': None, |
| | 'setting_clues': [] |
| | } |
| | |
| | text = description.lower() |
| | |
| | |
| | colors = ['red', 'blue', 'green', 'yellow', 'black', 'white', 'brown', 'pink', 'purple', 'orange'] |
| | found_colors = [color for color in colors if color in text] |
| | if found_colors: |
| | details['colors'] = ', '.join(found_colors) |
| | |
| | |
| | clothing_items = ['cape', 'hat', 'dress', 'suit', 'shirt', 'coat', 'jacket', 'uniform', 'costume', 'robe'] |
| | for item in clothing_items: |
| | if item in text: |
| | |
| | if 'red cape' in text: |
| | details['clothing'] = 'red cape' |
| | details['distinctive_feature'] = 'flowing red cape' |
| | elif 'hat' in text: |
| | if 'red hat' in text: |
| | details['clothing'] = 'red hat' |
| | details['distinctive_feature'] = 'red hat' |
| | else: |
| | details['clothing'] = 'hat' |
| | details['distinctive_feature'] = 'hat' |
| | else: |
| | details['clothing'] = item |
| | details['distinctive_feature'] = item |
| | break |
| | |
| | |
| | if 'man' in text: |
| | details['main_object'] = 'man' |
| | elif 'woman' in text: |
| | details['main_object'] = 'woman' |
| | elif 'person' in text: |
| | details['main_object'] = 'person' |
| | elif 'people' in text: |
| | details['main_object'] = 'people' |
| | |
| | |
| | setting_indicators = ['outdoor', 'indoor', 'street', 'room', 'building', 'nature', 'park', 'office'] |
| | details['setting_clues'] = [indicator for indicator in setting_indicators if indicator in text] |
| | |
| | return details |
| |
|
| | def get_contextual_subject(description, details): |
| | """Get appropriate subject reference based on image content""" |
| | text = description.lower() |
| | |
| | if 'man' in text: |
| | if details.get('clothing'): |
| | return f"The man in the {details['clothing']}" |
| | else: |
| | return "The man" |
| | elif 'woman' in text: |
| | if details.get('clothing'): |
| | return f"The woman in the {details['clothing']}" |
| | else: |
| | return "The woman" |
| | elif 'person' in text: |
| | return "The person" |
| | else: |
| | return "The subject" |
| |
|
| | def get_contextual_actions(description, details): |
| | """Get actions appropriate for the scene""" |
| | base_actions = ['speaks', 'gestures', 'moves', 'looks', 'turns'] |
| | |
| | |
| | if details.get('clothing'): |
| | if 'cape' in details['clothing']: |
| | base_actions.extend(['adjusts cape', 'moves dramatically', 'gestures with cape flowing']) |
| | if 'hat' in details['clothing']: |
| | base_actions.extend(['tips hat', 'adjusts hat', 'nods with hat']) |
| | |
| | |
| | if 'man' in description.lower(): |
| | base_actions.extend(['speaks confidently', 'gestures authoritatively']) |
| | |
| | return base_actions |
| |
|
| | def get_contextual_adverbs(details): |
| | """Get adverbs that fit the scene""" |
| | base_adverbs = ['naturally', 'smoothly', 'slowly', 'gracefully'] |
| | |
| | if details.get('clothing'): |
| | if 'cape' in details['clothing']: |
| | base_adverbs.extend(['dramatically', 'majestically', 'with flair']) |
| | if 'hat' in details['clothing']: |
| | base_adverbs.extend(['elegantly', 'with style', 'confidently']) |
| | |
| | return base_adverbs |
| |
|
| | def get_contextual_camera_movement(description, details): |
| | """Get camera movements appropriate for the scene""" |
| | base_movements = ['Camera follows steadily', 'Locked camera captures', 'Handheld camera tracks'] |
| | |
| | if details.get('distinctive_feature'): |
| | if 'cape' in details['distinctive_feature']: |
| | base_movements.extend(['Camera captures cape movement', 'Tracking shot follows cape flow']) |
| | if 'hat' in details['distinctive_feature']: |
| | base_movements.extend(['Camera frames from chest up', 'Close tracking of upper body']) |
| | |
| | return base_movements |
| |
|
| | def get_contextual_environment(description, details): |
| | """Get environmental effects that complement the scene""" |
| | if details.get('colors'): |
| | if 'red' in details['colors']: |
| | return "lighting enhances red tones" |
| | |
| | if details.get('clothing'): |
| | if 'cape' in details['clothing']: |
| | return "cape fabric reacts to air movement" |
| | |
| | return None |
| |
|
| | def get_contextual_style(details): |
| | """Get style that fits the scene context""" |
| | if details.get('clothing'): |
| | if 'cape' in details['clothing']: |
| | return "dramatic cinematic style" |
| | if 'hat' in details['clothing']: |
| | return "classic portrait style" |
| | |
| | return "professional documentary style" |
| |
|
| | def get_contextual_atmosphere(details): |
| | """Get atmosphere that matches the scene""" |
| | if details.get('colors'): |
| | if 'red' in details['colors']: |
| | return "dramatic atmosphere with rich red tones" |
| | |
| | if details.get('clothing'): |
| | if 'cape' in details['clothing']: |
| | return "heroic cinematic atmosphere" |
| | if 'hat' in details['clothing']: |
| | return "elegant portrait atmosphere" |
| | |
| | return "professional cinematic atmosphere" |
| |
|
| | def optimize_user_prompt(user_idea, scene_info=None): |
| | """Optimize and structure user's prompt idea into professional video prompt""" |
| | if not user_idea.strip(): |
| | return "Please enter your idea first." |
| | |
| | try: |
| | |
| | idea = user_idea.strip() |
| | |
| | |
| | analysis = analyze_user_idea(idea) |
| | |
| | |
| | optimized = create_optimized_prompt(idea, analysis, scene_info) |
| | |
| | return optimized |
| | |
| | except Exception as e: |
| | return f"Error optimizing prompt: {str(e)}" |
| |
|
| | def analyze_user_idea(idea): |
| | """Analyze user's idea to understand intent and content""" |
| | idea_lower = idea.lower() |
| | |
| | analysis = { |
| | 'language': detect_language(idea), |
| | 'has_action': False, |
| | 'has_object': False, |
| | 'has_emotion': False, |
| | 'has_camera': False, |
| | 'complexity': 'simple', |
| | 'main_elements': [] |
| | } |
| | |
| | |
| | action_words = { |
| | 'en': ['removes', 'takes off', 'puts on', 'walks', 'runs', 'speaks', 'gestures', 'moves', 'turns', 'looks'], |
| | 'es': ['quita', 'se quita', 'pone', 'camina', 'corre', 'habla', 'gesticula', 'mueve', 'gira', 'mira'], |
| | 'fr': ['enlève', 'met', 'marche', 'court', 'parle', 'gesticule', 'bouge'], |
| | 'de': ['nimmt ab', 'zieht aus', 'geht', 'lΓ€uft', 'spricht', 'bewegt'] |
| | } |
| | |
| | for lang, actions in action_words.items(): |
| | if any(action in idea_lower for action in actions): |
| | analysis['has_action'] = True |
| | break |
| | |
| | |
| | object_words = ['nose', 'nariz', 'hat', 'sombrero', 'costume', 'traje', 'cape', 'capa', 'mask', 'mΓ‘scara'] |
| | if any(obj in idea_lower for obj in object_words): |
| | analysis['has_object'] = True |
| | |
| | |
| | emotion_words = ['dramatic', 'dramΓ‘tico', 'slow', 'lento', 'fast', 'rΓ‘pido', 'gentle', 'suave', 'powerful', 'poderoso'] |
| | if any(emotion in idea_lower for emotion in emotion_words): |
| | analysis['has_emotion'] = True |
| | |
| | |
| | camera_words = ['camera', 'cΓ‘mara', 'shot', 'toma', 'angle', 'Γ‘ngulo', 'close', 'cerca', 'wide', 'amplio'] |
| | if any(camera in idea_lower for camera in camera_words): |
| | analysis['has_camera'] = True |
| | |
| | |
| | element_count = sum([analysis['has_action'], analysis['has_object'], analysis['has_emotion'], analysis['has_camera']]) |
| | if element_count >= 3: |
| | analysis['complexity'] = 'complex' |
| | elif element_count >= 2: |
| | analysis['complexity'] = 'medium' |
| | |
| | return analysis |
| |
|
| | def detect_language(text): |
| | """Simple language detection""" |
| | spanish_indicators = ['el', 'la', 'se', 'que', 'con', 'por', 'para', 'del', 'de la', 'nariz', 'payaso'] |
| | french_indicators = ['le', 'la', 'se', 'que', 'avec', 'pour', 'du', 'de la', 'nez', 'clown'] |
| | german_indicators = ['der', 'die', 'das', 'sich', 'mit', 'fΓΌr', 'vom', 'nase', 'clown'] |
| | |
| | text_lower = text.lower() |
| | |
| | if any(indicator in text_lower for indicator in spanish_indicators): |
| | return 'spanish' |
| | elif any(indicator in text_lower for indicator in french_indicators): |
| | return 'french' |
| | elif any(indicator in text_lower for indicator in german_indicators): |
| | return 'german' |
| | else: |
| | return 'english' |
| |
|
| | def create_optimized_prompt(idea, analysis, scene_info=None): |
| | """Create optimized English video prompt from user idea""" |
| | |
| | |
| | translations = { |
| | 'spanish': { |
| | 'se quita': 'removes', |
| | 'quita': 'removes', |
| | 'pone': 'puts on', |
| | 'camina': 'walks', |
| | 'habla': 'speaks', |
| | 'mueve': 'moves', |
| | 'nariz': 'nose', |
| | 'payaso': 'clown', |
| | 'personaje': 'character', |
| | 'sombrero': 'hat', |
| | 'capa': 'cape', |
| | 'lentamente': 'slowly', |
| | 'rΓ‘pidamente': 'quickly', |
| | 'dramΓ‘ticamente': 'dramatically' |
| | }, |
| | 'french': { |
| | 'enlève': 'removes', |
| | 'met': 'puts on', |
| | 'marche': 'walks', |
| | 'parle': 'speaks', |
| | 'bouge': 'moves', |
| | 'nez': 'nose', |
| | 'clown': 'clown', |
| | 'personnage': 'character', |
| | 'chapeau': 'hat', |
| | 'cape': 'cape' |
| | } |
| | } |
| | |
| | |
| | optimized_idea = idea |
| | if analysis['language'] in translations: |
| | for original, translation in translations[analysis['language']].items(): |
| | optimized_idea = optimized_idea.replace(original, translation) |
| | |
| | |
| | structured_prompt = structure_video_prompt(optimized_idea, analysis, scene_info) |
| | |
| | return structured_prompt |
| |
|
| | def structure_video_prompt(idea, analysis, scene_info=None): |
| | """Structure the idea into a professional video prompt""" |
| | |
| | |
| | idea_lower = idea.lower() |
| | |
| | |
| | if 'character' in idea_lower or 'personaje' in idea_lower: |
| | subject = "The character" |
| | elif 'person' in idea_lower or 'persona' in idea_lower: |
| | subject = "The person" |
| | elif scene_info and scene_info.get('has_person'): |
| | |
| | subject = extract_intelligent_subject_reference(scene_info) |
| | else: |
| | subject = "The subject" |
| | |
| | |
| | action = extract_action_from_idea(idea) |
| | |
| | |
| | if analysis['complexity'] == 'simple': |
| | |
| | optimized = f"{subject} {action} naturally" |
| | |
| | |
| | optimized += ". Camera captures the motion smoothly" |
| | |
| | elif analysis['complexity'] == 'medium': |
| | |
| | optimized = f"{subject} {action} while camera follows steadily" |
| | |
| | |
| | if analysis['has_emotion']: |
| | optimized += ", dramatic lighting enhances the mood" |
| | else: |
| | optimized += ", professional lighting" |
| | |
| | else: |
| | |
| | optimized = f"{subject} {action} expressively while camera tracks the motion" |
| | optimized += ", lighting and environment support the action, cinematic atmosphere" |
| | |
| | |
| | optimized = improve_technical_language(optimized) |
| | |
| | return optimized |
| |
|
| | def extract_action_from_idea(idea): |
| | """Extract and refine the main action from user's idea""" |
| | idea_lower = idea.lower() |
| | |
| | |
| | action_mappings = { |
| | 'removes': 'removes', |
| | 'quita': 'removes', |
| | 'se quita': 'removes', |
| | 'takes off': 'removes', |
| | 'puts on': 'puts on', |
| | 'pone': 'puts on', |
| | 'walks': 'walks', |
| | 'camina': 'walks', |
| | 'speaks': 'speaks', |
| | 'habla': 'speaks', |
| | 'moves': 'moves', |
| | 'mueve': 'moves', |
| | 'turns': 'turns', |
| | 'gira': 'turns', |
| | 'looks': 'looks', |
| | 'mira': 'looks' |
| | } |
| | |
| | |
| | action = "moves" |
| | object_part = "" |
| | |
| | for original, mapped in action_mappings.items(): |
| | if original in idea_lower: |
| | action = mapped |
| | |
| | if original in ['removes', 'quita', 'se quita', 'takes off']: |
| | |
| | if 'nose' in idea_lower or 'nariz' in idea_lower: |
| | if 'clown' in idea_lower or 'payaso' in idea_lower: |
| | object_part = "the clown nose" |
| | else: |
| | object_part = "the nose piece" |
| | elif 'hat' in idea_lower or 'sombrero' in idea_lower: |
| | object_part = "the hat" |
| | elif 'mask' in idea_lower or 'mΓ‘scara' in idea_lower: |
| | object_part = "the mask" |
| | break |
| | |
| | |
| | if object_part: |
| | return f"{action} {object_part}" |
| | else: |
| | return action |
| |
|
| | def improve_technical_language(prompt): |
| | """Improve the prompt with professional video terminology""" |
| | |
| | |
| | improvements = { |
| | 'moves naturally': 'moves with natural grace', |
| | 'Camera captures': 'Camera captures', |
| | 'smoothly': 'with smooth motion', |
| | 'follows steadily': 'follows with steady tracking', |
| | 'dramatic lighting': 'dramatic lighting transitions', |
| | 'professional lighting': 'professional lighting setup', |
| | 'cinematic atmosphere': 'rich cinematic atmosphere' |
| | } |
| | |
| | improved_prompt = prompt |
| | for basic, enhanced in improvements.items(): |
| | improved_prompt = improved_prompt.replace(basic, enhanced) |
| | |
| | return improved_prompt |
| |
|
| | def refine_prompt_with_feedback(current_prompt, feedback, chat_history, scene_info=None): |
| | """Use AI to intelligently refine prompts based on user feedback""" |
| | |
| | if not feedback.strip(): |
| | return current_prompt, chat_history |
| | |
| | |
| | refinement_analysis = analyze_refinement_request(feedback, current_prompt, scene_info) |
| | |
| | |
| | refined_prompt = apply_intelligent_refinement(current_prompt, refinement_analysis, scene_info) |
| | |
| | |
| | explanation = create_refinement_explanation(refinement_analysis, current_prompt, refined_prompt) |
| | |
| | |
| | new_chat_history = chat_history + [ |
| | [feedback, f"π€ {explanation}\n\nβ¨ **Refined Prompt**: {refined_prompt}"] |
| | ] |
| | |
| | return refined_prompt, new_chat_history |
| |
|
| | def analyze_refinement_request(feedback, current_prompt, scene_info): |
| | """Analyze what the user wants to change using AI understanding""" |
| | feedback_lower = feedback.lower() |
| | analysis = { |
| | 'request_type': 'general', |
| | 'intensity': 'moderate', |
| | 'focus_area': 'action', |
| | 'style_preference': None, |
| | 'specific_elements': [], |
| | 'language': detect_language(feedback) |
| | } |
| | |
| | |
| | if any(word in feedback_lower for word in ['dramatic', 'dramΓ‘tico', 'dramatique', 'dramatisch']): |
| | analysis['request_type'] = 'dramatic' |
| | analysis['intensity'] = 'high' |
| | elif any(word in feedback_lower for word in ['slow', 'slower', 'lento', 'mΓ‘s lento', 'lentement']): |
| | analysis['request_type'] = 'pace' |
| | analysis['intensity'] = 'slow' |
| | elif any(word in feedback_lower for word in ['fast', 'faster', 'rΓ‘pido', 'mΓ‘s rΓ‘pido', 'rapide']): |
| | analysis['request_type'] = 'pace' |
| | analysis['intensity'] = 'fast' |
| | elif any(word in feedback_lower for word in ['camera', 'cΓ‘mara', 'camΓ©ra', 'kamera']): |
| | analysis['request_type'] = 'camera' |
| | analysis['focus_area'] = 'cinematography' |
| | elif any(word in feedback_lower for word in ['lighting', 'light', 'luz', 'lumière', 'licht']): |
| | analysis['request_type'] = 'lighting' |
| | analysis['focus_area'] = 'atmosphere' |
| | elif any(word in feedback_lower for word in ['simple', 'simpler', 'mΓ‘s simple', 'plus simple']): |
| | analysis['request_type'] = 'simplify' |
| | analysis['intensity'] = 'low' |
| | elif any(word in feedback_lower for word in ['complex', 'complicated', 'detalle', 'detail', 'dΓ©tail']): |
| | analysis['request_type'] = 'elaborate' |
| | analysis['intensity'] = 'high' |
| | elif any(word in feedback_lower for word in ['elegant', 'elegante', 'Γ©lΓ©gant']): |
| | analysis['request_type'] = 'style' |
| | analysis['style_preference'] = 'elegant' |
| | elif any(word in feedback_lower for word in ['powerful', 'poderoso', 'puissant']): |
| | analysis['request_type'] = 'style' |
| | analysis['style_preference'] = 'powerful' |
| | elif any(word in feedback_lower for word in ['natural', 'natural', 'naturel']): |
| | analysis['request_type'] = 'style' |
| | analysis['style_preference'] = 'natural' |
| | |
| | |
| | elements = ['costume', 'dress', 'cape', 'hat', 'background', 'face', 'hands', 'movement'] |
| | for element in elements: |
| | if element in feedback_lower: |
| | analysis['specific_elements'].append(element) |
| | |
| | return analysis |
| |
|
| | def apply_intelligent_refinement(current_prompt, analysis, scene_info): |
| | """Apply intelligent refinement based on analysis""" |
| | |
| | |
| | refined = current_prompt |
| | |
| | |
| | if analysis['request_type'] == 'dramatic': |
| | refined = enhance_dramatic_elements(refined, analysis, scene_info) |
| | elif analysis['request_type'] == 'pace': |
| | refined = adjust_pace(refined, analysis) |
| | elif analysis['request_type'] == 'camera': |
| | refined = enhance_camera_work(refined, analysis, scene_info) |
| | elif analysis['request_type'] == 'lighting': |
| | refined = enhance_lighting(refined, analysis, scene_info) |
| | elif analysis['request_type'] == 'simplify': |
| | refined = simplify_prompt(refined) |
| | elif analysis['request_type'] == 'elaborate': |
| | refined = elaborate_prompt(refined, scene_info) |
| | elif analysis['request_type'] == 'style': |
| | refined = apply_style_preference(refined, analysis, scene_info) |
| | else: |
| | |
| | refined = apply_general_enhancement(refined, analysis, scene_info) |
| | |
| | return refined |
| |
|
| | def enhance_dramatic_elements(prompt, analysis, scene_info): |
| | """Enhance dramatic elements intelligently""" |
| | |
| | dramatic_replacements = { |
| | 'naturally': 'dramatically with intensity', |
| | 'smoothly': 'with powerful emphasis', |
| | 'gently': 'boldly', |
| | 'moves': 'commands attention', |
| | 'speaks': 'declares passionately', |
| | 'gestures': 'gestures with commanding presence', |
| | 'professional lighting': 'dramatic lighting with stark contrasts', |
| | 'cinematic lighting': 'theatrical lighting with deep shadows' |
| | } |
| | |
| | enhanced = prompt |
| | for original, dramatic in dramatic_replacements.items(): |
| | enhanced = enhanced.replace(original, dramatic) |
| | |
| | |
| | if scene_info and scene_info.get('distinctive_elements'): |
| | elements = scene_info['distinctive_elements'] |
| | if 'costume' in str(elements): |
| | enhanced += ". Costume elements amplify the dramatic presence" |
| | if 'cape' in str(elements): |
| | enhanced += ". Cape billows dramatically with movement" |
| | |
| | |
| | if 'Camera captures' in enhanced: |
| | enhanced = enhanced.replace('Camera captures', 'Dynamic camera captures') |
| | |
| | return enhanced |
| |
|
| | def adjust_pace(prompt, analysis): |
| | """Adjust the pace of action""" |
| | if analysis['intensity'] == 'slow': |
| | pace_replacements = { |
| | 'naturally': 'slowly and deliberately', |
| | 'smoothly': 'in measured slow motion', |
| | 'moves': 'moves with deliberate slowness', |
| | 'speaks': 'speaks thoughtfully', |
| | 'gestures': 'gestures with careful precision' |
| | } |
| | else: |
| | pace_replacements = { |
| | 'naturally': 'with energetic quickness', |
| | 'slowly': 'rapidly', |
| | 'smoothly': 'with swift fluidity', |
| | 'deliberate': 'rapid', |
| | 'measured': 'quick' |
| | } |
| | |
| | adjusted = prompt |
| | for original, paced in pace_replacements.items(): |
| | adjusted = adjusted.replace(original, paced) |
| | |
| | return adjusted |
| |
|
| | def enhance_camera_work(prompt, analysis, scene_info): |
| | """Enhance camera work based on scene context""" |
| | |
| | enhanced = prompt |
| | |
| | |
| | camera_enhancements = { |
| | 'Camera captures': 'Dynamic camera work captures', |
| | 'camera follows': 'cinematic camera tracks', |
| | 'handheld camera': 'fluid handheld camera movement', |
| | 'steady camera': 'precision camera operation', |
| | 'locked camera': 'artistically locked camera' |
| | } |
| | |
| | for basic, enhanced_version in camera_enhancements.items(): |
| | enhanced = enhanced.replace(basic, enhanced_version) |
| | |
| | |
| | if scene_info: |
| | composition = scene_info.get('composition', '') |
| | if 'Wide' in composition: |
| | enhanced += ". Wide tracking shots reveal environmental context" |
| | elif 'Portrait' in composition: |
| | enhanced += ". Intimate camera framing emphasizes character details" |
| | |
| | |
| | if 'camera' not in enhanced.lower(): |
| | enhanced += ". Sophisticated camera movement enhances the narrative" |
| | |
| | return enhanced |
| |
|
| | def enhance_lighting(prompt, analysis, scene_info): |
| | """Enhance lighting based on scene context""" |
| | enhanced = prompt |
| | |
| | |
| | lighting_enhancements = { |
| | 'professional lighting': 'artistic lighting design', |
| | 'cinematic lighting': 'masterful cinematic lighting', |
| | 'dramatic lighting': 'sculptural dramatic lighting', |
| | 'natural lighting': 'beautiful natural light' |
| | } |
| | |
| | for basic, enhanced_version in lighting_enhancements.items(): |
| | enhanced = enhanced.replace(basic, enhanced_version) |
| | |
| | |
| | if scene_info: |
| | emotional_tone = scene_info.get('emotional_tone', 'neutral') |
| | if emotional_tone == 'dramatic': |
| | enhanced += ". High-contrast lighting creates powerful shadows" |
| | elif emotional_tone == 'elegant': |
| | enhanced += ". Soft, sophisticated lighting enhances refinement" |
| | elif emotional_tone == 'theatrical': |
| | enhanced += ". Stage-quality lighting emphasizes performance" |
| | |
| | |
| | if 'lighting' not in enhanced.lower() and 'light' not in enhanced.lower(): |
| | enhanced += ". Expressive lighting design supports the mood" |
| | |
| | return enhanced |
| |
|
| | def simplify_prompt(prompt): |
| | """Simplify prompt to essential elements""" |
| | |
| | parts = prompt.split('.') |
| | |
| | |
| | if len(parts) > 1: |
| | simplified = parts[0] + '.' |
| | |
| | if 'camera' in prompt.lower(): |
| | simplified += " Camera follows naturally." |
| | elif 'lighting' in prompt.lower(): |
| | simplified += " Natural lighting." |
| | else: |
| | simplified = prompt |
| | |
| | return simplified |
| |
|
| | def elaborate_prompt(prompt, scene_info): |
| | """Add sophisticated details to the prompt""" |
| | elaborated = prompt |
| | |
| | |
| | if scene_info: |
| | setting = scene_info.get('setting', 'neutral') |
| | distinctive_elements = scene_info.get('distinctive_elements', []) |
| | |
| | if setting == 'outdoor': |
| | elaborated += ". Environmental elements respond subtly to the action" |
| | elif setting == 'indoor': |
| | elaborated += ". Interior atmosphere enhances intimate connection" |
| | elif setting == 'performance': |
| | elaborated += ". Stage environment supports theatrical presence" |
| | |
| | |
| | if distinctive_elements: |
| | element = distinctive_elements[0] if distinctive_elements else '' |
| | if 'costume' in element: |
| | elaborated += ". Costume textures and details visible in motion" |
| | elif 'color' in element: |
| | elaborated += ". Color palette enhanced through dynamic lighting" |
| | |
| | |
| | elaborated += ". Multi-layered composition with depth and visual interest" |
| | |
| | return elaborated |
| |
|
| | def apply_style_preference(prompt, analysis, scene_info): |
| | """Apply specific style preferences""" |
| | styled = prompt |
| | preference = analysis['style_preference'] |
| | |
| | if preference == 'elegant': |
| | style_replacements = { |
| | 'dramatically': 'with refined elegance', |
| | 'boldly': 'gracefully', |
| | 'powerfully': 'with sophisticated poise', |
| | 'dramatic lighting': 'elegant lighting transitions', |
| | 'intensive': 'refined' |
| | } |
| | elif preference == 'powerful': |
| | style_replacements = { |
| | 'gently': 'with commanding force', |
| | 'naturally': 'with authoritative presence', |
| | 'smoothly': 'with decisive power', |
| | 'professional lighting': 'bold, impactful lighting' |
| | } |
| | elif preference == 'natural': |
| | style_replacements = { |
| | 'dramatically': 'naturally', |
| | 'theatrical': 'authentic', |
| | 'commanding': 'genuine', |
| | 'dramatic lighting': 'natural lighting' |
| | } |
| | |
| | if preference in ['elegant', 'powerful', 'natural']: |
| | for original, styled_version in style_replacements.items(): |
| | styled = styled.replace(original, styled_version) |
| | |
| | return styled |
| |
|
| | def apply_general_enhancement(prompt, analysis, scene_info): |
| | """Apply general enhancements based on context""" |
| | enhanced = prompt |
| | |
| | |
| | if 'moves' in enhanced and 'gracefully' not in enhanced: |
| | enhanced = enhanced.replace('moves', 'moves with purposeful grace') |
| | |
| | if 'speaks' in enhanced and 'expressively' not in enhanced: |
| | enhanced = enhanced.replace('speaks', 'speaks with genuine expression') |
| | |
| | |
| | if scene_info: |
| | emotional_tone = scene_info.get('emotional_tone', 'neutral') |
| | if emotional_tone != 'neutral' and emotional_tone not in enhanced: |
| | enhanced += f". {emotional_tone.capitalize()} energy throughout" |
| | |
| | return enhanced |
| |
|
| | def create_refinement_explanation(analysis, original, refined): |
| | """Create an explanation of what was changed""" |
| | explanations = { |
| | 'dramatic': "I've enhanced the dramatic intensity by upgrading the actions and adding powerful lighting elements.", |
| | 'pace': f"I've adjusted the pacing to be more {'slow and deliberate' if analysis['intensity'] == 'slow' else 'energetic and quick'}.", |
| | 'camera': "I've enhanced the camera work with more sophisticated cinematography techniques.", |
| | 'lighting': "I've upgraded the lighting description to create more visual impact.", |
| | 'simplify': "I've simplified the prompt to focus on the essential action.", |
| | 'elaborate': "I've added more sophisticated details and environmental context.", |
| | 'style': f"I've adjusted the style to be more {analysis['style_preference']}." |
| | } |
| | |
| | base_explanation = explanations.get(analysis['request_type'], "I've enhanced the prompt based on your feedback.") |
| | |
| | |
| | if analysis['language'] != 'english': |
| | language_notes = { |
| | 'spanish': "Entiendo tu sugerencia y ", |
| | 'french': "Je comprends votre suggestion et ", |
| | 'german': "Ich verstehe Ihren Vorschlag und " |
| | } |
| | prefix = language_notes.get(analysis['language'], "") |
| | base_explanation = prefix + base_explanation.lower() |
| | |
| | return base_explanation |
| |
|
| | def generate_gen4_prompts_local(scene_info, user_input=""): |
| | """Generate Gen-4 prompts using iterative building""" |
| | try: |
| | description = scene_info.get('description', '') |
| | has_person = scene_info.get('has_person', False) |
| | setting = scene_info.get('setting', 'neutral') |
| | |
| | |
| | specific_details = extract_specific_details(description) |
| | subject_ref = get_contextual_subject(description, specific_details) |
| | |
| | prompts = [] |
| | |
| | |
| | if has_person: |
| | actions = get_contextual_actions(description, specific_details) |
| | basic = f"{subject_ref} {random.choice(actions)} to camera" |
| | else: |
| | basic = f"The {specific_details.get('main_object', 'main element')} {random.choice(['moves', 'shifts', 'transforms'])}" |
| | prompts.append(f"**Basic**: {basic}") |
| | |
| | |
| | motion_adverbs = get_contextual_adverbs(specific_details) |
| | motion_addition = random.choice(motion_adverbs) |
| | with_subject = f"{basic} {motion_addition}" |
| | prompts.append(f"**+ Subject Motion**: {with_subject}") |
| | |
| | |
| | camera_movements = get_contextual_camera_movement(description, specific_details) |
| | camera_addition = random.choice(camera_movements) |
| | with_camera = f"{with_subject}. {camera_addition}" |
| | prompts.append(f"**+ Camera Motion**: {with_camera}") |
| | |
| | |
| | if specific_details.get('colors'): |
| | style_addition = f"{specific_details['colors']} tones enhanced by lighting. {get_contextual_atmosphere(specific_details)}" |
| | elif setting == 'outdoor': |
| | style_addition = "Natural lighting enhances the scene. Cinematic" |
| | else: |
| | style_addition = f"Professional lighting highlights {specific_details.get('distinctive_feature', 'the subject')}. Documentary style" |
| | complete = f"{with_camera}. {style_addition}" |
| | prompts.append(f"**+ Scene/Style**: {complete}") |
| | |
| | return "\n\n".join(prompts) |
| | |
| | except Exception as e: |
| | return f"Error generating Gen-4 prompts: {str(e)}" |
| |
|
| | def build_custom_prompt_local(foundation, subject_motion, scene_motion, camera_motion, style, approach="SARA"): |
| | """Build custom prompt using selected approach""" |
| | if approach == "SARA": |
| | |
| | parts = [] |
| | |
| | if foundation: |
| | parts.append(foundation) |
| | |
| | |
| | motion_parts = [] |
| | if subject_motion: |
| | motion_parts.extend(subject_motion) |
| | if scene_motion: |
| | motion_parts.extend(scene_motion) |
| | |
| | if motion_parts: |
| | parts.append(", ".join(motion_parts)) |
| | |
| | |
| | if camera_motion: |
| | parts.append(f"while {camera_motion}") |
| | else: |
| | parts.append("while background remains steady") |
| | |
| | |
| | if style: |
| | parts.append(style) |
| | |
| | return " ".join(parts) |
| | |
| | else: |
| | |
| | parts = [] |
| | |
| | if foundation: |
| | parts.append(foundation) |
| | |
| | if subject_motion: |
| | parts.extend(subject_motion) |
| | |
| | if camera_motion: |
| | parts.append(camera_motion) |
| | |
| | if scene_motion: |
| | parts.extend(scene_motion) |
| | |
| | if style: |
| | parts.append(style) |
| | |
| | return ". ".join(parts) if parts else "The subject moves naturally" |
| |
|
| | def get_smart_suggestions_local(scene_info): |
| | """Generate intelligent suggestions using AI-enhanced analysis""" |
| | enhanced_description = scene_info.get('enhanced_description', '') |
| | emotional_tone = scene_info.get('emotional_tone', 'neutral') |
| | visual_style = scene_info.get('visual_style', 'cinematic') |
| | distinctive_elements = scene_info.get('distinctive_elements', []) |
| | motion_potential = scene_info.get('motion_potential', []) |
| | setting = scene_info.get('setting', 'neutral') |
| | |
| | if not enhanced_description: |
| | return "Please analyze an image first to generate smart suggestions." |
| | |
| | suggestions = [] |
| | |
| | |
| | subject_ref = extract_intelligent_subject_reference(scene_info) |
| | suggestions.append(f'π€ **AI Analysis**: {enhanced_description}') |
| | suggestions.append(f'π― **Smart Reference**: Use "{subject_ref}" for optimal clarity') |
| | |
| | |
| | actions = generate_tone_appropriate_actions(emotional_tone, scene_info)[:3] |
| | suggestions.append(f'π **Tone-Matched Actions**: {", ".join(actions)}') |
| | |
| | |
| | if motion_potential: |
| | top_potential = motion_potential[:3] |
| | suggestions.append(f'π¬ **Motion Opportunities**: {", ".join(top_potential)}') |
| | |
| | |
| | if distinctive_elements: |
| | top_elements = distinctive_elements[:2] |
| | suggestions.append(f'β¨ **Key Elements to Highlight**: {", ".join(top_elements)}') |
| | |
| | |
| | style_cameras = generate_style_appropriate_cameras(visual_style, scene_info.get('cinematic_qualities', []))[:2] |
| | suggestions.append(f'π₯ **Style-Appropriate Cameras**: {", ".join(style_cameras)}') |
| | |
| | |
| | appropriate_adverbs = [get_tone_appropriate_adverb(emotional_tone) for _ in range(3)] |
| | suggestions.append(f'π« **Emotional Adverbs**: {", ".join(appropriate_adverbs)}') |
| | |
| | |
| | if setting == 'performance': |
| | suggestions.append('πͺ **Performance Context**: Focus on stage presence and audience engagement') |
| | elif setting == 'outdoor': |
| | suggestions.append('πΏ **Outdoor Setting**: Leverage natural lighting and environmental elements') |
| | elif setting == 'indoor': |
| | suggestions.append('π **Indoor Context**: Utilize controlled lighting and intimate framing') |
| | |
| | |
| | cinematic_qualities = scene_info.get('cinematic_qualities', []) |
| | if cinematic_qualities: |
| | top_qualities = cinematic_qualities[:2] |
| | suggestions.append(f'π¬ **Cinematic Opportunities**: {", ".join(top_qualities)}') |
| | |
| | |
| | atmosphere = get_style_appropriate_atmosphere(visual_style, emotional_tone) |
| | suggestions.append(f'π **Recommended Atmosphere**: {atmosphere}') |
| | |
| | return "\n".join(suggestions[:10]) |
| |
|
| | def generate_instant_prompts(scene_info): |
| | """Generate sophisticated ready-to-use prompts based on AI-enhanced analysis""" |
| | enhanced_description = scene_info.get('enhanced_description', '') |
| | emotional_tone = scene_info.get('emotional_tone', 'neutral') |
| | visual_style = scene_info.get('visual_style', 'cinematic') |
| | distinctive_elements = scene_info.get('distinctive_elements', []) |
| | cinematic_qualities = scene_info.get('cinematic_qualities', []) |
| | motion_potential = scene_info.get('motion_potential', []) |
| | |
| | if not enhanced_description: |
| | return "Please analyze an image first to generate instant prompts." |
| | |
| | |
| | subject_ref = extract_intelligent_subject_reference(scene_info) |
| | |
| | |
| | actions = generate_tone_appropriate_actions(emotional_tone, scene_info) |
| | |
| | |
| | camera_movements = generate_style_appropriate_cameras(visual_style, cinematic_qualities) |
| | |
| | |
| | instant_prompts = [] |
| | |
| | |
| | instant_prompts.append("π€ **AI-Powered Simple Prompts:**") |
| | for i in range(3): |
| | action = random.choice(actions) |
| | adverb = get_tone_appropriate_adverb(emotional_tone) |
| | instant_prompts.append(f" β’ {subject_ref} {action} {adverb}") |
| | |
| | |
| | instant_prompts.append("\nπ§ **Context-Aware SARA Prompts:**") |
| | for i in range(3): |
| | action = random.choice(actions) |
| | adverb = get_tone_appropriate_adverb(emotional_tone) |
| | camera = random.choice(camera_movements) |
| | atmosphere = get_style_appropriate_atmosphere(visual_style, emotional_tone) |
| | |
| | |
| | if distinctive_elements and random.choice([True, False]): |
| | distinctive = random.choice(distinctive_elements) |
| | instant_prompts.append(f" β’ {subject_ref} {action} {adverb} while {camera}, {distinctive} enhanced, {atmosphere}") |
| | else: |
| | instant_prompts.append(f" β’ {subject_ref} {action} {adverb} while {camera}, {atmosphere}") |
| | |
| | |
| | instant_prompts.append("\n㪠**Intelligence-Enhanced Gen-4:**") |
| | for i in range(3): |
| | action = random.choice(actions) |
| | adverb = get_tone_appropriate_adverb(emotional_tone) |
| | camera = random.choice(camera_movements) |
| | |
| | |
| | basic = f"{subject_ref} {action}" |
| | with_motion = f"{basic} {adverb}" |
| | with_camera = f"{with_motion}. {camera}" |
| | |
| | |
| | if distinctive_elements: |
| | distinctive = random.choice(distinctive_elements) |
| | style_addition = f"{distinctive} highlighted by {get_lighting_for_style(visual_style)}" |
| | else: |
| | style_addition = f"{get_lighting_for_style(visual_style)} enhances {emotional_tone} mood" |
| | |
| | complete = f"{with_camera}. {style_addition}" |
| | instant_prompts.append(f" β’ {complete}") |
| | |
| | |
| | instant_prompts.append("\n⨠**Specialized AI Prompts:**") |
| | |
| | |
| | if 'costume dynamics' in motion_potential: |
| | instant_prompts.append(f" π **Costume Dynamics**: {subject_ref} {random.choice(actions)} while camera captures fabric textures, costume elements react to movement, theatrical lighting") |
| | |
| | if 'facial expressions' in motion_potential: |
| | instant_prompts.append(f" π **Expression Focus**: {subject_ref} {random.choice(['expresses emotion', 'speaks meaningfully', 'reacts naturally'])} while camera maintains intimate framing, {emotional_tone} energy emphasized") |
| | |
| | |
| | if 'dramatic lighting potential' in cinematic_qualities: |
| | instant_prompts.append(f" π‘ **Dramatic Lighting**: {subject_ref} {random.choice(actions)} as lighting creates dramatic shadows, visual contrast enhances {emotional_tone} mood, cinematic depth") |
| | |
| | if 'color enhancement opportunities' in cinematic_qualities: |
| | colors = [elem for elem in distinctive_elements if 'coloring' in elem] |
| | if colors: |
| | instant_prompts.append(f" π¨ **Color Enhanced**: {subject_ref} {random.choice(actions)} while lighting dramatically enhances {colors[0]}, color grading emphasizes mood, {visual_style} aesthetic") |
| | |
| | |
| | setting = scene_info.get('setting', 'neutral') |
| | if setting == 'performance': |
| | instant_prompts.append(f" πͺ **Performance Mode**: {subject_ref} {random.choice(['performs', 'presents', 'commands attention'])} while audience perspective maintained, {emotional_tone} stage presence, professional capture") |
| | elif setting == 'outdoor': |
| | instant_prompts.append(f" πΏ **Environmental Harmony**: {subject_ref} {random.choice(actions)} as natural elements complement motion, environmental lighting, organic {visual_style} feel") |
| | |
| | |
| | instant_prompts.append("\nπ **Advanced AI Composite:**") |
| | |
| | |
| | advanced_action = random.choice(actions) |
| | advanced_adverb = get_tone_appropriate_adverb(emotional_tone) |
| | advanced_camera = random.choice(camera_movements) |
| | advanced_atmosphere = get_style_appropriate_atmosphere(visual_style, emotional_tone) |
| | |
| | if distinctive_elements: |
| | advanced_distinctive = random.choice(distinctive_elements) |
| | advanced_prompt = f"{subject_ref} {advanced_action} {advanced_adverb} as {advanced_camera} captures nuanced details, {advanced_distinctive} dynamically enhanced, lighting and color grading amplify {emotional_tone} undertones, {advanced_atmosphere} with {visual_style} cinematography" |
| | else: |
| | advanced_prompt = f"{subject_ref} {advanced_action} {advanced_adverb} while {advanced_camera} follows natural rhythm, environmental elements support the motion, {advanced_atmosphere} with intelligent {visual_style} direction" |
| | |
| | instant_prompts.append(f" β’ {advanced_prompt}") |
| | |
| | return "\n".join(instant_prompts) |
| |
|
| | def extract_intelligent_subject_reference(scene_info): |
| | """Extract intelligent subject reference using AI analysis""" |
| | enhanced_desc = scene_info.get('enhanced_description', '') |
| | basic_desc = scene_info.get('basic_description', '') |
| | |
| | |
| | has_person = scene_info.get('has_person', False) |
| | |
| | if not has_person: |
| | return "The subject" |
| | |
| | |
| | if isinstance(enhanced_desc, str): |
| | enhanced_lower = enhanced_desc.lower() |
| | if 'man in costume' in enhanced_lower: |
| | return "The man in costume" |
| | elif 'woman in dress' in enhanced_lower: |
| | return "The woman in dress" |
| | elif 'man in suit' in enhanced_lower: |
| | return "The man in suit" |
| | |
| | |
| | if isinstance(basic_desc, str): |
| | basic_lower = basic_desc.lower() |
| | if 'man' in basic_lower: |
| | return "The man" |
| | elif 'woman' in basic_lower: |
| | return "The woman" |
| | elif 'person' in basic_lower: |
| | return "The person" |
| | |
| | return "The subject" |
| |
|
| | def generate_tone_appropriate_actions(emotional_tone, scene_info): |
| | """Generate actions that match the emotional tone""" |
| | base_actions = { |
| | 'dramatic': ['moves powerfully', 'gestures boldly', 'commands attention', 'strikes a pose', 'displays intensity'], |
| | 'elegant': ['moves gracefully', 'gestures refined', 'poses elegantly', 'demonstrates poise', 'flows naturally'], |
| | 'theatrical': ['performs dramatically', 'presents theatrically', 'expresses character', 'embodies role', 'captivates audience'], |
| | 'serious': ['maintains composure', 'speaks authoritatively', 'gestures formally', 'projects confidence', 'demonstrates focus'], |
| | 'cheerful': ['expresses joy', 'gestures enthusiastically', 'radiates energy', 'shows warmth', 'displays positivity'], |
| | 'professional': ['presents professionally', 'maintains bearing', 'demonstrates expertise', 'projects authority', 'engages formally'], |
| | 'neutral': ['moves naturally', 'gestures appropriately', 'maintains presence', 'expresses subtly', 'demonstrates character'] |
| | } |
| | |
| | |
| | actions = base_actions.get(emotional_tone, base_actions['neutral']).copy() |
| | |
| | |
| | if scene_info.get('distinctive_elements'): |
| | for element in scene_info['distinctive_elements']: |
| | if 'costume' in element: |
| | actions.extend(['adjusts costume', 'displays costume details']) |
| | elif 'cape' in element: |
| | actions.extend(['gestures with cape', 'moves dramatically with cape']) |
| | elif 'flag' in element: |
| | actions.extend(['acknowledges flag', 'presents with flag']) |
| | |
| | return actions |
| |
|
| | def generate_style_appropriate_cameras(visual_style, cinematic_qualities): |
| | """Generate camera movements appropriate for the visual style""" |
| | base_cameras = { |
| | 'cinematic': ['camera glides smoothly', 'tracking shot follows', 'camera orbits elegantly', 'dolly movement captures', 'crane shot reveals'], |
| | 'dramatic': ['camera emphasizes motion', 'dynamic camera movement', 'camera captures intensity', 'bold camera work follows', 'dramatic camera angles'], |
| | 'theatrical': ['camera frames performance', 'audience perspective maintained', 'camera captures stage presence', 'performance-focused framing', 'theatrical camera work'], |
| | 'professional': ['steady camera captures', 'professional camera movement', 'controlled camera work', 'camera maintains stability', 'precise camera tracking'], |
| | 'documentary': ['handheld camera follows', 'natural camera movement', 'camera observes genuinely', 'documentary-style capture', 'authentic camera work'] |
| | } |
| | |
| | cameras = base_cameras.get(visual_style, base_cameras['cinematic']).copy() |
| | |
| | |
| | if 'horizontal camera movements' in cinematic_qualities: |
| | cameras.extend(['camera pans horizontally', 'lateral camera movement']) |
| | if 'vertical movement' in cinematic_qualities: |
| | cameras.extend(['camera tilts vertically', 'vertical camera motion']) |
| | if 'environmental context' in cinematic_qualities: |
| | cameras.extend(['camera reveals environment', 'wide establishing shots']) |
| | |
| | return cameras |
| |
|
| | def get_tone_appropriate_adverb(emotional_tone): |
| | """Get adverbs that match the emotional tone""" |
| | adverbs = { |
| | 'dramatic': ['powerfully', 'intensely', 'dramatically', 'boldly', 'majestically'], |
| | 'elegant': ['gracefully', 'refinedly', 'elegantly', 'smoothly', 'sophisticatedly'], |
| | 'theatrical': ['dramatically', 'expressively', 'theatrically', 'charismatically', 'captivating'], |
| | 'serious': ['authoritatively', 'professionally', 'formally', 'confidently', 'purposefully'], |
| | 'cheerful': ['enthusiastically', 'energetically', 'warmly', 'positively', 'vibrantly'], |
| | 'professional': ['professionally', 'precisely', 'competently', 'expertly', 'authoritatively'], |
| | 'neutral': ['naturally', 'smoothly', 'appropriately', 'genuinely', 'authentically'] |
| | } |
| | |
| | return random.choice(adverbs.get(emotional_tone, adverbs['neutral'])) |
| |
|
| | def get_style_appropriate_atmosphere(visual_style, emotional_tone): |
| | """Get atmosphere that combines style and tone""" |
| | style_atmospheres = { |
| | 'cinematic': f'cinematic {emotional_tone} atmosphere', |
| | 'dramatic': f'dramatic {emotional_tone} mood', |
| | 'theatrical': f'theatrical {emotional_tone} presence', |
| | 'professional': f'professional {emotional_tone} environment', |
| | 'documentary': f'authentic {emotional_tone} feeling' |
| | } |
| | |
| | return style_atmospheres.get(visual_style, f'{visual_style} {emotional_tone} atmosphere') |
| |
|
| | def get_lighting_for_style(visual_style): |
| | """Get appropriate lighting description for visual style""" |
| | lighting = { |
| | 'cinematic': 'cinematic lighting', |
| | 'dramatic': 'dramatic lighting', |
| | 'theatrical': 'stage lighting', |
| | 'professional': 'professional lighting', |
| | 'documentary': 'natural lighting' |
| | } |
| | |
| | return lighting.get(visual_style, 'cinematic lighting') |
| | |
| | |
| | for i in range(3): |
| | action = random.choice(contextual_actions) |
| | adverb = random.choice(contextual_adverbs) |
| | camera = random.choice(camera_moves) |
| | |
| | |
| | basic = f"{subject_ref} {action}" |
| | with_motion = f"{basic} {adverb}" |
| | with_camera = f"{with_motion}. {camera}" |
| | |
| | |
| | if specific_details.get('colors'): |
| | style_addition = f"{specific_details['colors']} tones enhanced by lighting" |
| | else: |
| | style_addition = "Cinematic lighting" |
| | complete = f"{with_camera}. {style_addition}" |
| | |
| | instant_prompts.append(f"π **Gen-4**: {complete}") |
| | |
| | |
| | if specific_details.get('clothing'): |
| | clothing = specific_details['clothing'] |
| | if 'cape' in clothing: |
| | instant_prompts.append(f"π¦Έ **Cape Focus**: {subject_ref} moves dramatically while camera captures cape movement, wind effects enhance cape flow, heroic atmosphere") |
| | if 'dress' in clothing: |
| | instant_prompts.append(f"π **Dress Focus**: {subject_ref} moves gracefully while camera tracks smoothly, fabric reacts to movement, elegant atmosphere") |
| | if 'hat' in clothing: |
| | instant_prompts.append(f"π© **Hat Focus**: {subject_ref} tips hat confidently while camera frames from chest up, professional lighting") |
| | |
| | |
| | if specific_details.get('colors'): |
| | colors = specific_details['colors'] |
| | instant_prompts.append(f"π¨ **Color Enhanced**: {subject_ref} {random.choice(contextual_actions)} while lighting dramatically enhances {colors} tones, cinematic depth") |
| | |
| | return "\n\n".join(instant_prompts) |
| |
|
| | def copy_to_foundation(prompt_text, approach): |
| | """Extract the main prompt from formatted text for foundation field""" |
| | |
| | if "**" in prompt_text: |
| | |
| | parts = prompt_text.split("**: ", 1) |
| | if len(parts) > 1: |
| | return parts[1] |
| | return prompt_text |
| |
|
| | |
| | with gr.Blocks(theme=gr.themes.Soft(), title="Universal Video Prompting Tool") as demo: |
| | gr.Markdown("# π¬ Universal Video Prompting Tool") |
| | gr.Markdown("*Compatible with Gen-4, Sora, Pika, Luma, Runway & all AI video models*") |
| | gr.Markdown("**Combines official Gen-4 guidelines with advanced SARA Framework**") |
| | |
| | with gr.Tabs(): |
| | with gr.TabItem("π Prompting Guide"): |
| | gr.Markdown(unified_instructions) |
| | |
| | with gr.TabItem("π¬ Quick Video Prompt Generator"): |
| | with gr.Row(): |
| | with gr.Column(scale=1): |
| | |
| | gr.Markdown("## π· Upload Your Frame 0") |
| | image_input = gr.Image(type="pil", label="Upload your initial frame") |
| | analyze_btn = gr.Button("π Analyze Image (Fast)", variant="primary") |
| | image_analysis = gr.Textbox( |
| | label="Image Analysis Results", |
| | placeholder="Upload an image and click 'Analyze Image' for instant analysis...", |
| | lines=10, |
| | interactive=False |
| | ) |
| | |
| | |
| | scene_info_state = gr.State({}) |
| | |
| | |
| | with gr.Group(): |
| | gr.Markdown("### π‘ Smart Suggestions") |
| | get_suggestions_btn = gr.Button("Get Smart Tips", variant="secondary") |
| | smart_suggestions = gr.Textbox( |
| | label="Context-Aware Suggestions", |
| | placeholder="Click 'Get Smart Tips' after image analysis...", |
| | lines=5, |
| | interactive=False |
| | ) |
| | |
| | |
| | with gr.Group(): |
| | gr.Markdown("### π Ready-to-Use Prompts") |
| | generate_instant_btn = gr.Button("Generate Instant Prompts", variant="primary") |
| | instant_prompts = gr.Textbox( |
| | label="Copy & Paste Ready Prompts", |
| | placeholder="Click 'Generate Instant Prompts' to get ready-to-use prompts based on your image...", |
| | lines=12, |
| | interactive=True, |
| | show_copy_button=True |
| | ) |
| | |
| | with gr.Column(scale=1): |
| | |
| | gr.Markdown("## π Choose Your Method") |
| | |
| | with gr.Tabs(): |
| | with gr.TabItem("π€ AI Prompt Assistant"): |
| | gr.Markdown("*Describe your idea in any language - AI will create optimized English video prompts*") |
| | |
| | with gr.Row(): |
| | with gr.Column(scale=2): |
| | user_idea = gr.Textbox( |
| | label="Your Idea (any language)", |
| | placeholder="e.g., 'el personaje se quita la nariz de payaso' or 'character walks slowly towards camera'", |
| | lines=3 |
| | ) |
| | with gr.Column(scale=1): |
| | optimize_btn = gr.Button("π Optimize & Structure", variant="primary") |
| | |
| | ai_optimized = gr.Textbox( |
| | label="AI-Optimized Video Prompt", |
| | placeholder="Your optimized prompt will appear here...", |
| | lines=4, |
| | interactive=True, |
| | show_copy_button=True |
| | ) |
| | |
| | |
| | gr.Markdown("### π¬ Refine Your Prompt") |
| | chat_history = gr.Chatbot( |
| | label="Prompt Refinement Chat", |
| | height=250, |
| | placeholder="Chat history will appear here as you refine your prompt..." |
| | ) |
| | |
| | with gr.Row(): |
| | refine_input = gr.Textbox( |
| | label="Refine further", |
| | placeholder="e.g., 'make it more dramatic' or 'add camera movement' or 'mΓ‘s lento'", |
| | scale=3 |
| | ) |
| | refine_btn = gr.Button("π¬ Refine", scale=1) |
| | |
| | with gr.TabItem("π Gen-4 Official"): |
| | gr.Markdown("*Official method: Simple β Complex building*") |
| | foundation_gen4 = gr.Textbox( |
| | label="Foundation (Optional)", |
| | placeholder="e.g., 'The subject walks forward'", |
| | lines=1 |
| | ) |
| | generate_gen4_btn = gr.Button("Generate Gen-4 Prompts", variant="primary") |
| | gen4_output = gr.Textbox( |
| | label="Gen-4 Style Prompts", |
| | lines=8, |
| | interactive=False |
| | ) |
| | |
| | |
| | with gr.Group(): |
| | gr.Markdown("## π οΈ Custom Prompt Builder") |
| | |
| | with gr.Row(): |
| | approach_selector = gr.Radio( |
| | choices=["SARA", "Gen-4"], |
| | value="SARA", |
| | label="Approach", |
| | interactive=True |
| | ) |
| | custom_foundation = gr.Textbox( |
| | label="Foundation", |
| | placeholder="The subject...", |
| | lines=1 |
| | ) |
| | |
| | with gr.Row(): |
| | subject_motion = gr.CheckboxGroup( |
| | choices=["walks smoothly", "speaks clearly", "gestures naturally", "moves gracefully", "turns slowly"], |
| | label="Subject Motion" |
| | ) |
| | scene_motion = gr.CheckboxGroup( |
| | choices=["dust swirls", "lighting changes", "wind effects", "water movement", "atmosphere shifts"], |
| | label="Scene Motion" |
| | ) |
| | |
| | with gr.Row(): |
| | camera_motion = gr.Dropdown( |
| | choices=["camera remains steady", "handheld camera", "camera pans left", "camera pans right", "camera tracks forward", "camera zooms in"], |
| | label="Camera Motion", |
| | value="camera remains steady" |
| | ) |
| | style_motion = gr.Dropdown( |
| | choices=["cinematic", "documentary style", "live-action", "dramatic", "peaceful", "energetic", "professional"], |
| | label="Style/Atmosphere", |
| | value="cinematic" |
| | ) |
| | |
| | build_custom_btn = gr.Button("π¨ Build Custom Prompt", variant="secondary") |
| | custom_output = gr.Textbox( |
| | label="Your Custom Prompt", |
| | lines=3, |
| | interactive=True |
| | ) |
| | |
| | |
| | analyze_btn.click( |
| | fn=analyze_image_simple, |
| | inputs=[image_input], |
| | outputs=[image_analysis, gr.State(), scene_info_state] |
| | ) |
| | |
| | get_suggestions_btn.click( |
| | fn=get_smart_suggestions_local, |
| | inputs=[scene_info_state], |
| | outputs=[smart_suggestions] |
| | ) |
| | |
| | |
| | generate_instant_btn.click( |
| | fn=generate_instant_prompts, |
| | inputs=[scene_info_state], |
| | outputs=[instant_prompts] |
| | ) |
| | |
| | |
| | optimize_btn.click( |
| | fn=optimize_user_prompt, |
| | inputs=[user_idea, scene_info_state], |
| | outputs=[ai_optimized] |
| | ) |
| | |
| | refine_btn.click( |
| | fn=refine_prompt_with_feedback, |
| | inputs=[ai_optimized, refine_input, chat_history, scene_info_state], |
| | outputs=[ai_optimized, chat_history] |
| | ) |
| | |
| | generate_gen4_btn.click( |
| | fn=generate_gen4_prompts_local, |
| | inputs=[scene_info_state, foundation_gen4], |
| | outputs=[gen4_output] |
| | ) |
| | |
| | build_custom_btn.click( |
| | fn=build_custom_prompt_local, |
| | inputs=[custom_foundation, subject_motion, scene_motion, camera_motion, style_motion, approach_selector], |
| | outputs=[custom_output] |
| | ) |
| |
|
| | |
| | if __name__ == "__main__": |
| | demo.launch() |