Spaces:

akhaliq
/

sora-2

Running

File size: 20,069 Bytes

d872fa5
 
bc2cb72
275a10f
b78c4e1
 
275a10f
d872fa5
29ea35b
 
 
6b2dd9c
29ea35b
d872fa5
 
b78c4e1
 
 
 
 
 
 
 
 
 
0ac76ac
b78c4e1
 
 
 
 
 
 
 
 
 
 
275a10f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d872fa5
 
137d0c0
 
d872fa5
 
 
137d0c0
bc2cb72
d872fa5
275a10f
 
 
d872fa5
 
 
29ea35b
 
d872fa5
29ea35b
d872fa5
 
 
29ea35b
137d0c0
d872fa5
29ea35b
 
 
137d0c0
d872fa5
 
275a10f
 
 
 
 
 
 
 
bc2cb72
29ea35b
 
d872fa5
 
bc2cb72
d872fa5
 
137d0c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b78c4e1
 
0ac76ac
b78c4e1
 
137d0c0
b78c4e1
efee1d5
 
b78c4e1
 
832af5b
b78c4e1
832af5b
efee1d5
 
b78c4e1
 
 
 
efee1d5
b78c4e1
832af5b
b78c4e1
d872fa5
137d0c0
 
 
0ac76ac
137d0c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b78c4e1
137d0c0
b78c4e1
 
170fe75
b78c4e1
 
170fe75
137d0c0
 
 
 
 
 
 
 
6b2dd9c
b78c4e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137d0c0
6b2dd9c
b78c4e1
 
137d0c0
b78c4e1
 
fea8f7a
 
b78c4e1
 
 
fea8f7a
 
 
6b2dd9c
 
 
b78c4e1
6b2dd9c
 
b78c4e1
 
 
 
 
 
 
 
 
 
 
 
 
bee1302
 
137d0c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b78c4e1
137d0c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b78c4e1
137d0c0
 
 
0ac76ac
137d0c0
 
 
 
b78c4e1
137d0c0
0ac76ac
137d0c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ac76ac
137d0c0
 
 
 
b78c4e1
 
 
 
 
 
 
 
6b2dd9c
b78c4e1
0ac76ac
 
 
b78c4e1
 
 
 
 
 
 
 
 
 
 
 
137d0c0
b78c4e1
 
 
 
137d0c0
b78c4e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b2dd9c
b78c4e1
0ac76ac
 
b78c4e1
 
0ac76ac
b78c4e1
6b2dd9c
d872fa5
6b2dd9c
d872fa5
 
 
275a10f
 
 
 
 
 
 
 
 
6b2dd9c
b78c4e1
170fe75
6b2dd9c
170fe75
 
 
275a10f
d872fa5

import gradio as gr
import os
import tempfile
import shutil
from typing import Optional, Tuple, Union
from huggingface_hub import InferenceClient, whoami
from pathlib import Path

# Initialize Hugging Face Inference Client with fal-ai provider
client = InferenceClient(
    provider="fal-ai",
    api_key=os.environ.get("HF_TOKEN"),
    bill_to="huggingface",
)

def verify_pro_status(token: Optional[Union[gr.OAuthToken, str]]) -> bool:
    """Verifies if the user is a Hugging Face PRO user or part of an enterprise org."""
    if not token:
        return False
    
    if isinstance(token, gr.OAuthToken):
        token_str = token.token
    elif isinstance(token, str):
        token_str = token
    else:
        return False # Should not happen with correct type hints, but for safety
    
    try:
        user_info = whoami(token=token_str)
        return (
            user_info.get("isPro", False) or
            any(org.get("isEnterprise", False) for org in user_info.get("orgs", []))
        )
    except Exception as e:
        print(f"Could not verify user's PRO/Enterprise status: {e}")
        return False

def cleanup_temp_files():
    """Clean up old temporary video files to prevent storage overflow."""
    try:
        temp_dir = tempfile.gettempdir()
        # Clean up old .mp4 files in temp directory
        for file_path in Path(temp_dir).glob("*.mp4"):
            try:
                # Remove files older than 5 minutes
                if file_path.stat().st_mtime < (os.time.time() - 300):
                    file_path.unlink(missing_ok=True)
            except Exception:
                pass  # Ignore errors for individual files
    except Exception as e:
        print(f"Cleanup error: {e}")

def generate_video(
    prompt: str,
    duration: int = 8,  # These are not used by the fal.ai sora-2 model directly, but kept for interface consistency
    size: str = "1280x720",  # These are not used by the fal.ai sora-2 model directly, but kept for interface consistency
    api_key: Optional[str] = None
) -> Tuple[Optional[str], str]:
    """
    Generate video using Sora-2 Text-to-Video through Hugging Face Inference API with fal-ai provider.
    Returns tuple of (video_path, status_message).
    """
    # Clean up old files before generating new ones
    cleanup_temp_files()
    
    try:
        # Use provided API key or environment variable
        if api_key:
            temp_client = InferenceClient(
                provider="fal-ai",
                api_key=api_key,
                bill_to="huggingface",
            )
        else:
            temp_client = client
            if not os.environ.get("HF_TOKEN") and not api_key:
                return None, "❌ Please set HF_TOKEN environment variable or provide an API key."
        
        # Call Sora-2 through Hugging Face Inference API
        video_bytes = temp_client.text_to_video(
            prompt,
            model="akhaliq/sora-2", # Specific model for text-to-video
        )
        
        # Save to temporary file with proper cleanup
        temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
        try:
            temp_file.write(video_bytes)
            temp_file.flush()
            video_path = temp_file.name
        finally:
            temp_file.close()
        
        status_message = f"✅ Video generated successfully!"
        return video_path, status_message
        
    except Exception as e:
        error_msg = f"❌ Error generating video: {str(e)}"
        return None, error_msg

def generate_image_to_video(
    image_path: str,
    prompt: str,
    api_key: Optional[str] = None
) -> Tuple[Optional[str], str]:
    """
    Generate video using Sora-2 Image-to-Video through Hugging Face Inference API with fal-ai provider.
    Returns tuple of (video_path, status_message).
    """
    cleanup_temp_files() # Clean up old files
    
    if not image_path:
        return None, "❌ Please upload an image."
    if not prompt or prompt.strip() == "":
        return None, "❌ Please enter a prompt for the video generation."

    try:
        if api_key:
            temp_client = InferenceClient(
                provider="fal-ai",
                api_key=api_key,
                bill_to="huggingface",
            )
        else:
            temp_client = client
            if not os.environ.get("HF_TOKEN") and not api_key:
                return None, "❌ Please set HF_TOKEN environment variable or provide an API key."

        with open(image_path, "rb") as image_file:
            input_image_bytes = image_file.read()
        
        video_bytes = temp_client.image_to_video(
            input_image_bytes,
            prompt=prompt,
            model="akhaliq/sora-2-image-to-video", # Specific model for image-to-video
        )
        
        temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
        try:
            temp_file.write(video_bytes)
            temp_file.flush()
            video_path = temp_file.name
        finally:
            temp_file.close()
        
        status_message = f"✅ Video generated successfully from image and prompt!"
        return video_path, status_message

    except Exception as e:
        error_msg = f"❌ Error generating video from image: {str(e)}"
        return None, error_msg


def generate_with_pro_auth(
    prompt: str, 
    oauth_token: Optional[gr.OAuthToken] = None # Gradio will auto-inject this based on type hint
) -> Tuple[Optional[str], str]:
    """
    Wrapper function that checks if user is PRO before generating text-to-video.
    """
    # Check if user is PRO
    if not verify_pro_status(oauth_token):
        raise gr.Error("Access Denied. This app is exclusively for Hugging Face PRO users. Please subscribe to PRO to use this app.")
    
    if not prompt or prompt.strip() == "":
        return None, "❌ Please enter a prompt"
    
    # Use the environment token for API calls (with bill_to="huggingface")
    # Don't use the user's OAuth token for the API call
    video_path, status = generate_video(
        prompt, 
        duration=8, 
        size="1280x720", 
        api_key=None  # This will use the environment HF_TOKEN
    )
    
    return video_path, status

def generate_image_to_video_with_pro_auth(
    image_path: str,
    prompt: str,
    oauth_token: Optional[gr.OAuthToken] = None # Gradio will auto-inject this based on type hint
) -> Tuple[Optional[str], str]:
    """
    Wrapper function that checks if user is PRO before generating image-to-video.
    """
    if not verify_pro_status(oauth_token):
        raise gr.Error("Access Denied. This app is exclusively for Hugging Face PRO users. Please subscribe to PRO to use this app.")
    
    if not image_path:
        return None, "❌ Please upload an image."
    if not prompt or prompt.strip() == "":
        return None, "❌ Please enter a prompt"
    
    video_path, status = generate_image_to_video(
        image_path,
        prompt,
        api_key=None  # This will use the environment HF_TOKEN
    )
    
    return video_path, status


def simple_generate(prompt: str) -> Optional[str]:
    """Simplified wrapper for text-to-video examples that only returns video."""
    if not prompt or prompt.strip() == "":
        return None
    
    video_path, _ = generate_video(prompt, duration=8, size="1280x720", api_key=None)
    return video_path

def simple_generate_image_to_video(image_path: str, prompt: str) -> Optional[str]:
    """Simplified wrapper for image-to-video examples that only returns video."""
    if not image_path or not prompt or prompt.strip() == "":
        return None
    
    video_path, _ = generate_image_to_video(image_path, prompt, api_key=None)
    return video_path

def create_ui():
    """Create the Gradio interface with PRO verification."""
    
    css = '''
    .logo-dark{display: none}
    .dark .logo-dark{display: block !important}
    .dark .logo-light{display: none}
    #sub_title{margin-top: -20px !important}
    .pro-badge{
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        color: white;
        padding: 4px 12px;
        border-radius: 20px;
        font-size: 0.9em;
        font-weight: bold;
        display: inline-block;
        margin-left: 8px;
    }
    '''
    
    with gr.Blocks(title="Sora-2 Text & Image-to-Video Generator", theme=gr.themes.Soft(), css=css) as demo:
        gr.HTML("""
            <div style="text-align: center; max-width: 800px; margin: 0 auto;">
                <h1 style="font-size: 2.5em; margin-bottom: 0.5em;">
                    🎬 Sora-2 Text & Image-to-Video Generator
                    <span class="pro-badge">PRO</span>
                </h1>
                <p style="font-size: 1.1em; color: #666; margin-bottom: 20px;">Generate stunning videos using OpenAI's Sora-2 model</p>
                <p id="sub_title" style="font-size: 1em; margin-top: 20px; margin-bottom: 15px;">
                    <strong>Exclusive access for Hugging Face PRO users.</strong> 
                    <a href="http://huggingface.co/subscribe/pro?source=sora2_video" target="_blank" style="color: #667eea;">Subscribe to PRO →</a>
                </p>
                <p style="font-size: 0.9em; color: #999; margin-top: 15px;">
                    Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #667eea;">anycoder</a>
                </p>
            </div>
        """)
        
        # Login button for OAuth
        gr.LoginButton()
        
        # PRO message for non-PRO users
        pro_message = gr.Markdown(visible=False)
        
        # Main interface (hidden by default)
        main_interface = gr.Column(visible=False)
        
        with main_interface:
            gr.HTML("""
                <div style="text-align: center; margin: 20px 0;">
                    <p style="color: #28a745; font-weight: bold;">✨ Welcome PRO User! You have full access to Sora-2.</p>
                </div>
            """)
            
            with gr.Tabs() as tab_selector: 
                with gr.TabItem("Text-to-Video", id=0):
                    with gr.Row():
                        with gr.Column(scale=1):
                            prompt_input_text = gr.Textbox(
                                label="Enter your text prompt",
                                placeholder="Describe the video you want to create...",
                                lines=4
                            )
                            
                            with gr.Accordion("Advanced Settings", open=False):
                                gr.Markdown("*Coming soon: Duration and resolution controls*")
                            
                            generate_btn_text = gr.Button("🎥 Generate Video from Text", variant="primary", size="lg")
                        
                        with gr.Column(scale=1):
                            video_output_text = gr.Video(
                                label="Generated Video",
                                height=400,
                                interactive=False,
                                show_download_button=True
                            )
                            status_output_text = gr.Textbox(
                                label="Status",
                                interactive=False,
                                visible=True
                            )
                    
                    # Examples section with queue disabled
                    gr.Examples(
                        examples=[
                            "A serene beach at sunset with waves gently rolling onto the shore",
                            "A butterfly emerging from its chrysalis in slow motion",
                            "Northern lights dancing across a starry night sky",
                            "A bustling city street transitioning from day to night in timelapse",
                            "A close-up of coffee being poured into a cup with steam rising",
                            "Cherry blossoms falling in slow motion in a Japanese garden"
                        ],
                        inputs=prompt_input_text,
                        outputs=video_output_text,
                        fn=simple_generate,  # Examples use simplified function
                        cache_examples=False,
                        api_name=False,
                        show_api=False,
                    )
                    
                    # Event handler for generation with queue disabled
                    generate_btn_text.click(
                        fn=generate_with_pro_auth,
                        inputs=[prompt_input_text], # OAuth token is auto-injected by type hint
                        outputs=[video_output_text, status_output_text],
                        queue=False,
                        api_name=False,
                        show_api=False,
                    )

                with gr.TabItem("Image-to-Video", id=1):
                    with gr.Row():
                        with gr.Column(scale=1):
                            image_input = gr.Image(
                                label="Upload your input image",
                                type="filepath",
                                height=300,
                                value="https://huggingface.co/spaces/akhaliq/sora-2/raw/main/cat.png" # Example image
                            )
                            prompt_input_image = gr.Textbox(
                                label="Enter your text prompt for the video",
                                placeholder="Describe the action or style you want for the video (e.g., 'The cat starts to dance')",
                                lines=3
                            )
                            
                            generate_btn_image = gr.Button("🖼️ Generate Video from Image", variant="primary", size="lg")
                        
                        with gr.Column(scale=1):
                            video_output_image = gr.Video(
                                label="Generated Video",
                                height=400,
                                interactive=False,
                                show_download_button=True
                            )
                            status_output_image = gr.Textbox(
                                label="Status",
                                interactive=False,
                                visible=True
                            )

                    gr.Examples(
                        examples=[
                            ["https://huggingface.co/spaces/akhaliq/sora-2/raw/main/cat.png", "The cat starts to dance"],
                            ["https://huggingface.co/spaces/akhaliq/sora-2/raw/main/forest.png", "A magical forest where trees shimmer with light"],
                            ["https://huggingface.co/spaces/akhaliq/sora-2/raw/main/car.png", "A classic car driving through a futuristic city"]
                        ],
                        inputs=[image_input, prompt_input_image],
                        outputs=video_output_image,
                        fn=simple_generate_image_to_video,
                        cache_examples=False,
                        api_name=False,
                        show_api=False,
                    )

                    generate_btn_image.click(
                        fn=generate_image_to_video_with_pro_auth,
                        inputs=[image_input, prompt_input_image], # OAuth token is auto-injected by type hint
                        outputs=[video_output_image, status_output_image],
                        queue=False,
                        api_name=False,
                        show_api=False,
                    )
            
            # Footer
            gr.HTML("""
                <div style="text-align: center; margin-top: 40px; padding: 20px; border-top: 1px solid #e0e0e0;">
                    <h3 style="color: #667eea;">Thank you for being a PRO user! 🤗</h3>
                </div>
            """)
        
        def control_access(profile: Optional[gr.OAuthProfile] = None, oauth_token: Optional[gr.OAuthToken] = None):
            """Control interface visibility based on PRO status.
            Gradio automatically injects gr.OAuthProfile and gr.OAuthToken based on type hints
            when OAuth is enabled for the Space."""
            if not profile:
                # User not logged in
                return gr.update(visible=False), gr.update(visible=False)
            
            if verify_pro_status(oauth_token):
                # User is PRO - show main interface
                return gr.update(visible=True), gr.update(visible=False)
            else:
                # User is not PRO - show upgrade message
                message = """
                ## ✨ Exclusive Access for PRO Users
                
                Thank you for your interest in the Sora-2 Text & Image-to-Video Generator!
                
                This advanced AI video generation tool is available exclusively for Hugging Face **PRO** members.
                
                ### What you get with PRO:
                - ✅ Unlimited access to Sora-2 video generation (Text-to-Video & Image-to-Video)
                - ✅ High-quality video outputs up to 1280x720
                - ✅ Fast generation times with priority queue
                - ✅ Access to other exclusive PRO Spaces
                - ✅ Support the development of cutting-edge AI tools
                
                ### Ready to create amazing videos?
                
                <div style="text-align: center; margin: 30px 0;">
                    <a href="http://huggingface.co/subscribe/pro?source=sora2_video" target="_blank" style="
                        display: inline-block;
                        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                        color: white;
                        padding: 12px 30px;
                        border-radius: 25px;
                        text-decoration: none;
                        font-weight: bold;
                        font-size: 1.1em;
                        box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3);
                        transition: transform 0.2s;
                    ">
                        🚀 Become a PRO Today!
                    </a>
                </div>
                
                <p style="text-align: center; color: #666; margin-top: 20px;">
                    Join thousands of creators who are already using PRO tools to bring their ideas to life.
                </p>
                """
                return gr.update(visible=False), gr.update(visible=True, value=message)
        
        # Check access on load
        # No explicit inputs are needed here as gr.OAuthProfile and gr.OAuthToken are
        # provided automatically by Gradio to the function based on type hints.
        demo.load(
            control_access,
            inputs=None, # Removed explicit instantiation of OAuthProfile and OAuthToken
            outputs=[main_interface, pro_message]
        )
    
    return demo

# Launch the application
if __name__ == "__main__":
    # Clean up any leftover files on startup
    try:
        cleanup_temp_files()
        # Also try to clear Gradio's cache
        if os.path.exists("gradio_cached_examples"):
            shutil.rmtree("gradio_cached_examples", ignore_errors=True)
    except Exception as e:
        print(f"Initial cleanup error: {e}")
    
    app = create_ui()
    # Launch without special auth parameters and no queue
    # OAuth is enabled via Space metadata (hf_oauth: true in README.md)
    app.launch(
        show_api=False,
        enable_monitoring=False,
        quiet=True,
        max_threads=10,  # Limit threads to prevent resource exhaustion
    )