Spaces:

Sachin5112
/

Text-To-Video-Genrator

Sleeping

App Files Files Community

Sahil commited on Oct 9, 2025

Commit

71ad6f4

verified ·

1 Parent(s): 66865aa

Create app.py

Browse files

Files changed (1) hide show

app.py +296 -0

app.py ADDED Viewed

	@@ -0,0 +1,296 @@

+import gradio as gr
+import os
+from huggingface_hub import InferenceClient
+from pathlib import Path
+import tempfile
+# Initialize the inference client
+client = InferenceClient(
+    provider="fal-ai",
+    api_key=os.environ.get("HF_TOKEN"),
+    bill_to="huggingface",
+)
+def generate_video_with_auth(image, prompt, profile: gr.OAuthProfile | None, progress=gr.Progress()):
+    """
+    Generate a video from an image using the Ovi model with authentication check.
+    Args:
+        image: Input image (PIL Image or file path)
+        prompt: Text prompt describing the desired motion/animation
+        profile: OAuth profile for authentication
+        progress: Gradio progress tracker
+    Returns:
+        Path to the generated video file
+    """
+    if profile is None:
+        raise gr.Error("Click Sign in with Hugging Face button to use this app for free")
+    if image is None:
+        raise gr.Error("Please upload an image first!")
+    if not prompt or prompt.strip() == "":
+        raise gr.Error("Please enter a prompt describing the desired motion!")
+    try:
+        progress(0.2, desc="Processing image...")
+        # Read the image file
+        if isinstance(image, str):
+            with open(image, "rb") as image_file:
+                input_image = image_file.read()
+        else:
+            # If image is a PIL Image, save it temporarily
+            temp_image = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
+            image.save(temp_image.name)
+            with open(temp_image.name, "rb") as image_file:
+                input_image = image_file.read()
+        progress(0.4, desc="Generating video with AI...")
+        # Generate video using the inference client
+        video = client.image_to_video(
+            input_image,
+            prompt=prompt,
+            model="chetwinlow1/Ovi",
+        )
+        progress(0.9, desc="Finalizing video...")
+        # Save the video to a temporary file
+        output_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
+        # Check if video is bytes or a file path
+        if isinstance(video, bytes):
+            with open(output_path.name, "wb") as f:
+                f.write(video)
+        elif isinstance(video, str) and os.path.exists(video):
+            # If it's a path, copy it
+            import shutil
+            shutil.copy(video, output_path.name)
+        else:
+            # Try to write it directly
+            with open(output_path.name, "wb") as f:
+                f.write(video)
+        progress(1.0, desc="Complete!")
+        return output_path.name
+    except Exception as e:
+        raise gr.Error(f"Error generating video: {str(e)}")
+# Create the Gradio interface
+with gr.Blocks(
+    theme=gr.themes.Soft(
+        primary_hue="blue",
+        secondary_hue="indigo",
+    ),
+    css="""
+        .header-link {
+            font-size: 0.9em;
+            color: #666;
+            text-decoration: none;
+            margin-bottom: 1em;
+            display: inline-block;
+        }
+        .header-link:hover {
+            color: #333;
+            text-decoration: underline;
+        }
+        .main-header {
+            text-align: center;
+            margin-bottom: 2em;
+        }
+        .info-box {
+            background-color: #f0f7ff;
+            border-left: 4px solid #4285f4;
+            padding: 1em;
+            margin: 1em 0;
+            border-radius: 4px;
+        }
+        .auth-warning {
+            color: #ff6b00;
+            font-weight: bold;
+            text-align: center;
+            margin: 1em 0;
+        }
+    """,
+    title="Image to Video Generator with Ovi",
+) as demo:
+    gr.HTML(
+        """
+        <div class="main-header">
+            <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" class="header-link">
+                Built with anycoder ✨
+            </a>
+        </div>
+        """
+    )
+    gr.Markdown(
+        """
+        # 🎬 Image to Video Generator with Ovi
+        Transform your static images into dynamic videos with synchronized audio using AI! Upload an image and describe the motion you want to see.
+        Powered by **Ovi: Twin Backbone Cross-Modal Fusion for Audio-Video Generation** via HuggingFace Inference API.
+        """
+    )
+    gr.HTML(
+        """
+        <div class="auth-warning">
+            ⚠️ You must Sign in with Hugging Face using the button below to use this app.
+        </div>
+        """
+    )
+    # Add login button - required for OAuth
+    gr.LoginButton()
+    gr.HTML(
+        """
+        <div class="info-box">
+            <strong>💡 Tips for best results:</strong>
+            <ul>
+                <li>Use clear, well-lit images with a single main subject</li>
+                <li>Write specific prompts describing the desired motion or action</li>
+                <li>Keep prompts concise and focused on movement and audio elements</li>
+                <li>Processing generates 5-second videos at 24 FPS with synchronized audio</li>
+                <li>Processing may take 30-60 seconds depending on server load</li>
+            </ul>
+        </div>
+        """
+    )
+    gr.HTML(
+        """
+        <div class="info-box">
+            <strong>✨ Special Tokens for Enhanced Control:</strong>
+            <ul>
+                <li><strong>Speech:</strong> <code>&lt;S&gt;Your speech content here&lt;E&gt;</code> - Text enclosed in these tags will be converted to speech</li>
+                <li><strong>Audio Description:</strong> <code>&lt;AUDCAP&gt;Audio description here&lt;ENDAUDCAP&gt;</code> - Describes the audio or sound effects present in the video</li>
+            </ul>
+            <br>
+            <strong>📝 Example Prompt:</strong><br>
+            <code>Dogs bark loudly at a man wearing a red shirt. The man says &lt;S&gt;Please stop barking at me!&lt;E&gt;. &lt;AUDCAP&gt;Dogs barking, angry man yelling in stern voice&lt;ENDAUDCAP&gt;.</code>
+        </div>
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            image_input = gr.Image(
+                label="📸 Upload Image",
+                type="filepath",
+                sources=["upload", "clipboard"],
+                height=400,
+            )
+            prompt_input = gr.Textbox(
+                label="✍️ Text Prompt",
+                lines=3,
+            )
+            generate_btn = gr.Button(
+                "🎬 Generate Video",
+                variant="primary",
+                size="lg",
+            )
+            clear_btn = gr.Button(
+                "🗑️ Clear",
+                variant="secondary",
+            )
+            gr.Examples(
+                examples=[
+                    [
+                        "5.png",
+                        'A bearded man wearing large dark sunglasses and a blue patterned cardigan sits in a studio, actively speaking into a large, suspended microphone. He has headphones on and gestures with his hands, displaying rings on his fingers. Behind him, a wall is covered with red, textured sound-dampening foam on the left, and a white banner on the right features the "CHOICE FM" logo and various social media handles like "@ilovechoicefm" with "RALEIGH" below it. The man intently addresses the microphone, articulating, <S>is talent. It\'s all about authenticity. You gotta be who you really are, especially if you\'re working<E>. He leans forward slightly as he speaks, maintaining a serious expression behind his sunglasses.. <AUDCAP>Clear male voice speaking into a microphone, a low background hum.<ENDAUDCAP>'
+                    ]
+                ],
+                inputs=[image_input, prompt_input],
+                label="Example",
+            )
+        with gr.Column(scale=1):
+            video_output = gr.Video(
+                label="🎥 Generated Video",
+                height=400,
+                autoplay=True,
+            )
+            gr.Markdown(
+                """
+                ### About Ovi Model
+                **Ovi: Twin Backbone Cross-Modal Fusion for Audio-Video Generation**
+                Developed by Chetwin Low, Weimin Wang (Character AI) & Calder Katyal (Yale University)
+                🌟 **Key Features:**
+                - 🎬 **Video+Audio Generation**: Generates synchronized video and audio content simultaneously
+                - 📝 **Flexible Input**: Supports text-only or text+image conditioning
+                - ⏱️ **5-second Videos**: Generates 5-second videos at 24 FPS
+                - 📐 **Multiple Aspect Ratios**: Supports 720×720 area at various ratios (9:16, 16:9, 1:1, etc)
+                Ovi is a veo-3 like model that simultaneously generates both video and audio content from text or text+image inputs.
+                """
+            )
+    # Event handlers with authentication
+    generate_btn.click(
+        fn=generate_video_with_auth,
+        inputs=[image_input, prompt_input],
+        outputs=[video_output],
+        queue=False,
+        api_name=False,
+        show_api=False,
+    )
+    clear_btn.click(
+        fn=lambda: (None, "", None),
+        inputs=None,
+        outputs=[image_input, prompt_input, video_output],
+        queue=False,
+    )
+    gr.Markdown(
+        """
+        ---
+        ### 🚀 How it works
+        1. **Sign in** with your Hugging Face account
+        2. **Upload** your image - any photo or illustration
+        3. **Describe** the motion you want to see in the prompt
+        4. **Generate** and watch your image come to life!
+        ### ⚠️ Notes
+        - Video generation may take 30-60 seconds
+        - Generates 5-second videos at 24 FPS with synchronized audio
+        - Supports multiple aspect ratios (9:16, 16:9, 1:1, etc) at 720×720 area
+        - Requires a valid HuggingFace token with Inference API access
+        - Best results with clear, high-quality images
+        - The model works best with realistic subjects and natural motions
+        ### 🔗 Resources
+        - [Ovi Model Card](https://huggingface.co/chetwinlow1/Ovi)
+        - [HuggingFace Inference API](https://huggingface.co/docs/huggingface_hub/guides/inference)
+        - [Character AI](https://character.ai)
+        """
+    )
+# Launch the app
+if __name__ == "__main__":
+    demo.launch(
+        show_api=False,
+        enable_monitoring=False,
+        quiet=True,
+    )