deepvision-prompt-builder

Runtime error

File size: 13,247 Bytes

"""
DeepVision Prompt Builder - Gradio Interface
Hugging Face Spaces Deployment

This is the main Gradio application for the DeepVision Prompt Builder.
It provides a web interface for uploading images/videos and viewing analysis results.
"""

import gradio as gr
import json
from pathlib import Path
from typing import Dict, Any, Tuple
import tempfile

# Import core components
from core.engine import AnalysisEngine
from plugins.loader import PluginLoader
from core.logging_config import setup_logging
from loguru import logger

# Setup logging
setup_logging()


class DeepVisionGradioApp:
    """
    Gradio web interface for DeepVision Prompt Builder
    """
    
    def __init__(self):
        """Initialize the Gradio app"""
        self.engine = AnalysisEngine()
        self.plugin_loader = PluginLoader()
        self.setup_plugins()
        logger.info("DeepVision Gradio App initialized")
    
    def setup_plugins(self):
        """Load and register all available plugins"""
        try:
            # Load all plugins
            plugins = self.plugin_loader.load_all_plugins()
            
            # Register plugins with engine
            for name, plugin in plugins.items():
                self.engine.register_plugin(name, plugin)
                logger.info(f"Plugin registered: {name}")
            
            logger.success(f"Loaded {len(plugins)} plugins successfully")
        except Exception as e:
            logger.error(f"Error loading plugins: {e}")
    
    def analyze_media(
        self, 
        file_path: str,
        use_color_analyzer: bool = True,
        use_object_detector: bool = False,
        use_caption_generator: bool = False,
        num_frames: int = 5
    ) -> Tuple[str, str]:
        """
        Analyze uploaded image or video
        
        Args:
            file_path: Path to uploaded file
            use_color_analyzer: Enable color analysis
            use_object_detector: Enable object detection (heavy)
            use_caption_generator: Enable caption generation (heavy)
            num_frames: Number of frames to extract from video
        
        Returns:
            Tuple of (formatted results text, JSON string)
        """
        try:
            logger.info(f"Analyzing file: {file_path}")
            
            # Enable/disable plugins based on user selection
            self._configure_plugins(
                use_color_analyzer,
                use_object_detector,
                use_caption_generator
            )
            
            # Detect file type and analyze
            file_path_obj = Path(file_path)
            
            if file_path_obj.suffix.lower() in ['.mp4', '.avi', '.mov', '.mkv']:
                # Video analysis
                logger.info(f"Processing video with {num_frames} frames")
                results = self.engine.analyze_video(
                    file_path,
                    extract_method="keyframes",
                    num_frames=num_frames
                )
            else:
                # Image analysis
                logger.info("Processing image")
                results = self.engine.analyze_image(file_path)
            
            # Format results for display
            formatted_text = self._format_results(results)
            json_output = json.dumps(results, indent=2, ensure_ascii=False)
            
            logger.success("Analysis completed successfully")
            return formatted_text, json_output
            
        except Exception as e:
            logger.error(f"Analysis error: {e}")
            error_msg = f"❌ Error: {str(e)}"
            error_json = json.dumps({"error": str(e)}, indent=2)
            return error_msg, error_json
    
    def _configure_plugins(
        self,
        use_color: bool,
        use_object: bool,
        use_caption: bool
    ):
        """Enable/disable plugins based on user selection"""
        plugin_config = {
            'color_analyzer': use_color,
            'object_detector': use_object,
            'caption_generator': use_caption
        }
        
        for plugin_name, enabled in plugin_config.items():
            if plugin_name in self.engine.plugins:
                self.engine.plugins[plugin_name].enabled = enabled
                logger.info(f"Plugin '{plugin_name}': {'enabled' if enabled else 'disabled'}")
    
    def _format_results(self, results: Dict[str, Any]) -> str:
        """Format analysis results as readable text"""
        lines = ["# 🎯 Analysis Results\n"]
        
        # File metadata
        if "metadata" in results and "file" in results["metadata"]:
            meta = results["metadata"]["file"]
            lines.append("## 📁 File Information")
            lines.append(f"- **Filename**: {meta.get('filename', 'N/A')}")
            lines.append(f"- **Type**: {meta.get('type', 'N/A')}")
            lines.append(f"- **Size**: {meta.get('size_mb', 0):.2f} MB")
            
            if meta.get('type') == 'video':
                lines.append(f"- **Resolution**: {meta.get('width')}x{meta.get('height')}")
                lines.append(f"- **Duration**: {meta.get('duration', 0):.2f} seconds")
                lines.append(f"- **FPS**: {meta.get('fps', 0):.2f}")
            else:
                lines.append(f"- **Resolution**: {meta.get('width')}x{meta.get('height')}")
            lines.append("")
        
        # Processing info
        if "metadata" in results and "processing" in results["metadata"]:
            proc = results["metadata"]["processing"]
            lines.append("## ⚡ Processing Information")
            lines.append(f"- **Duration**: {proc.get('duration_seconds', 0):.3f} seconds")
            lines.append(f"- **Plugins Used**: {', '.join(proc.get('plugins_used', []))}")
            if proc.get('frames_extracted'):
                lines.append(f"- **Frames Analyzed**: {proc.get('frames_extracted')}")
            lines.append("")
        
        # Analysis results
        if "results" in results:
            res = results["results"]
            
            # For videos
            if "frames" in res:
                lines.append(f"## 🎬 Video Analysis ({len(res['frames'])} frames)")
                
                # Summary
                if "summary" in res:
                    for plugin_name, summary_data in res["summary"].items():
                        lines.append(f"\n### {plugin_name.replace('_', ' ').title()}")
                        lines.append(f"```json\n{json.dumps(summary_data, indent=2, ensure_ascii=False)}\n```")
            
            # For images
            else:
                lines.append("## 🖼️ Image Analysis")
                for plugin_name, plugin_data in res.items():
                    lines.append(f"\n### {plugin_name.replace('_', ' ').title()}")
                    lines.append(f"```json\n{json.dumps(plugin_data, indent=2, ensure_ascii=False)}\n```")
        
        return "\n".join(lines)
    
    def create_interface(self) -> gr.Blocks:
        """Create and return the Gradio interface"""
        
        with gr.Blocks(
            title="DeepVision Prompt Builder",
            theme="soft",
            css="""
                .output-text { font-family: 'Courier New', monospace; }
                .json-output { font-size: 12px; }
            """
        ) as demo:
            
            # Header
            gr.Markdown("""
            # 🎯 DeepVision Prompt Builder
            ### AI-Powered Image & Video Analysis with JSON Prompt Generation
            
            Upload an image or video to analyze its content and generate structured JSON prompts.
            """)
            
            with gr.Row():
                with gr.Column(scale=1):
                    # Input section
                    gr.Markdown("## 📤 Upload Media")
                    
                    file_input = gr.File(
                        label="Upload Image or Video",
                        file_types=["image", "video"],
                        type="filepath"
                    )
                    
                    gr.Markdown("### 🔌 Plugin Configuration")
                    
                    color_checkbox = gr.Checkbox(
                        label="🎨 Color Analyzer (Fast)",
                        value=True,
                        info="Extract dominant colors and color schemes"
                    )
                    
                    object_checkbox = gr.Checkbox(
                        label="🔍 Object Detector (Slow - CLIP)",
                        value=False,
                        info="Detect objects using CLIP model (~2-5GB download)"
                    )
                    
                    caption_checkbox = gr.Checkbox(
                        label="💬 Caption Generator (Slow - BLIP-2)",
                        value=False,
                        info="Generate image captions (~2-5GB download)"
                    )
                    
                    frames_slider = gr.Slider(
                        minimum=1,
                        maximum=20,
                        value=5,
                        step=1,
                        label="📹 Video Frames to Extract",
                        info="More frames = more accurate but slower"
                    )
                    
                    analyze_btn = gr.Button(
                        "🚀 Analyze",
                        variant="primary",
                        size="lg"
                    )
                
                with gr.Column(scale=2):
                    # Output section
                    gr.Markdown("## 📊 Analysis Results")
                    
                    with gr.Tabs():
                        with gr.Tab("📝 Formatted"):
                            output_text = gr.Markdown(
                                label="Results",
                                elem_classes=["output-text"]
                            )
                        
                        with gr.Tab("📋 JSON"):
                            output_json = gr.Code(
                                label="JSON Output",
                                language="json",
                                elem_classes=["json-output"],
                                lines=20
                            )
                    
                    download_btn = gr.DownloadButton(
                        label="💾 Download JSON",
                        visible=False
                    )
            
            # Examples
            gr.Markdown("## 💡 Example Usage")
            gr.Markdown("""
            1. **Quick Test**: Upload an image with only Color Analyzer enabled
            2. **Full Analysis**: Enable all plugins (requires model downloads)
            3. **Video Analysis**: Upload a video and adjust frame count
            
            **Note**: First-time use of Object Detector and Caption Generator will download ~2-5GB models.
            """)
            
            # Footer
            gr.Markdown("""
            ---
            **DeepVision Prompt Builder v0.1.0** | Built with ❤️ by AI Dev Collective
            
            📚 [Documentation](https://github.com/yourusername/deepvision) | 
            🐛 [Report Issues](https://github.com/yourusername/deepvision/issues)
            """)
            
            # Event handlers
            def analyze_and_prepare_download(file, color, obj, cap, frames):
                """Analyze and prepare results for download"""
                if file is None:
                    return "⚠️ Please upload a file first", "{}", gr.update(visible=False)
                
                text_result, json_result = self.analyze_media(
                    file, color, obj, cap, frames
                )
                
                # Save JSON to temp file for download
                temp_file = tempfile.NamedTemporaryFile(
                    mode='w',
                    suffix='.json',
                    delete=False,
                    encoding='utf-8'
                )
                temp_file.write(json_result)
                temp_file.close()
                
                return (
                    text_result,
                    json_result,
                    gr.update(visible=True, value=temp_file.name)
                )
            
            analyze_btn.click(
                fn=analyze_and_prepare_download,
                inputs=[
                    file_input,
                    color_checkbox,
                    object_checkbox,
                    caption_checkbox,
                    frames_slider
                ],
                outputs=[output_text, output_json, download_btn]
            )
        
        return demo


def main():
    """Main entry point for the Gradio app"""
    app = DeepVisionGradioApp()
    demo = app.create_interface()
    
    # Launch the app
    demo.launch(
        server_name="0.0.0.0",    # Allow external connections (required for HF Spaces)
        server_port=7860,         # HF Spaces default port
        share=False,
        show_error=True
    )


if __name__ == "__main__":
    main()