""" DeepVision Prompt Builder - Gradio Interface Hugging Face Spaces Deployment This is the main Gradio application for the DeepVision Prompt Builder. It provides a web interface for uploading images/videos and viewing analysis results. """ import gradio as gr import json from pathlib import Path from typing import Dict, Any, Tuple import tempfile # Import core components from core.engine import AnalysisEngine from plugins.loader import PluginLoader from core.logging_config import setup_logging from loguru import logger # Setup logging setup_logging() class DeepVisionGradioApp: """ Gradio web interface for DeepVision Prompt Builder """ def __init__(self): """Initialize the Gradio app""" self.engine = AnalysisEngine() self.plugin_loader = PluginLoader() self.setup_plugins() logger.info("DeepVision Gradio App initialized") def setup_plugins(self): """Load and register all available plugins""" try: # Load all plugins plugins = self.plugin_loader.load_all_plugins() # Register plugins with engine for name, plugin in plugins.items(): self.engine.register_plugin(name, plugin) logger.info(f"Plugin registered: {name}") logger.success(f"Loaded {len(plugins)} plugins successfully") except Exception as e: logger.error(f"Error loading plugins: {e}") def analyze_media( self, file_path: str, use_color_analyzer: bool = True, use_object_detector: bool = False, use_caption_generator: bool = False, num_frames: int = 5 ) -> Tuple[str, str]: """ Analyze uploaded image or video Args: file_path: Path to uploaded file use_color_analyzer: Enable color analysis use_object_detector: Enable object detection (heavy) use_caption_generator: Enable caption generation (heavy) num_frames: Number of frames to extract from video Returns: Tuple of (formatted results text, JSON string) """ try: logger.info(f"Analyzing file: {file_path}") # Enable/disable plugins based on user selection self._configure_plugins( use_color_analyzer, use_object_detector, use_caption_generator ) # Detect file type and analyze file_path_obj = Path(file_path) if file_path_obj.suffix.lower() in ['.mp4', '.avi', '.mov', '.mkv']: # Video analysis logger.info(f"Processing video with {num_frames} frames") results = self.engine.analyze_video( file_path, extract_method="keyframes", num_frames=num_frames ) else: # Image analysis logger.info("Processing image") results = self.engine.analyze_image(file_path) # Format results for display formatted_text = self._format_results(results) json_output = json.dumps(results, indent=2, ensure_ascii=False) logger.success("Analysis completed successfully") return formatted_text, json_output except Exception as e: logger.error(f"Analysis error: {e}") error_msg = f"❌ Error: {str(e)}" error_json = json.dumps({"error": str(e)}, indent=2) return error_msg, error_json def _configure_plugins( self, use_color: bool, use_object: bool, use_caption: bool ): """Enable/disable plugins based on user selection""" plugin_config = { 'color_analyzer': use_color, 'object_detector': use_object, 'caption_generator': use_caption } for plugin_name, enabled in plugin_config.items(): if plugin_name in self.engine.plugins: self.engine.plugins[plugin_name].enabled = enabled logger.info(f"Plugin '{plugin_name}': {'enabled' if enabled else 'disabled'}") def _format_results(self, results: Dict[str, Any]) -> str: """Format analysis results as readable text""" lines = ["# 🎯 Analysis Results\n"] # File metadata if "metadata" in results and "file" in results["metadata"]: meta = results["metadata"]["file"] lines.append("## 📁 File Information") lines.append(f"- **Filename**: {meta.get('filename', 'N/A')}") lines.append(f"- **Type**: {meta.get('type', 'N/A')}") lines.append(f"- **Size**: {meta.get('size_mb', 0):.2f} MB") if meta.get('type') == 'video': lines.append(f"- **Resolution**: {meta.get('width')}x{meta.get('height')}") lines.append(f"- **Duration**: {meta.get('duration', 0):.2f} seconds") lines.append(f"- **FPS**: {meta.get('fps', 0):.2f}") else: lines.append(f"- **Resolution**: {meta.get('width')}x{meta.get('height')}") lines.append("") # Processing info if "metadata" in results and "processing" in results["metadata"]: proc = results["metadata"]["processing"] lines.append("## ⚡ Processing Information") lines.append(f"- **Duration**: {proc.get('duration_seconds', 0):.3f} seconds") lines.append(f"- **Plugins Used**: {', '.join(proc.get('plugins_used', []))}") if proc.get('frames_extracted'): lines.append(f"- **Frames Analyzed**: {proc.get('frames_extracted')}") lines.append("") # Analysis results if "results" in results: res = results["results"] # For videos if "frames" in res: lines.append(f"## 🎬 Video Analysis ({len(res['frames'])} frames)") # Summary if "summary" in res: for plugin_name, summary_data in res["summary"].items(): lines.append(f"\n### {plugin_name.replace('_', ' ').title()}") lines.append(f"```json\n{json.dumps(summary_data, indent=2, ensure_ascii=False)}\n```") # For images else: lines.append("## 🖼️ Image Analysis") for plugin_name, plugin_data in res.items(): lines.append(f"\n### {plugin_name.replace('_', ' ').title()}") lines.append(f"```json\n{json.dumps(plugin_data, indent=2, ensure_ascii=False)}\n```") return "\n".join(lines) def create_interface(self) -> gr.Blocks: """Create and return the Gradio interface""" with gr.Blocks( title="DeepVision Prompt Builder", theme="soft", css=""" .output-text { font-family: 'Courier New', monospace; } .json-output { font-size: 12px; } """ ) as demo: # Header gr.Markdown(""" # 🎯 DeepVision Prompt Builder ### AI-Powered Image & Video Analysis with JSON Prompt Generation Upload an image or video to analyze its content and generate structured JSON prompts. """) with gr.Row(): with gr.Column(scale=1): # Input section gr.Markdown("## 📤 Upload Media") file_input = gr.File( label="Upload Image or Video", file_types=["image", "video"], type="filepath" ) gr.Markdown("### 🔌 Plugin Configuration") color_checkbox = gr.Checkbox( label="🎨 Color Analyzer (Fast)", value=True, info="Extract dominant colors and color schemes" ) object_checkbox = gr.Checkbox( label="🔍 Object Detector (Slow - CLIP)", value=False, info="Detect objects using CLIP model (~2-5GB download)" ) caption_checkbox = gr.Checkbox( label="💬 Caption Generator (Slow - BLIP-2)", value=False, info="Generate image captions (~2-5GB download)" ) frames_slider = gr.Slider( minimum=1, maximum=20, value=5, step=1, label="📹 Video Frames to Extract", info="More frames = more accurate but slower" ) analyze_btn = gr.Button( "🚀 Analyze", variant="primary", size="lg" ) with gr.Column(scale=2): # Output section gr.Markdown("## 📊 Analysis Results") with gr.Tabs(): with gr.Tab("📝 Formatted"): output_text = gr.Markdown( label="Results", elem_classes=["output-text"] ) with gr.Tab("📋 JSON"): output_json = gr.Code( label="JSON Output", language="json", elem_classes=["json-output"], lines=20 ) download_btn = gr.DownloadButton( label="💾 Download JSON", visible=False ) # Examples gr.Markdown("## 💡 Example Usage") gr.Markdown(""" 1. **Quick Test**: Upload an image with only Color Analyzer enabled 2. **Full Analysis**: Enable all plugins (requires model downloads) 3. **Video Analysis**: Upload a video and adjust frame count **Note**: First-time use of Object Detector and Caption Generator will download ~2-5GB models. """) # Footer gr.Markdown(""" --- **DeepVision Prompt Builder v0.1.0** | Built with ❤️ by AI Dev Collective 📚 [Documentation](https://github.com/yourusername/deepvision) | 🐛 [Report Issues](https://github.com/yourusername/deepvision/issues) """) # Event handlers def analyze_and_prepare_download(file, color, obj, cap, frames): """Analyze and prepare results for download""" if file is None: return "⚠️ Please upload a file first", "{}", gr.update(visible=False) text_result, json_result = self.analyze_media( file, color, obj, cap, frames ) # Save JSON to temp file for download temp_file = tempfile.NamedTemporaryFile( mode='w', suffix='.json', delete=False, encoding='utf-8' ) temp_file.write(json_result) temp_file.close() return ( text_result, json_result, gr.update(visible=True, value=temp_file.name) ) analyze_btn.click( fn=analyze_and_prepare_download, inputs=[ file_input, color_checkbox, object_checkbox, caption_checkbox, frames_slider ], outputs=[output_text, output_json, download_btn] ) return demo def main(): """Main entry point for the Gradio app""" app = DeepVisionGradioApp() demo = app.create_interface() # Launch the app demo.launch( server_name="0.0.0.0", # Allow external connections (required for HF Spaces) server_port=7860, # HF Spaces default port share=False, show_error=True ) if __name__ == "__main__": main()