File size: 13,247 Bytes
eb5a9e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bdb29e1
 
 
 
eb5a9e1
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
"""
DeepVision Prompt Builder - Gradio Interface
Hugging Face Spaces Deployment

This is the main Gradio application for the DeepVision Prompt Builder.
It provides a web interface for uploading images/videos and viewing analysis results.
"""

import gradio as gr
import json
from pathlib import Path
from typing import Dict, Any, Tuple
import tempfile

# Import core components
from core.engine import AnalysisEngine
from plugins.loader import PluginLoader
from core.logging_config import setup_logging
from loguru import logger

# Setup logging
setup_logging()


class DeepVisionGradioApp:
    """
    Gradio web interface for DeepVision Prompt Builder
    """
    
    def __init__(self):
        """Initialize the Gradio app"""
        self.engine = AnalysisEngine()
        self.plugin_loader = PluginLoader()
        self.setup_plugins()
        logger.info("DeepVision Gradio App initialized")
    
    def setup_plugins(self):
        """Load and register all available plugins"""
        try:
            # Load all plugins
            plugins = self.plugin_loader.load_all_plugins()
            
            # Register plugins with engine
            for name, plugin in plugins.items():
                self.engine.register_plugin(name, plugin)
                logger.info(f"Plugin registered: {name}")
            
            logger.success(f"Loaded {len(plugins)} plugins successfully")
        except Exception as e:
            logger.error(f"Error loading plugins: {e}")
    
    def analyze_media(
        self, 
        file_path: str,
        use_color_analyzer: bool = True,
        use_object_detector: bool = False,
        use_caption_generator: bool = False,
        num_frames: int = 5
    ) -> Tuple[str, str]:
        """
        Analyze uploaded image or video
        
        Args:
            file_path: Path to uploaded file
            use_color_analyzer: Enable color analysis
            use_object_detector: Enable object detection (heavy)
            use_caption_generator: Enable caption generation (heavy)
            num_frames: Number of frames to extract from video
        
        Returns:
            Tuple of (formatted results text, JSON string)
        """
        try:
            logger.info(f"Analyzing file: {file_path}")
            
            # Enable/disable plugins based on user selection
            self._configure_plugins(
                use_color_analyzer,
                use_object_detector,
                use_caption_generator
            )
            
            # Detect file type and analyze
            file_path_obj = Path(file_path)
            
            if file_path_obj.suffix.lower() in ['.mp4', '.avi', '.mov', '.mkv']:
                # Video analysis
                logger.info(f"Processing video with {num_frames} frames")
                results = self.engine.analyze_video(
                    file_path,
                    extract_method="keyframes",
                    num_frames=num_frames
                )
            else:
                # Image analysis
                logger.info("Processing image")
                results = self.engine.analyze_image(file_path)
            
            # Format results for display
            formatted_text = self._format_results(results)
            json_output = json.dumps(results, indent=2, ensure_ascii=False)
            
            logger.success("Analysis completed successfully")
            return formatted_text, json_output
            
        except Exception as e:
            logger.error(f"Analysis error: {e}")
            error_msg = f"❌ Error: {str(e)}"
            error_json = json.dumps({"error": str(e)}, indent=2)
            return error_msg, error_json
    
    def _configure_plugins(
        self,
        use_color: bool,
        use_object: bool,
        use_caption: bool
    ):
        """Enable/disable plugins based on user selection"""
        plugin_config = {
            'color_analyzer': use_color,
            'object_detector': use_object,
            'caption_generator': use_caption
        }
        
        for plugin_name, enabled in plugin_config.items():
            if plugin_name in self.engine.plugins:
                self.engine.plugins[plugin_name].enabled = enabled
                logger.info(f"Plugin '{plugin_name}': {'enabled' if enabled else 'disabled'}")
    
    def _format_results(self, results: Dict[str, Any]) -> str:
        """Format analysis results as readable text"""
        lines = ["# 🎯 Analysis Results\n"]
        
        # File metadata
        if "metadata" in results and "file" in results["metadata"]:
            meta = results["metadata"]["file"]
            lines.append("## πŸ“ File Information")
            lines.append(f"- **Filename**: {meta.get('filename', 'N/A')}")
            lines.append(f"- **Type**: {meta.get('type', 'N/A')}")
            lines.append(f"- **Size**: {meta.get('size_mb', 0):.2f} MB")
            
            if meta.get('type') == 'video':
                lines.append(f"- **Resolution**: {meta.get('width')}x{meta.get('height')}")
                lines.append(f"- **Duration**: {meta.get('duration', 0):.2f} seconds")
                lines.append(f"- **FPS**: {meta.get('fps', 0):.2f}")
            else:
                lines.append(f"- **Resolution**: {meta.get('width')}x{meta.get('height')}")
            lines.append("")
        
        # Processing info
        if "metadata" in results and "processing" in results["metadata"]:
            proc = results["metadata"]["processing"]
            lines.append("## ⚑ Processing Information")
            lines.append(f"- **Duration**: {proc.get('duration_seconds', 0):.3f} seconds")
            lines.append(f"- **Plugins Used**: {', '.join(proc.get('plugins_used', []))}")
            if proc.get('frames_extracted'):
                lines.append(f"- **Frames Analyzed**: {proc.get('frames_extracted')}")
            lines.append("")
        
        # Analysis results
        if "results" in results:
            res = results["results"]
            
            # For videos
            if "frames" in res:
                lines.append(f"## 🎬 Video Analysis ({len(res['frames'])} frames)")
                
                # Summary
                if "summary" in res:
                    for plugin_name, summary_data in res["summary"].items():
                        lines.append(f"\n### {plugin_name.replace('_', ' ').title()}")
                        lines.append(f"```json\n{json.dumps(summary_data, indent=2, ensure_ascii=False)}\n```")
            
            # For images
            else:
                lines.append("## πŸ–ΌοΈ Image Analysis")
                for plugin_name, plugin_data in res.items():
                    lines.append(f"\n### {plugin_name.replace('_', ' ').title()}")
                    lines.append(f"```json\n{json.dumps(plugin_data, indent=2, ensure_ascii=False)}\n```")
        
        return "\n".join(lines)
    
    def create_interface(self) -> gr.Blocks:
        """Create and return the Gradio interface"""
        
        with gr.Blocks(
            title="DeepVision Prompt Builder",
            theme="soft",
            css="""
                .output-text { font-family: 'Courier New', monospace; }
                .json-output { font-size: 12px; }
            """
        ) as demo:
            
            # Header
            gr.Markdown("""
            # 🎯 DeepVision Prompt Builder
            ### AI-Powered Image & Video Analysis with JSON Prompt Generation
            
            Upload an image or video to analyze its content and generate structured JSON prompts.
            """)
            
            with gr.Row():
                with gr.Column(scale=1):
                    # Input section
                    gr.Markdown("## πŸ“€ Upload Media")
                    
                    file_input = gr.File(
                        label="Upload Image or Video",
                        file_types=["image", "video"],
                        type="filepath"
                    )
                    
                    gr.Markdown("### πŸ”Œ Plugin Configuration")
                    
                    color_checkbox = gr.Checkbox(
                        label="🎨 Color Analyzer (Fast)",
                        value=True,
                        info="Extract dominant colors and color schemes"
                    )
                    
                    object_checkbox = gr.Checkbox(
                        label="πŸ” Object Detector (Slow - CLIP)",
                        value=False,
                        info="Detect objects using CLIP model (~2-5GB download)"
                    )
                    
                    caption_checkbox = gr.Checkbox(
                        label="πŸ’¬ Caption Generator (Slow - BLIP-2)",
                        value=False,
                        info="Generate image captions (~2-5GB download)"
                    )
                    
                    frames_slider = gr.Slider(
                        minimum=1,
                        maximum=20,
                        value=5,
                        step=1,
                        label="πŸ“Ή Video Frames to Extract",
                        info="More frames = more accurate but slower"
                    )
                    
                    analyze_btn = gr.Button(
                        "πŸš€ Analyze",
                        variant="primary",
                        size="lg"
                    )
                
                with gr.Column(scale=2):
                    # Output section
                    gr.Markdown("## πŸ“Š Analysis Results")
                    
                    with gr.Tabs():
                        with gr.Tab("πŸ“ Formatted"):
                            output_text = gr.Markdown(
                                label="Results",
                                elem_classes=["output-text"]
                            )
                        
                        with gr.Tab("πŸ“‹ JSON"):
                            output_json = gr.Code(
                                label="JSON Output",
                                language="json",
                                elem_classes=["json-output"],
                                lines=20
                            )
                    
                    download_btn = gr.DownloadButton(
                        label="πŸ’Ύ Download JSON",
                        visible=False
                    )
            
            # Examples
            gr.Markdown("## πŸ’‘ Example Usage")
            gr.Markdown("""
            1. **Quick Test**: Upload an image with only Color Analyzer enabled
            2. **Full Analysis**: Enable all plugins (requires model downloads)
            3. **Video Analysis**: Upload a video and adjust frame count
            
            **Note**: First-time use of Object Detector and Caption Generator will download ~2-5GB models.
            """)
            
            # Footer
            gr.Markdown("""
            ---
            **DeepVision Prompt Builder v0.1.0** | Built with ❀️ by AI Dev Collective
            
            πŸ“š [Documentation](https://github.com/yourusername/deepvision) | 
            πŸ› [Report Issues](https://github.com/yourusername/deepvision/issues)
            """)
            
            # Event handlers
            def analyze_and_prepare_download(file, color, obj, cap, frames):
                """Analyze and prepare results for download"""
                if file is None:
                    return "⚠️ Please upload a file first", "{}", gr.update(visible=False)
                
                text_result, json_result = self.analyze_media(
                    file, color, obj, cap, frames
                )
                
                # Save JSON to temp file for download
                temp_file = tempfile.NamedTemporaryFile(
                    mode='w',
                    suffix='.json',
                    delete=False,
                    encoding='utf-8'
                )
                temp_file.write(json_result)
                temp_file.close()
                
                return (
                    text_result,
                    json_result,
                    gr.update(visible=True, value=temp_file.name)
                )
            
            analyze_btn.click(
                fn=analyze_and_prepare_download,
                inputs=[
                    file_input,
                    color_checkbox,
                    object_checkbox,
                    caption_checkbox,
                    frames_slider
                ],
                outputs=[output_text, output_json, download_btn]
            )
        
        return demo


def main():
    """Main entry point for the Gradio app"""
    app = DeepVisionGradioApp()
    demo = app.create_interface()
    
    # Launch the app
    demo.launch(
        server_name="0.0.0.0",    # Allow external connections (required for HF Spaces)
        server_port=7860,         # HF Spaces default port
        share=False,
        show_error=True
    )


if __name__ == "__main__":
    main()