Spaces:
Runtime error
Runtime error
File size: 13,247 Bytes
eb5a9e1 bdb29e1 eb5a9e1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 | """
DeepVision Prompt Builder - Gradio Interface
Hugging Face Spaces Deployment
This is the main Gradio application for the DeepVision Prompt Builder.
It provides a web interface for uploading images/videos and viewing analysis results.
"""
import gradio as gr
import json
from pathlib import Path
from typing import Dict, Any, Tuple
import tempfile
# Import core components
from core.engine import AnalysisEngine
from plugins.loader import PluginLoader
from core.logging_config import setup_logging
from loguru import logger
# Setup logging
setup_logging()
class DeepVisionGradioApp:
"""
Gradio web interface for DeepVision Prompt Builder
"""
def __init__(self):
"""Initialize the Gradio app"""
self.engine = AnalysisEngine()
self.plugin_loader = PluginLoader()
self.setup_plugins()
logger.info("DeepVision Gradio App initialized")
def setup_plugins(self):
"""Load and register all available plugins"""
try:
# Load all plugins
plugins = self.plugin_loader.load_all_plugins()
# Register plugins with engine
for name, plugin in plugins.items():
self.engine.register_plugin(name, plugin)
logger.info(f"Plugin registered: {name}")
logger.success(f"Loaded {len(plugins)} plugins successfully")
except Exception as e:
logger.error(f"Error loading plugins: {e}")
def analyze_media(
self,
file_path: str,
use_color_analyzer: bool = True,
use_object_detector: bool = False,
use_caption_generator: bool = False,
num_frames: int = 5
) -> Tuple[str, str]:
"""
Analyze uploaded image or video
Args:
file_path: Path to uploaded file
use_color_analyzer: Enable color analysis
use_object_detector: Enable object detection (heavy)
use_caption_generator: Enable caption generation (heavy)
num_frames: Number of frames to extract from video
Returns:
Tuple of (formatted results text, JSON string)
"""
try:
logger.info(f"Analyzing file: {file_path}")
# Enable/disable plugins based on user selection
self._configure_plugins(
use_color_analyzer,
use_object_detector,
use_caption_generator
)
# Detect file type and analyze
file_path_obj = Path(file_path)
if file_path_obj.suffix.lower() in ['.mp4', '.avi', '.mov', '.mkv']:
# Video analysis
logger.info(f"Processing video with {num_frames} frames")
results = self.engine.analyze_video(
file_path,
extract_method="keyframes",
num_frames=num_frames
)
else:
# Image analysis
logger.info("Processing image")
results = self.engine.analyze_image(file_path)
# Format results for display
formatted_text = self._format_results(results)
json_output = json.dumps(results, indent=2, ensure_ascii=False)
logger.success("Analysis completed successfully")
return formatted_text, json_output
except Exception as e:
logger.error(f"Analysis error: {e}")
error_msg = f"β Error: {str(e)}"
error_json = json.dumps({"error": str(e)}, indent=2)
return error_msg, error_json
def _configure_plugins(
self,
use_color: bool,
use_object: bool,
use_caption: bool
):
"""Enable/disable plugins based on user selection"""
plugin_config = {
'color_analyzer': use_color,
'object_detector': use_object,
'caption_generator': use_caption
}
for plugin_name, enabled in plugin_config.items():
if plugin_name in self.engine.plugins:
self.engine.plugins[plugin_name].enabled = enabled
logger.info(f"Plugin '{plugin_name}': {'enabled' if enabled else 'disabled'}")
def _format_results(self, results: Dict[str, Any]) -> str:
"""Format analysis results as readable text"""
lines = ["# π― Analysis Results\n"]
# File metadata
if "metadata" in results and "file" in results["metadata"]:
meta = results["metadata"]["file"]
lines.append("## π File Information")
lines.append(f"- **Filename**: {meta.get('filename', 'N/A')}")
lines.append(f"- **Type**: {meta.get('type', 'N/A')}")
lines.append(f"- **Size**: {meta.get('size_mb', 0):.2f} MB")
if meta.get('type') == 'video':
lines.append(f"- **Resolution**: {meta.get('width')}x{meta.get('height')}")
lines.append(f"- **Duration**: {meta.get('duration', 0):.2f} seconds")
lines.append(f"- **FPS**: {meta.get('fps', 0):.2f}")
else:
lines.append(f"- **Resolution**: {meta.get('width')}x{meta.get('height')}")
lines.append("")
# Processing info
if "metadata" in results and "processing" in results["metadata"]:
proc = results["metadata"]["processing"]
lines.append("## β‘ Processing Information")
lines.append(f"- **Duration**: {proc.get('duration_seconds', 0):.3f} seconds")
lines.append(f"- **Plugins Used**: {', '.join(proc.get('plugins_used', []))}")
if proc.get('frames_extracted'):
lines.append(f"- **Frames Analyzed**: {proc.get('frames_extracted')}")
lines.append("")
# Analysis results
if "results" in results:
res = results["results"]
# For videos
if "frames" in res:
lines.append(f"## π¬ Video Analysis ({len(res['frames'])} frames)")
# Summary
if "summary" in res:
for plugin_name, summary_data in res["summary"].items():
lines.append(f"\n### {plugin_name.replace('_', ' ').title()}")
lines.append(f"```json\n{json.dumps(summary_data, indent=2, ensure_ascii=False)}\n```")
# For images
else:
lines.append("## πΌοΈ Image Analysis")
for plugin_name, plugin_data in res.items():
lines.append(f"\n### {plugin_name.replace('_', ' ').title()}")
lines.append(f"```json\n{json.dumps(plugin_data, indent=2, ensure_ascii=False)}\n```")
return "\n".join(lines)
def create_interface(self) -> gr.Blocks:
"""Create and return the Gradio interface"""
with gr.Blocks(
title="DeepVision Prompt Builder",
theme="soft",
css="""
.output-text { font-family: 'Courier New', monospace; }
.json-output { font-size: 12px; }
"""
) as demo:
# Header
gr.Markdown("""
# π― DeepVision Prompt Builder
### AI-Powered Image & Video Analysis with JSON Prompt Generation
Upload an image or video to analyze its content and generate structured JSON prompts.
""")
with gr.Row():
with gr.Column(scale=1):
# Input section
gr.Markdown("## π€ Upload Media")
file_input = gr.File(
label="Upload Image or Video",
file_types=["image", "video"],
type="filepath"
)
gr.Markdown("### π Plugin Configuration")
color_checkbox = gr.Checkbox(
label="π¨ Color Analyzer (Fast)",
value=True,
info="Extract dominant colors and color schemes"
)
object_checkbox = gr.Checkbox(
label="π Object Detector (Slow - CLIP)",
value=False,
info="Detect objects using CLIP model (~2-5GB download)"
)
caption_checkbox = gr.Checkbox(
label="π¬ Caption Generator (Slow - BLIP-2)",
value=False,
info="Generate image captions (~2-5GB download)"
)
frames_slider = gr.Slider(
minimum=1,
maximum=20,
value=5,
step=1,
label="πΉ Video Frames to Extract",
info="More frames = more accurate but slower"
)
analyze_btn = gr.Button(
"π Analyze",
variant="primary",
size="lg"
)
with gr.Column(scale=2):
# Output section
gr.Markdown("## π Analysis Results")
with gr.Tabs():
with gr.Tab("π Formatted"):
output_text = gr.Markdown(
label="Results",
elem_classes=["output-text"]
)
with gr.Tab("π JSON"):
output_json = gr.Code(
label="JSON Output",
language="json",
elem_classes=["json-output"],
lines=20
)
download_btn = gr.DownloadButton(
label="πΎ Download JSON",
visible=False
)
# Examples
gr.Markdown("## π‘ Example Usage")
gr.Markdown("""
1. **Quick Test**: Upload an image with only Color Analyzer enabled
2. **Full Analysis**: Enable all plugins (requires model downloads)
3. **Video Analysis**: Upload a video and adjust frame count
**Note**: First-time use of Object Detector and Caption Generator will download ~2-5GB models.
""")
# Footer
gr.Markdown("""
---
**DeepVision Prompt Builder v0.1.0** | Built with β€οΈ by AI Dev Collective
π [Documentation](https://github.com/yourusername/deepvision) |
π [Report Issues](https://github.com/yourusername/deepvision/issues)
""")
# Event handlers
def analyze_and_prepare_download(file, color, obj, cap, frames):
"""Analyze and prepare results for download"""
if file is None:
return "β οΈ Please upload a file first", "{}", gr.update(visible=False)
text_result, json_result = self.analyze_media(
file, color, obj, cap, frames
)
# Save JSON to temp file for download
temp_file = tempfile.NamedTemporaryFile(
mode='w',
suffix='.json',
delete=False,
encoding='utf-8'
)
temp_file.write(json_result)
temp_file.close()
return (
text_result,
json_result,
gr.update(visible=True, value=temp_file.name)
)
analyze_btn.click(
fn=analyze_and_prepare_download,
inputs=[
file_input,
color_checkbox,
object_checkbox,
caption_checkbox,
frames_slider
],
outputs=[output_text, output_json, download_btn]
)
return demo
def main():
"""Main entry point for the Gradio app"""
app = DeepVisionGradioApp()
demo = app.create_interface()
# Launch the app
demo.launch(
server_name="0.0.0.0", # Allow external connections (required for HF Spaces)
server_port=7860, # HF Spaces default port
share=False,
show_error=True
)
if __name__ == "__main__":
main()
|