#!/usr/bin/env python3 """ UI components for the Streamlit application """ import streamlit as st from typing import Dict, List, Any, Optional from local_models import get_local_model_manager # Available Hugging Face models for remote API AVAILABLE_MODELS = { "microsoft/kosmos-2-patch14-224": "Kosmos-2", "Salesforce/blip-image-captioning-large": "BLIP Image Captioning", "microsoft/DialoGPT-medium": "DialoGPT", "microsoft/git-large-coco": "GIT Large COCO", "nlpconnect/vit-gpt2-image-captioning": "ViT-GPT2" } def render_sidebar_config(settings: Dict, local_models_available: bool, local_manager: Optional[Any]) -> Dict[str, Any]: """ Render the sidebar configuration panel Returns configuration settings """ with st.sidebar: st.header("Konfiguration") # Model type selection available_options = [] if local_models_available: available_options.append("Local Models") available_options.append("Remote API") model_type = st.radio( "Model Type", available_options, help="Wähle zwischen den KI-Modellen" ) # Model selection based on type if model_type == "Local Models" and local_models_available: selected_model, api_token = _render_local_model_config(local_manager) else: selected_model, api_token = _render_remote_model_config(settings) # Frame extraction rate fps = st.slider( "Frames per second to extract", min_value=0.1, max_value=5.0, value=1.0, step=0.1 ) # Ontology settings st.subheader("Ontology Analysis") use_ontology = st.checkbox( "Enable Ontology Analysis", value=True, help="Use ontology-based classification (NONE/LOW/MEDIUM/HIGH/CRITICAL)" ) if not use_ontology: st.info("🔄 Ontology analysis disabled - showing raw model output only") return { "model_type": model_type, "selected_model": selected_model, "api_token": api_token, "fps": fps, "use_ontology": use_ontology } def _render_local_model_config(local_manager) -> tuple: """Render local model configuration""" available_local_models = local_manager.get_available_models() # Standard-Auswahl auf "Person on Track Detector" setzen (falls vorhanden) default_index = ( available_local_models.index("Person on Track Detector") if "Person on Track Detector" in available_local_models else 0 ) selected_model = st.selectbox( "Lokales Modell auswählen", options=available_local_models, index=default_index, # <-- diese Zeile ist neu help="Choose between CNN (fast) or Transformer (detailed) models" ) # Show model info model_info = local_manager.get_model_info() if selected_model in model_info: with st.expander("Model Information"): st.write(f"**Description:** {model_info[selected_model]['description']}") st.write(f"**Strengths:** {model_info[selected_model]['strengths']}") st.write(f"**Size:** {model_info[selected_model]['size']}") return selected_model, None # No API token needed for local models def _render_remote_model_config(settings: Dict) -> tuple: """Render remote API model configuration""" default_token = settings.get('hugging_face_api_token', '') api_token = st.text_input( "Hugging Face API Token", value=default_token, type="password", help="Get your token from https://huggingface.co/settings/tokens or save in settings.json" ) selected_model = st.selectbox( "Select Model", options=list(AVAILABLE_MODELS.keys()), format_func=lambda x: AVAILABLE_MODELS[x] ) return selected_model, api_token def render_input_section() -> Dict[str, Any]: """ Render the input section for video upload and prompts Returns input data """ st.header("Input") # Video upload video_file = st.file_uploader( "Upload Video", type=['mp4', 'avi', 'mov', 'mkv'], help="Upload a video file to analyze" ) return { "video_file": video_file } def render_prompt_section(config: Dict[str, Any]) -> str: """ Render prompt input section based on model configuration """ model_type = config["model_type"] selected_model = config["selected_model"] # Prompt input (conditional based on model) if (model_type == "Local Models" and selected_model == "Person on Track Detector"): # Person on Track Detector works automatically st.info("🤖Das Modell ist mit einem ontologiebasierten Ansatz gefüttert und erfordert keinen Prompt") return "automatic" else: # Regular models need user prompt return st.text_area( "Analysis Prompt", placeholder="Describe what you see in the image...", help="Enter the prompt to analyze each frame" ) def render_process_button() -> bool: """Render the process button""" return st.button("Process Video", type="primary") def render_results_header(): """Render the results section header""" st.header("Results") return st.container() def render_frame_result(result_data: Dict[str, Any]): """ Render a single frame result with ontology analysis """ ontology = result_data['ontology_analysis'] # Create expander title - only include severity if ontology is active if ontology.get('ontology_used', False): severity_icon = ontology.get('severity_icon', '✅') severity = ontology.get('severity', 'NONE') expander_title = f"{severity_icon} {severity} - Frame {result_data['frame_number']} (t={result_data['timestamp']:.1f}s)" else: # Clean title without severity symbols when ontology is disabled expander_title = f"Frame {result_data['frame_number']} (t={result_data['timestamp']:.1f}s)" with st.expander(expander_title): col_img, col_text = st.columns([1, 2]) with col_img: st.image( result_data['image'], caption=f"Frame {result_data['frame_number']}", use_container_width=True ) with col_text: # Display ontology analysis first if enabled if ontology.get('ontology_used', False): _render_ontology_analysis(ontology) st.divider() # Display original model results _render_model_output(result_data['result']) def _render_ontology_analysis(ontology: Dict[str, Any]): """Render ontology analysis section""" severity = ontology.get('severity', 'NONE') severity_icon = ontology.get('severity_icon', '✅') severity_color = ontology.get('severity_color', 'green') # Severity display with color st.markdown(f"**Safety Assessment:** :{severity_color}[{severity_icon} {severity}]") # Score display if ontology.get('score', 0) > 0: st.metric("Risk Score", f"{ontology['score']}/100") # Show explanations if available if ontology.get('explanations'): st.write("**Ontology Analysis:**") for explanation in ontology['explanations']: st.write(f"• {explanation}") # Show fired rules if available if ontology.get('fired_rules'): with st.expander("Technical Details"): st.write("**Triggered Rules:**") for rule in ontology['fired_rules']: st.code(rule) if ontology.get('labels'): st.write("**Detected Hazard Labels:**") for label in ontology['labels']: st.code(label) def _render_model_output(result: Dict[str, Any]): """Render original model output section""" st.write("**Model Output:**") if 'error' in result: st.error(f"Error: {result['error']}") elif 'person_on_track_detection' in result: _render_person_detection_result(result['person_on_track_detection']) else: _render_general_model_result(result) def _render_person_detection_result(detection: Dict[str, Any]): """Render person on track detection specific results""" people_count = detection.get('people_count', 0) confidence = detection.get('confidence', 0) analysis = detection.get('analysis', 'No analysis') st.write(f"**Detection Analysis:** {analysis}") # Show metrics col1, col2 = st.columns(2) with col1: st.metric("👥 People Detected", people_count) with col2: st.metric("📊 Model Confidence", f"{confidence:.0%}") def _render_general_model_result(result: Dict[str, Any]): """Render general model results (captioning, etc.)""" if 'generated_text' in result: st.write(f"*{result['generated_text']}*") elif isinstance(result, list) and len(result) > 0: if 'generated_text' in result[0]: st.write(f"*{result[0]['generated_text']}*") else: st.json(result[0]) else: st.json(result) def render_validation_errors(video_file, prompt, api_token, model_type, local_models_available, selected_model): """ Render validation error messages """ if not video_file: st.error("Please upload a video file") if not prompt and not (model_type == "Local Models" and selected_model == "Person on Track Detector"): st.error("Please enter an analysis prompt") if not api_token and model_type == "Remote API": st.error("Please provide your Hugging Face API token for remote models") if model_type == "Local Models" and not local_models_available: st.error("Local models failed to initialize. Check your installation.") def render_instructions(): """Render the instructions section""" with st.expander("How to use"): st.markdown(""" ## Local AI Models (Recommended) 1. **Upload a video**: Choose a video file (MP4, AVI, MOV, or MKV) 2. **Select model type**: Choose "Local Models" for offline processing 3. **Choose AI model**: - **CNN (BLIP)**: Fast, good for object detection (~1.2GB) - **Transformer (ViT-GPT2)**: Detailed descriptions (~1.8GB) 4. **Enter a prompt**: Describe what you want the AI to analyze 5. **Enable/Disable Ontology**: Toggle ontology-based risk assessment 6. **Adjust frame rate**: Set frames per second to extract (default: 1 fps) 7. **Click Process**: Frames are processed locally on your machine ## Ontology Analysis - **✅ NONE**: No safety concerns detected - **🟢 LOW**: Minor safety considerations - **🟠 MEDIUM**: Moderate safety risk - **⚠️ HIGH**: Significant safety risk - **🚨 CRITICAL**: Immediate safety hazard ## Remote API Models (Optional) 1. **Get API token**: Visit [Hugging Face Settings](https://huggingface.co/settings/tokens) 2. **Select "Remote API"** in model type 3. **Enter token** and select remote model ## Video Support Features - **Automatic corruption repair**: Handles videos with corrupted moov atoms - **FFmpeg integration**: Auto-repairs problematic video files - **Multiple formats**: MP4, AVI, MOV, MKV support ## Requirements - **Python packages**: torch, transformers, accelerate (see requirements.txt) - **Optional**: FFmpeg for video repair (download from https://ffmpeg.org) - **Storage**: ~3GB for both local models """)