Spaces:

Migjomatic
/

bahngleis-detektor

Running

File size: 11,941 Bytes

#!/usr/bin/env python3
"""
UI components for the Streamlit application
"""
import streamlit as st
from typing import Dict, List, Any, Optional
from local_models import get_local_model_manager


# Available Hugging Face models for remote API
AVAILABLE_MODELS = {
    "microsoft/kosmos-2-patch14-224": "Kosmos-2",
    "Salesforce/blip-image-captioning-large": "BLIP Image Captioning",
    "microsoft/DialoGPT-medium": "DialoGPT",
    "microsoft/git-large-coco": "GIT Large COCO",
    "nlpconnect/vit-gpt2-image-captioning": "ViT-GPT2"
}


def render_sidebar_config(settings: Dict, local_models_available: bool, local_manager: Optional[Any]) -> Dict[str, Any]:
    """
    Render the sidebar configuration panel
    Returns configuration settings
    """
    with st.sidebar:
        st.header("Konfiguration")
        
        # Model type selection
        available_options = []
        if local_models_available:
            available_options.append("Local Models")
        available_options.append("Remote API")
        
        model_type = st.radio(
            "Model Type",
            available_options,
            help="Wähle zwischen den KI-Modellen"
        )
        
        # Model selection based on type
        if model_type == "Local Models" and local_models_available:
            selected_model, api_token = _render_local_model_config(local_manager)
        else:
            selected_model, api_token = _render_remote_model_config(settings)
        
        # Frame extraction rate
        fps = st.slider(
            "Frames per second to extract",
            min_value=0.1,
            max_value=5.0,
            value=1.0,
            step=0.1
        )
        
        # Ontology settings
        st.subheader("Ontology Analysis")
        use_ontology = st.checkbox(
            "Enable Ontology Analysis",
            value=True,
            help="Use ontology-based classification (NONE/LOW/MEDIUM/HIGH/CRITICAL)"
        )
        
        if not use_ontology:
            st.info("🔄 Ontology analysis disabled - showing raw model output only")
    
    return {
        "model_type": model_type,
        "selected_model": selected_model,
        "api_token": api_token,
        "fps": fps,
        "use_ontology": use_ontology
    }


def _render_local_model_config(local_manager) -> tuple:
    """Render local model configuration"""
    available_local_models = local_manager.get_available_models()

    # Standard-Auswahl auf "Person on Track Detector" setzen (falls vorhanden)
    default_index = (
        available_local_models.index("Person on Track Detector")
        if "Person on Track Detector" in available_local_models else 0
    )

    selected_model = st.selectbox(
        "Lokales Modell auswählen",
        options=available_local_models,
        index=default_index,  # <-- diese Zeile ist neu
        help="Choose between CNN (fast) or Transformer (detailed) models"
    )


    
    # Show model info
    model_info = local_manager.get_model_info()
    if selected_model in model_info:
        with st.expander("Model Information"):
            st.write(f"**Description:** {model_info[selected_model]['description']}")
            st.write(f"**Strengths:** {model_info[selected_model]['strengths']}")
            st.write(f"**Size:** {model_info[selected_model]['size']}")
    
    return selected_model, None  # No API token needed for local models


def _render_remote_model_config(settings: Dict) -> tuple:
    """Render remote API model configuration"""
    default_token = settings.get('hugging_face_api_token', '')
    api_token = st.text_input(
        "Hugging Face API Token",
        value=default_token,
        type="password",
        help="Get your token from https://huggingface.co/settings/tokens or save in settings.json"
    )
    
    selected_model = st.selectbox(
        "Select Model",
        options=list(AVAILABLE_MODELS.keys()),
        format_func=lambda x: AVAILABLE_MODELS[x]
    )
    
    return selected_model, api_token


def render_input_section() -> Dict[str, Any]:
    """
    Render the input section for video upload and prompts
    Returns input data
    """
    st.header("Input")
    
    # Video upload
    video_file = st.file_uploader(
        "Upload Video",
        type=['mp4', 'avi', 'mov', 'mkv'],
        help="Upload a video file to analyze"
    )
    
    return {
        "video_file": video_file
    }


def render_prompt_section(config: Dict[str, Any]) -> str:
    """
    Render prompt input section based on model configuration
    """
    model_type = config["model_type"]
    selected_model = config["selected_model"]
    
    # Prompt input (conditional based on model)
    if (model_type == "Local Models" and 
        selected_model == "Person on Track Detector"):
        # Person on Track Detector works automatically
        st.info("🤖Das Modell ist mit einem ontologiebasierten Ansatz gefüttert und erfordert keinen Prompt")
        return "automatic"
    else:
        # Regular models need user prompt
        return st.text_area(
            "Analysis Prompt",
            placeholder="Describe what you see in the image...",
            help="Enter the prompt to analyze each frame"
        )


def render_process_button() -> bool:
    """Render the process button"""
    return st.button("Process Video", type="primary")


def render_results_header():
    """Render the results section header"""
    st.header("Results")
    return st.container()


def render_frame_result(result_data: Dict[str, Any]):
    """
    Render a single frame result with ontology analysis
    """
    ontology = result_data['ontology_analysis']
    
    # Create expander title - only include severity if ontology is active
    if ontology.get('ontology_used', False):
        severity_icon = ontology.get('severity_icon', '✅')
        severity = ontology.get('severity', 'NONE')
        expander_title = f"{severity_icon} {severity} - Frame {result_data['frame_number']} (t={result_data['timestamp']:.1f}s)"
    else:
        # Clean title without severity symbols when ontology is disabled
        expander_title = f"Frame {result_data['frame_number']} (t={result_data['timestamp']:.1f}s)"
    
    with st.expander(expander_title):
        col_img, col_text = st.columns([1, 2])
        
        with col_img:
            st.image(
                result_data['image'],
                caption=f"Frame {result_data['frame_number']}",
                use_container_width=True
            )
        
        with col_text:
            # Display ontology analysis first if enabled
            if ontology.get('ontology_used', False):
                _render_ontology_analysis(ontology)
                st.divider()
            
            # Display original model results
            _render_model_output(result_data['result'])


def _render_ontology_analysis(ontology: Dict[str, Any]):
    """Render ontology analysis section"""
    severity = ontology.get('severity', 'NONE')
    severity_icon = ontology.get('severity_icon', '✅')
    severity_color = ontology.get('severity_color', 'green')
    
    # Severity display with color
    st.markdown(f"**Safety Assessment:** :{severity_color}[{severity_icon} {severity}]")
    
    # Score display
    if ontology.get('score', 0) > 0:
        st.metric("Risk Score", f"{ontology['score']}/100")
    
    # Show explanations if available
    if ontology.get('explanations'):
        st.write("**Ontology Analysis:**")
        for explanation in ontology['explanations']:
            st.write(f"• {explanation}")
    
    # Show fired rules if available
    if ontology.get('fired_rules'):
        with st.expander("Technical Details"):
            st.write("**Triggered Rules:**")
            for rule in ontology['fired_rules']:
                st.code(rule)
            
            if ontology.get('labels'):
                st.write("**Detected Hazard Labels:**")
                for label in ontology['labels']:
                    st.code(label)


def _render_model_output(result: Dict[str, Any]):
    """Render original model output section"""
    st.write("**Model Output:**")
    
    if 'error' in result:
        st.error(f"Error: {result['error']}")
    elif 'person_on_track_detection' in result:
        _render_person_detection_result(result['person_on_track_detection'])
    else:
        _render_general_model_result(result)


def _render_person_detection_result(detection: Dict[str, Any]):
    """Render person on track detection specific results"""
    people_count = detection.get('people_count', 0)
    confidence = detection.get('confidence', 0)
    analysis = detection.get('analysis', 'No analysis')
    
    st.write(f"**Detection Analysis:** {analysis}")
    
    # Show metrics
    col1, col2 = st.columns(2)
    with col1:
        st.metric("👥 People Detected", people_count)
    with col2:
        st.metric("📊 Model Confidence", f"{confidence:.0%}")


def _render_general_model_result(result: Dict[str, Any]):
    """Render general model results (captioning, etc.)"""
    if 'generated_text' in result:
        st.write(f"*{result['generated_text']}*")
    elif isinstance(result, list) and len(result) > 0:
        if 'generated_text' in result[0]:
            st.write(f"*{result[0]['generated_text']}*")
        else:
            st.json(result[0])
    else:
        st.json(result)


def render_validation_errors(video_file, prompt, api_token, model_type, local_models_available, selected_model):
    """
    Render validation error messages
    """
    if not video_file:
        st.error("Please upload a video file")
    if not prompt and not (model_type == "Local Models" and selected_model == "Person on Track Detector"):
        st.error("Please enter an analysis prompt")
    if not api_token and model_type == "Remote API":
        st.error("Please provide your Hugging Face API token for remote models")
    if model_type == "Local Models" and not local_models_available:
        st.error("Local models failed to initialize. Check your installation.")


def render_instructions():
    """Render the instructions section"""
    with st.expander("How to use"):
        st.markdown("""
        ## Local AI Models (Recommended)
        1. **Upload a video**: Choose a video file (MP4, AVI, MOV, or MKV)
        2. **Select model type**: Choose "Local Models" for offline processing
        3. **Choose AI model**: 
           - **CNN (BLIP)**: Fast, good for object detection (~1.2GB)
           - **Transformer (ViT-GPT2)**: Detailed descriptions (~1.8GB)
        4. **Enter a prompt**: Describe what you want the AI to analyze
        5. **Enable/Disable Ontology**: Toggle ontology-based risk assessment
        6. **Adjust frame rate**: Set frames per second to extract (default: 1 fps)
        7. **Click Process**: Frames are processed locally on your machine
        
        ## Ontology Analysis
        - **✅ NONE**: No safety concerns detected
        - **🟢 LOW**: Minor safety considerations
        - **🟠 MEDIUM**: Moderate safety risk
        - **⚠️ HIGH**: Significant safety risk
        - **🚨 CRITICAL**: Immediate safety hazard
        
        ## Remote API Models (Optional)
        1. **Get API token**: Visit [Hugging Face Settings](https://huggingface.co/settings/tokens)
        2. **Select "Remote API"** in model type
        3. **Enter token** and select remote model
        
        ## Video Support Features
        - **Automatic corruption repair**: Handles videos with corrupted moov atoms
        - **FFmpeg integration**: Auto-repairs problematic video files
        - **Multiple formats**: MP4, AVI, MOV, MKV support
        
        ## Requirements
        - **Python packages**: torch, transformers, accelerate (see requirements.txt)
        - **Optional**: FFmpeg for video repair (download from https://ffmpeg.org)
        - **Storage**: ~3GB for both local models
        """)