bahngleis-detektor / ui_components.py
Migjomatic's picture
Deutsche Fassung
a5ab0c0
raw
history blame
11.9 kB
#!/usr/bin/env python3
"""
UI components for the Streamlit application
"""
import streamlit as st
from typing import Dict, List, Any, Optional
from local_models import get_local_model_manager
# Available Hugging Face models for remote API
AVAILABLE_MODELS = {
"microsoft/kosmos-2-patch14-224": "Kosmos-2",
"Salesforce/blip-image-captioning-large": "BLIP Image Captioning",
"microsoft/DialoGPT-medium": "DialoGPT",
"microsoft/git-large-coco": "GIT Large COCO",
"nlpconnect/vit-gpt2-image-captioning": "ViT-GPT2"
}
def render_sidebar_config(settings: Dict, local_models_available: bool, local_manager: Optional[Any]) -> Dict[str, Any]:
"""
Render the sidebar configuration panel
Returns configuration settings
"""
with st.sidebar:
st.header("Konfiguration")
# Model type selection
available_options = []
if local_models_available:
available_options.append("Local Models")
available_options.append("Remote API")
model_type = st.radio(
"Model Type",
available_options,
help="Wähle zwischen den KI-Modellen"
)
# Model selection based on type
if model_type == "Local Models" and local_models_available:
selected_model, api_token = _render_local_model_config(local_manager)
else:
selected_model, api_token = _render_remote_model_config(settings)
# Frame extraction rate
fps = st.slider(
"Frames per second to extract",
min_value=0.1,
max_value=5.0,
value=1.0,
step=0.1
)
# Ontology settings
st.subheader("Ontology Analysis")
use_ontology = st.checkbox(
"Enable Ontology Analysis",
value=True,
help="Use ontology-based classification (NONE/LOW/MEDIUM/HIGH/CRITICAL)"
)
if not use_ontology:
st.info("🔄 Ontology analysis disabled - showing raw model output only")
return {
"model_type": model_type,
"selected_model": selected_model,
"api_token": api_token,
"fps": fps,
"use_ontology": use_ontology
}
def _render_local_model_config(local_manager) -> tuple:
"""Render local model configuration"""
available_local_models = local_manager.get_available_models()
# Standard-Auswahl auf "Person on Track Detector" setzen (falls vorhanden)
default_index = (
available_local_models.index("Person on Track Detector")
if "Person on Track Detector" in available_local_models else 0
)
selected_model = st.selectbox(
"Lokales Modell auswählen",
options=available_local_models,
index=default_index, # <-- diese Zeile ist neu
help="Choose between CNN (fast) or Transformer (detailed) models"
)
# Show model info
model_info = local_manager.get_model_info()
if selected_model in model_info:
with st.expander("Model Information"):
st.write(f"**Description:** {model_info[selected_model]['description']}")
st.write(f"**Strengths:** {model_info[selected_model]['strengths']}")
st.write(f"**Size:** {model_info[selected_model]['size']}")
return selected_model, None # No API token needed for local models
def _render_remote_model_config(settings: Dict) -> tuple:
"""Render remote API model configuration"""
default_token = settings.get('hugging_face_api_token', '')
api_token = st.text_input(
"Hugging Face API Token",
value=default_token,
type="password",
help="Get your token from https://huggingface.co/settings/tokens or save in settings.json"
)
selected_model = st.selectbox(
"Select Model",
options=list(AVAILABLE_MODELS.keys()),
format_func=lambda x: AVAILABLE_MODELS[x]
)
return selected_model, api_token
def render_input_section() -> Dict[str, Any]:
"""
Render the input section for video upload and prompts
Returns input data
"""
st.header("Input")
# Video upload
video_file = st.file_uploader(
"Upload Video",
type=['mp4', 'avi', 'mov', 'mkv'],
help="Upload a video file to analyze"
)
return {
"video_file": video_file
}
def render_prompt_section(config: Dict[str, Any]) -> str:
"""
Render prompt input section based on model configuration
"""
model_type = config["model_type"]
selected_model = config["selected_model"]
# Prompt input (conditional based on model)
if (model_type == "Local Models" and
selected_model == "Person on Track Detector"):
# Person on Track Detector works automatically
st.info("🤖Das Modell ist mit einem ontologiebasierten Ansatz gefüttert und erfordert keinen Prompt")
return "automatic"
else:
# Regular models need user prompt
return st.text_area(
"Analysis Prompt",
placeholder="Describe what you see in the image...",
help="Enter the prompt to analyze each frame"
)
def render_process_button() -> bool:
"""Render the process button"""
return st.button("Process Video", type="primary")
def render_results_header():
"""Render the results section header"""
st.header("Results")
return st.container()
def render_frame_result(result_data: Dict[str, Any]):
"""
Render a single frame result with ontology analysis
"""
ontology = result_data['ontology_analysis']
# Create expander title - only include severity if ontology is active
if ontology.get('ontology_used', False):
severity_icon = ontology.get('severity_icon', '✅')
severity = ontology.get('severity', 'NONE')
expander_title = f"{severity_icon} {severity} - Frame {result_data['frame_number']} (t={result_data['timestamp']:.1f}s)"
else:
# Clean title without severity symbols when ontology is disabled
expander_title = f"Frame {result_data['frame_number']} (t={result_data['timestamp']:.1f}s)"
with st.expander(expander_title):
col_img, col_text = st.columns([1, 2])
with col_img:
st.image(
result_data['image'],
caption=f"Frame {result_data['frame_number']}",
use_container_width=True
)
with col_text:
# Display ontology analysis first if enabled
if ontology.get('ontology_used', False):
_render_ontology_analysis(ontology)
st.divider()
# Display original model results
_render_model_output(result_data['result'])
def _render_ontology_analysis(ontology: Dict[str, Any]):
"""Render ontology analysis section"""
severity = ontology.get('severity', 'NONE')
severity_icon = ontology.get('severity_icon', '✅')
severity_color = ontology.get('severity_color', 'green')
# Severity display with color
st.markdown(f"**Safety Assessment:** :{severity_color}[{severity_icon} {severity}]")
# Score display
if ontology.get('score', 0) > 0:
st.metric("Risk Score", f"{ontology['score']}/100")
# Show explanations if available
if ontology.get('explanations'):
st.write("**Ontology Analysis:**")
for explanation in ontology['explanations']:
st.write(f"• {explanation}")
# Show fired rules if available
if ontology.get('fired_rules'):
with st.expander("Technical Details"):
st.write("**Triggered Rules:**")
for rule in ontology['fired_rules']:
st.code(rule)
if ontology.get('labels'):
st.write("**Detected Hazard Labels:**")
for label in ontology['labels']:
st.code(label)
def _render_model_output(result: Dict[str, Any]):
"""Render original model output section"""
st.write("**Model Output:**")
if 'error' in result:
st.error(f"Error: {result['error']}")
elif 'person_on_track_detection' in result:
_render_person_detection_result(result['person_on_track_detection'])
else:
_render_general_model_result(result)
def _render_person_detection_result(detection: Dict[str, Any]):
"""Render person on track detection specific results"""
people_count = detection.get('people_count', 0)
confidence = detection.get('confidence', 0)
analysis = detection.get('analysis', 'No analysis')
st.write(f"**Detection Analysis:** {analysis}")
# Show metrics
col1, col2 = st.columns(2)
with col1:
st.metric("👥 People Detected", people_count)
with col2:
st.metric("📊 Model Confidence", f"{confidence:.0%}")
def _render_general_model_result(result: Dict[str, Any]):
"""Render general model results (captioning, etc.)"""
if 'generated_text' in result:
st.write(f"*{result['generated_text']}*")
elif isinstance(result, list) and len(result) > 0:
if 'generated_text' in result[0]:
st.write(f"*{result[0]['generated_text']}*")
else:
st.json(result[0])
else:
st.json(result)
def render_validation_errors(video_file, prompt, api_token, model_type, local_models_available, selected_model):
"""
Render validation error messages
"""
if not video_file:
st.error("Please upload a video file")
if not prompt and not (model_type == "Local Models" and selected_model == "Person on Track Detector"):
st.error("Please enter an analysis prompt")
if not api_token and model_type == "Remote API":
st.error("Please provide your Hugging Face API token for remote models")
if model_type == "Local Models" and not local_models_available:
st.error("Local models failed to initialize. Check your installation.")
def render_instructions():
"""Render the instructions section"""
with st.expander("How to use"):
st.markdown("""
## Local AI Models (Recommended)
1. **Upload a video**: Choose a video file (MP4, AVI, MOV, or MKV)
2. **Select model type**: Choose "Local Models" for offline processing
3. **Choose AI model**:
- **CNN (BLIP)**: Fast, good for object detection (~1.2GB)
- **Transformer (ViT-GPT2)**: Detailed descriptions (~1.8GB)
4. **Enter a prompt**: Describe what you want the AI to analyze
5. **Enable/Disable Ontology**: Toggle ontology-based risk assessment
6. **Adjust frame rate**: Set frames per second to extract (default: 1 fps)
7. **Click Process**: Frames are processed locally on your machine
## Ontology Analysis
- **✅ NONE**: No safety concerns detected
- **🟢 LOW**: Minor safety considerations
- **🟠 MEDIUM**: Moderate safety risk
- **⚠️ HIGH**: Significant safety risk
- **🚨 CRITICAL**: Immediate safety hazard
## Remote API Models (Optional)
1. **Get API token**: Visit [Hugging Face Settings](https://huggingface.co/settings/tokens)
2. **Select "Remote API"** in model type
3. **Enter token** and select remote model
## Video Support Features
- **Automatic corruption repair**: Handles videos with corrupted moov atoms
- **FFmpeg integration**: Auto-repairs problematic video files
- **Multiple formats**: MP4, AVI, MOV, MKV support
## Requirements
- **Python packages**: torch, transformers, accelerate (see requirements.txt)
- **Optional**: FFmpeg for video repair (download from https://ffmpeg.org)
- **Storage**: ~3GB for both local models
""")