Spaces:

Migjomatic
/

bahngleis-detektor

Running

App Files Files Community

bahngleis-detektor / ui_components.py

Migjomatic

Deutsche Fassung

a5ab0c0 2 months ago

raw

history blame

11.9 kB

	#!/usr/bin/env python3
	"""
	UI components for the Streamlit application
	"""
	import streamlit as st
	from typing import Dict, List, Any, Optional
	from local_models import get_local_model_manager


	# Available Hugging Face models for remote API
	AVAILABLE_MODELS = {
	"microsoft/kosmos-2-patch14-224": "Kosmos-2",
	"Salesforce/blip-image-captioning-large": "BLIP Image Captioning",
	"microsoft/DialoGPT-medium": "DialoGPT",
	"microsoft/git-large-coco": "GIT Large COCO",
	"nlpconnect/vit-gpt2-image-captioning": "ViT-GPT2"
	}


	def render_sidebar_config(settings: Dict, local_models_available: bool, local_manager: Optional[Any]) -> Dict[str, Any]:
	"""
	Render the sidebar configuration panel
	Returns configuration settings
	"""
	with st.sidebar:
	st.header("Konfiguration")

	# Model type selection
	available_options = []
	if local_models_available:
	available_options.append("Local Models")
	available_options.append("Remote API")

	model_type = st.radio(
	"Model Type",
	available_options,
	help="Wähle zwischen den KI-Modellen"
	)

	# Model selection based on type
	if model_type == "Local Models" and local_models_available:
	selected_model, api_token = _render_local_model_config(local_manager)
	else:
	selected_model, api_token = _render_remote_model_config(settings)

	# Frame extraction rate
	fps = st.slider(
	"Frames per second to extract",
	min_value=0.1,
	max_value=5.0,
	value=1.0,
	step=0.1
	)

	# Ontology settings
	st.subheader("Ontology Analysis")
	use_ontology = st.checkbox(
	"Enable Ontology Analysis",
	value=True,
	help="Use ontology-based classification (NONE/LOW/MEDIUM/HIGH/CRITICAL)"
	)

	if not use_ontology:
	st.info("🔄 Ontology analysis disabled - showing raw model output only")

	return {
	"model_type": model_type,
	"selected_model": selected_model,
	"api_token": api_token,
	"fps": fps,
	"use_ontology": use_ontology
	}


	def _render_local_model_config(local_manager) -> tuple:
	"""Render local model configuration"""
	available_local_models = local_manager.get_available_models()

	# Standard-Auswahl auf "Person on Track Detector" setzen (falls vorhanden)
	default_index = (
	available_local_models.index("Person on Track Detector")
	if "Person on Track Detector" in available_local_models else 0
	)

	selected_model = st.selectbox(
	"Lokales Modell auswählen",
	options=available_local_models,
	index=default_index, # <-- diese Zeile ist neu
	help="Choose between CNN (fast) or Transformer (detailed) models"
	)



	# Show model info
	model_info = local_manager.get_model_info()
	if selected_model in model_info:
	with st.expander("Model Information"):
	st.write(f"Description: {model_info[selected_model]['description']}")
	st.write(f"Strengths: {model_info[selected_model]['strengths']}")
	st.write(f"Size: {model_info[selected_model]['size']}")

	return selected_model, None # No API token needed for local models


	def _render_remote_model_config(settings: Dict) -> tuple:
	"""Render remote API model configuration"""
	default_token = settings.get('hugging_face_api_token', '')
	api_token = st.text_input(
	"Hugging Face API Token",
	value=default_token,
	type="password",
	help="Get your token from https://huggingface.co/settings/tokens or save in settings.json"
	)

	selected_model = st.selectbox(
	"Select Model",
	options=list(AVAILABLE_MODELS.keys()),
	format_func=lambda x: AVAILABLE_MODELS[x]
	)

	return selected_model, api_token


	def render_input_section() -> Dict[str, Any]:
	"""
	Render the input section for video upload and prompts
	Returns input data
	"""
	st.header("Input")

	# Video upload
	video_file = st.file_uploader(
	"Upload Video",
	type=['mp4', 'avi', 'mov', 'mkv'],
	help="Upload a video file to analyze"
	)

	return {
	"video_file": video_file
	}


	def render_prompt_section(config: Dict[str, Any]) -> str:
	"""
	Render prompt input section based on model configuration
	"""
	model_type = config["model_type"]
	selected_model = config["selected_model"]

	# Prompt input (conditional based on model)
	if (model_type == "Local Models" and
	selected_model == "Person on Track Detector"):
	# Person on Track Detector works automatically
	st.info("🤖Das Modell ist mit einem ontologiebasierten Ansatz gefüttert und erfordert keinen Prompt")
	return "automatic"
	else:
	# Regular models need user prompt
	return st.text_area(
	"Analysis Prompt",
	placeholder="Describe what you see in the image...",
	help="Enter the prompt to analyze each frame"
	)


	def render_process_button() -> bool:
	"""Render the process button"""
	return st.button("Process Video", type="primary")


	def render_results_header():
	"""Render the results section header"""
	st.header("Results")
	return st.container()


	def render_frame_result(result_data: Dict[str, Any]):
	"""
	Render a single frame result with ontology analysis
	"""
	ontology = result_data['ontology_analysis']

	# Create expander title - only include severity if ontology is active
	if ontology.get('ontology_used', False):
	severity_icon = ontology.get('severity_icon', '✅')
	severity = ontology.get('severity', 'NONE')
	expander_title = f"{severity_icon} {severity} - Frame {result_data['frame_number']} (t={result_data['timestamp']:.1f}s)"
	else:
	# Clean title without severity symbols when ontology is disabled
	expander_title = f"Frame {result_data['frame_number']} (t={result_data['timestamp']:.1f}s)"

	with st.expander(expander_title):
	col_img, col_text = st.columns([1, 2])

	with col_img:
	st.image(
	result_data['image'],
	caption=f"Frame {result_data['frame_number']}",
	use_container_width=True
	)

	with col_text:
	# Display ontology analysis first if enabled
	if ontology.get('ontology_used', False):
	_render_ontology_analysis(ontology)
	st.divider()

	# Display original model results
	_render_model_output(result_data['result'])


	def _render_ontology_analysis(ontology: Dict[str, Any]):
	"""Render ontology analysis section"""
	severity = ontology.get('severity', 'NONE')
	severity_icon = ontology.get('severity_icon', '✅')
	severity_color = ontology.get('severity_color', 'green')

	# Severity display with color
	st.markdown(f"Safety Assessment: :{severity_color}[{severity_icon} {severity}]")

	# Score display
	if ontology.get('score', 0) > 0:
	st.metric("Risk Score", f"{ontology['score']}/100")

	# Show explanations if available
	if ontology.get('explanations'):
	st.write("Ontology Analysis:")
	for explanation in ontology['explanations']:
	st.write(f"• {explanation}")

	# Show fired rules if available
	if ontology.get('fired_rules'):
	with st.expander("Technical Details"):
	st.write("Triggered Rules:")
	for rule in ontology['fired_rules']:
	st.code(rule)

	if ontology.get('labels'):
	st.write("Detected Hazard Labels:")
	for label in ontology['labels']:
	st.code(label)


	def _render_model_output(result: Dict[str, Any]):
	"""Render original model output section"""
	st.write("Model Output:")

	if 'error' in result:
	st.error(f"Error: {result['error']}")
	elif 'person_on_track_detection' in result:
	_render_person_detection_result(result['person_on_track_detection'])
	else:
	_render_general_model_result(result)


	def _render_person_detection_result(detection: Dict[str, Any]):
	"""Render person on track detection specific results"""
	people_count = detection.get('people_count', 0)
	confidence = detection.get('confidence', 0)
	analysis = detection.get('analysis', 'No analysis')

	st.write(f"Detection Analysis: {analysis}")

	# Show metrics
	col1, col2 = st.columns(2)
	with col1:
	st.metric("👥 People Detected", people_count)
	with col2:
	st.metric("📊 Model Confidence", f"{confidence:.0%}")


	def _render_general_model_result(result: Dict[str, Any]):
	"""Render general model results (captioning, etc.)"""
	if 'generated_text' in result:
	st.write(f"{result['generated_text']}")
	elif isinstance(result, list) and len(result) > 0:
	if 'generated_text' in result[0]:
	st.write(f"{result[0]['generated_text']}")
	else:
	st.json(result[0])
	else:
	st.json(result)


	def render_validation_errors(video_file, prompt, api_token, model_type, local_models_available, selected_model):
	"""
	Render validation error messages
	"""
	if not video_file:
	st.error("Please upload a video file")
	if not prompt and not (model_type == "Local Models" and selected_model == "Person on Track Detector"):
	st.error("Please enter an analysis prompt")
	if not api_token and model_type == "Remote API":
	st.error("Please provide your Hugging Face API token for remote models")
	if model_type == "Local Models" and not local_models_available:
	st.error("Local models failed to initialize. Check your installation.")


	def render_instructions():
	"""Render the instructions section"""
	with st.expander("How to use"):
	st.markdown("""
	## Local AI Models (Recommended)
	1. Upload a video: Choose a video file (MP4, AVI, MOV, or MKV)
	2. Select model type: Choose "Local Models" for offline processing
	3. Choose AI model:
	- CNN (BLIP): Fast, good for object detection (~1.2GB)
	- Transformer (ViT-GPT2): Detailed descriptions (~1.8GB)
	4. Enter a prompt: Describe what you want the AI to analyze
	5. Enable/Disable Ontology: Toggle ontology-based risk assessment
	6. Adjust frame rate: Set frames per second to extract (default: 1 fps)
	7. Click Process: Frames are processed locally on your machine

	## Ontology Analysis
	- ✅ NONE: No safety concerns detected
	- 🟢 LOW: Minor safety considerations
	- 🟠 MEDIUM: Moderate safety risk
	- ⚠️ HIGH: Significant safety risk
	- 🚨 CRITICAL: Immediate safety hazard

	## Remote API Models (Optional)
	1. Get API token: Visit [Hugging Face Settings](https://huggingface.co/settings/tokens)
	2. Select "Remote API" in model type
	3. Enter token and select remote model

	## Video Support Features
	- Automatic corruption repair: Handles videos with corrupted moov atoms
	- FFmpeg integration: Auto-repairs problematic video files
	- Multiple formats: MP4, AVI, MOV, MKV support

	## Requirements
	- Python packages: torch, transformers, accelerate (see requirements.txt)
	- Optional: FFmpeg for video repair (download from https://ffmpeg.org)
	- Storage: ~3GB for both local models
	""")