Spaces:

Wicky
/

bus-inspection-classifier

Sleeping

Sathwik P

Fix Gallery parameter for local Gradio compatibility

3cd80ad about 1 month ago

17.1 kB

	import gradio as gr
	import onnxruntime as ort
	import numpy as np
	from PIL import Image
	import time
	import pandas as pd
	import requests
	from io import BytesIO

	# Load class names
	CLASS_NAMES = [
	"AC Mat",
	"Alco brake camera",
	"Alco-brake device",
	"Back windshield",
	"Bus back side",
	"Bus front side",
	"Bus side",
	"Cabin",
	"Driver grooming",
	"First aid kit",
	"Floormats & POS",
	"Front windshield",
	"Hat rack",
	"ITMS Device",
	"Jack & Spare tyre",
	"Luggage compartment",
	"RFID Card",
	"Seats"
	]

	# Load ONNX model
	MODEL_PATH = "siglip_v2.onnx"
	session = ort.InferenceSession(MODEL_PATH, providers=['CPUExecutionProvider'])
	input_name = session.get_inputs()[0].name

	def preprocess_image(image):
	"""Preprocess image with SigLIP normalization"""
	# Resize to 224x224
	img_resized = image.resize((224, 224))
	img_array = np.array(img_resized).astype(np.float32) / 255.0

	# SigLIP normalization
	mean = np.array([0.5, 0.5, 0.5])
	std = np.array([0.5, 0.5, 0.5])
	img_norm = (img_array - mean) / std

	# Convert to CHW format (channels, height, width)
	img_final = np.transpose(img_norm, (2, 0, 1))
	return np.expand_dims(img_final, axis=0).astype(np.float32)

	def predict_single_image(image):
	"""
	Run inference on a single image

	Args:
	image: PIL Image or numpy array

	Returns:
	dict: Contains class_name, confidence, and inference_time_ms
	"""
	# Convert to PIL Image if numpy array
	if isinstance(image, np.ndarray):
	image = Image.fromarray(image).convert('RGB')
	else:
	image = image.convert('RGB')

	# Start timing
	start_time = time.time()

	# Preprocess
	img_tensor = preprocess_image(image)

	# Run inference
	outputs = session.run(None, {input_name: img_tensor})[0]

	# Apply softmax
	exp_outputs = np.exp(outputs - np.max(outputs))
	probs = exp_outputs / exp_outputs.sum()

	# Get prediction
	pred_idx = np.argmax(probs)
	confidence = float(probs[0][pred_idx])
	pred_class = CLASS_NAMES[pred_idx]

	# Calculate inference time
	inference_time = (time.time() - start_time) * 1000 # Convert to milliseconds

	# Return results
	return {
	"class_name": pred_class,
	"confidence": f"{confidence:.2%}",
	"inference_time_ms": f"{inference_time:.2f}"
	}


	def predict_batch(images, csv_file):
	"""
	Run inference on multiple images or CSV with image URLs (unlimited) with PROGRESSIVE DISPLAY

	Args:
	images: List of PIL Images or file paths (or None)
	csv_file: CSV file with image URLs (or None)

	Yields:
	tuple: (gallery_data, json_results) after each image is processed
	"""
	# Check if CSV file is provided
	if csv_file is not None:
	try:
	# Read CSV
	df = pd.read_csv(csv_file)

	# Validate columns
	if 'Answer' not in df.columns or 'Questions - QuestionId → Name' not in df.columns:
	yield [], {
	"error": "CSV must have 'Answer' and 'Questions - QuestionId → Name' columns",
	"total_images": 0,
	"results": []
	}
	return

	results = []
	gallery_images = []
	total_start_time = time.time()

	# Process each row PROGRESSIVELY
	for idx, row in df.iterrows():
	try:
	# Get image URL and expected class
	img_url = row['Answer']
	given_class = row['Questions - QuestionId → Name']

	# Download image from URL
	response = requests.get(img_url, timeout=10)
	response.raise_for_status()
	image = Image.open(BytesIO(response.content)).convert('RGB')

	# Get prediction
	result = predict_single_image(image)
	result["image_index"] = idx + 1
	result["given_class"] = given_class
	result["image_url"] = img_url

	# Check if matches
	result["match"] = "✓" if given_class.lower() in result["class_name"].lower() or result["class_name"].lower() in given_class.lower() else "✗"

	results.append(result)

	# Create caption for gallery - CONCISE FORMAT
	caption = f"#{idx + 1} {result['match']} Pred: {result['class_name']}\n✓ Expected: {given_class}\n{result['confidence']} \| {result['inference_time_ms']}ms"

	# Add to gallery
	gallery_images.append((image, caption))

	except Exception as e:
	results.append({
	"image_index": idx + 1,
	"given_class": row.get('Questions - QuestionId → Name', 'Unknown'),
	"image_url": row.get('Answer', 'Unknown'),
	"error": str(e),
	"class_name": None,
	"confidence": None,
	"inference_time_ms": None,
	"match": "✗"
	})

	# YIELD every 5 images (or last image) - More reliable updates!
	if (idx + 1) % 5 == 0 or (idx + 1) == len(df):
	elapsed_time = (time.time() - total_start_time) * 1000
	successful = [r for r in results if "error" not in r]
	matched = [r for r in successful if r["match"] == "✓"]

	json_results = {
	"source": "CSV",
	"status": f"Processing... {idx + 1}/{len(df)} ({((idx+1)/len(df)*100):.1f}%)",
	"total_images": len(df),
	"processed": idx + 1,
	"successful_predictions": len(successful),
	"failed_predictions": len(results) - len(successful),
	"matched_predictions": len(matched),
	"accuracy": f"{(len(matched) / len(successful) * 100):.2f}%" if successful else "0%",
	"elapsed_time_ms": f"{elapsed_time:.2f}",
	"average_time_per_image_ms": f"{elapsed_time / (idx + 1):.2f}",
	"last_results": results[-5:] # Show last 5 for reference
	}

	yield gallery_images.copy(), json_results

	# Final yield with complete results
	total_time = (time.time() - total_start_time) * 1000
	successful = [r for r in results if "error" not in r]
	matched = [r for r in successful if r["match"] == "✓"]

	final_results = {
	"source": "CSV",
	"status": "✅ Complete!",
	"total_images": len(df),
	"processed": len(df),
	"successful_predictions": len(successful),
	"failed_predictions": len(results) - len(successful),
	"matched_predictions": len(matched),
	"accuracy": f"{(len(matched) / len(successful) * 100):.2f}%" if successful else "0%",
	"total_processing_time_ms": f"{total_time:.2f}",
	"average_time_per_image_ms": f"{total_time / len(df):.2f}",
	"results": results # Full results at the end
	}

	yield gallery_images, final_results

	except Exception as e:
	yield [], {
	"error": f"CSV processing error: {str(e)}",
	"total_images": 0,
	"results": []
	}
	return

	# Process regular image uploads (no limit) PROGRESSIVELY
	if images is None or len(images) == 0:
	yield [], {
	"error": "No images or CSV provided",
	"total_images": 0,
	"results": []
	}
	return

	results = []
	gallery_images = []
	total_start_time = time.time()

	for idx, img in enumerate(images):
	try:
	# Handle file path or PIL Image
	if isinstance(img, str):
	image = Image.open(img).convert('RGB')
	img_path = img
	elif isinstance(img, np.ndarray):
	image = Image.fromarray(img).convert('RGB')
	img_path = None
	else:
	image = img.convert('RGB')
	img_path = None

	# Get prediction
	result = predict_single_image(image)
	result["image_index"] = idx + 1
	results.append(result)

	# Create caption for gallery - CONCISE FORMAT
	caption = f"#{idx + 1} {result['class_name']}\n{result['confidence']} \| {result['inference_time_ms']}ms"

	# Add to gallery (use file path if available, otherwise PIL Image)
	gallery_images.append((img_path if img_path else image, caption))

	except Exception as e:
	results.append({
	"image_index": idx + 1,
	"error": str(e),
	"class_name": None,
	"confidence": None,
	"inference_time_ms": None
	})

	# Add error image to gallery
	try:
	if isinstance(img, str):
	error_img = Image.open(img).convert('RGB')
	elif isinstance(img, np.ndarray):
	error_img = Image.fromarray(img).convert('RGB')
	else:
	error_img = img.convert('RGB')
	gallery_images.append((error_img, f"#{idx + 1}: ERROR - {str(e)}"))
	except:
	pass

	# YIELD every 5 images (or last image) - More reliable updates!
	if (idx + 1) % 5 == 0 or (idx + 1) == len(images):
	elapsed_time = (time.time() - total_start_time) * 1000

	json_results = {
	"source": "Direct Upload",
	"status": f"Processing... {idx + 1}/{len(images)} ({((idx+1)/len(images)*100):.1f}%)",
	"total_images": len(images),
	"processed": idx + 1,
	"successful_predictions": len([r for r in results if "error" not in r]),
	"failed_predictions": len([r for r in results if "error" in r]),
	"elapsed_time_ms": f"{elapsed_time:.2f}",
	"average_time_per_image_ms": f"{elapsed_time / (idx + 1):.2f}",
	"last_results": results[-5:] # Show last 5 for reference
	}

	yield gallery_images.copy(), json_results

	# Final yield with complete results
	total_time = (time.time() - total_start_time) * 1000

	final_results = {
	"source": "Direct Upload",
	"status": "✅ Complete!",
	"total_images": len(images),
	"processed": len(images),
	"successful_predictions": len([r for r in results if "error" not in r]),
	"failed_predictions": len([r for r in results if "error" in r]),
	"total_processing_time_ms": f"{total_time:.2f}",
	"average_time_per_image_ms": f"{total_time / len(images):.2f}",
	"results": results # Full results at the end
	}

	yield gallery_images, final_results

	# Create tabbed interface
	with gr.Blocks(title="🚌 Bus Inspection Classifier") as demo:
	gr.Markdown("# 🚌 Bus Inspection Classifier - SigLIP v2")
	gr.Markdown("""
	Automated bus component classification using the SigLIP v2 vision model.

	18 Categories: AC Mat \| Alco brake camera \| Alco-brake device \| Back windshield \| Bus back side \| Bus front side \| Bus side \| Cabin \| Driver grooming \| First aid kit \| Floormats & POS \| Front windshield \| Hat rack \| ITMS Device \| Jack & Spare tyre \| Luggage compartment \| RFID Card \| Seats
	""")

	with gr.Tabs():
	# Single Image Tab
	with gr.Tab("Single Image"):
	gr.Markdown("### Upload a single bus inspection image")
	with gr.Row():
	with gr.Column():
	single_input = gr.Image(type="pil", label="Upload Image")
	single_button = gr.Button("Classify", variant="primary")
	with gr.Column():
	single_output = gr.JSON(label="Prediction Result")

	single_button.click(
	fn=predict_single_image,
	inputs=single_input,
	outputs=single_output
	)

	gr.Markdown("""
	Returns:
	- `class_name`: Predicted bus component category
	- `confidence`: Model confidence score (%)
	- `inference_time_ms`: Processing time in milliseconds
	""")

	# Batch Processing Tab
	with gr.Tab("Batch Processing (Unlimited)"):
	gr.Markdown("### Upload images OR CSV file with image URLs")
	gr.Markdown("Option 1: Upload multiple images directly")
	gr.Markdown("Option 2: Upload CSV with columns: `Questions - QuestionId → Name` (given class) and `Answer` (image URL)")

	batch_input = gr.File(
	file_count="multiple",
	label="Upload Images",
	file_types=["image"]
	)

	csv_input = gr.File(
	file_count="single",
	label="OR Upload CSV with Image URLs",
	file_types=[".csv"]
	)

	batch_button = gr.Button("Classify Batch", variant="primary", size="lg")

	# Gallery to show images with predictions - LARGER DISPLAY
	batch_gallery = gr.Gallery(
	label="Classified Images with Predictions",
	show_label=True,
	columns=2, # Reduced from 3 to show larger images
	rows=4, # Increased rows
	height=600, # Fixed height for better scrolling
	object_fit="contain"
	)

	# JSON output for API/detailed results
	batch_output = gr.JSON(label="Detailed JSON Results")

	batch_button.click(
	fn=predict_batch,
	inputs=[batch_input, csv_input],
	outputs=[batch_gallery, batch_output],
	show_progress="full" # Enable progress display
	).then(
	lambda: None, # Completion callback
	None,
	None
	)

	gr.Markdown("""
	Returns:
	```json
	{
	"total_images": 10,
	"successful_predictions": 10,
	"failed_predictions": 0,
	"total_processing_time_ms": "456.78",
	"average_time_per_image_ms": "45.68",
	"results": [
	{
	"image_index": 1,
	"class_name": "Bus front side",
	"confidence": "98.45%",
	"inference_time_ms": "43.21"
	},
	...
	]
	}
	```
	""")

	# API Documentation
	gr.Markdown("""
	---

	## 🔌 API Usage

	### Single Image API

	Using Gradio Client (Python):
	```python
	from gradio_client import Client

	client = Client("Wicky/bus-inspection-classifier")
	result = client.predict("bus_image.jpg", api_name="/predict")
	print(result)
	```

	### Batch Processing API

	Using Gradio Client (Python):
	```python
	from gradio_client import Client

	client = Client("Wicky/bus-inspection-classifier")

	# Upload multiple images
	image_files = ["img1.jpg", "img2.jpg", "img3.jpg"]
	result = client.predict(image_files, api_name="/predict_batch")

	print(f"Total: {result['total_images']}")
	print(f"Successful: {result['successful_predictions']}")

	for res in result['results']:
	print(f"Image {res['image_index']}: {res['class_name']} ({res['confidence']})")
	```

	Using Python Requests:
	```python
	import requests

	files = [
	('files', open('img1.jpg', 'rb')),
	('files', open('img2.jpg', 'rb')),
	('files', open('img3.jpg', 'rb'))
	]

	response = requests.post(
	"https://Wicky-bus-inspection-classifier.hf.space/api/predict_batch",
	files=files
	)

	results = response.json()
	print(results)
	```
	""")

	if __name__ == "__main__":
	demo.launch()