Spaces:

AlBaraa63
/

CleanCity

Sleeping

CleanCity / gemini_detector.py

AlBaraa63

Recover

5369733 26 days ago

8.16 kB

	"""
	Google Gemini Vision - Trash Detection Module

	Uses Gemini's multimodal capabilities to detect and describe trash in images.
	Complements YOLOv8 with natural language understanding and detailed analysis.
	"""

	import os
	from typing import Dict, List, Optional
	from PIL import Image
	import base64
	from io import BytesIO
	from dotenv import load_dotenv

	load_dotenv()


	class GeminiVisionDetector:
	"""Trash detection using Google Gemini Vision API."""

	def __init__(self):
	"""Initialize Gemini Vision detector."""
	self.enabled = False
	self.model = None

	api_key = os.getenv("GEMINI_API_KEY")
	if api_key:
	try:
	import google.generativeai as genai
	genai.configure(api_key=api_key)
	self.model = genai.GenerativeModel('gemini-1.5-flash-latest')
	self.enabled = True
	print("✓ Gemini Vision detector initialized")
	except ImportError:
	print("⚠ Google GenAI package not installed (pip install google-generativeai)")
	except Exception as e:
	print(f"⚠ Gemini Vision initialization failed: {e}")
	else:
	print("ℹ Gemini Vision disabled (no GEMINI_API_KEY)")

	def detect_trash(self, image: Image.Image) -> Dict:
	"""
	Detect trash items in image using Gemini Vision.

	Args:
	image: PIL Image object

	Returns:
	Dictionary with detection results:
	{
	'items': List[str], # List of detected items
	'count': int, # Total item count
	'description': str, # Detailed description
	'severity': str, # LOW, MEDIUM, HIGH
	'categories': Dict # Category counts
	}
	"""
	if not self.enabled:
	return self._offline_fallback()

	try:
	# Create prompt for Gemini
	prompt = """Analyze this image for trash and litter. Provide a detailed analysis:

	1. List all visible trash items (be specific: "plastic bottle", "cigarette butt", not just "trash")
	2. Estimate the total number of items
	3. Categorize items (plastic, paper, metal, organic, other)
	4. Assess severity (LOW: 1-5 items, MEDIUM: 6-15 items, HIGH: 16+ items)
	5. Describe the location/context

	Format your response as:
	ITEMS: item1, item2, item3, ...
	COUNT: <number>
	CATEGORIES: plastic:<count>, paper:<count>, metal:<count>, organic:<count>, other:<count>
	SEVERITY: <LOW\|MEDIUM\|HIGH>
	DESCRIPTION: <1-2 sentence description>"""

	# Generate response
	response = self.model.generate_content([prompt, image])

	# Parse response
	return self._parse_gemini_response(response.text)

	except Exception as e:
	print(f"⚠ Gemini Vision detection failed: {e}")
	return self._offline_fallback()

	def _parse_gemini_response(self, response_text: str) -> Dict:
	"""Parse Gemini's structured response."""
	lines = response_text.strip().split('\n')

	result = {
	'items': [],
	'count': 0,
	'description': '',
	'severity': 'MEDIUM',
	'categories': {}
	}

	for line in lines:
	line = line.strip()

	if line.startswith('ITEMS:'):
	items_str = line.replace('ITEMS:', '').strip()
	result['items'] = [item.strip() for item in items_str.split(',')]

	elif line.startswith('COUNT:'):
	try:
	result['count'] = int(line.replace('COUNT:', '').strip())
	except:
	result['count'] = len(result['items'])

	elif line.startswith('CATEGORIES:'):
	cats_str = line.replace('CATEGORIES:', '').strip()
	for cat_pair in cats_str.split(','):
	if ':' in cat_pair:
	cat, count = cat_pair.split(':')
	try:
	result['categories'][cat.strip()] = int(count.strip())
	except:
	pass

	elif line.startswith('SEVERITY:'):
	severity = line.replace('SEVERITY:', '').strip().upper()
	if severity in ['LOW', 'MEDIUM', 'HIGH']:
	result['severity'] = severity

	elif line.startswith('DESCRIPTION:'):
	result['description'] = line.replace('DESCRIPTION:', '').strip()

	return result

	def _offline_fallback(self) -> Dict:
	"""Return mock results when Gemini is not available."""
	return {
	'items': ['plastic bottle', 'food wrapper', 'paper cup'],
	'count': 3,
	'description': 'Gemini Vision not configured. Using mock detection.',
	'severity': 'MEDIUM',
	'categories': {
	'plastic': 2,
	'paper': 1
	}
	}

	def compare_with_yolo(self, yolo_results: Dict, gemini_results: Dict) -> str:
	"""
	Generate comparison between YOLOv8 and Gemini detections.

	Args:
	yolo_results: Results from YOLOv8 detector
	gemini_results: Results from Gemini Vision

	Returns:
	Formatted comparison text
	"""
	# Safely extract YOLO counts
	if isinstance(yolo_results, dict):
	yolo_count = yolo_results.get('total_items', 0)
	if yolo_count == 0 and 'detections' in yolo_results:
	yolo_count = len(yolo_results.get('detections', []))
	yolo_categories = list(yolo_results.get('categories', {}).keys())
	yolo_confidence = yolo_results.get('avg_confidence', 0)
	else:
	# yolo_results might be a list of detections
	yolo_count = len(yolo_results) if isinstance(yolo_results, list) else 0
	yolo_categories = []
	yolo_confidence = 0

	comparison = f"""
	🔍 Dual-Engine Detection Comparison

	YOLOv8 (Computer Vision):
	- Detected: {yolo_count} items
	- Categories: {', '.join(yolo_categories) if yolo_categories else 'Multiple types'}
	- Confidence: {yolo_confidence:.1f}%

	Gemini Vision (Multimodal AI):
	- Detected: {gemini_results['count']} items
	- Categories: {', '.join(gemini_results['categories'].keys())}
	- Severity: {gemini_results['severity']}
	- Context: {gemini_results['description']}

	Insights:
	{self._generate_insights(yolo_count, yolo_confidence, gemini_results)}
	"""
	return comparison.strip()

	def _generate_insights(self, yolo_count: int, yolo_confidence: float, gemini: Dict) -> str:
	"""Generate insights from comparing both detections."""
	insights = []

	# Compare counts
	gemini_count = gemini['count']

	if abs(yolo_count - gemini_count) <= 2:
	insights.append("✓ Both models agree on item count")
	else:
	insights.append(f"⚠ Count difference: YOLOv8={yolo_count}, Gemini={gemini_count}")

	# Highlight Gemini's advantage
	if gemini['description']:
	insights.append(f"💡 Gemini provides context: \"{gemini['description']}\"")

	# Highlight YOLOv8's advantage
	if yolo_confidence > 80:
	insights.append(f"🎯 YOLOv8 provides precise bounding boxes ({yolo_confidence:.0f}% confidence)")
	elif yolo_confidence > 0:
	insights.append(f"📍 YOLOv8 provides precise bounding boxes")

	return '\n'.join(insights) if insights else "Both models detected trash successfully"


	# Global instance
	_gemini_detector = None

	def get_gemini_detector() -> GeminiVisionDetector:
	"""Get singleton Gemini detector instance."""
	global _gemini_detector
	if _gemini_detector is None:
	_gemini_detector = GeminiVisionDetector()
	return _gemini_detector