Spaces:

RaghavenderReddy
/

LABEL-IT

Sleeping

File size: 8,383 Bytes

"""
Enhanced utility functions for LabelIt! application with real-time analytics
"""

import json
import os
from PIL import Image
from collections import Counter

# File paths for data storage (compatible with pre-existing folders)
DATA_DIR = 'data'
USERS_FILE = os.path.join(DATA_DIR, 'users.json')
LABELS_FILE = os.path.join(DATA_DIR, 'labels.json')
IMAGES_DIR = os.path.join(DATA_DIR, 'images')

def load_users():
    """Load users from JSON file with error handling for pre-existing files"""
    if os.path.exists(USERS_FILE):
        try:
            with open(USERS_FILE, 'r') as f:
                return json.load(f)
        except (FileNotFoundError, json.JSONDecodeError, PermissionError):
            pass
    return {}

def save_users(users):
    """Save users to JSON file with error handling for read-only environments"""
    if os.path.exists(DATA_DIR):
        try:
            with open(USERS_FILE, 'w') as f:
                json.dump(users, f, indent=2)
        except (OSError, PermissionError):
            pass

def load_labels():
    """Load labels from JSON file with error handling for pre-existing files"""
    if os.path.exists(LABELS_FILE):
        try:
            with open(LABELS_FILE, 'r') as f:
                return json.load(f)
        except (FileNotFoundError, json.JSONDecodeError, PermissionError):
            pass
    return {}

def save_labels(labels):
    """Save labels to JSON file with error handling for read-only environments"""
    if os.path.exists(DATA_DIR):
        try:
            with open(LABELS_FILE, 'w') as f:
                json.dump(labels, f, indent=2)
        except (OSError, PermissionError):
            pass

def validate_image(uploaded_file):
    """Validate uploaded image file"""
    # Check file size (10MB limit)
    if uploaded_file.size > 10 * 1024 * 1024:  # 10MB in bytes
        return {
            'valid': False,
            'error': 'file_too_large'
        }
    
    # Check file type
    allowed_types = ['image/png', 'image/jpeg', 'image/jpg', 'image/gif']
    if uploaded_file.type not in allowed_types:
        return {
            'valid': False,
            'error': 'invalid_file_type'
        }
    
    # Try to open and verify image
    try:
        image = Image.open(uploaded_file)
        image.verify()
        uploaded_file.seek(0)  # Reset file pointer
        return {
            'valid': True,
            'error': None
        }
    except Exception:
        return {
            'valid': False,
            'error': 'image_processing_error'
        }

def get_categories():
    """Get list of available categories"""
    return [
        'Animals',
        'Food',
        'Objects',
        'Nature',
        'People',
        'Transportation'
    ]

def calculate_statistics():
    """Calculate comprehensive real-time statistics for the analytics dashboard"""
    users = load_users()
    labels_data = load_labels()
    
    # Basic counts
    total_users = len(users)
    total_images = len(labels_data)
    
    # Count total labels across all images
    total_labels = 0
    languages_used = set()
    language_breakdown = Counter()
    category_breakdown = Counter()
    
    # Location-based statistics
    images_with_location = 0
    gps_accuracy_breakdown = Counter()
    location_methods = Counter()
    country_breakdown = Counter()
    city_breakdown = Counter()
    
    for entry_data in labels_data.values():
        # Count labels for this image
        image_labels = entry_data.get('labels', [])
        total_labels += len(image_labels)
        
        # Track languages used
        for label in image_labels:
            lang = label.get('language', 'unknown')
            languages_used.add(lang)
            language_breakdown[lang] += 1
        
        # Track categories
        category = entry_data.get('category', 'Unknown')
        category_breakdown[category] += 1
        
        # Track location data
        location = entry_data.get('location')
        if location and location.get('latitude'):
            images_with_location += 1
            
            # Track location capture method
            method = location.get('method', 'Unknown')
            location_methods[method] += 1
            
            # Track GPS accuracy levels
            accuracy = location.get('accuracy')
            if accuracy is not None:
                if accuracy <= 10:
                    gps_accuracy_breakdown['High'] += 1
                elif accuracy <= 50:
                    gps_accuracy_breakdown['Medium'] += 1
                else:
                    gps_accuracy_breakdown['Low'] += 1
            else:
                gps_accuracy_breakdown['Unknown'] += 1
            
            # Track countries and cities
            country = location.get('country')
            if country:
                country_breakdown[country] += 1
            
            city = location.get('city')
            if city:
                city_breakdown[city] += 1
    
    return {
        'total_users': total_users,
        'total_images': total_images,
        'total_labels': total_labels,
        'languages_used': len(languages_used),
        'language_breakdown': dict(language_breakdown),
        'category_breakdown': dict(category_breakdown),
        'images_with_location': images_with_location,
        'gps_accuracy_breakdown': dict(gps_accuracy_breakdown),
        'location_methods': dict(location_methods),
        'country_breakdown': dict(country_breakdown),
        'city_breakdown': dict(city_breakdown)
    }

def get_location_accuracy_level(accuracy):
    """Determine location accuracy level based on accuracy value"""
    if accuracy is None:
        return "Unknown", "#95a5a6"
    
    if accuracy <= 10:
        return "High", "#27ae60"
    elif accuracy <= 50:
        return "Medium", "#f39c12"
    else:
        return "Low", "#e74c3c"

def format_location_display(location_data):
    """Format location data for display"""
    if not location_data or not location_data.get('lat'):
        return "📍 Location not available"
    
    lat = location_data['lat']
    lon = location_data['lon']
    method = location_data.get('method', 'Unknown')
    accuracy = location_data.get('accuracy')
    
    # Method icon
    method_icons = {
        'GPS': '🛰️',
        'IP': '🌐',
        'Manual': '📝'
    }
    method_icon = method_icons.get(method, '📍')
    
    # Base location string
    location_str = f"{method_icon} {lat:.6f}, {lon:.6f}"
    
    # Add accuracy if available
    if accuracy:
        level, _ = get_location_accuracy_level(accuracy)
        location_str += f" (±{accuracy:.0f}m - {level} Accuracy)"
    
    # Add city/country if available
    if location_data.get('city') and location_data.get('country'):
        location_str += f" - {location_data['city']}, {location_data['country']}"
    
    return location_str

def validate_coordinates(lat, lon):
    """Validate latitude and longitude coordinates"""
    try:
        lat = float(lat)
        lon = float(lon)
        
        if -90 <= lat <= 90 and -180 <= lon <= 180:
            return True, lat, lon
        else:
            return False, None, None
    except (ValueError, TypeError):
        return False, None, None

def get_user_contribution_stats(username):
    """Get contribution statistics for a specific user"""
    labels_data = load_labels()
    
    user_stats = {
        'images_uploaded': 0,
        'labels_added': 0,
        'languages_contributed': set(),
        'categories_contributed': set()
    }
    
    for entry_data in labels_data.values():
        # Check if user uploaded this image
        if entry_data.get('uploaded_by') == username:
            user_stats['images_uploaded'] += 1
            user_stats['categories_contributed'].add(entry_data.get('category', 'Unknown'))
        
        # Check labels added by this user
        for label in entry_data.get('labels', []):
            if label.get('added_by') == username:
                user_stats['labels_added'] += 1
                user_stats['languages_contributed'].add(label.get('language', 'unknown'))
    
    # Convert sets to counts
    user_stats['languages_contributed'] = len(user_stats['languages_contributed'])
    user_stats['categories_contributed'] = len(user_stats['categories_contributed'])
    
    return user_stats