import os import io import sys # Set up logging early import logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.StreamHandler(sys.stdout) ] ) logger = logging.getLogger(__name__) # Add current directory to Python path sys.path.insert(0, '/app/EmoVIT') sys.path.insert(0, '/app/EmoVIT/lib') # Set cache directories before importing any ML libraries os.environ['TRANSFORMERS_CACHE'] = os.environ.get('TRANSFORMERS_CACHE', '/app/.cache/transformers') os.environ['HF_HOME'] = os.environ.get('HF_HOME', '/app/.cache/huggingface') os.environ['TORCH_HOME'] = os.environ.get('TORCH_HOME', '/app/.cache/torch') os.environ['HF_DATASETS_CACHE'] = os.environ.get('HF_DATASETS_CACHE', '/app/.cache/datasets') os.environ['PYTHONUNBUFFERED'] = '1' # Create cache directories if they don't exist for cache_dir in ['/app/.cache/transformers', '/app/.cache/huggingface', '/app/.cache/torch', '/app/.cache/datasets']: os.makedirs(cache_dir, exist_ok=True) logger.info("🔧 Environment setup complete") logger.info(f"PYTHONPATH: {sys.path[:3]}") # Import basic dependencies try: import torch from flask import Flask, render_template, request, jsonify, url_for from PIL import Image import base64 import numpy as np logger.info("✅ Basic dependencies loaded successfully") except ImportError as e: logger.error(f"❌ Failed to import basic dependencies: {e}") sys.exit(1) # Safe import with error handling for LAVIS try: # Check numpy version compatibility numpy_version = np.__version__ logger.info(f"NumPy version: {numpy_version}") from transformers import AutoTokenizer logger.info("✅ Transformers imported successfully") # Try to import LAVIS components import lavis logger.info("✅ LAVIS base imported successfully") from blip2_vicuna_instruct import Blip2VicunaInstruct logger.info("✅ Blip2VicunaInstruct imported successfully") MODEL_AVAILABLE = True logger.info("✅ All imports successful - Full model mode enabled") except ImportError as e: logger.error(f"❌ Model import failed: {e}") logger.info("🔄 Running in demo mode without full model capabilities") MODEL_AVAILABLE = False Blip2VicunaInstruct = None except Exception as e: logger.error(f"❌ Unexpected error during import: {e}") logger.info("🔄 Running in demo mode without full model capabilities") MODEL_AVAILABLE = False Blip2VicunaInstruct = None app = Flask(__name__) app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max file size # Global variables cho model model = None device = None def load_model(): """Load BLIP2 Vicuna model""" global model, device if not MODEL_AVAILABLE: logger.warning("⚠️ Model is not available due to import errors") return try: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") logger.info(f"🔧 Using device: {device}") # Check if we have CUDA support if torch.cuda.is_available(): logger.info(f"🎮 CUDA available: {torch.cuda.get_device_name(0)}") else: logger.info("🖥️ Running on CPU") # For demo purposes, we'll skip actual model loading if LAVIS isn't available if Blip2VicunaInstruct is None: logger.warning("⚠️ Blip2VicunaInstruct class not available - skipping model load") return # Cấu hình model - có thể cần điều chỉnh theo config thực tế model_config = { "vit_model": "eva_clip_g", "img_size": 224, "drop_path_rate": 0, "use_grad_checkpoint": False, "vit_precision": "fp16", "freeze_vit": True, "num_query_token": 32, "llm_model": "vicuna-7b-v1.1", # Có thể cần thay đổi path "prompt": "", "max_txt_len": 128, "max_output_txt_len": 256, "apply_lemmatizer": False, "qformer_text_input": True, } logger.info("🔄 Initializing model...") # Khởi tạo model model = Blip2VicunaInstruct(**model_config) model.to(device) model.eval() logger.info("✅ Model loaded successfully!") except Exception as e: logger.error(f"❌ Error loading model: {str(e)}") logger.info("🔄 Continuing in demo mode...") model = None def preprocess_image(image): """Preprocess image for model""" try: # Resize và normalize image if image.mode != 'RGB': image = image.convert('RGB') # Resize to model input size image = image.resize((224, 224)) # Convert to tensor import torchvision.transforms as transforms transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) image_tensor = transform(image).unsqueeze(0) return image_tensor except Exception as e: logger.error(f"❌ Error preprocessing image: {str(e)}") return None def predict_emotion(image_tensor, prompt="What emotion is shown in this image?"): """Predict emotion từ image""" global model, device if model is None: return "Model not loaded" try: with torch.no_grad(): # Prepare samples samples = { "image": image_tensor.to(device), "text_input": [prompt] } # Generate prediction result = model.generate( samples, use_nucleus_sampling=False, num_beams=3, max_length=50, min_length=1, temperature=0.1, repetition_penalty=1.1 ) return result[0] if result else "Unable to predict emotion" except Exception as e: logger.error(f"❌ Error predicting emotion: {str(e)}") return f"Error: {str(e)}" @app.route('/') def index(): """Home page""" return render_template('index.html') @app.route('/predict', methods=['POST']) def predict(): """Handle image upload and prediction""" try: if not MODEL_AVAILABLE: return jsonify({ 'error': 'Model is not available due to import errors. Please check dependencies.', 'details': 'The application is running in demo mode. Full model functionality requires proper LAVIS installation.' }), 500 if 'image' not in request.files: return jsonify({'error': 'No image file provided'}), 400 file = request.files['image'] if file.filename == '': return jsonify({'error': 'No image selected'}), 400 logger.info(f"📷 Processing image: {file.filename}") # Đọc và xử lý image image = Image.open(io.BytesIO(file.read())) # Get custom prompt if provided custom_prompt = request.form.get('prompt', 'What emotion is shown in this image?') # Convert image to base64 for display buffered = io.BytesIO() image.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode() # If model is not loaded, return a fallback response if model is None: emotion_result = "Model not loaded - unable to analyze emotion. This might be due to missing model weights or configuration issues. Running in demo mode." logger.warning("⚠️ Model not available, returning demo response") else: logger.info("🔄 Running model inference...") # Preprocess image image_tensor = preprocess_image(image) if image_tensor is None: return jsonify({'error': 'Failed to process image'}), 400 # Predict emotion emotion_result = predict_emotion(image_tensor, custom_prompt) logger.info(f"✅ Prediction complete: {emotion_result[:50]}...") return jsonify({ 'success': True, 'emotion': emotion_result, 'image': img_str, 'prompt': custom_prompt, 'model_available': MODEL_AVAILABLE, 'model_loaded': model is not None }) except Exception as e: logger.error(f"❌ Error in prediction: {str(e)}") return jsonify({'error': f'Prediction failed: {str(e)}'}), 500 @app.route('/health') def health(): """Health check endpoint""" return jsonify({ 'status': 'healthy', 'model_available': MODEL_AVAILABLE, 'model_loaded': model is not None, 'device': str(device) if device else 'unknown' }) if __name__ == '__main__': # Setup logging (already done above, but ensure it's configured) logger.info("🚀 Starting EmoVIT Flask application...") # Load model logger.info("📝 Loading model...") load_model() if MODEL_AVAILABLE and model is not None: logger.info("✅ Model loaded successfully - Full functionality available") else: logger.warning("⚠️ Model not available - Running in demo mode") # Determine port for Hugging Face Spaces port = int(os.environ.get("PORT", 7860)) logger.info(f"🌐 Starting server on port {port}") # Run app with proper logging app.run(host="0.0.0.0", port=port, debug=False)