EmoVIT / app.py
manhteky123's picture
Upload 25 files
c802cc8 verified
import os
import io
import sys
# Set up logging early
import logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(sys.stdout)
]
)
logger = logging.getLogger(__name__)
# Add current directory to Python path
sys.path.insert(0, '/app/EmoVIT')
sys.path.insert(0, '/app/EmoVIT/lib')
# Set cache directories before importing any ML libraries
os.environ['TRANSFORMERS_CACHE'] = os.environ.get('TRANSFORMERS_CACHE', '/app/.cache/transformers')
os.environ['HF_HOME'] = os.environ.get('HF_HOME', '/app/.cache/huggingface')
os.environ['TORCH_HOME'] = os.environ.get('TORCH_HOME', '/app/.cache/torch')
os.environ['HF_DATASETS_CACHE'] = os.environ.get('HF_DATASETS_CACHE', '/app/.cache/datasets')
os.environ['PYTHONUNBUFFERED'] = '1'
# Create cache directories if they don't exist
for cache_dir in ['/app/.cache/transformers', '/app/.cache/huggingface', '/app/.cache/torch', '/app/.cache/datasets']:
os.makedirs(cache_dir, exist_ok=True)
logger.info("🔧 Environment setup complete")
logger.info(f"PYTHONPATH: {sys.path[:3]}")
# Import basic dependencies
try:
import torch
from flask import Flask, render_template, request, jsonify, url_for
from PIL import Image
import base64
import numpy as np
logger.info("✅ Basic dependencies loaded successfully")
except ImportError as e:
logger.error(f"❌ Failed to import basic dependencies: {e}")
sys.exit(1)
# Safe import with error handling for LAVIS
try:
# Check numpy version compatibility
numpy_version = np.__version__
logger.info(f"NumPy version: {numpy_version}")
from transformers import AutoTokenizer
logger.info("✅ Transformers imported successfully")
# Try to import LAVIS components
import lavis
logger.info("✅ LAVIS base imported successfully")
from blip2_vicuna_instruct import Blip2VicunaInstruct
logger.info("✅ Blip2VicunaInstruct imported successfully")
MODEL_AVAILABLE = True
logger.info("✅ All imports successful - Full model mode enabled")
except ImportError as e:
logger.error(f"❌ Model import failed: {e}")
logger.info("🔄 Running in demo mode without full model capabilities")
MODEL_AVAILABLE = False
Blip2VicunaInstruct = None
except Exception as e:
logger.error(f"❌ Unexpected error during import: {e}")
logger.info("🔄 Running in demo mode without full model capabilities")
MODEL_AVAILABLE = False
Blip2VicunaInstruct = None
app = Flask(__name__)
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max file size
# Global variables cho model
model = None
device = None
def load_model():
"""Load BLIP2 Vicuna model"""
global model, device
if not MODEL_AVAILABLE:
logger.warning("⚠️ Model is not available due to import errors")
return
try:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logger.info(f"🔧 Using device: {device}")
# Check if we have CUDA support
if torch.cuda.is_available():
logger.info(f"🎮 CUDA available: {torch.cuda.get_device_name(0)}")
else:
logger.info("🖥️ Running on CPU")
# For demo purposes, we'll skip actual model loading if LAVIS isn't available
if Blip2VicunaInstruct is None:
logger.warning("⚠️ Blip2VicunaInstruct class not available - skipping model load")
return
# Cấu hình model - có thể cần điều chỉnh theo config thực tế
model_config = {
"vit_model": "eva_clip_g",
"img_size": 224,
"drop_path_rate": 0,
"use_grad_checkpoint": False,
"vit_precision": "fp16",
"freeze_vit": True,
"num_query_token": 32,
"llm_model": "vicuna-7b-v1.1", # Có thể cần thay đổi path
"prompt": "",
"max_txt_len": 128,
"max_output_txt_len": 256,
"apply_lemmatizer": False,
"qformer_text_input": True,
}
logger.info("🔄 Initializing model...")
# Khởi tạo model
model = Blip2VicunaInstruct(**model_config)
model.to(device)
model.eval()
logger.info("✅ Model loaded successfully!")
except Exception as e:
logger.error(f"❌ Error loading model: {str(e)}")
logger.info("🔄 Continuing in demo mode...")
model = None
def preprocess_image(image):
"""Preprocess image for model"""
try:
# Resize và normalize image
if image.mode != 'RGB':
image = image.convert('RGB')
# Resize to model input size
image = image.resize((224, 224))
# Convert to tensor
import torchvision.transforms as transforms
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
image_tensor = transform(image).unsqueeze(0)
return image_tensor
except Exception as e:
logger.error(f"❌ Error preprocessing image: {str(e)}")
return None
def predict_emotion(image_tensor, prompt="What emotion is shown in this image?"):
"""Predict emotion từ image"""
global model, device
if model is None:
return "Model not loaded"
try:
with torch.no_grad():
# Prepare samples
samples = {
"image": image_tensor.to(device),
"text_input": [prompt]
}
# Generate prediction
result = model.generate(
samples,
use_nucleus_sampling=False,
num_beams=3,
max_length=50,
min_length=1,
temperature=0.1,
repetition_penalty=1.1
)
return result[0] if result else "Unable to predict emotion"
except Exception as e:
logger.error(f"❌ Error predicting emotion: {str(e)}")
return f"Error: {str(e)}"
@app.route('/')
def index():
"""Home page"""
return render_template('index.html')
@app.route('/predict', methods=['POST'])
def predict():
"""Handle image upload and prediction"""
try:
if not MODEL_AVAILABLE:
return jsonify({
'error': 'Model is not available due to import errors. Please check dependencies.',
'details': 'The application is running in demo mode. Full model functionality requires proper LAVIS installation.'
}), 500
if 'image' not in request.files:
return jsonify({'error': 'No image file provided'}), 400
file = request.files['image']
if file.filename == '':
return jsonify({'error': 'No image selected'}), 400
logger.info(f"📷 Processing image: {file.filename}")
# Đọc và xử lý image
image = Image.open(io.BytesIO(file.read()))
# Get custom prompt if provided
custom_prompt = request.form.get('prompt', 'What emotion is shown in this image?')
# Convert image to base64 for display
buffered = io.BytesIO()
image.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode()
# If model is not loaded, return a fallback response
if model is None:
emotion_result = "Model not loaded - unable to analyze emotion. This might be due to missing model weights or configuration issues. Running in demo mode."
logger.warning("⚠️ Model not available, returning demo response")
else:
logger.info("🔄 Running model inference...")
# Preprocess image
image_tensor = preprocess_image(image)
if image_tensor is None:
return jsonify({'error': 'Failed to process image'}), 400
# Predict emotion
emotion_result = predict_emotion(image_tensor, custom_prompt)
logger.info(f"✅ Prediction complete: {emotion_result[:50]}...")
return jsonify({
'success': True,
'emotion': emotion_result,
'image': img_str,
'prompt': custom_prompt,
'model_available': MODEL_AVAILABLE,
'model_loaded': model is not None
})
except Exception as e:
logger.error(f"❌ Error in prediction: {str(e)}")
return jsonify({'error': f'Prediction failed: {str(e)}'}), 500
@app.route('/health')
def health():
"""Health check endpoint"""
return jsonify({
'status': 'healthy',
'model_available': MODEL_AVAILABLE,
'model_loaded': model is not None,
'device': str(device) if device else 'unknown'
})
if __name__ == '__main__':
# Setup logging (already done above, but ensure it's configured)
logger.info("🚀 Starting EmoVIT Flask application...")
# Load model
logger.info("📝 Loading model...")
load_model()
if MODEL_AVAILABLE and model is not None:
logger.info("✅ Model loaded successfully - Full functionality available")
else:
logger.warning("⚠️ Model not available - Running in demo mode")
# Determine port for Hugging Face Spaces
port = int(os.environ.get("PORT", 7860))
logger.info(f"🌐 Starting server on port {port}")
# Run app with proper logging
app.run(host="0.0.0.0", port=port, debug=False)