import numpy as np
import pickle
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.models import Model
from src.model import build_model

# Load the tokenizer
try:
    with open('models/tokenizer.pickle', 'rb') as handle:
        tokenizer = pickle.load(handle)
except (EOFError, FileNotFoundError):
    print("Error: tokenizer.pickle is missing or corrupted. Please retrain the model.")
    exit(1)

# Load max_length
try:
    with open('models/max_length.txt', 'r') as f:
        max_length = int(f.read().strip())
except (ValueError, FileNotFoundError):
    print("Error: max_length.txt is missing or invalid. Please retrain the model.")
    exit(1)

# Load InceptionV3 model for feature extraction
inception_model = InceptionV3(weights='imagenet')
inception_model = Model(inception_model.input, inception_model.layers[-2].output)

# Rebuild the model
vocab_size = len(tokenizer.word_index) + 1
model = build_model(vocab_size, max_length)

# Load the weights
try:
    model.load_weights('models/model.h5')
except:
    print("Error: Failed to load model weights. Please retrain the model.")
    exit(1)

def extract_features(image):
    features = inception_model.predict(image)
    return features

def word_for_id(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None

def predict_caption(image):
    # Extract features from the image
    features = extract_features(image)
    
    # Generate caption
    in_text = 'startseq'

    for _ in range(max_length):
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        sequence = pad_sequences([sequence], maxlen=max_length)
        
        yhat = model.predict([features, sequence], verbose=0)
        yhat = np.argmax(yhat)
        
        word = word_for_id(yhat, tokenizer)
        
        if word is None or word == 'endseq':
            break
            
        in_text += ' ' + word

    final_caption = in_text.replace('startseq', '').strip()
    
    return final_caption