Spaces:
Sleeping
Sleeping
File size: 2,176 Bytes
0f0bf0d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | import gradio as gr
import numpy as np
import pickle
import tensorflow as tf
from keras.preprocessing.image import load_img, img_to_array
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.models import Model
from keras.preprocessing.sequence import pad_sequences
# 1. Load Resources
model = tf.keras.models.load_model('trained_model.keras')
with open('vocab.pkl', 'rb') as f:
ixtoword = pickle.load(f)
with open('wordtoix.pkl', 'rb') as f:
wordtoix = pickle.load(f)
# 2. Setup Feature Extractor (InceptionV3)
base_model = InceptionV3(weights='imagenet')
feature_extractor = Model(base_model.input, base_model.layers[-2].output)
def extract_features(image):
# Resize and preprocess image for InceptionV3
image = image.resize((299, 299))
image = img_to_array(image)
image = np.expand_dims(image, axis=0)
image = preprocess_input(image)
feature = feature_extractor.predict(image)
return np.reshape(feature, (feature.shape[1]))
def generate_caption(image):
# Get image features
photo = extract_features(image)
# Generate caption
in_text = 'startseq'
max_length = 34 # Match your training max_length
for i in range(max_length):
sequence = [wordtoix[w] for w in in_text.split() if w in wordtoix]
sequence = pad_sequences([sequence], maxlen=max_length)[0]
sequence = np.expand_dims(sequence, axis=0)
# Predict next word
yhat = model.predict([np.array([photo]), sequence], verbose=0)
yhat = np.argmax(yhat)
word = ixtoword.get(yhat)
if word is None:
break
in_text += ' ' + word
if word == 'endseq':
break
final_caption = in_text.replace('startseq', '').replace('endseq', '')
return final_caption.strip()
# 3. Launch Interface
interface = gr.Interface(
fn=generate_caption,
inputs=gr.Image(type="pil"),
outputs="text",
title="Image Caption Generator",
description="Upload an image and the AI will describe it."
)
interface.launch() |