Spaces:

pr0ximaCent
/

caption

Sleeping

App Files Files Community

pr0ximaCent commited on May 26, 2025

Commit

4145d27

verified ·

1 Parent(s): 46e1197

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -34

app.py CHANGED Viewed

@@ -7,50 +7,100 @@ from tensorflow.keras.preprocessing.image import img_to_array
 from tensorflow.keras.preprocessing.sequence import pad_sequences
 import pickle
 # Load your pre-trained model and tokenizer
-model = tf.keras.models.load_model("caption_model.h5")
 with open("tokenizer.pkl", "rb") as handle:
     tokenizer = pickle.load(handle)
-# Load your precomputed features if required (else comment out)
-# with open("features.pkl", "rb") as f:
-#     features = pickle.load(f)
 # Image feature extractor model
 feature_extractor = VGG16()
 feature_extractor = tf.keras.Model(feature_extractor.input, feature_extractor.layers[-2].output)
 # Description generation function
 def generate_caption(image):
-    # Preprocess the image
-    image = image.resize((224, 224))
-    image = img_to_array(image)
-    image = np.expand_dims(image, axis=0)
-    image = preprocess_input(image)
-    # Extract features
-    feature = feature_extractor.predict(image, verbose=0)
-    # Generate caption (mock example: replace with your real inference loop)
-    input_text = 'startseq'
-    max_length = 34  # set this to your model's max_length
-    for _ in range(max_length):
-        sequence = tokenizer.texts_to_sequences([input_text])[0]
-        sequence = pad_sequences([sequence], maxlen=max_length)
-        yhat = model.predict([feature, sequence], verbose=0)
-        yhat = np.argmax(yhat)
-        word = ''
-        for w, i in tokenizer.word_index.items():
-            if i == yhat:
-                word = w
                 break
-        if word == 'endseq' or word == '':
-            break
-        input_text += ' ' + word
-    caption = input_text.replace('startseq', '').strip()
-    return caption
 # Gradio Interface
 title = "📸 Image Caption Generator"
@@ -68,4 +118,4 @@ iface = gr.Interface(
 )
 if __name__ == "__main__":
-    iface.launch()

 from tensorflow.keras.preprocessing.sequence import pad_sequences
 import pickle
+# Custom Lambda layer with explicit output shape
+class CustomLambda(tf.keras.layers.Lambda):
+    def __init__(self, function, output_shape=None, **kwargs):
+        super().__init__(function, output_shape=output_shape, **kwargs)
+    def compute_output_shape(self, input_shape):
+        if self.output_shape is None:
+            # Default behavior for attention-like operations
+            if isinstance(input_shape, list) and len(input_shape) == 2:
+                return input_shape[0]  # Return shape of first input
+            return input_shape
+        return super().compute_output_shape(input_shape)
+# Define custom objects for model loading
+custom_objects = {
+    'Lambda': CustomLambda,
+    'lambda': CustomLambda
+}
+# Multiple loading strategies
+def load_model_safely():
+    strategies = [
+        # Strategy 1: Load with custom objects
+        lambda: tf.keras.models.load_model("caption_model.h5", custom_objects=custom_objects),
+        # Strategy 2: Load without compilation
+        lambda: tf.keras.models.load_model("caption_model.h5", compile=False),
+        # Strategy 3: Load with different custom objects
+        lambda: tf.keras.models.load_model("caption_model.h5",
+                                         custom_objects={'Lambda': tf.keras.layers.Lambda}),
+    ]
+    for i, strategy in enumerate(strategies, 1):
+        try:
+            model = strategy()
+            print(f"Model loaded successfully using strategy {i}!")
+            return model
+        except Exception as e:
+            print(f"Strategy {i} failed: {e}")
+            continue
+    raise Exception("All loading strategies failed")
 # Load your pre-trained model and tokenizer
+model = load_model_safely()
 with open("tokenizer.pkl", "rb") as handle:
     tokenizer = pickle.load(handle)
 # Image feature extractor model
 feature_extractor = VGG16()
 feature_extractor = tf.keras.Model(feature_extractor.input, feature_extractor.layers[-2].output)
 # Description generation function
 def generate_caption(image):
+    try:
+        # Preprocess the image
+        image = image.resize((224, 224))
+        image = img_to_array(image)
+        image = np.expand_dims(image, axis=0)
+        image = preprocess_input(image)
+        # Extract features
+        feature = feature_extractor.predict(image, verbose=0)
+        # Generate caption
+        input_text = 'startseq'
+        max_length = 34  # set this to your model's max_length
+        for _ in range(max_length):
+            sequence = tokenizer.texts_to_sequences([input_text])[0]
+            sequence = pad_sequences([sequence], maxlen=max_length)
+            try:
+                yhat = model.predict([feature, sequence], verbose=0)
+                yhat = np.argmax(yhat)
+            except Exception as e:
+                print(f"Prediction error: {e}")
+                return "Error generating caption"
+            word = ''
+            for w, i in tokenizer.word_index.items():
+                if i == yhat:
+                    word = w
+                    break
+            if word == 'endseq' or word == '':
                 break
+            input_text += ' ' + word
+        caption = input_text.replace('startseq', '').strip()
+        return caption
+    except Exception as e:
+        return f"Error processing image: {str(e)}"
 # Gradio Interface
 title = "📸 Image Caption Generator"
 )
 if __name__ == "__main__":
+    iface.launch()