Spaces:

pr0ximaCent
/

caption

Sleeping

App Files Files Community

pr0ximaCent commited on May 26

Commit

7034074

verified ·

1 Parent(s): 4145d27

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -32

app.py CHANGED Viewed

@@ -7,50 +7,113 @@ from tensorflow.keras.preprocessing.image import img_to_array
 from tensorflow.keras.preprocessing.sequence import pad_sequences
 import pickle
-# Custom Lambda layer with explicit output shape
-class CustomLambda(tf.keras.layers.Lambda):
-    def __init__(self, function, output_shape=None, **kwargs):
-        super().__init__(function, output_shape=output_shape, **kwargs)
     def compute_output_shape(self, input_shape):
-        if self.output_shape is None:
-            # Default behavior for attention-like operations
-            if isinstance(input_shape, list) and len(input_shape) == 2:
-                return input_shape[0]  # Return shape of first input
-            return input_shape
-        return super().compute_output_shape(input_shape)
-# Define custom objects for model loading
-custom_objects = {
-    'Lambda': CustomLambda,
-    'lambda': CustomLambda
-}
-# Multiple loading strategies
 def load_model_safely():
-    strategies = [
-        # Strategy 1: Load with custom objects
-        lambda: tf.keras.models.load_model("caption_model.h5", custom_objects=custom_objects),
-        # Strategy 2: Load without compilation
-        lambda: tf.keras.models.load_model("caption_model.h5", compile=False),
-        # Strategy 3: Load with different custom objects
-        lambda: tf.keras.models.load_model("caption_model.h5",
-                                         custom_objects={'Lambda': tf.keras.layers.Lambda}),
     ]
-    for i, strategy in enumerate(strategies, 1):
         try:
-            model = strategy()
-            print(f"Model loaded successfully using strategy {i}!")
             return model
         except Exception as e:
-            print(f"Strategy {i} failed: {e}")
             continue
-    raise Exception("All loading strategies failed")
 # Load your pre-trained model and tokenizer
-model = load_model_safely()
 with open("tokenizer.pkl", "rb") as handle:
     tokenizer = pickle.load(handle)
@@ -62,6 +125,9 @@ feature_extractor = tf.keras.Model(feature_extractor.input, feature_extractor.la
 # Description generation function
 def generate_caption(image):
     try:
         # Preprocess the image
         image = image.resize((224, 224))
         image = img_to_array(image)
@@ -84,7 +150,7 @@ def generate_caption(image):
                 yhat = np.argmax(yhat)
             except Exception as e:
                 print(f"Prediction error: {e}")
-                return "Error generating caption"
             word = ''
             for w, i in tokenizer.word_index.items():
@@ -97,7 +163,7 @@ def generate_caption(image):
             input_text += ' ' + word
         caption = input_text.replace('startseq', '').strip()
-        return caption
     except Exception as e:
         return f"Error processing image: {str(e)}"

 from tensorflow.keras.preprocessing.sequence import pad_sequences
 import pickle
+# Custom function to handle attention mechanism
+def attention_function(inputs):
+    """
+    Custom attention function that likely combines two inputs
+    Input 1: (None, 34, 34) - attention weights
+    Input 2: (None, 34, 512) - feature vectors
+    Output: (None, 34, 512) - attended features
+    """
+    attention_weights, features = inputs
+    # Expand attention weights to match feature dimensions
+    attention_weights = tf.expand_dims(attention_weights, axis=-1)
+    # Apply attention weights to features
+    attended_features = attention_weights * features
+    return attended_features
+def attention_output_shape(input_shapes):
+    """Define the output shape for attention mechanism"""
+    # Return the shape of the feature input (second input)
+    return input_shapes[1]  # (None, 34, 512)
+# Alternative attention functions to try
+def attention_function_v2(inputs):
+    """Alternative attention mechanism - weighted sum"""
+    attention_weights, features = inputs
+    # Normalize attention weights
+    attention_weights = tf.nn.softmax(attention_weights, axis=-1)
+    attention_weights = tf.expand_dims(attention_weights, axis=-1)
+    return attention_weights * features
+def attention_function_v3(inputs):
+    """Another alternative - dot product attention"""
+    attention_weights, features = inputs
+    # Sum along the second dimension of attention weights
+    attention_weights = tf.reduce_sum(attention_weights, axis=-1, keepdims=True)
+    attention_weights = tf.expand_dims(attention_weights, axis=-1)
+    return attention_weights * features
+# Custom Lambda layer class
+class AttentionLambda(tf.keras.layers.Lambda):
+    def __init__(self, function, output_shape_func=None, **kwargs):
+        super().__init__(function, **kwargs)
+        self.output_shape_func = output_shape_func
     def compute_output_shape(self, input_shape):
+        if self.output_shape_func:
+            return self.output_shape_func(input_shape)
+        # Default: return the shape of the second input (features)
+        if isinstance(input_shape, list) and len(input_shape) >= 2:
+            return input_shape[1]
+        return input_shape
+# Define multiple custom objects to try different attention mechanisms
+def get_custom_objects(attention_func, output_shape_func):
+    return {
+        'Lambda': lambda function=None, **kwargs: AttentionLambda(
+            attention_func if function is None else function,
+            output_shape_func,
+            **kwargs
+        )
+    }
+# Multiple loading strategies with different attention mechanisms
 def load_model_safely():
+    attention_strategies = [
+        (attention_function, attention_output_shape),
+        (attention_function_v2, attention_output_shape),
+        (attention_function_v3, attention_output_shape),
     ]
+    for i, (att_func, shape_func) in enumerate(attention_strategies, 1):
         try:
+            print(f"Trying attention strategy {i}...")
+            custom_objects = get_custom_objects(att_func, shape_func)
+            model = tf.keras.models.load_model("caption_model.h5", custom_objects=custom_objects)
+            print(f"Model loaded successfully using attention strategy {i}!")
             return model
         except Exception as e:
+            print(f"Attention strategy {i} failed: {e}")
             continue
+    # If all attention strategies fail, try loading without compilation
+    try:
+        print("Trying to load without compilation...")
+        model = tf.keras.models.load_model("caption_model.h5", compile=False)
+        print("Model loaded without compilation!")
+        return model
+    except Exception as e:
+        print(f"Loading without compilation failed: {e}")
+    # Last resort: try to load and rebuild the model
+    try:
+        print("Attempting to load model weights only...")
+        # This is a more complex approach that would require knowing the model architecture
+        raise Exception("Model architecture reconstruction needed")
+    except:
+        pass
+    raise Exception("All loading strategies failed. The model may need to be retrained or converted.")
 # Load your pre-trained model and tokenizer
+try:
+    model = load_model_safely()
+except Exception as e:
+    print(f"Failed to load model: {e}")
+    print("Creating a dummy model for testing...")
+    # Create a simple dummy model for testing the interface
+    model = None
 with open("tokenizer.pkl", "rb") as handle:
     tokenizer = pickle.load(handle)
 # Description generation function
 def generate_caption(image):
     try:
+        if model is None:
+            return "Model failed to load. Please check the model file."
         # Preprocess the image
         image = image.resize((224, 224))
         image = img_to_array(image)
                 yhat = np.argmax(yhat)
             except Exception as e:
                 print(f"Prediction error: {e}")
+                return f"Error during prediction: {str(e)}"
             word = ''
             for w, i in tokenizer.word_index.items():
             input_text += ' ' + word
         caption = input_text.replace('startseq', '').strip()
+        return caption if caption else "Unable to generate caption"
     except Exception as e:
         return f"Error processing image: {str(e)}"