harshrajsaxena commited on
Commit
6123f34
·
1 Parent(s): a557dd6

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +36 -0
  2. image_captioning.py +57 -0
  3. ixtoword.npy +3 -0
  4. requirements.txt +4 -0
  5. wordtoix.npy +3 -0
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ from io import BytesIO
4
+ from image_captioning import generate_captions
5
+ import requests
6
+ import numpy as np
7
+
8
+ def main():
9
+ st.title("Image Captioning App")
10
+ st.write("Upload an image or enter the URL of an image.")
11
+
12
+ # Image upload section
13
+ upload_option = st.radio("Select an option", ("Upload Image", "Image URL"))
14
+
15
+ if upload_option == "Upload Image":
16
+ uploaded_file = st.file_uploader("Choose an image", type=["jpg", "jpeg", "png"])
17
+ if uploaded_file is not None:
18
+ image = Image.open(uploaded_file)
19
+ st.image(image, caption="Uploaded Image", use_column_width=True)
20
+
21
+ else:
22
+ image_url = st.text_input("Enter the URL of an image")
23
+ if image_url:
24
+ try:
25
+ response = requests.get(image_url)
26
+ image = Image.open(BytesIO(response.content))
27
+ st.image(image, caption="Image from URL", use_column_width=True)
28
+ except:
29
+ st.error("Invalid image URL. Please enter a valid URL.")
30
+
31
+ if uploaded_file or image_url:
32
+ captions = generate_captions(image)
33
+ # Display the generated captions in the web interface
34
+
35
+ if __name__ == '__main__':
36
+ main()
image_captioning.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ import numpy as np
3
+ from tensorflow.keras.applications.inception_v3 import preprocess_input
4
+ from tensorflow.keras.models import Model, load_model
5
+ from tensorflow.keras.preprocessing import image
6
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
7
+
8
+ # Paths
9
+ token_path = 'saved_models/Flickr8k.token.txt'
10
+ train_images_path = 'saved_models/Flickr_8k.trainImages.txt'
11
+ test_images_path = 'saved_models/Flickr_8k.testImages.txt'
12
+ model_path = 'saved_models/Final_Image_Captioning.h5'
13
+
14
+ def preprocess_image(image_path):
15
+ img = image.load_img(image_path, target_size=(299, 299))
16
+ img = image.img_to_array(img)
17
+ img = np.expand_dims(img, axis=0)
18
+ img = preprocess_input(img)
19
+ return img
20
+
21
+ def generate_caption(image_file):
22
+ # Load the pre-trained model
23
+ model = load_model(model_path)
24
+
25
+ # Preprocess the image
26
+ img = preprocess_image(image_file)
27
+
28
+ # Generate the caption
29
+ caption = generate_caption_from_image(model, img)
30
+
31
+ return caption
32
+
33
+ def generate_caption_from_image(model, img):
34
+ max_length = 34
35
+ start_token = "<start>"
36
+ end_token = "<end>"
37
+ wordtoix = np.load("wordtoix.npy", allow_pickle=True).item()
38
+ ixtoword = np.load("ixtoword.npy", allow_pickle=True).item()
39
+
40
+ initial_state = [np.zeros((1, 256)), np.zeros((1, 256))]
41
+
42
+ # Generate caption using greedy search
43
+ caption = start_token
44
+ for _ in range(max_length):
45
+ sequence = [wordtoix[word] for word in caption.split() if word in wordtoix]
46
+ sequence = pad_sequences([sequence], maxlen=max_length)
47
+ y_pred = model.predict([img, sequence] + initial_state)
48
+ y_pred = np.argmax(y_pred)
49
+ word = ixtoword[y_pred]
50
+ caption += " " + word
51
+ if word == end_token:
52
+ break
53
+
54
+ # Remove start and end tokens
55
+ caption = " ".join(caption.split()[1:-1])
56
+
57
+ return caption
ixtoword.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64e40c24c08b638ed9151153e94b1847ef8f337316bf75625fe44730c9b260fb
3
+ size 30612
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit==0.85.1
2
+ tensorflow==2.5.0
3
+ numpy~=1.19.2
4
+ Pillow==9.5.0
wordtoix.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0f69c0df696abb66e4713736e6e13ab506a941928b4d8a98ffe0683880805ba
3
+ size 30612