Spaces:

saurabhharak
/

image-captioning-streamlit

Sleeping

App Files Files Community

saurabhharak commited on Aug 14, 2023

Commit

5deb389

1 Parent(s): e9b8ff0

code added

Browse files

Files changed (4) hide show

.gitignore +162 -0
model_init.py +44 -0
model_use.py +62 -0
streamilt_app.py +71 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,162 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+content/
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

model_init.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from transformers import VisionEncoderDecoderModel,ViTFeatureExtractor,AutoTokenizer,ViTImageProcessor
+import torch
+from PIL import Image
+model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+max_length = 16
+num_beams = 4
+gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
+def predict_step(image_paths):
+  images = []
+  for image_path in image_paths:
+    i_image = Image.open(image_path)
+    if i_image.mode != "RGB":
+      i_image = i_image.convert(mode="RGB")
+    images.append(i_image)
+    pixel_values = feature_extractor(images=images, return_tensors="pt").pixel_values
+    pixel_values = pixel_values.to(device)
+    output_ids = model.generate(pixel_values, **gen_kwargs)
+    preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
+    preds = [pred.strip() for pred in preds]
+    return preds
+# Define paths to save the components in your Google Drive
+drive_folder = "/content/drive/My Drive/image_captioning_streamlit"
+saved_model_directory = f"{drive_folder}/saved_model"
+saved_feature_extractor_directory = f"{drive_folder}/saved_feature_extractor"
+saved_tokenizer_directory = f"{drive_folder}/saved_tokenizer"
+# Save the model and its components
+model.save_pretrained(saved_model_directory)
+feature_extractor.save_pretrained(saved_feature_extractor_directory)
+tokenizer.save_pretrained(saved_tokenizer_directory)

model_use.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
+import torch
+from PIL import Image
+drive_folder = "/content/drive/My Drive/image_captioning_streamlit"
+saved_model_directory = f"{drive_folder}/saved_model"
+saved_feature_extractor_directory = f"{drive_folder}/saved_feature_extractor"
+saved_tokenizer_directory = f"{drive_folder}/saved_tokenizer"
+# Define paths to save the components in your Google Drive
+saved_model = VisionEncoderDecoderModel.from_pretrained(saved_model_directory)
+saved_feature_extractor = ViTImageProcessor.from_pretrained(saved_feature_extractor_directory)
+saved_tokenizer = AutoTokenizer.from_pretrained(saved_tokenizer_directory)
+# Move the model to the appropriate device (GPU if available)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+saved_model.to(device)
+# Define prediction parameters
+max_length = 16
+num_beams = 4
+gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
+# Define the prediction function
+def predict_step(image_paths):
+    """
+    Generate predictions for a list of image paths.
+    Args:
+        image_paths (List[str]): A list of file paths to the images.
+    Returns:
+        List[str]: A list of predicted strings.
+    Raises:
+        None
+    Examples:
+        >>> image_paths = ["path/to/image1.jpg", "path/to/image2.jpg"]
+        >>> predict_step(image_paths)
+        ["prediction1", "prediction2"]
+    """
+    images = []
+    for image_path in image_paths:
+        i_image = Image.open(image_path)
+        if i_image.mode != "RGB":
+            i_image = i_image.convert(mode="RGB")
+        images.append(i_image)
+    pixel_values = saved_feature_extractor(images=images, return_tensors="pt").pixel_values
+    pixel_values = pixel_values.to(device)
+    output_ids = saved_model.generate(pixel_values, **gen_kwargs)
+    preds = saved_tokenizer.batch_decode(output_ids, skip_special_tokens=True)
+    preds = [pred.strip() for pred in preds]
+    return preds

streamilt_app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import streamlit as st
+from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
+import torch
+from PIL import Image
+# Load the saved model and its components
+saved_model_directory = "content/saved_model"
+saved_feature_extractor_directory = "content/saved_feature_extractor"
+saved_tokenizer_directory = "content/saved_tokenizer"
+saved_model = VisionEncoderDecoderModel.from_pretrained(saved_model_directory)
+saved_feature_extractor = ViTImageProcessor.from_pretrained(saved_feature_extractor_directory)
+saved_tokenizer = AutoTokenizer.from_pretrained(saved_tokenizer_directory)
+# Move the model to the appropriate device (GPU if available)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+saved_model.to(device)
+# Define prediction parameters
+max_length = 16
+num_beams = 4
+gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
+# Define the prediction function
+def predict_step(image):
+    i_image = Image.open(image)
+    if i_image.mode != "RGB":
+        i_image = i_image.convert(mode="RGB")
+    pixel_values = saved_feature_extractor(images=[i_image], return_tensors="pt").pixel_values
+    pixel_values = pixel_values.to(device)
+    output_ids = saved_model.generate(pixel_values, **gen_kwargs)
+    preds = saved_tokenizer.batch_decode(output_ids, skip_special_tokens=True)
+    preds = [pred.strip() for pred in preds]
+    return preds[0]
+def main():
+    # Streamlit app
+    st.set_page_config(
+        page_title="⭐ Image Captioning App",
+        page_icon="⭐",
+        layout="centered"
+    )
+    st.title("📷 Image Captioning Using Transformers")
+    image = None
+    # Upload image or provide URL
+    st.write("Upload an image 📤 or provide an image URL 🔗:")
+    uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
+    image_url = st.text_input("Or provide an image URL:")
+    if uploaded_image is not None:
+        image = uploaded_image
+    elif image_url:
+        image = image_url
+    else:
+        st.warning("❗ Please upload an image or provide an image URL.")
+    if image:
+        try:
+            caption = predict_step(image)
+            st.image(image, caption=f"📝 Predicted caption: {caption}", use_column_width=True)
+        except Exception as e:
+            st.error("❌ An error occurred while generating the caption.")
+            st.error(str(e))
+if __name__ == "__main__":
+    main()