Spaces:

Harsh1306
/

ImageRecogineserConversationalBot

Sleeping

App Files Files Community

Harsh1306 commited on Apr 26, 2025

Commit

f6697b8

verified ·

1 Parent(s): 91e509c

Upload 7 files

Browse files

# ImageRecogniserConversationalChatbot
The app is designed to identify objects in images and then answer questions related to those objects using a conversational chatbot interface. It effectively bridges the gap between computer vision and natural language understanding, making it a versatile tool for various applications, including education, tourism, and general information retrieval

Files changed (7) hide show

LICENSE +21 -0
README.md +2 -14
__pycache__/GeneriCaptioner.cpython-312.pyc +0 -0
__pycache__/final_captioner.cpython-312.pyc +0 -0
app.py +144 -0
final_captioner.py +254 -0
requirements.txt +11 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Harsh Sanga
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,14 +1,2 @@
----
-title: ImageRecogineserConversationalBot
-emoji: 🌍
-colorFrom: blue
-colorTo: blue
-sdk: streamlit
-sdk_version: 1.44.1
-app_file: app.py
-pinned: false
-license: mit
-short_description: ImageRecogniserConversationalChatbot
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


1	+ # ImageRecogniserConversationalChatbot
2	+ The app is designed to identify objects in images and then answer questions related to those objects using a conversational chatbot interface. It effectively bridges the gap between computer vision and natural language understanding, making it a versatile tool for various applications, including education, tourism, and general information retrieval

__pycache__/GeneriCaptioner.cpython-312.pyc ADDED Viewed

Binary file (655 Bytes). View file

__pycache__/final_captioner.cpython-312.pyc ADDED Viewed

Binary file (7.09 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import os
+import json
+import streamlit as st
+from groq import Groq
+from PIL import Image, UnidentifiedImageError, ExifTags
+import requests
+from io import BytesIO
+from transformers import pipeline
+from final_captioner import generate_final_caption
+import hashlib
+# Streamlit page title
+st.title("PicSamvaad : Image Conversational Chatbot")
+# # Load configuration
+# working_dir = os.path.dirname(os.path.abspath(__file__))
+# config_data = json.load(open(f"{working_dir}/config.json"))
+# GROQ_API_KEY = config_data["GROQ_API_KEY"]
+# Save the API key to environment variable
+os.environ["GROQ_API_KEY"] = GROQ_API_KEY
+client = Groq()
+# Sidebar for image upload and URL input
+with st.sidebar:
+    st.header("Upload Image or Enter URL")
+    uploaded_file = st.file_uploader(
+        "Upload an image to chat...", type=["jpg", "jpeg", "png"]
+    )
+    url = st.text_input("Or enter a valid image URL...")
+image = None
+error_message = None
+def correct_image_orientation(img):
+    try:
+        for orientation in ExifTags.TAGS.keys():
+            if ExifTags.TAGS[orientation] == "Orientation":
+                break
+        exif = img._getexif()
+        if exif is not None:
+            orientation = exif[orientation]
+            if orientation == 3:
+                img = img.rotate(180, expand=True)
+            elif orientation == 6:
+                img = img.rotate(270, expand=True)
+            elif orientation == 8:
+                img = img.rotate(90, expand=True)
+    except (AttributeError, KeyError, IndexError):
+        pass
+    return img
+def get_image_hash(image):
+    # Generate a unique hash for the image
+    img_bytes = image.tobytes()
+    return hashlib.md5(img_bytes).hexdigest()
+# Check if a new image or URL has been provided and reset chat history
+if "last_uploaded_hash" not in st.session_state:
+    st.session_state.last_uploaded_hash = None
+if uploaded_file is not None:
+    image = Image.open(uploaded_file)
+    image_hash = get_image_hash(image)
+    if st.session_state.last_uploaded_hash != image_hash:
+        st.session_state.chat_history = []  # Clear chat history
+        st.session_state.last_uploaded_hash = image_hash  # Update last uploaded hash
+    image = correct_image_orientation(image)
+    st.image(image, caption="Uploaded Image.", use_column_width=True)
+elif url:
+    try:
+        response = requests.get(url)
+        response.raise_for_status()  # Check if the request was successful
+        image = Image.open(BytesIO(response.content))
+        image_hash = get_image_hash(image)
+        if st.session_state.last_uploaded_hash != image_hash:
+            st.session_state.chat_history = []  # Clear chat history
+            st.session_state.last_uploaded_hash = (
+                image_hash  # Update last uploaded hash
+            )
+        image = correct_image_orientation(image)
+        st.image(image, caption="Image from URL.", use_column_width=True)
+    except (requests.exceptions.RequestException, UnidentifiedImageError) as e:
+        image = None
+        error_message = "Error: The provided URL is invalid or the image could not be loaded. Sometimes some image URLs don't work. We suggest you upload the downloaded image instead ;)"
+caption = ""
+if image is not None:
+    caption += generate_final_caption(image)
+    st.write("ChatBot : " + caption)
+# Display error message if any
+if error_message:
+    st.error(error_message)
+# Initialize chat history in Streamlit session state if not present already
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = []
+# Display chat history
+for message in st.session_state.chat_history:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+# Input field for user's message
+user_prompt = st.chat_input("Ask the Chatbot about the image...")
+if user_prompt:
+    st.chat_message("user").markdown(user_prompt)
+    st.session_state.chat_history.append({"role": "user", "content": user_prompt})
+    # Send user's message to the LLM and get a response
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful, accurate image conversational assistant. You don't hallucinate, and your answers are very precise and have a positive approach.The caption of the image is: "
+            + caption,
+        },
+        *st.session_state.chat_history,
+    ]
+    response = client.chat.completions.create(
+        model="llama-3.1-8b-instant", messages=messages
+    )
+    assistant_response = response.choices[0].message.content
+    st.session_state.chat_history.append(
+        {"role": "assistant", "content": assistant_response}
+    )
+    # Display the LLM's response
+    with st.chat_message("assistant"):
+        st.markdown(assistant_response)

final_captioner.py ADDED Viewed

	@@ -0,0 +1,254 @@

+from tensorflow.keras.preprocessing import image
+import tensorflow as tf
+from tensorflow.keras.models import load_model
+import numpy as np
+from transformers import pipeline
+import gdown
+import os
+git_pipe = pipeline("image-to-text", model="microsoft/git-large-textcaps")
+flower_output = "Flower_classifier.h5"
+flower_model_id = "1AlBunIPDg4HYYCqhcHtOiXxnPFhmsoSn"
+flower_url = f"https://drive.google.com/uc?id={flower_model_id}"
+if not os.path.exists(flower_output):
+    gdown.download(flower_url, flower_output, quiet=False)
+flower_model = load_model(flower_output)
+flower_model.summary()
+bird_output = "Bird_classifier.h5"
+bird_model_id = "1a6vqFERbrr_Cw-NyBqVHG7fsjU2-xKJ4"
+bird_url = f"https://drive.google.com/uc?id={bird_model_id}"
+if not os.path.exists(bird_output):
+    gdown.download(bird_url, bird_output, quiet=False)
+bird_model = load_model(bird_output)
+bird_model.summary()
+dog_output = "DogClassifier.h5"
+dog_model_id = "1UFn1NGVtP5rhvcWnAANQ_4E9YRJvDEad"
+dog_url = f"https://drive.google.com/uc?id={dog_model_id}"
+if not os.path.exists(dog_output):
+    gdown.download(dog_url, dog_output, quiet=False)
+dog_model = load_model(dog_output)
+dog_model.summary()
+landmark_output = "LandmarkClassifierV5.h5"
+landmark_model_id = "1PXixJsrUaVcHEEC-jDlv4tHT2qrCrf5c"  # Replace with your file ID
+landmark_url = f"https://drive.google.com/uc?id={landmark_model_id}"
+if not os.path.exists(landmark_output):
+    gdown.download(landmark_url, landmark_output, quiet=False)
+landmark_model = load_model(landmark_output)
+landmark_model.summary()
+dog_list = [
+    "Bulldog",
+    "Chihuahua (dog breed)",
+    "Dobermann",
+    "German Shepherd",
+    "Golden Retriever",
+    "Husky",
+    "Labrador Retriever",
+    "Pomeranian dog",
+    "Pug",
+    "Rottweiler",
+    "Street dog",
+]
+flower_list = [
+    "Jasmine",
+    "Lavender",
+    "Lily",
+    "Lotus",
+    "Orchid",
+    "Rose",
+    "Sunflower",
+    "Tulip",
+    "daisy",
+    "dandelion",
+]
+bird_list = [
+    "Crow",
+    "Eagle",
+    "Flamingo",
+    "Hummingbird",
+    "Parrot",
+    "Peacock",
+    "Pigeon",
+    "Sparrow",
+    "Swan",
+]
+landmark_list = [
+    "The Agra Fort",
+    "Ajanta Caves",
+    "Alai Darwaza",
+    "Amarnath Temple",
+    "The Amber Fort",
+    "Basilica of Bom Jesus",
+    "Brihadisvara Temple",
+    "Charar-e-Sharief shrine",
+    "Charminar",
+    "Chhatrapati Shivaji Terminus",
+    "Chota Imambara",
+    "Dal Lake",
+    "The Elephanta Caves",
+    "Ellora Caves",
+    "Fatehpur Sikri",
+    "Gateway of India",
+    "Ghats in Varanasi",
+    "Gol Gumbaz",
+    "Golden Temple",
+    "Group of Monuments at Mahabalipuram",
+    "Hampi",
+    "Hawa Mahal",
+    "Humayun's Tomb",
+    "The India gate",
+    "Iron Pillar",
+    "Jagannath Temple, Puri",
+    "Jageshwar",
+    "Jama Masjid",
+    "Jamali Kamali Tomb",
+    "Jantar Mantar, Jaipur",
+    "Jantar Mantar, New Delhi",
+    "Kedarnath Temple",
+    "Khajuraho Temple",
+    "Konark Sun Temple",
+    "Mahabodhi Temple",
+    "Meenakshi Temple",
+    "Nalanda mahavihara",
+    "Parliament House, New Delhi",
+    "Qutb Minar",
+    "Qutb Minar Complex",
+    "Ram Mandir",
+    "Rani ki Vav",
+    "Rashtrapati Bhavan",
+    "The Red Fort",
+    "Sanchi",
+    "Supreme Court of India",
+    "Swaminarayan Akshardham (Delhi)",
+    "Taj Hotels",
+    "The Lotus Temple",
+    "The Mysore Palace",
+    "The Statue of Unity",
+    "The Taj Mahal",
+    "Vaishno Devi Temple",
+    "Venkateswara Temple, Tirumala",
+    "Victoria Memorial, Kolkata",
+    "Vivekananda Rock Memorial",
+]
+def identify_dog(img):
+    img = img.resize((224, 224))
+    img_array = image.img_to_array(img)
+    img_array = np.expand_dims(img_array, axis=0)
+    img_array /= 255.0
+    # Get predictions
+    predictions = dog_model.predict(img_array)
+    # Get the index of the class with the highest probability
+    predicted_class_index = np.argmax(predictions[0])
+    # Get the probability of the predicted class
+    predicted_probability = predictions[0][predicted_class_index]
+    # Map the predicted class index to the class label
+    predicted_class_label = dog_list[predicted_class_index]
+    return predicted_class_label
+def identify_flower(img):
+    img = img.resize((224, 224))
+    img_array = image.img_to_array(img)
+    img_array = np.expand_dims(img_array, axis=0)
+    img_array /= 255.0
+    # Get predictions
+    predictions = flower_model.predict(img_array)
+    # Get the index of the class with the highest probability
+    predicted_class_index = np.argmax(predictions[0])
+    # Get the probability of the predicted class
+    predicted_probability = predictions[0][predicted_class_index]
+    # Map the predicted class index to the class label
+    predicted_class_label = flower_list[predicted_class_index]
+    return predicted_class_label
+def identify_bird(img):
+    # Preprocess the image
+    img = img.resize((224, 224))
+    img_array = image.img_to_array(img)
+    img_array = np.expand_dims(img_array, axis=0)
+    img_array /= 255.0
+    # Get predictions
+    predictions = bird_model.predict(img_array)
+    # Get the index of the class with the highest probability
+    predicted_class_index = np.argmax(predictions[0])
+    # Get the probability of the predicted class
+    predicted_probability = predictions[0][predicted_class_index]
+    # Map the predicted class index to the class label
+    predicted_class_label = bird_list[predicted_class_index]
+    return predicted_class_label
+def identify_landmark(img):
+    # Preprocess the image
+    img = img.resize((224, 224))
+    img_array = image.img_to_array(img)
+    img_array = np.expand_dims(img_array, axis=0)
+    img_array /= 255.0
+    # Get predictions
+    predictions = landmark_model.predict(img_array)
+    # Get the index of the class with the highest probability
+    predicted_class_index = np.argmax(predictions[0])
+    # Get the probability of the predicted class
+    predicted_probability = predictions[0][predicted_class_index]
+    # Map the predicted class index to the class label
+    predicted_class_label = landmark_list[predicted_class_index]
+    return predicted_class_label
+def generate_final_caption(image):
+    caption_dict = git_pipe(image)
+    caption = caption_dict[0]["generated_text"]
+    image = image.resize((256, 256))
+    caption = caption_dict[0]["generated_text"]
+    phrases_to_cut = ["with the word", "that says"]
+    for phrase in phrases_to_cut:
+        index = caption.find(phrase)
+        if index != -1:
+            caption = caption[:index].strip()
+    if (
+        "building" in caption.lower()
+        or "monument" in caption.lower()
+        or "tower" in caption.lower()
+    ):
+        caption += "\nThe landmark is : " + identify_landmark(image)
+    elif "flower" in caption.lower() or "flowers" in caption.lower():
+        caption += "\nThe Flower is : " + identify_flower(image)
+    elif "dog" in caption.lower() or "puppy" in caption.lower():
+        caption += "\nThe Dog is : " + identify_dog(image)
+    elif "birds" in caption.lower() or "bird" in caption.lower():
+        caption += "\nThe Bird is : " + identify_bird(image)
+    return caption

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+streamlit
+pillow
+requests
+matplotlib
+tensorflow
+transformers
+torch
+tf-keras
+easygoogletranslate
+groq
+gdown