Harsh1306 commited on
Commit
6248b37
·
verified ·
1 Parent(s): 700534c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -49
app.py CHANGED
@@ -5,33 +5,22 @@ from PIL import Image, UnidentifiedImageError, ExifTags
5
  import requests
6
  from io import BytesIO
7
  import wikipedia
8
- from easygoogletranslate import EasyGoogleTranslate
9
  from BharatCaptioner import identify_landmark
10
  from groq import Groq
11
  import hashlib
 
12
 
13
- # Initialize EasyGoogleTranslate
14
- translator = EasyGoogleTranslate(source_language="en", target_language="hi", timeout=10)
15
-
16
- # Load configuration for Groq API key
17
- working_dir = os.path.dirname(os.path.abspath(__file__))
18
- config_data = json.load(open(f"{working_dir}/config.json"))
19
- GROQ_API_KEY = config_data["GROQ_API_KEY"]
20
- os.environ["GROQ_API_KEY"] = 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
21
-
22
  client = Groq()
23
 
24
- # Title of the Streamlit app
25
  st.title("BharatCaptioner with Conversational Chatbot")
26
  st.write(
27
  "A tool to identify/describe Indian Landmarks in Indic Languages and chat about the image."
28
  )
29
 
30
  # Sidebar details
31
- st.sidebar.title("Developed by Harshal and Harsh Pandey")
32
- st.sidebar.write(
33
- "**For the Model that I trained**: [Mail me here](mailto:harshal19052003@gmail.com)"
34
- )
35
  st.sidebar.write(
36
  "**For the Code**: [GitHub Repo](https://github.com/justharshal2023/BharatCaptioner)"
37
  )
@@ -43,13 +32,20 @@ st.sidebar.write(
43
  uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
44
  url = st.text_input("Or enter a valid image URL...")
45
 
 
 
 
 
 
 
 
 
46
  image = None
47
  error_message = None
48
  landmark = None
49
  summary = None
50
  caption = None
51
 
52
-
53
  # Function to correct image orientation
54
  def correct_image_orientation(img):
55
  try:
@@ -69,17 +65,15 @@ def correct_image_orientation(img):
69
  pass
70
  return img
71
 
72
-
73
  # Function to get a unique hash for the image
74
  def get_image_hash(image):
75
  img_bytes = image.tobytes()
76
  return hashlib.md5(img_bytes).hexdigest()
77
 
78
-
79
- # Check if new image or URL is uploaded and reset the chat history
80
  def reset_chat_if_new_image():
81
- if "last_uploaded_hash" not in st.session_state:
82
- st.session_state["last_uploaded_hash"] = None
83
 
84
  # Process the new image or URL
85
  if uploaded_file:
@@ -102,45 +96,43 @@ def reset_chat_if_new_image():
102
  st.error(error_message)
103
  else:
104
  image = None
105
- new_image_hash = None
106
 
107
  # If the image is new, reset the chat and session state
108
- if new_image_hash and new_image_hash != st.session_state["last_uploaded_hash"]:
109
- st.session_state.clear()
110
- st.session_state["last_uploaded_hash"] = new_image_hash
111
- st.experimental_rerun()
112
 
113
  return image
114
 
115
-
116
  # Call the reset function to check for new images or URL
117
  image = reset_chat_if_new_image()
118
 
119
  # If an image is provided
120
  if image is not None:
121
- # Resize image for processing
122
- image = image.resize((256, 256))
123
 
124
  # Identify the landmark using BharatCaptioner
125
- landmark, prob = identify_landmark(image)
126
  summary = wikipedia.summary(landmark, sentences=3) # Shortened summary
127
- st.write(f"**Landmark Identified:** {landmark} (Confidence: {prob:.2f})")
128
 
129
- # Display image and landmark name in the sidebar
130
  with st.sidebar:
131
- st.image(image, caption="Current Image", use_column_width=True)
132
- st.write(f"**Landmark:** {landmark}")
 
 
 
 
133
 
134
  # Chatbot functionality
135
  st.write("### Chat with the Chatbot about the Image")
136
  caption = f"The landmark in the image is {landmark}. {summary}"
137
 
138
- # Initialize chat history in session state if not present
139
- if "chat_history" not in st.session_state:
140
- st.session_state["chat_history"] = []
141
-
142
- # Chatbot introduction message with bold text for landmark and question
143
- if not st.session_state.get("chatbot_started"):
144
  chatbot_intro = f"Hello! I see the image is of **{landmark}**. {summary} **Would you like to know more** about this landmark?"
145
  st.session_state["chat_history"].append(
146
  {"role": "assistant", "content": chatbot_intro}
@@ -156,28 +148,37 @@ if image is not None:
156
  user_prompt = st.chat_input("Ask the Chatbot about the image...")
157
 
158
  if user_prompt:
 
159
  st.chat_message("user").markdown(user_prompt)
160
- st.session_state.chat_history.append({"role": "user", "content": user_prompt})
161
 
162
- # Send the user's message to the LLaMA chatbot
163
  messages = [
164
  {
165
  "role": "system",
166
- "content": "You are a helpful image conversational assistant. "
167
  + f"The caption of the image is: {caption}",
168
  },
169
- *st.session_state.chat_history,
170
  ]
171
 
 
172
  response = client.chat.completions.create(
173
  model="llama-3.1-8b-instant", messages=messages
174
  )
175
-
176
  assistant_response = response.choices[0].message.content
177
- st.session_state.chat_history.append(
 
 
 
 
 
 
 
 
 
 
 
178
  {"role": "assistant", "content": assistant_response}
179
  )
180
 
181
- # Display chatbot response
182
- with st.chat_message("assistant"):
183
- st.markdown(assistant_response)
 
5
  import requests
6
  from io import BytesIO
7
  import wikipedia
 
8
  from BharatCaptioner import identify_landmark
9
  from groq import Groq
10
  import hashlib
11
+ import time # To simulate character-by-character display
12
 
13
+ # Initialize Groq API client
14
+ os.environ["GROQ_API_KEY"] = "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"
 
 
 
 
 
 
 
15
  client = Groq()
16
 
 
17
  st.title("BharatCaptioner with Conversational Chatbot")
18
  st.write(
19
  "A tool to identify/describe Indian Landmarks in Indic Languages and chat about the image."
20
  )
21
 
22
  # Sidebar details
23
+ st.sidebar.title("Developed by Harshal")
 
 
 
24
  st.sidebar.write(
25
  "**For the Code**: [GitHub Repo](https://github.com/justharshal2023/BharatCaptioner)"
26
  )
 
32
  uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
33
  url = st.text_input("Or enter a valid image URL...")
34
 
35
+ # Initialize session state variables
36
+ if "image_hash" not in st.session_state:
37
+ st.session_state["image_hash"] = None
38
+ if "chat_history" not in st.session_state:
39
+ st.session_state["chat_history"] = []
40
+ if "chatbot_started" not in st.session_state:
41
+ st.session_state["chatbot_started"] = False
42
+
43
  image = None
44
  error_message = None
45
  landmark = None
46
  summary = None
47
  caption = None
48
 
 
49
  # Function to correct image orientation
50
  def correct_image_orientation(img):
51
  try:
 
65
  pass
66
  return img
67
 
 
68
  # Function to get a unique hash for the image
69
  def get_image_hash(image):
70
  img_bytes = image.tobytes()
71
  return hashlib.md5(img_bytes).hexdigest()
72
 
73
+ # Check if new image or URL is uploaded and reset the chat history if necessary
 
74
  def reset_chat_if_new_image():
75
+ global image, landmark, summary, caption
76
+ new_image_hash = None
77
 
78
  # Process the new image or URL
79
  if uploaded_file:
 
96
  st.error(error_message)
97
  else:
98
  image = None
 
99
 
100
  # If the image is new, reset the chat and session state
101
+ if new_image_hash and new_image_hash != st.session_state["image_hash"]:
102
+ st.session_state["image_hash"] = new_image_hash
103
+ st.session_state["chat_history"] = []
104
+ st.session_state["chatbot_started"] = False # Reset chatbot status
105
 
106
  return image
107
 
 
108
  # Call the reset function to check for new images or URL
109
  image = reset_chat_if_new_image()
110
 
111
  # If an image is provided
112
  if image is not None:
113
+ # Keep the original image size for processing
114
+ original_image = image.copy() # Create a copy for identification
115
 
116
  # Identify the landmark using BharatCaptioner
117
+ landmark, prob = identify_landmark(original_image)
118
  summary = wikipedia.summary(landmark, sentences=3) # Shortened summary
119
+ st.write(f"**Landmark Identified:** {landmark}")
120
 
121
+ # Display a smaller version of the image in the sidebar
122
  with st.sidebar:
123
+ small_image = original_image.resize((128, 128)) # Resize for display
124
+ st.image(small_image, caption=f"Landmark: {landmark}", use_column_width=True)
125
+ # st.write(f"**Landmark:** {landmark}")
126
+
127
+ # Display the original image before the conversation
128
+ st.image(original_image, caption=f"Image of {landmark}", use_column_width=True)
129
 
130
  # Chatbot functionality
131
  st.write("### Chat with the Chatbot about the Image")
132
  caption = f"The landmark in the image is {landmark}. {summary}"
133
 
134
+ # Chatbot introduction message
135
+ if not st.session_state["chatbot_started"]:
 
 
 
 
136
  chatbot_intro = f"Hello! I see the image is of **{landmark}**. {summary} **Would you like to know more** about this landmark?"
137
  st.session_state["chat_history"].append(
138
  {"role": "assistant", "content": chatbot_intro}
 
148
  user_prompt = st.chat_input("Ask the Chatbot about the image...")
149
 
150
  if user_prompt:
151
+ st.session_state["chat_history"].append({"role": "user", "content": user_prompt})
152
  st.chat_message("user").markdown(user_prompt)
 
153
 
154
+ # Send the user's message to the chatbot
155
  messages = [
156
  {
157
  "role": "system",
158
+ "content": "You are a helpful image conversational assistant, specialized in explaining about the monuments/landmarks of india. Give answer in points and in detail but dont hallucinate."
159
  + f"The caption of the image is: {caption}",
160
  },
161
+ *st.session_state["chat_history"],
162
  ]
163
 
164
+ # Simulate character-by-character response
165
  response = client.chat.completions.create(
166
  model="llama-3.1-8b-instant", messages=messages
167
  )
 
168
  assistant_response = response.choices[0].message.content
169
+
170
+ # Character-by-character output simulation
171
+ with st.chat_message("assistant"):
172
+ response_container = st.empty() # Placeholder for response
173
+ response_text = ""
174
+ for char in assistant_response:
175
+ response_text += char
176
+ time.sleep(0.005) # Adjust speed of character display
177
+ response_container.markdown(response_text)
178
+
179
+ # Append full response after display
180
+ st.session_state["chat_history"].append(
181
  {"role": "assistant", "content": assistant_response}
182
  )
183
 
184
+