Deva commited on
Commit
72431ed
·
1 Parent(s): 856113e

handling weird exif element

Browse files
Files changed (1) hide show
  1. app.py +108 -24
app.py CHANGED
@@ -54,31 +54,28 @@ def decode_utf16_little_endian(binary_data):
54
  # print(f"Test:{binary_data.decode('utf-16-le')}")
55
  # print(f"Type:{type(binary_data)}")
56
  decoded_text = binary_data.decode("utf-16-le").rstrip("\x00")
57
- return decoded_text
58
  except Exception as e:
59
- return f"Error decoding UTF-16 LE: {e}"
 
60
 
61
 
 
62
  def get_exif(list_file_paths):
63
  metadata_all_file = {}
64
  df = pd.DataFrame()
65
  for file_path in list_file_paths:
66
  metadata = {}
 
67
  print(file_path)
68
  try:
69
- # file_path = file_path_.split("/")[-1]
70
- # df = pd.DataFrame()
71
- # df['file_name'] = [file_path]
72
- # print(df)
73
- # print("inside the tryin")
74
  image = Image.open(file_path)
75
  exifdata = image._getexif()
76
  if exifdata is not None:
77
- # print(metadata)
78
  for tagid, value in exifdata.items():
79
  # print(tagid, value)
80
  # print(f"Value:{value}")
81
- tagname = TAGS.get(tagid, tagid)
82
  # value = exifdata.get(tagid)
83
  # Handle binary data
84
  if isinstance(value, bytes):
@@ -86,18 +83,101 @@ def get_exif(list_file_paths):
86
  # print(f"Value bytes {type(value)}")
87
  # print(f"Value str {decode_utf16_little_endian(value)}")
88
  value = decode_utf16_little_endian(value)
89
- metadata[tagname] = value
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  # print(f"\t{metadata}")
91
- new_row = {"name": file_path, **metadata}
92
- df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
 
 
 
93
  # metadata_all_file[file_path] = metadata
94
  else:
95
  return "No EXIF metadata found."
96
  except Exception as e:
97
  return f"Error : {e}"
98
- print(df)
99
- df["name"] = df["name"].apply(lambda filepath: filepath.split("/")[-1])
100
- print(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  return df
102
 
103
 
@@ -127,7 +207,6 @@ def get_annotation(files_):
127
  """
128
  # df = pd.DataFrame(columns=["file_name", "label", "accuracy"])
129
  df_exif = get_exif(get_file_names(files_))
130
- print(df_exif)
131
  return df_exif
132
 
133
 
@@ -135,14 +214,22 @@ def update_dataframe(df):
135
  return df # Simply return the modified dataframe
136
 
137
 
138
- def df_to_csv(df_):
139
  """
140
  Get the df and convert it as an gradio file output ready for download
141
  Input: DF created
142
  Output: gr.File()
143
  """
144
- df_.to_csv("output.csv", index=False, escapechar="\\", quoting=csv.QUOTE_MINIMAL)
145
- return gr.File(value="output.csv", visible=True)
 
 
 
 
 
 
 
 
146
 
147
 
148
  ##################################################
@@ -179,15 +266,14 @@ with gr.Blocks() as interface:
179
  df = gr.DataFrame(interactive=False)
180
  download_raw_btn.click(
181
  fn=df_to_csv,
182
- inputs=df,
183
  outputs=gr.File(visible=False),
184
- show_progress=False,
185
  )
186
  gr.Markdown("## Modified results")
187
  df_modified = gr.DataFrame(interactive=True)
188
  download_modified_btn.click(
189
  fn=df_to_csv,
190
- inputs=df_modified,
191
  outputs=gr.File(visible=False),
192
  show_progress=False,
193
  )
@@ -198,6 +284,4 @@ with gr.Blocks() as interface:
198
 
199
 
200
  if __name__ == "__main__":
201
- # file_path = "../data/rat1.jpg"
202
- # get_exif(file_path)
203
  interface.launch(debug=True)
 
54
  # print(f"Test:{binary_data.decode('utf-16-le')}")
55
  # print(f"Type:{type(binary_data)}")
56
  decoded_text = binary_data.decode("utf-16-le").rstrip("\x00")
 
57
  except Exception as e:
58
+ decoded_text = "Encoded"
59
+ return decoded_text
60
 
61
 
62
+ '''
63
  def get_exif(list_file_paths):
64
  metadata_all_file = {}
65
  df = pd.DataFrame()
66
  for file_path in list_file_paths:
67
  metadata = {}
68
+ metadata["name"] = file_path.split("/")[-1]
69
  print(file_path)
70
  try:
 
 
 
 
 
71
  image = Image.open(file_path)
72
  exifdata = image._getexif()
73
  if exifdata is not None:
74
+ print(len(exifdata.items()))
75
  for tagid, value in exifdata.items():
76
  # print(tagid, value)
77
  # print(f"Value:{value}")
78
+ tagname = str(TAGS.get(tagid, tagid))
79
  # value = exifdata.get(tagid)
80
  # Handle binary data
81
  if isinstance(value, bytes):
 
83
  # print(f"Value bytes {type(value)}")
84
  # print(f"Value str {decode_utf16_little_endian(value)}")
85
  value = decode_utf16_little_endian(value)
86
+ print(tagname)
87
+ print(type(tagname))
88
+ print(value)
89
+ if type(tagname) is not str:
90
+ print(">>>>>>>>>>>> here " + type(tagname))
91
+ try:
92
+ metadata[str(tagname)] = value
93
+ except:
94
+ try:
95
+ metadata[repr(tagname)] = value
96
+ except:
97
+ pass
98
+ else:
99
+ metadata[tagname] = value
100
+ """
101
+ for key in metadata.keys():
102
+ if type(key) is not str:
103
+ try:
104
+ metadata[str(key)] = metadata[key]
105
+ except:
106
+ try:
107
+ metadata[repr(key)] = metadata[key]
108
+ except:
109
+ pass
110
+ del metadata[key]
111
+ """
112
  # print(f"\t{metadata}")
113
+ print(metadata)
114
+ print(pd.DataFrame([metadata]))
115
+ df = pd.concat([df, pd.DataFrame([metadata])], ignore_index=True)
116
+ # new_row = {"name": file_path, **metadata}
117
+ # df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
118
  # metadata_all_file[file_path] = metadata
119
  else:
120
  return "No EXIF metadata found."
121
  except Exception as e:
122
  return f"Error : {e}"
123
+ print(pd.concat([df, pd.DataFrame([metadata])], ignore_index=True))
124
+ print(f"FINAL DF \n \n \n {df}")
125
+ return df
126
+ '''
127
+ import pandas as pd
128
+ from PIL import Image
129
+ from PIL.ExifTags import TAGS
130
+
131
+
132
+ def decode_utf16_little_endian(value):
133
+ try:
134
+ return value.decode("utf-16le").strip()
135
+ except:
136
+ return value # Fallback to the original value if decoding fails
137
+
138
+
139
+ def extract_particular_value_from_exif_file(metadata, tagname, value):
140
+ pass
141
+
142
+
143
+ def get_exif(list_file_paths):
144
+ df = pd.DataFrame()
145
+
146
+ for file_path in list_file_paths:
147
+ metadata = {"name": file_path.split("/")[-1]}
148
+ print(file_path)
149
+
150
+ try:
151
+ image = Image.open(file_path)
152
+ exifdata = image._getexif()
153
+
154
+ if exifdata is not None:
155
+ for tagid, value in exifdata.items():
156
+ tagname = TAGS.get(tagid, str(tagid)) # Ensure tagname is a string
157
+ print(type(tagname))
158
+ if isinstance(value, bytes):
159
+ value = decode_utf16_little_endian(value)
160
+ if isinstance(value, dict):
161
+ # for subkey, subvalue in value.items():
162
+ # metadata[f"{tagname}_{subkey}"] = subvalue
163
+ # else:
164
+ # metadata[tagname] = value
165
+ value = str(value)
166
+ print(value)
167
+ print(type(value))
168
+ metadata[tagname] = value # All keys are now strings
169
+ print(metadata)
170
+ if all(isinstance(k, str) for k in metadata.keys()):
171
+ df = pd.concat([df, pd.DataFrame([metadata])], ignore_index=True)
172
+ else:
173
+ print("Skipping metadata with non-string keys.")
174
+ else:
175
+ print(f"No EXIF metadata found for {file_path}")
176
+
177
+ except Exception as e:
178
+ print(f"Error processing {file_path}: {e}")
179
+
180
+ print(f"FINAL DF:\n{df}")
181
  return df
182
 
183
 
 
207
  """
208
  # df = pd.DataFrame(columns=["file_name", "label", "accuracy"])
209
  df_exif = get_exif(get_file_names(files_))
 
210
  return df_exif
211
 
212
 
 
214
  return df # Simply return the modified dataframe
215
 
216
 
217
+ def df_to_csv(df_, encodings=None):
218
  """
219
  Get the df and convert it as an gradio file output ready for download
220
  Input: DF created
221
  Output: gr.File()
222
  """
223
+ if encodings is None:
224
+ encodings = ["utf-8", "utf-8-sig", "latin1", "iso-8859-1", "cp1252"]
225
+
226
+ for encoding in encodings:
227
+ try:
228
+ df_.to_csv("output.csv", encoding=encoding, index=False)
229
+ # print(f"File saved successfully with encoding: {encoding}")
230
+ return gr.File(value="output.csv", visible=True)
231
+ except Exception as e:
232
+ print(f"Failed with encoding {encoding}: {e}")
233
 
234
 
235
  ##################################################
 
266
  df = gr.DataFrame(interactive=False)
267
  download_raw_btn.click(
268
  fn=df_to_csv,
269
+ inputs=[df],
270
  outputs=gr.File(visible=False),
 
271
  )
272
  gr.Markdown("## Modified results")
273
  df_modified = gr.DataFrame(interactive=True)
274
  download_modified_btn.click(
275
  fn=df_to_csv,
276
+ inputs=[df_modified],
277
  outputs=gr.File(visible=False),
278
  show_progress=False,
279
  )
 
284
 
285
 
286
  if __name__ == "__main__":
 
 
287
  interface.launch(debug=True)