Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -177,36 +177,36 @@ def load_data_and_setup_chroma():
|
|
| 177 |
parsed_metadatas.append(parsed if isinstance(parsed, dict) else {})
|
| 178 |
except:
|
| 179 |
parsed_metadatas.append({})
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
logging.error(f"Error adding batch {i+1}/{num_batches} to in-memory Chroma: {e}")
|
| 211 |
error_count += 1
|
| 212 |
progress_bar.progress((i + 1) / num_batches, text=f"Loading embeddings... Batch {i+1}/{num_batches}")
|
|
|
|
| 177 |
parsed_metadatas.append(parsed if isinstance(parsed, dict) else {})
|
| 178 |
except:
|
| 179 |
parsed_metadatas.append({})
|
| 180 |
+
metadatas_list = parsed_metadatas # This line has the wrong indentation
|
| 181 |
+
|
| 182 |
+
# --- Clean None values from metadata ---
|
| 183 |
+
cleaned_metadatas = []
|
| 184 |
+
for meta_dict in metadatas_list:
|
| 185 |
+
cleaned_dict = {}
|
| 186 |
+
if isinstance(meta_dict, dict):
|
| 187 |
+
for key, value in meta_dict.items():
|
| 188 |
+
# Replace None with empty string, keep other valid types
|
| 189 |
+
if value is None:
|
| 190 |
+
cleaned_dict[key] = ""
|
| 191 |
+
elif isinstance(value, (str, int, float, bool)):
|
| 192 |
+
cleaned_dict[key] = value
|
| 193 |
+
else:
|
| 194 |
+
# Attempt to convert other types to string, or skip
|
| 195 |
+
try:
|
| 196 |
+
cleaned_dict[key] = str(value)
|
| 197 |
+
logging.warning(f"Converted unexpected metadata type ({type(value)}) to string for key '{key}'.")
|
| 198 |
+
except:
|
| 199 |
+
logging.warning(f"Skipping metadata key '{key}' with unconvertible type {type(value)}.")
|
| 200 |
+
cleaned_metadatas.append(cleaned_dict)
|
| 201 |
+
# -----------------------------------------
|
| 202 |
+
|
| 203 |
+
collection.add(
|
| 204 |
+
ids=batch_df['id'].tolist(),
|
| 205 |
+
embeddings=batch_df['embedding'].tolist(),
|
| 206 |
+
documents=batch_df['document'].tolist(),
|
| 207 |
+
metadatas=cleaned_metadatas # Use cleaned list
|
| 208 |
+
)
|
| 209 |
+
except Exception as e:
|
| 210 |
logging.error(f"Error adding batch {i+1}/{num_batches} to in-memory Chroma: {e}")
|
| 211 |
error_count += 1
|
| 212 |
progress_bar.progress((i + 1) / num_batches, text=f"Loading embeddings... Batch {i+1}/{num_batches}")
|