Spaces:
Build error
Build error
Commit
·
340cc83
1
Parent(s):
2734d11
Update weaviate_utils.py
Browse files- weaviate_utils.py +38 -1
weaviate_utils.py
CHANGED
|
@@ -23,7 +23,44 @@ def map_dtype_to_weaviate(dtype):
|
|
| 23 |
return "string"
|
| 24 |
|
| 25 |
def ingest_data_to_weaviate(client, dataframe, class_name, class_description):
|
| 26 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
def get_class_schema(client, class_name):
|
| 29 |
all_classes = client.schema.get()["classes"]
|
|
|
|
| 23 |
return "string"
|
| 24 |
|
| 25 |
def ingest_data_to_weaviate(client, dataframe, class_name, class_description):
|
| 26 |
+
# Create class schema
|
| 27 |
+
class_schema = {
|
| 28 |
+
"class": class_name,
|
| 29 |
+
"description": class_description,
|
| 30 |
+
"properties": [] # Start with an empty properties list
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
# Try to create the class without properties first
|
| 34 |
+
try:
|
| 35 |
+
client.schema.create({"classes": [class_schema]})
|
| 36 |
+
except weaviate.exceptions.SchemaValidationException:
|
| 37 |
+
# Class might already exist, so we can continue
|
| 38 |
+
pass
|
| 39 |
+
|
| 40 |
+
# Now, let's add properties to the class
|
| 41 |
+
for column_name, data_type in zip(dataframe.columns, dataframe.dtypes):
|
| 42 |
+
property_schema = {
|
| 43 |
+
"name": column_name,
|
| 44 |
+
"description": f"Property for {column_name}",
|
| 45 |
+
"dataType": [map_dtype_to_weaviate(data_type)]
|
| 46 |
+
}
|
| 47 |
+
try:
|
| 48 |
+
client.schema.property.create(class_name, property_schema)
|
| 49 |
+
except weaviate.exceptions.SchemaValidationException:
|
| 50 |
+
# Property might already exist, so we can continue
|
| 51 |
+
pass
|
| 52 |
+
|
| 53 |
+
# Ingest data
|
| 54 |
+
for index, row in dataframe.iterrows():
|
| 55 |
+
obj = {
|
| 56 |
+
"class": class_name,
|
| 57 |
+
"id": str(index),
|
| 58 |
+
"properties": row.to_dict()
|
| 59 |
+
}
|
| 60 |
+
client.data_object.create(obj)
|
| 61 |
+
|
| 62 |
+
# Log data ingestion
|
| 63 |
+
log_debug_info(f"Data ingested into Weaviate for class: {class_name}")
|
| 64 |
|
| 65 |
def get_class_schema(client, class_name):
|
| 66 |
all_classes = client.schema.get()["classes"]
|