jskinner215 commited on
Commit
1cb0871
·
1 Parent(s): 5e4315c

Update weaviate_utils.py

Browse files
Files changed (1) hide show
  1. weaviate_utils.py +31 -10
weaviate_utils.py CHANGED
@@ -36,22 +36,43 @@ def create_new_class_schema(client, class_name, class_description):
36
  st.success(f"Class {class_name} created successfully!")
37
  except Exception as e:
38
  st.error(f"Error creating class: {e}")
39
-
40
  def ingest_data_to_weaviate(client, csv_file, selected_class):
41
- # Convert CSV to DataFrame
42
  data = csv_file.read().decode("utf-8")
43
  dataframe = pd.read_csv(StringIO(data))
44
 
45
- # Check if columns match the selected class schema
46
  class_schema = get_class_schema(client, selected_class)
47
- if class_schema:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  schema_columns = [prop["name"] for prop in class_schema["properties"]]
49
- if set(dataframe.columns) == set(schema_columns):
50
- data = dataframe.to_dict(orient="records")
51
- client.data_object.create(data, selected_class)
52
- st.success("Data ingested successfully!")
53
- else:
54
- st.error("The columns in the uploaded CSV do not match the schema of the selected class.")
 
 
 
 
 
 
55
 
56
  def get_class_schema(client, class_name):
57
  try:
 
36
  st.success(f"Class {class_name} created successfully!")
37
  except Exception as e:
38
  st.error(f"Error creating class: {e}")
39
+
40
  def ingest_data_to_weaviate(client, csv_file, selected_class):
41
+ # Read the CSV data
42
  data = csv_file.read().decode("utf-8")
43
  dataframe = pd.read_csv(StringIO(data))
44
 
45
+ # Fetch the schema for the selected class
46
  class_schema = get_class_schema(client, selected_class)
47
+
48
+ # If the schema is empty, create it based on the CSV columns
49
+ if not class_schema or not class_schema["properties"]:
50
+ for column_name, data_type in zip(dataframe.columns, dataframe.dtypes):
51
+ property_schema = {
52
+ "name": column_name,
53
+ "description": f"Property for {column_name}",
54
+ "dataType": [map_dtype_to_weaviate(data_type)]
55
+ }
56
+ try:
57
+ client.schema.property.create(selected_class, property_schema)
58
+ except weaviate.exceptions.SchemaValidationException:
59
+ # Property might already exist, so we can continue
60
+ pass
61
+ else:
62
+ # If the schema is not empty, compare it with the CSV columns
63
  schema_columns = [prop["name"] for prop in class_schema["properties"]]
64
+ if set(dataframe.columns) != set(schema_columns):
65
+ st.error("The columns in the uploaded CSV do not match the schema of the selected class. Please check and upload the correct CSV or create a new class.")
66
+ return
67
+
68
+ # Ingest the data into Weaviate
69
+ data = dataframe.to_dict(orient="records")
70
+ client.data_object.create(data, selected_class)
71
+
72
+ # Display a preview of the ingested data
73
+ st.write(f"Your CSV was successfully integrated into the vector database under the class '{selected_class}'")
74
+ st.write(dataframe.head()) # Display the first few rows of the dataframe as a preview
75
+
76
 
77
  def get_class_schema(client, class_name):
78
  try: