RegBotBeta2.1

Sleeping

hbui commited on Aug 1, 2023

Commit

ecb943c

1 Parent(s): 794666f

Update models/llamaCustom.py

Files changed (1) hide show

models/llamaCustom.py CHANGED Viewed

@@ -87,6 +87,27 @@ class LlamaCustom:
     def __init__(self, name: str) -> None:
         self.vector_index = self.initialize_index(index_name=name)
     def initialize_index(self, index_name):
         file_path = f"./vectorStores/{index_name}"
         if os.path.exists(path=file_path):

     def __init__(self, name: str) -> None:
         self.vector_index = self.initialize_index(index_name=name)
+    def prepare_data(file_path: str):
+        df = pd.read_json(file_path)
+        df = df.replace(to_replace="", value=np.nan).dropna(axis=0)  # remove null values
+        parsed = loads(df.to_json(orient="records"))
+        documents = []
+        for item in parsed:
+            document = Document(
+                text=item["paragraphText"],
+                doc_id=item["_id"]["$oid"],
+                extra_info={
+                    "chapter": item["chapter"],
+                    "article": item["article"],
+                    "title": item["title"],
+                },
+            )
+            documents.append(document)
+        return documents
     def initialize_index(self, index_name):
         file_path = f"./vectorStores/{index_name}"
         if os.path.exists(path=file_path):