hbui commited on
Commit
ecb943c
·
1 Parent(s): 794666f

Update models/llamaCustom.py

Browse files
Files changed (1) hide show
  1. models/llamaCustom.py +21 -0
models/llamaCustom.py CHANGED
@@ -87,6 +87,27 @@ class LlamaCustom:
87
  def __init__(self, name: str) -> None:
88
  self.vector_index = self.initialize_index(index_name=name)
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  def initialize_index(self, index_name):
91
  file_path = f"./vectorStores/{index_name}"
92
  if os.path.exists(path=file_path):
 
87
  def __init__(self, name: str) -> None:
88
  self.vector_index = self.initialize_index(index_name=name)
89
 
90
+ def prepare_data(file_path: str):
91
+ df = pd.read_json(file_path)
92
+ df = df.replace(to_replace="", value=np.nan).dropna(axis=0) # remove null values
93
+
94
+ parsed = loads(df.to_json(orient="records"))
95
+
96
+ documents = []
97
+ for item in parsed:
98
+ document = Document(
99
+ text=item["paragraphText"],
100
+ doc_id=item["_id"]["$oid"],
101
+ extra_info={
102
+ "chapter": item["chapter"],
103
+ "article": item["article"],
104
+ "title": item["title"],
105
+ },
106
+ )
107
+ documents.append(document)
108
+
109
+ return documents
110
+
111
  def initialize_index(self, index_name):
112
  file_path = f"./vectorStores/{index_name}"
113
  if os.path.exists(path=file_path):