| import pickle | |
| from langchain.schema import Document | |
| from autorag.data.corpus import langchain_documents_to_parquet | |
| with open("/Users/anpigon/Documents/Embed/법원판례/cases.pkl", "rb") as file: | |
| data = pickle.load(file) | |
| print(len(data)) # 총 2736개의 배치 | |
| docs = [] | |
| for i in range(100): | |
| for sentence in data[i][1]: | |
| print(sentence) | |
| doc = Document(page_content=sentence) | |
| docs.append(doc) | |
| langchain_documents_to_parquet(docs, "evaluation/data/corpus.parquet") | |