Initial codes commit
Browse files- db/initializer.py +5 -0
- modules/corpus.py +1 -1
db/initializer.py
CHANGED
|
@@ -26,6 +26,11 @@ def initialize_dbs():
|
|
| 26 |
corpus.prepare_corpus()
|
| 27 |
# 2) ์ธ๋ฑ์ค/ID ๋งคํ ๋ฉ๋ชจ๋ฆฌ ๋ก๋
|
| 28 |
_load_index_in_memory()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
def force_update():
|
| 31 |
_load_index_in_memory()
|
|
|
|
| 26 |
corpus.prepare_corpus()
|
| 27 |
# 2) ์ธ๋ฑ์ค/ID ๋งคํ ๋ฉ๋ชจ๋ฆฌ ๋ก๋
|
| 28 |
_load_index_in_memory()
|
| 29 |
+
# 3) Arrow ์บ์ ์์ฑ
|
| 30 |
+
datasets = corpus._get_datasets()
|
| 31 |
+
for _subset, ds in datasets.items():
|
| 32 |
+
# dummy ํธ์ถ๋ก ์บ์ ์์ฑ
|
| 33 |
+
_ = ds.filter(lambda r: False)
|
| 34 |
|
| 35 |
def force_update():
|
| 36 |
_load_index_in_memory()
|
modules/corpus.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# rag/modules/
|
| 2 |
from typing import List, Dict, Any
|
| 3 |
from datasets import load_dataset, DatasetDict, Dataset
|
| 4 |
from config import HF_CORPUS_REPO, HF_CORPUS_SUBSET, HF_CORPUS_SPLIT, MARKER_DIR, CORPUS_READY_MARK
|
|
|
|
| 1 |
+
# rag/modules/corpus.py
|
| 2 |
from typing import List, Dict, Any
|
| 3 |
from datasets import load_dataset, DatasetDict, Dataset
|
| 4 |
from config import HF_CORPUS_REPO, HF_CORPUS_SUBSET, HF_CORPUS_SPLIT, MARKER_DIR, CORPUS_READY_MARK
|