Spaces:
Build error
Build error
Upload data_processing.py
Browse files- data_processing.py +7 -7
data_processing.py
CHANGED
|
@@ -101,18 +101,18 @@ def load_query_dataset(q_dataset):
|
|
| 101 |
query_dataset_data[q_dataset] = load_dataset("rungalileo/ragbench", q_dataset)
|
| 102 |
return query_dataset_data[q_dataset]
|
| 103 |
|
| 104 |
-
def load_faiss(
|
| 105 |
global index
|
| 106 |
-
faiss_index_path = f"data_local/{
|
| 107 |
if os.path.exists(faiss_index_path):
|
| 108 |
index = faiss.read_index(faiss_index_path)
|
| 109 |
print("FAISS index loaded successfully.")
|
| 110 |
else:
|
| 111 |
print("FAISS index file not found. Run create_faiss_index_file() first.")
|
| 112 |
|
| 113 |
-
def load_chunks(
|
| 114 |
global chunk_docs
|
| 115 |
-
metadata_path = f"data_local/{
|
| 116 |
if os.path.exists(metadata_path):
|
| 117 |
with open(metadata_path, "r") as f:
|
| 118 |
chunk_docs = json.load(f)
|
|
@@ -120,9 +120,9 @@ def load_chunks(query_dataset):
|
|
| 120 |
else:
|
| 121 |
print("Metadata file not found. Run create_faiss_index_file() first.")
|
| 122 |
|
| 123 |
-
def load_data_from_faiss(
|
| 124 |
-
load_faiss(
|
| 125 |
-
load_chunks(
|
| 126 |
|
| 127 |
def rerank_documents(query, retrieved_docs):
|
| 128 |
doc_texts = [doc for doc in retrieved_docs]
|
|
|
|
| 101 |
query_dataset_data[q_dataset] = load_dataset("rungalileo/ragbench", q_dataset)
|
| 102 |
return query_dataset_data[q_dataset]
|
| 103 |
|
| 104 |
+
def load_faiss(q_dataset):
|
| 105 |
global index
|
| 106 |
+
faiss_index_path = f"data_local/{q_dataset}_quantized.faiss"
|
| 107 |
if os.path.exists(faiss_index_path):
|
| 108 |
index = faiss.read_index(faiss_index_path)
|
| 109 |
print("FAISS index loaded successfully.")
|
| 110 |
else:
|
| 111 |
print("FAISS index file not found. Run create_faiss_index_file() first.")
|
| 112 |
|
| 113 |
+
def load_chunks(q_dataset):
|
| 114 |
global chunk_docs
|
| 115 |
+
metadata_path = f"data_local/{q_dataset}_chunked_docs.json"
|
| 116 |
if os.path.exists(metadata_path):
|
| 117 |
with open(metadata_path, "r") as f:
|
| 118 |
chunk_docs = json.load(f)
|
|
|
|
| 120 |
else:
|
| 121 |
print("Metadata file not found. Run create_faiss_index_file() first.")
|
| 122 |
|
| 123 |
+
def load_data_from_faiss(q_dataset):
|
| 124 |
+
load_faiss(q_dataset)
|
| 125 |
+
load_chunks(q_dataset)
|
| 126 |
|
| 127 |
def rerank_documents(query, retrieved_docs):
|
| 128 |
doc_texts = [doc for doc in retrieved_docs]
|