Spaces:
Runtime error
Runtime error
Commit
·
6b3fd91
1
Parent(s):
e2e1dca
remove duplicates
Browse files- pages/1_Retrieval_App.py +6 -1
pages/1_Retrieval_App.py
CHANGED
|
@@ -130,7 +130,12 @@ def app_main(
|
|
| 130 |
):
|
| 131 |
print("loading data")
|
| 132 |
|
| 133 |
-
retrieval_df =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
print("setting up retrieval_pipe")
|
| 135 |
doc_col = "dependencies"
|
| 136 |
retrieval_pipeline = setup_retrieval_pipeline(
|
|
|
|
| 130 |
):
|
| 131 |
print("loading data")
|
| 132 |
|
| 133 |
+
retrieval_df = (
|
| 134 |
+
datasets.load_dataset(data_path)["train"]
|
| 135 |
+
.to_pandas()
|
| 136 |
+
.drop_duplicates(subset=["repo"])
|
| 137 |
+
.reset_index(drop=True)
|
| 138 |
+
)
|
| 139 |
print("setting up retrieval_pipe")
|
| 140 |
doc_col = "dependencies"
|
| 141 |
retrieval_pipeline = setup_retrieval_pipeline(
|