Spaces:
Sleeping
Sleeping
Commit ·
3b72f75
1
Parent(s): 3a05861
new version of rag
Browse files
app.py
CHANGED
|
@@ -95,20 +95,18 @@ def get_embedding_model():
|
|
| 95 |
return HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 96 |
|
| 97 |
def list_zip_files_in_repo(repo_id: str) -> List[str]:
|
| 98 |
-
files = list_repo_files(repo_id, token=HF_TOKEN)
|
| 99 |
return [f for f in files if f.startswith(JSON_FILES_DIR) and f.endswith('.zip')]
|
| 100 |
|
| 101 |
def download_file_from_hf(repo_id: str, path_in_repo: str, dest_dir: str) -> str:
|
| 102 |
-
local_path = hf_hub_download(
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
pass
|
| 111 |
-
return dst
|
| 112 |
|
| 113 |
def read_jsons_from_zip(zip_path: str) -> List[Dict[str, Any]]:
|
| 114 |
docs = []
|
|
|
|
| 95 |
return HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 96 |
|
| 97 |
def list_zip_files_in_repo(repo_id: str) -> List[str]:
|
| 98 |
+
files = list_repo_files(repo_id, repo_type="dataset", token=HF_TOKEN) # Add repo_type="dataset"
|
| 99 |
return [f for f in files if f.startswith(JSON_FILES_DIR) and f.endswith('.zip')]
|
| 100 |
|
| 101 |
def download_file_from_hf(repo_id: str, path_in_repo: str, dest_dir: str) -> str:
|
| 102 |
+
local_path = hf_hub_download(
|
| 103 |
+
repo_id=repo_id,
|
| 104 |
+
filename=path_in_repo,
|
| 105 |
+
repo_type="dataset",
|
| 106 |
+
token=HF_TOKEN,
|
| 107 |
+
local_dir=dest_dir # Add this to download directly to dest_dir
|
| 108 |
+
)
|
| 109 |
+
return local_path # Return the path directly
|
|
|
|
|
|
|
| 110 |
|
| 111 |
def read_jsons_from_zip(zip_path: str) -> List[Dict[str, Any]]:
|
| 112 |
docs = []
|