Spaces:
Running
Running
Add dataset configuration files and scripts
Browse files- .gitignore +27 -0
- dataset.py +31 -0
- dataset.txt +25 -0
- terminal +25 -0
.gitignore
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Vector store files
|
| 2 |
+
status-law-knowledge-base/vector_store/
|
| 3 |
+
vector_store/
|
| 4 |
+
*.faiss
|
| 5 |
+
*.pkl
|
| 6 |
+
|
| 7 |
+
# Chat history
|
| 8 |
+
status-law-knowledge-base/chat_history/
|
| 9 |
+
chat_history/
|
| 10 |
+
*.json
|
| 11 |
+
|
| 12 |
+
# Hugging Face specific
|
| 13 |
+
.huggingface/
|
| 14 |
+
|
| 15 |
+
# Python specific
|
| 16 |
+
__pycache__/
|
| 17 |
+
*.py[cod]
|
| 18 |
+
*$py.class
|
| 19 |
+
.env
|
| 20 |
+
.venv
|
| 21 |
+
env/
|
| 22 |
+
venv/
|
| 23 |
+
ENV/
|
| 24 |
+
|
| 25 |
+
# IDE specific
|
| 26 |
+
.vscode/
|
| 27 |
+
.idea/
|
dataset.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from huggingface_hub import HfApi
|
| 2 |
+
api = HfApi()
|
| 3 |
+
|
| 4 |
+
# Имя существующего датасета
|
| 5 |
+
dataset_name = "Rulga/status-law-knowledge-base"
|
| 6 |
+
|
| 7 |
+
# Создаем структуру с пустыми файлами
|
| 8 |
+
try:
|
| 9 |
+
# Создаем .gitkeep в vector_store
|
| 10 |
+
api.upload_file(
|
| 11 |
+
path_or_fileobj=b"", # пустой файл
|
| 12 |
+
path_in_repo="vector_store/.gitkeep",
|
| 13 |
+
repo_id=dataset_name,
|
| 14 |
+
repo_type="dataset"
|
| 15 |
+
)
|
| 16 |
+
print("✓ Создана папка vector_store")
|
| 17 |
+
|
| 18 |
+
# Создаем .gitkeep в chat_history
|
| 19 |
+
api.upload_file(
|
| 20 |
+
path_or_fileobj=b"",
|
| 21 |
+
path_in_repo="chat_history/.gitkeep",
|
| 22 |
+
repo_id=dataset_name,
|
| 23 |
+
repo_type="dataset"
|
| 24 |
+
)
|
| 25 |
+
print("✓ Создана папка chat_history")
|
| 26 |
+
|
| 27 |
+
print("\nСтруктура датасета успешно создана!")
|
| 28 |
+
|
| 29 |
+
except Exception as e:
|
| 30 |
+
print(f"Произошла ошибка: {str(e)}")
|
| 31 |
+
|
dataset.txt
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from huggingface_hub import HfApi
|
| 2 |
+
api = HfApi()
|
| 3 |
+
|
| 4 |
+
# Создайте новый датасет (замените YOUR_USERNAME на ваше имя пользователя)
|
| 5 |
+
dataset_name = "Rulga/status-law-knowledge-base"
|
| 6 |
+
api.create_repo(
|
| 7 |
+
repo_id=dataset_name,
|
| 8 |
+
repo_type="dataset",
|
| 9 |
+
private=False
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
# Создайте структуру с пустыми файлами
|
| 13 |
+
api.upload_file(
|
| 14 |
+
path_or_fileobj=b"", # пустой файл
|
| 15 |
+
path_in_repo="vector_store/.gitkeep",
|
| 16 |
+
repo_id=dataset_name,
|
| 17 |
+
repo_type="dataset"
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
api.upload_file(
|
| 21 |
+
path_or_fileobj=b"",
|
| 22 |
+
path_in_repo="chat_history/.gitkeep",
|
| 23 |
+
repo_id=dataset_name,
|
| 24 |
+
repo_type="dataset"
|
| 25 |
+
)
|
terminal
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from huggingface_hub import HfApi
|
| 2 |
+
api = HfApi()
|
| 3 |
+
|
| 4 |
+
# Создайте новый датасет (замените YOUR_USERNAME на ваше имя пользователя)
|
| 5 |
+
dataset_name = "YOUR_USERNAME/status-law-knowledge-base"
|
| 6 |
+
api.create_repo(
|
| 7 |
+
repo_id=dataset_name,
|
| 8 |
+
repo_type="dataset",
|
| 9 |
+
private=True # или False если хотите публичный датасет
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
# Создайте структуру с пустыми файлами
|
| 13 |
+
api.upload_file(
|
| 14 |
+
path_or_fileobj=b"", # пустой файл
|
| 15 |
+
path_in_repo="vector_store/.gitkeep",
|
| 16 |
+
repo_id=dataset_name,
|
| 17 |
+
repo_type="dataset"
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
api.upload_file(
|
| 21 |
+
path_or_fileobj=b"",
|
| 22 |
+
path_in_repo="chat_history/.gitkeep",
|
| 23 |
+
repo_id=dataset_name,
|
| 24 |
+
repo_type="dataset"
|
| 25 |
+
)
|