Spaces:
Sleeping
Sleeping
| from api.pinecone_func import upsert_texts | |
| from pathlib import Path | |
| from utils.chunking import chunk_text | |
| import uuid | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| def load_documents(folder="docs"): | |
| texts = [] | |
| ids = [] | |
| for file in Path(folder).glob("*.txt"): | |
| with open(file, "r", encoding="utf-8") as f: | |
| content = f.read() | |
| chunks = chunk_text(content) | |
| for chunk in chunks: | |
| texts.append(chunk) | |
| ids.append(str(uuid.uuid4())) | |
| return texts, ids | |
| if __name__ == "__main__": | |
| texts, ids = load_documents() | |
| print(f"{len(texts)}件の知識チャンクをアップロード中") | |
| upsert_texts(texts, ids) | |
| print("アップロード完了") | |