Dr.Yasuda_streamlit / upload_knowledge.py
Blue2962
a
a8f4c3e
raw
history blame contribute delete
737 Bytes
from api.pinecone_func import upsert_texts
from pathlib import Path
from utils.chunking import chunk_text
import uuid
from dotenv import load_dotenv
load_dotenv()
def load_documents(folder="docs"):
texts = []
ids = []
for file in Path(folder).glob("*.txt"):
with open(file, "r", encoding="utf-8") as f:
content = f.read()
chunks = chunk_text(content)
for chunk in chunks:
texts.append(chunk)
ids.append(str(uuid.uuid4()))
return texts, ids
if __name__ == "__main__":
texts, ids = load_documents()
print(f"{len(texts)}件の知識チャンクをアップロード中")
upsert_texts(texts, ids)
print("アップロード完了")