Blue2962 commited on
Commit
1fd68ae
·
1 Parent(s): b3884c2
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore CHANGED
@@ -1,2 +1,3 @@
1
  .env
2
- firebase_key.json
 
 
1
  .env
2
+ firebase_key.json
3
+ docs
README.md DELETED
@@ -1,12 +0,0 @@
1
- ---
2
- title: Dr.Yasuda Streamlit
3
- emoji: 🚀
4
- colorFrom: red
5
- colorTo: red
6
- sdk: docker
7
- app_port: 8501
8
- tags:
9
- - streamlit
10
- pinned: false
11
- short_description: Streamlit template space
12
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
__pycache__/pinecone_func.cpython-311.pyc DELETED
Binary file (2.57 kB)
 
index.html DELETED
@@ -1,17 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="UTF-8">
5
- <title>Dr Yasuda App</title>
6
- <style>
7
- body { margin: 0; }
8
- canvas { display: block; }
9
- </style>
10
- </head>
11
- <body>
12
- <script src="https://cdn.jsdelivr.net/npm/three@0.158.0/build/three.min.js"></script>
13
- <script>
14
-
15
- </script>
16
- </body>
17
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
upload_knowledge.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from api.pinecone_func import upsert_texts
2
+ from pathlib import Path
3
+ from utils.chunking import chunk_text
4
+ import uuid
5
+
6
+ def load_documents(folder="docs"):
7
+ texts = []
8
+ ids = []
9
+ for file in Path(folder).glob("*.txt"):
10
+ with open(file, "r", encoding="utf-8") as f:
11
+ content = f.read()
12
+ chunks = chunk_text(content)
13
+ for chunk in chunks:
14
+ texts.append(chunk)
15
+ ids.append(str(uuid.uuid4()))
16
+ return texts, ids
17
+
18
+ if __name__ == "__main__":
19
+ texts, ids = load_documents()
20
+ print(f"{len(texts)}件の知識チャンクをアップロード中")
21
+ upsert_texts(texts, ids)
22
+ print("アップロード完了")
utils/chunking.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def chunk_text(text: str, max_length: int = 1000):
2
+ chunks = []
3
+ while len(text) > max_length:
4
+ split_pos = text.rfind("。", 0, max_length)
5
+ if split_pos == -1:
6
+ split_pos = max_length
7
+ chunks.append(text[:split_pos + 1].strip())
8
+ text = text[split_pos + 1:]
9
+ if text:
10
+ chunks.append(text.strip())
11
+ return chunks