Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +8 -12
src/streamlit_app.py
CHANGED
|
@@ -115,8 +115,8 @@ if "messages" not in st.session_state:
|
|
| 115 |
st.session_state.messages = []
|
| 116 |
|
| 117 |
# === 修改:檔案處理邏輯 (轉為 FAISS) ===
|
| 118 |
-
def process_file_to_faiss(uploaded_file,
|
| 119 |
-
"""讀取檔案 ->
|
| 120 |
text_content = ""
|
| 121 |
try:
|
| 122 |
# 1. 讀取文字
|
|
@@ -134,17 +134,13 @@ def process_file_to_faiss(uploaded_file, chunk_size, chunk_overlap=50):
|
|
| 134 |
if not text_content.strip():
|
| 135 |
return None, "File is empty"
|
| 136 |
|
| 137 |
-
# 2.
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
)
|
| 143 |
-
# 將純文字包裝成 LangChain Document 物件
|
| 144 |
-
docs = [Document(page_content=x) for x in text_splitter.split_text(text_content)]
|
| 145 |
|
| 146 |
-
#
|
| 147 |
-
# 如果 session 中已有 vector_store,可以選擇合併,這裡示範每次上傳新檔就重建一個新的
|
| 148 |
vector_store = FAISS.from_documents(docs, embedding_model)
|
| 149 |
|
| 150 |
return vector_store, f"Success: {len(docs)} chunks created."
|
|
|
|
| 115 |
st.session_state.messages = []
|
| 116 |
|
| 117 |
# === 修改:檔案處理邏輯 (轉為 FAISS) ===
|
| 118 |
+
def process_file_to_faiss(uploaded_file, chunk_overlap=0):
|
| 119 |
+
"""讀取檔案 -> 以 </Event> 分割 -> 建立 FAISS 索引"""
|
| 120 |
text_content = ""
|
| 121 |
try:
|
| 122 |
# 1. 讀取文字
|
|
|
|
| 134 |
if not text_content.strip():
|
| 135 |
return None, "File is empty"
|
| 136 |
|
| 137 |
+
# 2. 以 </Event> 做分割
|
| 138 |
+
events = [e + "</Event>" for e in text_content.split("</Event>") if e.strip()]
|
| 139 |
+
|
| 140 |
+
# 3. 封裝成 Document
|
| 141 |
+
docs = [Document(page_content=e) for e in events]
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
+
# 4. 建立 FAISS
|
|
|
|
| 144 |
vector_store = FAISS.from_documents(docs, embedding_model)
|
| 145 |
|
| 146 |
return vector_store, f"Success: {len(docs)} chunks created."
|