Donlagon007 commited on
Commit
348087f
·
verified ·
1 Parent(s): de9737a

Upload personalized_ht4.py

Browse files
Files changed (1) hide show
  1. personalized_ht4.py +111 -12
personalized_ht4.py CHANGED
@@ -10,6 +10,12 @@ import pandas as pd
10
  import matplotlib.pyplot as plt
11
  from typing import Dict, List, Any
12
 
 
 
 
 
 
 
13
  # LangChain imports
14
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings
15
  from langchain_community.vectorstores import Chroma
@@ -30,6 +36,19 @@ st.set_page_config(
30
  initial_sidebar_state="expanded",
31
  )
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  # Initialize session state for chat histories
34
  if 'assistant_messages' not in st.session_state:
35
  st.session_state.assistant_messages = []
@@ -43,6 +62,33 @@ if 'vectorstore' not in st.session_state:
43
  st.session_state.vectorstore = None
44
  if 'cea_results' not in st.session_state:
45
  st.session_state.cea_results = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  # Header with OpenAI API Key input
48
  col1, col2 = st.columns([3, 1])
@@ -62,20 +108,29 @@ with col2:
62
  st.success("✓ API Key set")
63
  else:
64
  st.warning("⚠️ Enter API key")
 
65
 
66
-
67
- # Check if API key is provided
68
  def get_llm():
69
- """Initialize LangChain LLM with OpenAI"""
70
  if not openai_api_key:
71
  return None
72
 
 
 
 
 
 
73
  try:
74
  llm = ChatOpenAI(
75
  model="gpt-4o-mini",
76
  temperature=0.7,
77
  openai_api_key=openai_api_key
78
  )
 
 
 
 
79
  return llm
80
  except Exception as e:
81
  st.error(f"Error initializing OpenAI: {str(e)}")
@@ -84,11 +139,16 @@ def get_llm():
84
 
85
  # Create vector store from patient data
86
  def create_patient_vectorstore(patients_df: pd.DataFrame):
87
- """Create vector store from patient dataframe for RAG retrieval"""
88
  if not openai_api_key:
89
  return None
90
 
91
  try:
 
 
 
 
 
92
  documents = []
93
  for idx, row in patients_df.iterrows():
94
  patient_text = f"""Patient ID: {row['patient_id']}
@@ -102,12 +162,36 @@ Betel: {row.get('betel', 'No')}, Family History: {row['family_history']}"""
102
 
103
  doc = Document(
104
  page_content=patient_text,
105
- metadata={"patient_id": row['patient_id']}
 
 
 
106
  )
107
  documents.append(doc)
108
 
109
  embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
110
- vectorstore = Chroma.from_documents(documents=documents, embedding=embeddings)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  return vectorstore
112
 
113
  except Exception as e:
@@ -115,6 +199,7 @@ Betel: {row.get('betel', 'No')}, Family History: {row['family_history']}"""
115
  return None
116
 
117
 
 
118
  # Retrieve patient by ID
119
  def retrieve_patient_by_id(patient_id: str):
120
  """Retrieve patient from dataframe by ID"""
@@ -134,31 +219,44 @@ def retrieve_patient_by_id(patient_id: str):
134
  # Sidebar for patient information
135
  st.sidebar.header("👤 Patient Information")
136
 
137
- # ===== NEW: Patient Data Upload Section =====
138
- with st.sidebar.expander("📁 Upload Patient Database (Optional)", expanded=False):
 
139
  uploaded_file = st.file_uploader(
140
  "Upload CSV/Excel with patient data",
141
  type=['csv', 'xlsx'],
142
- help="Upload a file with multiple patients to enable quick retrieval by ID"
143
  )
144
 
145
  if uploaded_file is not None:
146
  try:
 
 
 
 
 
 
147
  if uploaded_file.name.endswith('.csv'):
148
  df = pd.read_csv(uploaded_file)
149
  else:
150
  df = pd.read_excel(uploaded_file)
151
 
152
  st.session_state.patients_df = df
153
- st.success(f"✅ Loaded {len(df)} patients")
 
 
 
 
 
 
154
 
155
  # Optionally create vector store
156
  if openai_api_key and st.button("🔄 Create Vector Store for Smart Search"):
157
- with st.spinner("Creating vector store..."):
158
  vectorstore = create_patient_vectorstore(df)
159
  if vectorstore:
160
  st.session_state.vectorstore = vectorstore
161
- st.success("✅ Vector store created!")
162
 
163
  except Exception as e:
164
  st.error(f"Error loading file: {str(e)}")
@@ -180,6 +278,7 @@ with st.sidebar.expander("📁 Upload Patient Database (Optional)", expanded=Fal
180
 
181
  st.sidebar.markdown("---")
182
 
 
183
  # Check if we have loaded patient data
184
  if 'loaded_patient' in st.session_state:
185
  # Use loaded patient data
 
10
  import matplotlib.pyplot as plt
11
  from typing import Dict, List, Any
12
 
13
+ import uuid
14
+ import os
15
+ import shutil
16
+ from datetime import datetime, timedelta
17
+ import hashlib
18
+
19
  # LangChain imports
20
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings
21
  from langchain_community.vectorstores import Chroma
 
36
  initial_sidebar_state="expanded",
37
  )
38
 
39
+ # ===== 用戶隔離系統初始化 =====
40
+ # 為每個用戶生成唯一 ID
41
+ if 'user_id' not in st.session_state:
42
+ st.session_state.user_id = str(uuid.uuid4())
43
+ st.session_state.session_start = datetime.now()
44
+
45
+ # Session 過期檢查(2小時後過期)
46
+ if 'session_start' in st.session_state:
47
+ session_duration = datetime.now() - st.session_state.session_start
48
+ if session_duration > timedelta(hours=2):
49
+ st.warning("⏱️ Your session has expired (2 hours). Please refresh the page.")
50
+ st.stop()
51
+
52
  # Initialize session state for chat histories
53
  if 'assistant_messages' not in st.session_state:
54
  st.session_state.assistant_messages = []
 
62
  st.session_state.vectorstore = None
63
  if 'cea_results' not in st.session_state:
64
  st.session_state.cea_results = None
65
+ if 'uploaded_files' not in st.session_state:
66
+ st.session_state.uploaded_files = [] # ✅ 新增
67
+ if 'api_call_count' not in st.session_state:
68
+ st.session_state.api_call_count = 0 # ✅ 新增
69
+
70
+
71
+ # ✅ 啟動時清理超過 24 小時的舊資料
72
+ def cleanup_old_data():
73
+ """Delete vector stores older than 24 hours"""
74
+ data_dir = "./data/chroma"
75
+ if not os.path.exists(data_dir):
76
+ return
77
+
78
+ cutoff_time = datetime.now() - timedelta(hours=24)
79
+
80
+ try:
81
+ for user_folder in os.listdir(data_dir):
82
+ folder_path = os.path.join(data_dir, user_folder)
83
+ if os.path.isdir(folder_path):
84
+ mod_time = datetime.fromtimestamp(os.path.getmtime(folder_path))
85
+ if mod_time < cutoff_time:
86
+ shutil.rmtree(folder_path)
87
+ except Exception as e:
88
+ pass # 靜默失敗,不影響用戶
89
+
90
+ cleanup_old_data()
91
+
92
 
93
  # Header with OpenAI API Key input
94
  col1, col2 = st.columns([3, 1])
 
108
  st.success("✓ API Key set")
109
  else:
110
  st.warning("⚠️ Enter API key")
111
+
112
 
113
+ # ali Check if API key is provided
 
114
  def get_llm():
115
+ """Initialize LangChain LLM with OpenAI - with rate limiting"""
116
  if not openai_api_key:
117
  return None
118
 
119
+ # ✅ API 調用限制(每 session 最多 100 次)
120
+ if st.session_state.api_call_count >= 100:
121
+ st.error("⚠️ API call limit reached (100 calls per session). Please start a new session.")
122
+ st.stop()
123
+
124
  try:
125
  llm = ChatOpenAI(
126
  model="gpt-4o-mini",
127
  temperature=0.7,
128
  openai_api_key=openai_api_key
129
  )
130
+
131
+ # ✅ 記錄 API 調用
132
+ st.session_state.api_call_count += 1
133
+
134
  return llm
135
  except Exception as e:
136
  st.error(f"Error initializing OpenAI: {str(e)}")
 
139
 
140
  # Create vector store from patient data
141
  def create_patient_vectorstore(patients_df: pd.DataFrame):
142
+ """Create vector store from patient dataframe for RAG retrieval - USER ISOLATED"""
143
  if not openai_api_key:
144
  return None
145
 
146
  try:
147
+ # ✅ 建立用戶專屬目錄
148
+ user_id = st.session_state.user_id
149
+ persist_dir = f"./data/chroma/{user_id}"
150
+ os.makedirs(persist_dir, exist_ok=True)
151
+
152
  documents = []
153
  for idx, row in patients_df.iterrows():
154
  patient_text = f"""Patient ID: {row['patient_id']}
 
162
 
163
  doc = Document(
164
  page_content=patient_text,
165
+ metadata={
166
+ "patient_id": row['patient_id'],
167
+ "user_id": user_id # ✅ 加入 user_id 標記
168
+ }
169
  )
170
  documents.append(doc)
171
 
172
  embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
173
+
174
+ # ✅ 使用用戶專屬的 collection 和目錄
175
+ vectorstore = Chroma(
176
+ collection_name=f"user_{user_id}_patients",
177
+ embedding_function=embeddings,
178
+ persist_directory=persist_dir
179
+ )
180
+
181
+ # 清空舊資料(如果有)
182
+ try:
183
+ vectorstore.delete_collection()
184
+ vectorstore = Chroma(
185
+ collection_name=f"user_{user_id}_patients",
186
+ embedding_function=embeddings,
187
+ persist_directory=persist_dir
188
+ )
189
+ except:
190
+ pass
191
+
192
+ # 加入新文件
193
+ vectorstore.add_documents(documents)
194
+
195
  return vectorstore
196
 
197
  except Exception as e:
 
199
  return None
200
 
201
 
202
+
203
  # Retrieve patient by ID
204
  def retrieve_patient_by_id(patient_id: str):
205
  """Retrieve patient from dataframe by ID"""
 
219
  # Sidebar for patient information
220
  st.sidebar.header("👤 Patient Information")
221
 
222
+
223
+ # ali ===== NEW: Patient Data Upload Section =====
224
+ with st.sidebar.expander("📂 Upload Patient Database (Optional)", expanded=False):
225
  uploaded_file = st.file_uploader(
226
  "Upload CSV/Excel with patient data",
227
  type=['csv', 'xlsx'],
228
+ help="Upload a file with multiple patients (Max 10MB)"
229
  )
230
 
231
  if uploaded_file is not None:
232
  try:
233
+ # ✅ 檔案大小限制
234
+ file_size_mb = uploaded_file.size / (1024 * 1024)
235
+ if file_size_mb > 10:
236
+ st.error(f"❌ File too large ({file_size_mb:.1f}MB). Max 10MB.")
237
+ st.stop()
238
+
239
  if uploaded_file.name.endswith('.csv'):
240
  df = pd.read_csv(uploaded_file)
241
  else:
242
  df = pd.read_excel(uploaded_file)
243
 
244
  st.session_state.patients_df = df
245
+
246
+ # ✅ 記錄上傳的檔案
247
+ file_hash = hashlib.md5(uploaded_file.getvalue()).hexdigest()
248
+ if file_hash not in st.session_state.uploaded_files:
249
+ st.session_state.uploaded_files.append(file_hash)
250
+
251
+ st.success(f"✅ Loaded {len(df)} patients ({file_size_mb:.1f}MB)")
252
 
253
  # Optionally create vector store
254
  if openai_api_key and st.button("🔄 Create Vector Store for Smart Search"):
255
+ with st.spinner("Creating isolated vector store..."):
256
  vectorstore = create_patient_vectorstore(df)
257
  if vectorstore:
258
  st.session_state.vectorstore = vectorstore
259
+ st.success("✅ Vector store created! (Isolated to your session)")
260
 
261
  except Exception as e:
262
  st.error(f"Error loading file: {str(e)}")
 
278
 
279
  st.sidebar.markdown("---")
280
 
281
+
282
  # Check if we have loaded patient data
283
  if 'loaded_patient' in st.session_state:
284
  # Use loaded patient data