zzejiao commited on
Commit
3530638
·
1 Parent(s): d45930f

yash's hf bipolar demo code with github action set

Browse files
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.faiss filter=lfs diff=lfs merge=lfs -text
.github/workflows/sync-to-hf.yml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync hf-bipolar to HF Space main
2
+
3
+ on:
4
+ push:
5
+ branches: [ demo_bipolar ]
6
+ workflow_dispatch: {}
7
+
8
+ jobs:
9
+ sync:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - name: Checkout demo_bipolar (with LFS)
13
+ uses: actions/checkout@v4
14
+ with:
15
+ ref: demo_bipolar
16
+ lfs: true
17
+ fetch-depth: 0
18
+
19
+ - name: Setup Git LFS
20
+ run: |
21
+ git lfs install
22
+ git lfs fetch --all
23
+
24
+ - name: Push hf-demo_bipolar to HF Space main
25
+ env:
26
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
27
+ run: |
28
+ set -e
29
+ git config user.email "actions@github.com"
30
+ git config user.name "github-actions[bot]"
31
+ SPACE_ID="ymali/bipolar"
32
+ TARGET_BRANCH="main"
33
+ AUTHED_URL="https://user:${HF_TOKEN}@huggingface.co/spaces/${SPACE_ID}"
34
+ git remote add hf "$AUTHED_URL" || git remote set-url hf "$AUTHED_URL"
35
+ git lfs push hf --all
36
+ git push hf "HEAD:refs/heads/${TARGET_BRANCH}" --force
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
.idea/bipolar.iml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="jdk" jdkName="Python 3.13" jdkType="Python SDK" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ <component name="PyDocumentationSettings">
9
+ <option name="format" value="PLAIN" />
10
+ <option name="myDocStringFormat" value="Plain" />
11
+ </component>
12
+ </module>
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
5
+ <inspection_tool class="PyTypeCheckerInspection" enabled="false" level="WARNING" enabled_by_default="false" />
6
+ </profile>
7
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Black">
4
+ <option name="sdkName" value="Python 3.13" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13" project-jdk-type="Python SDK" />
7
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/bipolar.iml" filepath="$PROJECT_DIR$/.idea/bipolar.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ <mapping directory="$PROJECT_DIR$" vcs="Git" />
6
+ </component>
7
+ </project>
.streamlit/config.toml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [server]
2
+ enableStaticServing = false
3
+ #
4
+ #[[theme.fontFaces]]
5
+ #family = "SpaceGrotesk"
6
+ #url = "app/static/SpaceGrotesk-VariableFont_wght.ttf"
7
+ #
8
+ #[[theme.fontFaces]]
9
+ #family = "SpaceMono"
10
+ #url = "app/static/SpaceMono-Bold.ttf"
11
+ #style = "normal"
12
+ #weight = 700
13
+ #
14
+ #[[theme.fontFaces]]
15
+ #family = "SpaceMono"
16
+ #url = "app/static/SpaceMono-BoldItalic.ttf"
17
+ #style = "italic"
18
+ #weight = 700
19
+ #
20
+ #[[theme.fontFaces]]
21
+ #family = "SpaceMono"
22
+ #url = "app/static/SpaceMono-Italic.ttf"
23
+ #style = "italic"
24
+ #weight = 400
25
+ #
26
+ #[[theme.fontFaces]]
27
+ #family = "SpaceMono"
28
+ #url = "app/static/SpaceMono-Regular.ttf"
29
+ #style = "normal"
30
+ #weight = 400
31
+
32
+ [theme]
33
+ primaryColor = "#cb785c"
34
+ backgroundColor = "#fdfdf8"
35
+ secondaryBackgroundColor = "#ecebe3"
36
+ textColor = "#3d3a2a"
37
+ linkColor = "#3d3a2a"
38
+ borderColor = "#d3d2ca"
39
+ showWidgetBorder = true
40
+ baseRadius = "0.75rem"
41
+ buttonRadius = "full"
42
+ #font = "SpaceGrotesk"
43
+ headingFontWeights = [600,500,500,500,500,500]
44
+ headingFontSizes = ["3rem", "2rem"]
45
+ #codeFont = "SpaceMono"
46
+ codeFontSize = ".75rem"
47
+ codeBackgroundColor = "#ecebe4"
48
+ showSidebarBorder = true
49
+ chartCategoricalColors = ["#0ea5e9", "#059669", "#fbbf24"]
50
+
51
+ [theme.sidebar]
52
+ backgroundColor = "#f0f0ec"
53
+ secondaryBackgroundColor = "#ecebe3"
54
+ headingFontSizes = ["1.6rem", "1.4rem", "1.2rem"]
55
+ dataframeHeaderBackgroundColor = "#e4e4e0"
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update && apt-get install -y \
6
+ build-essential \
7
+ curl \
8
+ software-properties-common \
9
+ git \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ COPY requirements.txt ./
13
+ COPY src/ ./src/
14
+
15
+ RUN pip3 install -r requirements.txt
16
+
17
+ EXPOSE 8501
18
+
19
+ HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
20
+
21
+ ENTRYPOINT ["streamlit", "run", "src/app.py", "--server.port=8501", "--server.address=0.0.0.0"]
README.md CHANGED
@@ -1,2 +1,19 @@
1
- # chatbot_nlp
2
- NLP experimentation, deployment and testing for the depression and bipolar Chatbot.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Bipolar Chatbot
3
+ emoji: 🚀
4
+ colorFrom: red
5
+ colorTo: red
6
+ sdk: streamlit
7
+ app_port: 8501
8
+ sdk_version: "1.45.1"
9
+ app_file: src/app.py
10
+ pinned: true
11
+ short_description: Streamlit template space
12
+ ---
13
+
14
+ # Welcome to Streamlit!
15
+
16
+ Edit `/src/app.py` to customize this app to your heart's desire. :heart:
17
+
18
+ If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
19
+ forums](https://discuss.streamlit.io).
data/embeddings/Qwen_Qwen3-Embedding-0.6B.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc3ceea12f082aff546c36423f436922abb5600de3c77d494a5d8bd87f9edf62
3
+ size 1495168
data/processed/guideline_db.json ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/referenced_table_chunks.json ADDED
The diff for this file is too large to render. See raw diff
 
data/readme.md ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ - The `embeddings/` contains the embedded data by different embedders.
2
+
3
+ - The `faiss_index/` contains the index built by faiss on different embedding files
4
+
5
+ - The `processed/` contains the chuncked data that's been preprocessed and used in the RAG system
6
+
7
+ - The `raw/` contains the original data from the clinical guideline
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ torch>=2.1.0 # >=2.1 preferred for compatibility on Apple Silicon
3
+ faiss-cpu>=1.7.4
4
+ numpy>=1.24.0
5
+ python-dotenv>=1.0.0
6
+
7
+ # Required by Qwen Embedding model
8
+ transformers>=4.51.0
9
+ sentence-transformers>=2.7.0
10
+
11
+ # LLM providers
12
+ openai
13
+ together>=0.2.8
src/Rag.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import time
4
+ import requests
5
+ import numpy as np
6
+
7
+ from dotenv import load_dotenv
8
+ from sentence_transformers import SentenceTransformer
9
+ from together import Together
10
+
11
+ global db, referenced_tables_db, embedder, index, llm_client
12
+
13
+
14
+ def load_json_to_db(file_path):
15
+ with open(file_path) as f:
16
+ db = json.load(f)
17
+ return db
18
+
19
+
20
+ # -------- Embedding Functions --------
21
+ def make_embeddings(embedder, embedder_name, db):
22
+ texts = [chunk['text'] for chunk in db]
23
+ embeddings = embedder.encode(texts, convert_to_numpy=True, batch_size=1, show_progress_bar=True)
24
+ return embeddings
25
+
26
+
27
+ def get_project_root():
28
+ return os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
29
+
30
+
31
+ def save_embeddings(embedder_name, embeddings):
32
+ root = get_project_root()
33
+ file_path = os.path.join(root, "data", "embeddings", f"{embedder_name.replace('/', '_')}.npy")
34
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
35
+ np.save(file_path, embeddings)
36
+ print(f"Saved embeddings to: {file_path}")
37
+
38
+
39
+ def load_embeddings(embedder_name):
40
+ root = get_project_root()
41
+ file_path = os.path.join(root, "data", "embeddings", f"{embedder_name.replace('/', '_')}.npy")
42
+
43
+ try:
44
+ embeddings = np.load(file_path, allow_pickle=True)
45
+ print(f"Loaded embeddings from: {file_path}")
46
+ except FileNotFoundError:
47
+ print(f"Embeddings not found. Recomputing for: {embedder_name}")
48
+ embeddings = make_embeddings(embedder, embedder_name, db)
49
+ save_embeddings(embedder_name, embeddings)
50
+
51
+ return embeddings
52
+
53
+
54
+ def load_embedder_with_fallbacks(embedder_name):
55
+ print(f"Loading embedder {embedder_name}")
56
+ model = SentenceTransformer(
57
+ embedder_name,
58
+ trust_remote_code=True,
59
+ tokenizer_kwargs={"padding_side": "left"},
60
+ device='cpu'
61
+ )
62
+ return model
63
+
64
+
65
+ # -------- Cosine Similarity Index (no FAISS) --------
66
+ def build_cosine_index(embeddings):
67
+ norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
68
+ return embeddings / norms
69
+
70
+
71
+ def load_cosine_index(embedder_name):
72
+ embeddings = load_embeddings(embedder_name)
73
+ normalized_embeddings = build_cosine_index(embeddings)
74
+ return normalized_embeddings
75
+
76
+
77
+ # -------- Cosine Similarity Search (Brute Force) --------
78
+ def vector_search(query, embedder, db, index, referenced_table_db, k=6):
79
+ def get_detailed_instruct(task_description: str, query: str) -> str:
80
+ return f'Instruct: {task_description}\nQuery:{query}'
81
+
82
+ task = 'Given a search query, retrieve relevant passages that answer the query'
83
+ query_embedding = embedder.encode([get_detailed_instruct(task, query)], convert_to_numpy=True)
84
+ query_vec = query_embedding / np.linalg.norm(query_embedding)
85
+
86
+ cosine_similarities = np.dot(index, query_vec.T).flatten()
87
+ top_k_indices = np.argsort(-cosine_similarities)[:k]
88
+
89
+ results = []
90
+ referenced_tables = set()
91
+ existed_tables = set()
92
+
93
+ for i in top_k_indices:
94
+ results.append({
95
+ "text": db[i]['text'],
96
+ "section": db[i]['metadata']['section'],
97
+ "chunk_id": db[i]['metadata']['chunk_id'],
98
+ "similarity": float(cosine_similarities[i]),
99
+ })
100
+ if db[i]['metadata']['referee_id']:
101
+ existed_tables.add(db[i]['metadata']['referee_id'])
102
+ try:
103
+ if db[i]['metadata']['referenced_tables']:
104
+ referenced_tables.update(db[i]['metadata']['referenced_tables'])
105
+ except KeyError:
106
+ continue
107
+
108
+ table_to_add = [table for table in referenced_tables if table not in existed_tables]
109
+
110
+ for chunk in referenced_table_db:
111
+ if chunk['metadata']['referee_id'] in table_to_add:
112
+ results.append({
113
+ "text": chunk['text'],
114
+ "section": chunk['metadata']['section'],
115
+ "chunk_id": chunk['metadata']['chunk_id'],
116
+ })
117
+ return results
118
+
119
+
120
+ def load_together_llm_client():
121
+ load_dotenv()
122
+ return Together(api_key=os.getenv("TOGETHER_API_KEY"))
123
+
124
+
125
+ # -------- Prompt Construction --------
126
+ def construct_prompt(query, faiss_results):
127
+ with open("src/system_prompt.txt", "r") as f:
128
+ system_prompt = f.read().strip()
129
+
130
+ prompt = f"""
131
+ ### System Prompt
132
+ {system_prompt}
133
+
134
+ ### User Query
135
+ {query}
136
+
137
+ ### Clinical Guidelines Context
138
+ """
139
+ for res in faiss_results:
140
+ prompt += f"- reference: {res['section']}\n- This paragraph is from section: {res['text']}\n"
141
+ return prompt
142
+
143
+
144
+ def construct_prompt_with_memory(query, faiss_results, chat_history=None, history_limit=4):
145
+ with open("src/system_prompt.txt", "r") as f:
146
+ system_prompt = f.read().strip()
147
+
148
+ prompt = f"### System Prompt\n{system_prompt}\n\n"
149
+
150
+ if chat_history:
151
+ prompt += "### Chat History\n"
152
+ for m in chat_history[-history_limit:]:
153
+ prompt += f"{m['role'].title()}: {m['content']}\n"
154
+ prompt += "\n"
155
+
156
+ prompt += f"### User Query\n{query}\n\n"
157
+ prompt += "### Clinical Guidelines Context\n"
158
+ for res in faiss_results:
159
+ prompt += f"- reference: {res['section']}\n- This paragraph is from section: {res['text']}\n"
160
+ return prompt
161
+
162
+
163
+ def call_llm(llm_client, prompt, stream_flag=False, max_tokens=500, temperature=0.05, top_p=0.9, model_name="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"):
164
+ print(f"Calling LLM with model: {model_name}")
165
+ try:
166
+ if stream_flag:
167
+ def stream_generator():
168
+ response = llm_client.chat.completions.create(
169
+ model=model_name,
170
+ messages=[{"role": "user", "content": prompt}],
171
+ max_tokens=max_tokens,
172
+ temperature=temperature,
173
+ top_p=top_p,
174
+ stream=True,
175
+ )
176
+ for chunk in response:
177
+ if chunk.choices and chunk.choices[0].delta.content:
178
+ yield chunk.choices[0].delta.content
179
+ return stream_generator()
180
+ else:
181
+ response = llm_client.chat.completions.create(
182
+ model=model_name,
183
+ messages=[{"role": "user", "content": prompt}],
184
+ max_tokens=max_tokens,
185
+ temperature=temperature,
186
+ top_p=top_p,
187
+ stream=False,
188
+ )
189
+ return response.choices[0].message.content
190
+ except Exception as e:
191
+ print("Error in call_llm:", str(e))
192
+ import traceback
193
+ traceback.print_exc()
194
+ raise
195
+
196
+
197
+ def call_ollama(prompt, model="mistral", stream_flag=False, max_tokens=500, temperature=0.05, top_p=0.9):
198
+ url = "http://localhost:11434/api/generate"
199
+ payload = {
200
+ "model": model,
201
+ "prompt": prompt,
202
+ "temperature": temperature,
203
+ "top_p": top_p,
204
+ "max_tokens": max_tokens,
205
+ "stream": True
206
+ }
207
+
208
+ with requests.post(url, json=payload, stream=True) as response:
209
+ for line in response.iter_lines():
210
+ if line:
211
+ try:
212
+ data = json.loads(line.decode("utf-8"))
213
+ yield data["response"]
214
+ except Exception:
215
+ continue
216
+
217
+
218
+ # -------- Main Assistant Entry Points --------
219
+ def launch_depression_assistant(embedder_name, designated_client=None):
220
+ global db, referenced_tables_db, embedder, index, llm_client
221
+
222
+ db = load_json_to_db("data/processed/guideline_db.json")
223
+ referenced_tables_db = load_json_to_db("data/processed/referenced_table_chunks.json")
224
+
225
+ embedder = load_embedder_with_fallbacks(embedder_name)
226
+ index = load_cosine_index(embedder_name)
227
+
228
+ if designated_client is None:
229
+ print("No LLM client provided. Loading Together LLM client...")
230
+ try:
231
+ llm_client = load_together_llm_client()
232
+ except Exception:
233
+ print("Failed to load Together LLM client. Please check your API key.")
234
+ else:
235
+ llm_client = designated_client
236
+
237
+ print("---------Depression Assistant is ready to use!--------------\n\n")
238
+
239
+
240
+ def depression_assistant(query, model_name="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free", max_tokens=500, temperature=0.05, top_p=0.9, stream_flag=False, chat_history=None):
241
+ results = vector_search(query, embedder, db, index, referenced_tables_db, k=3)
242
+ prompt = construct_prompt_with_memory(query, results, chat_history=chat_history)
243
+
244
+ if llm_client == "Run Ollama Locally":
245
+ return results, call_ollama(prompt, model_name, stream_flag, max_tokens, temperature, top_p)
246
+ else:
247
+ return results, call_llm(llm_client, prompt, stream_flag, max_tokens, temperature, top_p, model_name)
248
+
249
+
250
+ def load_queries_and_answers(query_file, answers_file):
251
+ with open(query_file, 'r') as f:
252
+ queries = f.readlines()
253
+ with open(answers_file, 'r') as f:
254
+ answers = f.readlines()
255
+ return queries, answers
256
+
257
+
258
+ def write_batched_results(embedder_name, result_path):
259
+ launch_depression_assistant(embedder_name)
260
+ queries, answers = load_queries_and_answers("data/raw/queries.txt", "data/raw/answers.txt")
261
+ embedder_filename = embedder_name.replace('/', '_')
262
+
263
+ with open(f"{result_path}Retrieved_Results_by_{embedder_filename}.md", "w") as f1, \
264
+ open(f"{result_path}Response_by_{embedder_filename}.md", "w") as f2:
265
+
266
+ for i, query in enumerate(queries):
267
+ result, response = depression_assistant(query)
268
+
269
+ f1.write(f"## Query {i+1}\n{query.strip()}\n\n## Answer\n{answers[i].strip()}\n\n## Retrieved Results\n")
270
+ for res in result:
271
+ f1.write(f"\n\n#### {res['section']}\n\n{res['text']}\n")
272
+ f1.write("\n\n---\n\n")
273
+
274
+ f2.write(f"## Query {i+1}\n{query.strip()}\n\n## Answer\n{answers[i].strip()}\n\n## Response\n{response}\n\n---\n\n")
275
+ break # remove this `break` if you want to process all queries
src/app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from Rag import launch_depression_assistant, depression_assistant
3
+ from openai import OpenAI
4
+ from together import Together
5
+ import time
6
+ import os
7
+ from dotenv import load_dotenv
8
+
9
+ load_dotenv()
10
+
11
+ if "embedder_loaded" not in st.session_state:
12
+ st.session_state.embedder_loaded = False
13
+ if "current_embedder_name" not in st.session_state:
14
+ st.session_state.current_embedder_name = None
15
+ if "last_sources" not in st.session_state:
16
+ st.session_state.last_sources = []
17
+
18
+ st.set_page_config(
19
+ page_title="Bipolar Assistant Chatbot",
20
+ page_icon=":robot_face:",
21
+ layout="wide",
22
+ initial_sidebar_state="collapsed"
23
+ )
24
+
25
+ model_options = [
26
+ "Qwen/Qwen3-Embedding-0.6B",
27
+ "jinaai/jina-embeddings-v3",
28
+ "BAAI/bge-large-en-v1.5",
29
+ "BAAI/bge-small-en-v1.5",
30
+ "BAAI/bge-base-en-v1.5",
31
+ "sentence-transformers/all-mpnet-base-v2",
32
+ "Other"
33
+ ]
34
+
35
+ st.sidebar.title("Settings")
36
+ with st.sidebar:
37
+ st.subheader("Model Selection")
38
+ embedder_name = st.selectbox("Select embedder model", model_options, index=0)
39
+
40
+ if embedder_name == "Other":
41
+ embedder_name = st.text_input('Enter the embedder model name')
42
+
43
+ llm_client = Together(api_key=os.getenv("TOGETHER_API_KEY"))
44
+
45
+ if (not st.session_state.embedder_loaded or
46
+ st.session_state.current_embedder_name != embedder_name):
47
+
48
+ with st.spinner(f"Loading embedding model: {embedder_name}..."):
49
+ launch_depression_assistant(embedder_name=embedder_name, designated_client=llm_client)
50
+ st.session_state.embedder_loaded = True
51
+ st.session_state.current_embedder_name = embedder_name
52
+ st.success(f"✅ Embedding model {embedder_name} loaded successfully!")
53
+ else:
54
+ st.info(f"📋 Current embedding model: {st.session_state.current_embedder_name}")
55
+
56
+ selected_model = st.selectbox('Choose a model for generation',
57
+ ["meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
58
+ "deepseek-ai/deepseek-r1",
59
+ "meta/llama-3.3-70b-instruct"],
60
+ key='selected_model')
61
+
62
+ if selected_model in ["deepseek-ai/deepseek-r1", "meta/llama-3.3-70b-instruct"]:
63
+ max_length = 1000
64
+ llm_client = OpenAI(
65
+ base_url="https://integrate.api.nvidia.com/v1",
66
+ api_key=os.getenv("NVIDIA_API_KEY", None),
67
+ )
68
+
69
+ temperature = st.slider('temperature', min_value=0.01, max_value=1.0, value=0.05, step=0.01)
70
+ top_p = st.slider('top_p', min_value=0.01, max_value=1.0, value=0.9, step=0.01)
71
+ max_length = st.slider('max_length', min_value=100, max_value=1000, value=500, step=10)
72
+
73
+ st.title("💬 Bipolar Assistant Chatbot")
74
+
75
+ if "messages" not in st.session_state:
76
+ st.session_state.messages = [{
77
+ "role": "assistant",
78
+ "content": "This is a simple Bipolar assistant bot that uses **RAG (Retrieval-Augmented Generation)** to answer questions related to bipolar. You can ask questions related to bipolar and get responses based on [CANMAT clinical guidelines](https://pmc.ncbi.nlm.nih.gov/articles/PMC11351064/)."
79
+ }]
80
+
81
+ chat_col, sources_col = st.columns([1, 1])
82
+
83
+ with sources_col:
84
+ st.markdown("### Sources")
85
+ sources_placeholder = st.empty()
86
+
87
+ with sources_placeholder.container():
88
+ if st.session_state.last_sources:
89
+ for i, result in enumerate(st.session_state.last_sources):
90
+ st.markdown(f"**Source {i + 1}** | Similarity: {result.get('similarity', 'N/A')}")
91
+ st.markdown(f"- **Section:** {result['section']}")
92
+ st.markdown(f"> {result['text']}")
93
+ st.markdown("---")
94
+ else:
95
+ st.markdown("*Sources will appear here after you ask a question.*")
96
+
97
+ with chat_col:
98
+ for message in st.session_state.messages:
99
+ with st.chat_message(message["role"]):
100
+ st.markdown(message["content"])
101
+
102
+ if user_input := st.chat_input("Ask me questions about the CANMAT bipolar guideline!"):
103
+ st.chat_message("user").markdown(user_input)
104
+ st.session_state.messages.append({"role": "user", "content": user_input})
105
+
106
+ history = st.session_state.messages[:-1][-4:]
107
+
108
+ collected = ""
109
+ t0 = time.perf_counter()
110
+ results, response = depression_assistant(user_input, model_name=selected_model, max_tokens=max_length,
111
+ temperature=temperature, top_p=top_p, stream_flag=True,
112
+ chat_history=history)
113
+
114
+ st.session_state.last_sources = results if results else []
115
+
116
+ with sources_placeholder.container():
117
+ if st.session_state.last_sources:
118
+ for i, result in enumerate(st.session_state.last_sources):
119
+ st.markdown(f"**Source {i + 1}** | Similarity: {result.get('similarity', 'N/A')}")
120
+ st.markdown(f"- **Section:** {result['section']}")
121
+ st.markdown(f"> {result['text']}")
122
+ st.markdown("---")
123
+ else:
124
+ st.markdown("*Sources will appear here after you ask a question.*")
125
+
126
+ placeholder = st.empty()
127
+ for chunk in response:
128
+ collected += chunk
129
+ placeholder.markdown(collected)
130
+
131
+ t1 = time.perf_counter()
132
+ print(f"[Time] Retriever + Generator takes: {t1 - t0:.2f} seconds in total.")
133
+ print(f"============== Finish R-A-Generation for Current Query {user_input} ==============")
134
+
135
+ st.session_state.messages.append({"role": "assistant", "content": collected})
136
+
137
+ st.rerun()
src/data_processing/bipolar.html ADDED
The diff for this file is too large to render. See raw diff
 
src/data_processing/bipolar_modified.html ADDED
The diff for this file is too large to render. See raw diff
 
src/data_processing/guideline_db copy.json ADDED
The diff for this file is too large to render. See raw diff
 
src/data_processing/image_processing.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bs4 import BeautifulSoup
2
+ import json
3
+ import re
4
+
5
+ def get_graph_metadata(graph, url="https://pmc.ncbi.nlm.nih.gov/articles/PMC11351064/#"):
6
+
7
+
8
+
9
+ figure = graph.find_parent("figure")
10
+ figure_flag = False
11
+
12
+ section = graph.find_parent(id=re.compile(r'^section\d+-\d+$'))
13
+ section_id = section.get("id")
14
+ section_url = url + section_id
15
+
16
+ section_heading = section.find("h2").get_text()
17
+ section_subheading = section.find("h3").get_text()
18
+ headings = section_heading + " > " + section_subheading
19
+
20
+ attribution = ""
21
+
22
+
23
+ if figure:
24
+
25
+ figure_flag = True
26
+
27
+ image_url = graph.get("src")
28
+
29
+ name = figure.select_one(".obj_head").get_text()
30
+ all_p = [p.get_text() for p in figure.find_all("p") if not p.attrs]
31
+ caption = all_p[0]
32
+ label = name + " " + caption
33
+
34
+ attribution = "(" + figure.select_one('[aria-label="Attribution"]').get_text() + ")"
35
+ number = "_".join(re.findall(r"(.{1})\.", name)).lower()
36
+ referee_id = f"figure_{number}"
37
+
38
+
39
+ else:
40
+
41
+ image_url = graph.get("src")
42
+
43
+ table_section = graph.find_parent("section")
44
+
45
+ name = table_section.select_one(".obj_head").get_text()
46
+ caption = table_section.select_one(".caption p").get_text()
47
+ label = name + " " + caption
48
+
49
+ number = "_".join(re.findall(r"(.{1})\.", name)).lower()
50
+ referee_id = f"table_{number}"
51
+
52
+
53
+ return attribution, caption, figure_flag, headings, image_url, label, name, referee_id, section_url
54
+
55
+ def to_chunk(text_block, section_url, referee_id, headings):
56
+
57
+ d = {
58
+ "text": text_block,
59
+ "metadata": {
60
+ "section": section_url,
61
+ "type": "table image",
62
+ "referee_id": referee_id,
63
+ "headings": headings,
64
+ }
65
+ }
66
+ return d
67
+
68
+ def main():
69
+
70
+ with open('../data/raw/source.html', encoding="utf-8") as f:
71
+ html = f.read()
72
+ soup = BeautifulSoup(html)
73
+
74
+ with open("../data/processed/parsed_images.txt", encoding="utf-8") as f:
75
+ text = f.read()
76
+ text_blocks = text.split("------")
77
+
78
+ docs = []
79
+ for graph, text_block in zip(soup.select(".graphic"), text_blocks):
80
+ attribution, caption, figure_flag, headings, image_url, label, name, referee_id, section_url = get_graph_metadata(graph)
81
+ text_block = text_block.strip()
82
+
83
+ if text_block.startswith(name):
84
+ chunk = to_chunk(text_block, section_url, referee_id, headings)
85
+ docs.append(chunk)
86
+
87
+ with open("../data/processed/graphs.json", "w", encoding="utf-8") as f:
88
+ json.dump(docs, f, indent=4)
89
+
90
+ if __name__ == "__main__":
91
+ main()
src/data_processing/main.py ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import json
3
+ from tables import get_table_metadata, to_text, get_table_data
4
+ from bs4 import BeautifulSoup, Tag, NavigableString
5
+
6
+ abbr_map = {
7
+ "ACT": "Acceptance and commitment therapy",
8
+ "ADHD": "Attention-deficit hyperactivity disorder",
9
+ "AI": "Artificial intelligence",
10
+ "BA": "Behavioural activation",
11
+ "CAM": "Complementary and alternative medicine",
12
+ # "CANMAT":"Canadian Network for Mood and Anxiety Treatments",
13
+ "CBASP": "Cognitive behavioural analysis system of psychotherapy",
14
+ "CBT": "Cognitive-behavioural therapy",
15
+ "CPD": "Continuing professional development",
16
+ "CYP": "Cytochrome P450",
17
+ "DBS": "Deep brain stimulation",
18
+ "DHI": "Digital health intervention",
19
+ "DLPFC": "Dorsolateral prefrontal cortex",
20
+ "DSM-5": "Diagnostic and Statistical Manual",
21
+ "DSM-5-TR": "Diagnostic and Statistical Manual, 5th edition, Text Revision",
22
+ "DSM-IV-TR":"Diagnostic and Statistical Manual, 4th edition, Text Revision",
23
+ "DTD": "Difficult-to-treat depression",
24
+ "ECG": "Electrocardiography",
25
+ "ECT": "Electroconvulsive therapy",
26
+ "EEG": "Electroencephalography",
27
+ "GRADE": "Grading of Recommendations Assessment, Development, and Evaluation",
28
+ "ICD": "International Classification of Diseases",
29
+ "IPT": "Interpersonal therapy",
30
+ "MAOI": "Monoamine oxidase inhibitor",
31
+ "MBC": "Measurement-based care",
32
+ "MBCT": "Mindfulness-based cognitive therapy",
33
+ "MCT": "Metacognitive therapy",
34
+ "MDD": "Major depressive disorder",
35
+ "MDE": "Major depressive episode",
36
+ "MI": "Motivational interviewing",
37
+ "MST": "Magnetic seizure therapy",
38
+ "NbN": "Neuroscience-based nomenclature",
39
+ "NDRI": "Norepinephrine-dopamine reuptake inhibitor",
40
+ "NMDA": "N-methyl-D-aspartate",
41
+ "NSAID": "Nonsteroidal anti-inflammatory drug",
42
+ "PDD": "Persistent depressive disorder",
43
+ "PDT": "Psychodynamic psychotherapy",
44
+ "PHQ": "Patient health questionnaire",
45
+ "PST": "Problem-solving therapy",
46
+ "RCT": "Randomized controlled trial",
47
+ "rTMS": "Repetitive transcranial magnetic stimulation",
48
+ "SDM": "Shared decision-making",
49
+ "SNRI": "Serotonin-norepinephrine reuptake inhibitor",
50
+ "SSRI": "Selective serotonin reuptake inhibitor",
51
+ "STPP": "Short-term psychodynamic psychotherapy",
52
+ "TBS": "Theta burst stimulation",
53
+ "TCA": "Tricyclic antidepressants",
54
+ "tDCS": "Transcranial direct current stimulation",
55
+ "TMS": "Transcranial magnetic stimulation",
56
+ "TRD": "Treatment-resistant depression",
57
+ "VNS": "Vagus nerve stimulation",
58
+ "WHO": "World Health Organization",
59
+ }
60
+
61
+ def append_definition(guideline):
62
+ pattern = re.compile(r'\b([A-Z]{2,})\b')
63
+
64
+ for i in range(len(guideline)):
65
+ if guideline[i]['metadata']['referee_id'] == 'table_c':
66
+ continue
67
+
68
+ text = guideline[i]['text']
69
+
70
+ # Find all abbreviations in the text
71
+ found_abbrs = set()
72
+ matches = pattern.findall(text)
73
+ for abbr in matches:
74
+ if abbr in abbr_map:
75
+ found_abbrs.add(abbr)
76
+
77
+ # Create definitions section if abbreviations found
78
+ if found_abbrs:
79
+ definitions = []
80
+ for abbr in sorted(found_abbrs):
81
+ definitions.append(f"{abbr}: {abbr_map[abbr]}")
82
+
83
+ definitions_text = "Abbreviations: " + "; ".join(definitions) + "\n\n"
84
+ guideline[i]['text'] = text + "\n" + definitions_text
85
+
86
+ return guideline
87
+
88
+ def parse_title(soup):
89
+ title = soup.find("h1")
90
+ if title:
91
+ title = title.decode_contents().replace('\n', '')
92
+ return {
93
+ "text": title,
94
+ "metadata": {
95
+ "section": "title",
96
+ "type": "title",
97
+ "headings": "Title of the guideline document",
98
+ "referenced_tables": [],
99
+ "referee_id": ""
100
+ }
101
+ }
102
+
103
+ def prepend_headings_to_text(guideline):
104
+ for i in range(len(guideline)):
105
+ guideline[i]['metadata']['chunk_id'] = i
106
+ guideline[i]['text'] = guideline[i]['metadata']['headings'] + " > paragraph id: " + str(i) + "\n\n" + guideline[i]['text']
107
+
108
+
109
+ def build_headings_trail(p):
110
+ # build headings trail
111
+ heading = p.find_previous_sibling(lambda tag: bool(re.match(r'^h[2-6]$', tag.name)))
112
+ headings = heading.get_text(strip=True) if heading else 'No heading'
113
+
114
+ parent_sec = p.find_parent(["section",'figure'], id=True)
115
+ while parent_sec:
116
+ heading = parent_sec.find_previous_sibling(lambda tag: bool(re.match(r'^h[2-6]$', tag.name)))
117
+ if heading:
118
+ headings = heading.get_text(strip=True) + ' > ' + headings
119
+ parent_sec = parent_sec.find_parent("section", id=True)
120
+ headings = headings.strip().replace('\n', ' ')
121
+ return headings
122
+
123
+
124
+ def delete_bib_links(soup):
125
+ for a in soup.find_all("a", href=True):
126
+ if a["href"].startswith("#bdi12609-bib-"):
127
+ a.decompose()
128
+ return soup
129
+
130
+ def delete_fig_and_tbl_sections(soup):
131
+ # Remove all <figure> and <table> sections
132
+ for fig in soup.find_all('figure'):
133
+ fig.decompose()
134
+ # find section that has class "tw xbox font-sm" and remove it
135
+ for section in soup.find_all('section', class_="tw xbox font-sm"):
136
+ section.decompose()
137
+ return soup
138
+
139
+
140
+ def scan_links_and_tables(p):
141
+ referenced_tables = set()
142
+ # scan for numeric links and reconstruct table IDs
143
+ for link in p.find_all('a', href=re.compile(r'-(fig|tbl)-')):
144
+ href = link['href']
145
+ tables = re.findall(r'(fig|tbl)-(\d+)', href)
146
+ for table_id in tables:
147
+ print(f"Found table links: {table_id[0]}-{table_id[1]}")
148
+ referenced_tables.add(f"{table_id[0]}-{table_id[1]}")
149
+ if referenced_tables:
150
+ print("--------")
151
+
152
+ return referenced_tables
153
+
154
+ def parse_paragraph(soup, output):
155
+ paragraphs = soup.find_all('p')
156
+ for p in paragraphs:
157
+ parent = p.find_parent("section")
158
+ sec_id = parent["id"] if parent else "unknown"
159
+
160
+ output.append({
161
+ "text": p.get_text(strip=False),
162
+ "metadata": {
163
+ "section": f"https://pmc.ncbi.nlm.nih.gov/articles/PMC5947163/#{sec_id}",
164
+ "type": "paragraph",
165
+ "headings": build_headings_trail(p),
166
+ "referenced_tables": list(scan_links_and_tables(p)),
167
+ "referee_id": "",
168
+ }
169
+ })
170
+
171
+
172
+ def parse_figures(soup, output):
173
+ figures = soup.find_all('figure')
174
+ for fig in figures:
175
+ sec_id = fig["id"]
176
+ fig_caption = fig.find('figcaption').find('p').get_text()
177
+ img_src_link = fig.find('img')['src'] if fig.find('img') else "No image link found"
178
+ referee_id = re.search(r'(fig)-(\d+)', sec_id)
179
+
180
+ p = fig.find('p')
181
+ output.append({
182
+ "text": f" > Figure: Image link: {img_src_link}-----\nFigure Caption: {fig_caption}\n",
183
+ "metadata": {
184
+ "section": f"https://pmc.ncbi.nlm.nih.gov/articles/PMC5947163/#{sec_id}",
185
+ "type": "figure",
186
+ "headings": build_headings_trail(p),
187
+ "referenced_tables": list(scan_links_and_tables(p)),
188
+ "referee_id": referee_id.group(0) if referee_id else "fig_unknown",
189
+ }
190
+ })
191
+
192
+ def parse_tables(soup, output):
193
+ tables = soup.find_all("section", class_="tw xbox font-sm")
194
+ for table in tables:
195
+ sec_id = table["id"]
196
+ referee_id = re.search(r'(tbl)-(\d+)', sec_id)
197
+
198
+ img = table.find('img')
199
+ if img: # then this is a img table
200
+ img_src_link = img['src'] if img else "No image link found"
201
+ p = table.find('p')
202
+ captions = table.find_all('div', class_='caption p')
203
+ caption = ""
204
+ for cap in captions:
205
+ caption += cap.get_text(strip=True) + " "
206
+ output.append({
207
+ "text": f" > Table: Image link: {img_src_link}-----\nTable Caption: {caption}\n",
208
+ "metadata": {
209
+ "section": f"https://pmc.ncbi.nlm.nih.gov/articles/PMC5947163/#{sec_id}",
210
+ "type": "table",
211
+ "headings": build_headings_trail(p),
212
+ "referenced_tables": [],
213
+ "referee_id": referee_id.group(0) if referee_id else "tbl_unknown",
214
+ }
215
+ })
216
+ else: # else it is a html table
217
+ name, caption, footnotes, headings, label, ref_id, section_url = get_table_metadata(table, base_url="")
218
+ table_data = get_table_data(table, footnotes)
219
+ text = to_text(table_data, label, caption)
220
+ p = table.find('div')
221
+
222
+ output.append({
223
+ "text": text,
224
+ "metadata": {
225
+ "section": f"https://pmc.ncbi.nlm.nih.gov/articles/PMC5947163/#{sec_id}",
226
+ "type": "table",
227
+ "headings": build_headings_trail(p),
228
+ "referenced_tables": [],
229
+ "referee_id": referee_id.group(0) if referee_id else "tbl_unknown",
230
+ }
231
+ })
232
+
233
+
234
+
235
+ def parse_main_article(soup, output):
236
+
237
+ soup = delete_bib_links(soup)
238
+ parse_figures(soup, output)
239
+ parse_tables(soup, output)
240
+
241
+ paragraph_only_soup = delete_fig_and_tbl_sections(soup)
242
+
243
+ parse_paragraph(paragraph_only_soup, output)
244
+
245
+ # parse_tables(soup, output)
246
+
247
+ return output
248
+
249
+
250
+ def main():
251
+ with open('bipolar.html', 'r', encoding='utf-8') as f:
252
+ html = f.read()
253
+ soup = BeautifulSoup(html, "html.parser")
254
+ output = []
255
+
256
+ output.append(parse_title(soup))
257
+ output = parse_main_article(soup, output)
258
+ print(f"Parsed {len(output)} paragraphs from the main article.")
259
+
260
+ combined = output
261
+ prepend_headings_to_text(combined)
262
+ append_definition(combined)
263
+
264
+ with open("guideline_db.json", "w", encoding="utf-8") as f:
265
+ json.dump(combined, f, ensure_ascii=False, indent=4)
266
+ print(f"guideline_db.json for bipolar created with {len(combined)} chunks.")
267
+
268
+
269
+ def write_referenced_tables():
270
+ with open("guideline_db.json", "r", encoding="utf-8") as f:
271
+ guideline_db = json.load(f)
272
+ with open("table12_textual.txt", "r", encoding="utf-8") as f:
273
+ guideline_db[21]['text'] += f.read().strip()
274
+ with open("table14_textual.txt", "r", encoding="utf-8") as f:
275
+ guideline_db[23]['text'] += f.read().strip()
276
+ with open("table17_textual.txt", "r", encoding="utf-8") as f:
277
+ guideline_db[26]['text'] += f.read().strip()
278
+
279
+ figures_and_tables = guideline_db[1:34] # Assuming these are the table chunks
280
+ #write back to the original file
281
+ with open ("guideline_db.json", "w", encoding="utf-8") as f:
282
+ json.dump(guideline_db, f, ensure_ascii=False, indent=4)
283
+ print(f"guideline_db.json updated with table 12 14 17 chunks.")
284
+ with open("referenced_table_chunks.json", "w", encoding="utf-8") as f:
285
+ json.dump(figures_and_tables, f, ensure_ascii=False, indent=4)
286
+ print(f"referenced_table_chunks.json created with {len(figures_and_tables)} chunks.")
287
+
288
+
289
+ if __name__ == "__main__":
290
+ # main()
291
+ # read in the file guideline_db.json
292
+
293
+ # make the referenced_tables.json
294
+ write_referenced_tables()
src/data_processing/table12_textual.txt ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ #### Table 12. Hierarchical rankings of first and second‐line treatments recommended for management of acute mania
3
+
4
+
5
+ ##### First-line treatments: Monotherapies
6
+
7
+
8
+ ###### Lithium
9
+
10
+
11
+ ####### Level of evidence by phase of treatment
12
+
13
+ Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 1 evidence. Prevention of mania during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Acute depression: level 2 evidence.
14
+
15
+ ####### Considerations for treatment selection
16
+
17
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
18
+
19
+ ###### Quetiapine
20
+
21
+
22
+ ####### Level of evidence by phase of treatment
23
+
24
+ Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 1 evidence. Prevention of mania during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Acute depression: level 1 evidence.
25
+
26
+ ####### Considerations for treatment selection
27
+
28
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
29
+
30
+ ###### Divalproex
31
+
32
+
33
+ ####### Level of evidence by phase of treatment
34
+
35
+ Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 1 evidence. Prevention of mania during maintenance: level 3 evidence. Prevention of depression during maintenance: level 2 evidence. Acute depression: level 2 evidence.
36
+
37
+ ####### Considerations for treatment selection
38
+
39
+ Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection. Maintenance phase tolerability concerns: minor impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
40
+
41
+ ###### Asenapine
42
+
43
+
44
+ ####### Level of evidence by phase of treatment
45
+
46
+ Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 2 evidence. Prevention of mania during maintenance: level 2 evidence. Prevention of depression during maintenance: level 2 evidence. Acute depression: no data.
47
+
48
+ ####### Considerations for treatment selection
49
+
50
+ Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: minor impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
51
+
52
+ ###### Aripiprazole
53
+
54
+
55
+ ####### Level of evidence by phase of treatment
56
+
57
+ Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 2 evidence. Prevention of mania during maintenance: level 2 evidence. Prevention of depression during maintenance: no data; although monotherapies are listed above combination therapies in the hierarchy, combination therapies may be indicated as the preferred choice in patients with previous history of partial response to monotherapy and in those with psychotic mania or in situations where rapid response is desirable. Acute depression: level 1 negative evidence.
58
+
59
+ ####### Considerations for treatment selection
60
+
61
+ Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: minor impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
62
+
63
+ ###### Paliperidone(>6mg)
64
+
65
+
66
+ ####### Level of evidence by phase of treatment
67
+
68
+ Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 2 evidence. Prevention of mania during maintenance: level 2 evidence. Prevention of depression during maintenance: no data; although monotherapies are listed above combination therapies in the hierarchy, combination therapies may be indicated as the preferred choice in patients with previous history of partial response to monotherapy and in those with psychotic mania or in situations where rapid response is desirable. Acute depression: no data.
69
+
70
+ ####### Considerations for treatment selection
71
+
72
+ Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: minor impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
73
+
74
+ ###### Risperidone
75
+
76
+
77
+ ####### Level of evidence by phase of treatment
78
+
79
+ Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 3 evidence. Prevention of mania during maintenance: level 3 evidence. Prevention of depression during maintenance: no data. Acute depression: no data.
80
+
81
+ ####### Considerations for treatment selection
82
+
83
+ Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: minor impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
84
+
85
+ ###### Cariprazine
86
+
87
+
88
+ ####### Level of evidence by phase of treatment
89
+
90
+ Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: no data. Prevention of mania during maintenance: no data. Prevention of depression during maintenance: no data. Acute depression: level 1 evidence.
91
+
92
+ ####### Considerations for treatment selection
93
+
94
+ Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: limited impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
95
+
96
+ ##### First-line treatments: Combination therapies
97
+
98
+
99
+ ###### Quetiapine and Lithium/divalproex
100
+
101
+
102
+ ####### Level of evidence by phase of treatment
103
+
104
+ Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 1 evidence. Prevention of mania during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Acute depression: level 4 evidence; no controlled trials; however, clinical experience suggests that it is a useful strategy.
105
+
106
+ ####### Considerations for treatment selection
107
+
108
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: significant impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
109
+
110
+ ###### Aripiprazole and Lithium/divalproex
111
+
112
+
113
+ ####### Level of evidence by phase of treatment
114
+
115
+ Acute mania: level 2 evidence. Prevention of any mood episode during maintenance: level 2 evidence. Prevention of mania during maintenance: level 2 evidence. Prevention of depression during maintenance: no data; did not separate from placebo in those with index mania; no studies available in index depression. Acute depression: level 4 evidence.
116
+
117
+ ####### Considerations for treatment selection
118
+
119
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
120
+
121
+ ###### Risperidone and Lithium/divalproex
122
+
123
+
124
+ ####### Level of evidence by phase of treatment
125
+
126
+ Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 4 evidence. Prevention of mania during maintenance: level 4 evidence. Prevention of depression during maintenance: no data. Acute depression: level 4 evidence.
127
+
128
+ ####### Considerations for treatment selection
129
+
130
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: significant impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
131
+
132
+ ###### Asenapine and Lithium/divalproex
133
+
134
+
135
+ ####### Level of evidence by phase of treatment
136
+
137
+ Acute mania: level 2 evidence. Prevention of any mood episode during maintenance: level 4 evidence. Prevention of mania during maintenance: level 4 evidence. Prevention of depression during maintenance: no data. Acute depression: level 4 evidence.
138
+
139
+ ####### Considerations for treatment selection
140
+
141
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: minor impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
142
+
143
+ ##### Second-line treatments: Combination therapies
144
+
145
+
146
+ ###### Olanzapine
147
+
148
+
149
+ ####### Level of evidence by phase of treatment
150
+
151
+ Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 1 evidence. Prevention of mania during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Acute depression: level 1 evidence; did not separate from placebo on core symptoms of depression.
152
+
153
+ ####### Considerations for treatment selection
154
+
155
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: significant impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
156
+
157
+ ###### Carbamazepine
158
+
159
+
160
+ ####### Level of evidence by phase of treatment
161
+
162
+ Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 2 evidence. Prevention of mania during maintenance: level 2 evidence. Prevention of depression during maintenance: level 2 evidence. Acute depression: level 3 evidence.
163
+
164
+ ####### Considerations for treatment selection
165
+
166
+ Acute phase safety concerns: moderate impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
167
+
168
+ ###### Olanzapine and Lithium/divalproex
169
+
170
+
171
+ ####### Level of evidence by phase of treatment
172
+
173
+ Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 4 evidence. Prevention of mania during maintenance: level 4 evidence. Prevention of depression during maintenance: level 4 evidence. Acute depression: no data.
174
+
175
+ ####### Considerations for treatment selection
176
+
177
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
178
+
179
+ ###### Lithium and divalproex
180
+
181
+
182
+ ####### Level of evidence by phase of treatment
183
+
184
+ Acute mania: level 2 evidence. Prevention of any mood episode during maintenance: level 2 evidence. Prevention of mania during maintenance: level 2 evidence. Prevention of depression during maintenance: no data. Acute depression: no data.
185
+
186
+ ####### Considerations for treatment selection
187
+
188
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
189
+
190
+ ###### Ziprasidone
191
+
192
+
193
+ ####### Level of evidence by phase of treatment
194
+
195
+ Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 3 evidence. Prevention of mania during maintenance: level 3 evidence. Prevention of depression during maintenance: no data. Acute depression: level 1 negative evidence.
196
+
197
+ ####### Considerations for treatment selection
198
+
199
+ Acute phase safety concerns: moderate impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection. Maintenance phase tolerability concerns: minor impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
200
+
201
+ ###### Haloperidol
202
+
203
+
204
+ ####### Level of evidence by phase of treatment
205
+
206
+ Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: no data. Prevention of mania during maintenance: level 4 evidence. Prevention of depression during maintenance: level 4 negative evidence. Acute depression: no data.
207
+
208
+ ####### Considerations for treatment selection
209
+
210
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: significant impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: moderate impact on treatment selection.
211
+
212
+ ###### ECT
213
+
214
+
215
+ ####### Level of evidence by phase of treatment
216
+
217
+ Acute mania: level 3 evidence. Prevention of any mood episode during maintenance: level 4 evidence. Prevention of mania during maintenance: level 4 evidence. Prevention of depression during maintenance: level 4 evidence. Acute depression: level 4 evidence.
218
+
219
+ ####### Considerations for treatment selection
220
+
221
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: minor impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
222
+
223
+
src/data_processing/table14_textual.txt ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ #### Table 14. Hierarchical rankings of first and second‐line treatments recommended for management of acute bipolar I depression
4
+
5
+
6
+ ##### First-line treatments
7
+
8
+ ###### Quetiapine
9
+
10
+
11
+ ####### Level of evidence by phase of treatment
12
+
13
+ Acute depression: level 1 evidence. Prevention of any mood episode during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Prevention of mania during maintenance: level 1 evidence. Acute mania: level 1 evidence.
14
+
15
+ ####### Considerations for treatment selection
16
+
17
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of manic / hypomanic switch: limited impact on treatment selection.
18
+
19
+ ###### Lurasidone and Lithium/divalproex
20
+
21
+
22
+ ####### Level of evidence by phase of treatment
23
+
24
+ Acute depression: level 1 evidence. Prevention of any mood episode during maintenance: level 3 evidence; trend for superiority on the primary efficacy measure, hence the lower rating. Prevention of depression during maintenance: level 3 evidence; effective in those with an index episode of depression. Prevention of mania during maintenance: level 4 evidence; negative data from the trial are probably due to methodological issues; rating based on expert opinion. Acute mania: no data.
25
+
26
+ ####### Considerations for treatment selection
27
+
28
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: moderate / limited impact on treatment selection. Risk of manic / hypomanic switch: limited impact on treatment selection.
29
+
30
+ ###### Lithium
31
+
32
+
33
+ ####### Level of evidence by phase of treatment
34
+
35
+ Acute depression: level 2 evidence. Prevention of any mood episode during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Prevention of mania during maintenance: level 1 evidence. Acute mania: no data.
36
+
37
+ ####### Considerations for treatment selection
38
+
39
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of manic / hypomanic switch: limited impact on treatment selection.
40
+
41
+ ###### Lamotrigine
42
+
43
+
44
+ ####### Level of evidence by phase of treatment
45
+
46
+ Acute depression: level 2 evidence. Prevention of any mood episode during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Prevention of mania during maintenance: level 2 evidence. Acute mania: level 1 negative evidence.
47
+
48
+ ####### Considerations for treatment selection
49
+
50
+ Acute phase safety concerns: moderate impact on treatment selection. Acute phase tolerability concerns: limited impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: limited impact on treatment selection. Risk of manic / hypomanic switch: limited impact on treatment selection.
51
+
52
+ ###### Lurasidone
53
+
54
+
55
+ ####### Level of evidence by phase of treatment
56
+
57
+ Acute depression: level 2 evidence. Prevention of any mood episode during maintenance: level 4 evidence. Prevention of depression during maintenance: level 4 evidence. Prevention of mania during maintenance: level 4 evidence. Acute mania: no data.
58
+
59
+ ####### Considerations for treatment selection
60
+
61
+ Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: minor impact on treatment selection. Risk of manic / hypomanic switch: limited impact on treatment selection.
62
+
63
+ ###### Lamotrigine(adj)
64
+
65
+
66
+ ####### Level of evidence by phase of treatment
67
+
68
+ Acute depression: level 2 evidence. Prevention of any mood episode during maintenance: level 4 evidence. Prevention of depression during maintenance: level 4 evidence. Prevention of mania during maintenance: level 4 evidence. Acute mania: level 4 negative evidence.
69
+
70
+ ####### Considerations for treatment selection
71
+
72
+ Acute phase safety concerns: moderate impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of manic / hypomanic switch: limited impact on treatment selection.
73
+
74
+ ##### Second-line treatments
75
+
76
+ ###### Divalproex
77
+
78
+
79
+ ####### Level of evidence by phase of treatment
80
+
81
+ Acute depression: level 2 evidence. Prevention of any mood episode during maintenance: level 1 evidence. Prevention of depression during maintenance: level 2 evidence. Prevention of mania during maintenance: level 3 evidence. Acute mania: level 1 evidence.
82
+
83
+ ####### Considerations for treatment selection
84
+
85
+ Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: minor impact on treatment selection. Risk of manic / hypomanic switch: limited impact on treatment selection.
86
+
87
+ ###### SSRIs/bupropion(adj)
88
+
89
+
90
+ ####### Level of evidence by phase of treatment
91
+
92
+ Acute depression: level 1 evidence. Prevention of any mood episode during maintenance: no data. Prevention of depression during maintenance: level 4 evidence. Prevention of mania during maintenance: no data. Acute mania: no data.
93
+
94
+ ####### Considerations for treatment selection
95
+
96
+ Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: minor impact on treatment selection. Risk of manic / hypomanic switch: minor impact on treatment selection.
97
+
98
+ ###### ECT
99
+
100
+
101
+ ####### Level of evidence by phase of treatment
102
+
103
+ Acute depression: level 4 evidence. Prevention of any mood episode during maintenance: level 4 evidence. Prevention of depression during maintenance: level 4 evidence. Prevention of mania during maintenance: level 4 evidence. Acute mania: level 3 evidence.
104
+
105
+ ####### Considerations for treatment selection
106
+
107
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: minor impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of manic / hypomanic switch: limited impact on treatment selection.
108
+
109
+ ###### Cariprazine
110
+
111
+
112
+ ####### Level of evidence by phase of treatment
113
+
114
+ Acute depression: level 1 evidence. Prevention of any mood episode during maintenance: no data. Prevention of depression during maintenance: no data. Prevention of mania during maintenance: no data. Acute mania: level 1 evidence.
115
+
116
+ ####### Considerations for treatment selection
117
+
118
+ Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: limited impact on treatment selection. Risk of manic / hypomanic switch: limited impact on treatment selection.
119
+
120
+ ###### Olanzapine-fluoxetine
121
+
122
+
123
+ ####### Level of evidence by phase of treatment
124
+
125
+ Acute depression: level 2 evidence. Prevention of any mood episode during maintenance: no data. Prevention of depression during maintenance: no data. Prevention of mania during maintenance: no data. Acute mania: no data.
126
+
127
+ ####### Considerations for treatment selection
128
+
129
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: significant impact on treatment selection. Maintenance phase tolerability concerns: minor impact on treatment selection. Risk of manic / hypomanic switch: minor impact on treatment selection.
130
+
131
+
src/data_processing/table17_textual.txt ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+
4
+ #### Table 17. Hierarchical rankings of first‐ and second‐line treatments recommended for maintenance treatment in bipolar disorder
5
+
6
+
7
+ ##### First-line treatment
8
+
9
+
10
+ ###### Lithium
11
+
12
+
13
+ ####### Level of evidence by phase of treatment
14
+
15
+ Prevention of any mood episode during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Prevention of mania during maintenance: level 1 evidence. Acute Depression: level 2 evidence. Acute Mania: level 1 evidence.
16
+
17
+ ####### Considerations for treatment selection
18
+
19
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection.
20
+
21
+ ###### Quetiapine
22
+
23
+
24
+ ####### Level of evidence by phase of treatment
25
+
26
+ Prevention of any mood episode during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Prevention of mania during maintenance: level 1 evidence. Acute Depression: level 1 evidence. Acute Mania: level 1 evidence.
27
+
28
+ ####### Considerations for treatment selection
29
+
30
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection.
31
+
32
+ ###### Lamotrigine
33
+
34
+
35
+ ####### Level of evidence by phase of treatment
36
+
37
+ Prevention of any mood episode during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Prevention of mania during maintenance: level 2 evidence. Acute Depression: level 1 evidence. Acute Mania: level 1 negative evidence.
38
+
39
+ ####### Considerations for treatment selection
40
+
41
+ Acute phase safety concerns: moderate impact on treatment selection. Acute phase tolerability concerns: limited impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: limited impact on treatment selection.
42
+
43
+ ###### Asenapine
44
+
45
+
46
+ ####### Level of evidence by phase of treatment
47
+
48
+ Prevention of any mood episode during maintenance: level 2 evidence. Prevention of depression during maintenance: level 2 evidence. Prevention of mania during maintenance: level 2 evidence. Acute Depression: no data. Acute Mania: level 1 evidence.
49
+
50
+ ####### Considerations for treatment selection
51
+
52
+ Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: minor impact on treatment selection.
53
+
54
+ ###### Quetiapine + Lithium/divalproex
55
+
56
+
57
+ ####### Level of evidence by phase of treatment
58
+
59
+ Prevention of any mood episode during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Prevention of mania during maintenance: level 1 evidence. Acute Depression: level 4 evidence. Acute Mania: level 1 evidence.
60
+
61
+ ####### Considerations for treatment selection
62
+
63
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: significant impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: moderate impact on treatment selection.
64
+
65
+ ###### Aripiprazole + Lithium/divalproex
66
+
67
+
68
+ ####### Level of evidence by phase of treatment
69
+
70
+ Prevention of any mood episode during maintenance: level 2 evidence. Prevention of depression during maintenance: no data; did not separate from placebo in those with index mania; no studies available in index depression. Prevention of mania during maintenance: level 2 evidence. Acute Depression: level 4 evidence. Acute Mania: level 2 evidence.
71
+
72
+ ####### Considerations for treatment selection
73
+
74
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: moderate impact on treatment selection.
75
+
76
+ ###### Aripiprazole
77
+
78
+
79
+ ####### Level of evidence by phase of treatment
80
+
81
+ Prevention of any mood episode during maintenance: level 2 evidence. Prevention of depression during maintenance: no data; did not separate from placebo in those with index mania; no studies available in index depression. Prevention of mania during maintenance: level 2 evidence. Acute Depression: level 1 negative evidence. Acute Mania: level 1 evidence.
82
+
83
+ ####### Considerations for treatment selection
84
+
85
+ Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: minor impact on treatment selection.
86
+
87
+ ###### Aripiprazole OM
88
+
89
+
90
+ ####### Level of evidence by phase of treatment
91
+
92
+ Prevention of any mood episode during maintenance: level 2 evidence. Prevention of depression during maintenance: no data; did not separate from placebo in those with index mania; no studies available in index depression. Prevention of mania during maintenance: level 2 evidence. Acute Depression: no data. Acute Mania: no data.
93
+
94
+ ####### Considerations for treatment selection
95
+
96
+ Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: minor impact on treatment selection.
97
+
98
+ ##### Second-line treatments
99
+
100
+
101
+ ###### Olanzapine
102
+
103
+
104
+ ####### Level of evidence by phase of treatment
105
+
106
+ Prevention of any mood episode during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Prevention of mania during maintenance: level 1 evidence. Acute Depression: level 1 evidence; did not separate from placebo on core symptoms of depression. Acute Mania: level 1 evidence.
107
+
108
+ ####### Considerations for treatment selection
109
+
110
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: significant impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection.
111
+
112
+ ###### Risperidone LAI
113
+
114
+
115
+ ####### Level of evidence by phase of treatment
116
+
117
+ Prevention of any mood episode during maintenance: level 1 evidence. Prevention of depression during maintenance: no data; did not separate from placebo in those with index mania; no studies available in index depression. Prevention of mania during maintenance: level 1 evidence. Acute Depression: no data. Acute Mania: no data.
118
+
119
+ ####### Considerations for treatment selection
120
+
121
+ Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: minor impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection.
122
+
123
+ ###### Risperidone LAI (adj)
124
+
125
+
126
+ ####### Level of evidence by phase of treatment
127
+
128
+ Prevention of any mood episode during maintenance: level 2 evidence. Prevention of depression during maintenance: level 4 evidence. Prevention of mania during maintenance: level 2 evidence. Acute Depression: no data. Acute Mania: no data.
129
+
130
+ ####### Considerations for treatment selection
131
+
132
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: significant impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection.
133
+
134
+ ###### Carbamazepine
135
+
136
+
137
+ ####### Level of evidence by phase of treatment
138
+
139
+ Prevention of any mood episode during maintenance: level 2 evidence. Prevention of depression during maintenance: level 2 evidence. Prevention of mania during maintenance: level 2 evidence. Acute Depression: level 3 evidence. Acute Mania: level 1 evidence.
140
+
141
+ ####### Considerations for treatment selection
142
+
143
+ Acute phase safety concerns: moderate impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: minor impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: moderate impact on treatment selection.
144
+
145
+ ###### Paliperidone (>6 mg)
146
+
147
+
148
+ ####### Level of evidence by phase of treatment
149
+
150
+ Prevention of any mood episode during maintenance: level 2 evidence. Prevention of depression during maintenance: no data; did not separate from placebo in those with index mania; no studies available in index depression. Prevention of mania during maintenance: level 2 evidence. Acute Depression: no data. Acute Mania: level 1 evidence.
151
+
152
+ ####### Considerations for treatment selection
153
+
154
+ Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: minor impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection.
155
+
156
+ ###### Lurasidone + Lithium/divalproex
157
+
158
+
159
+ ####### Level of evidence by phase of treatment
160
+
161
+ Prevention of any mood episode during maintenance: level 3 evidence; trend for superiority on the primary efficacy measure, hence the lower rating. Prevention of depression during maintenance: level 3 evidence; effective in those with an index episode of depression. Prevention of mania during maintenance: level 4 evidence. Acute Depression: level 2 evidence. Acute Mania: no data.
162
+
163
+ ####### Considerations for treatment selection
164
+
165
+ Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: moderate / limited impact on treatment selection.
166
+
167
+ ###### Ziprasidone + Lithium/divalproex
168
+
169
+
170
+ ####### Level of evidence by phase of treatment
171
+
172
+ Prevention of any mood episode during maintenance: level 2 evidence. Prevention of depression during maintenance: no data; did not separate from placebo in those with index mania; no studies available in index depression. Prevention of mania during maintenance: level 2 evidence. Acute Depression: level 3 negative evidence. Acute Mania: level 2 negative evidence.
173
+
174
+ ####### Considerations for treatment selection
175
+
176
+ Acute phase safety concerns: moderate impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: minor impact on treatment selection.
177
+
178
+
src/data_processing/tables.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bs4 import BeautifulSoup
2
+ import json
3
+ import pandas as pd
4
+ import re
5
+
6
+
7
+ def get_table_metadata(table, base_url):
8
+ # Find the nearest ancestor <section> that has an id
9
+ section = table.find_parent('section')
10
+ while section and not section.get('id'):
11
+ section = section.find_parent('section')
12
+
13
+ section_id = section.get("id") if section else None
14
+ if section_id:
15
+ section_url = base_url + section_id
16
+ # Try to get the main heading
17
+ heading_el = section.select_one(f"[data-anchor-id={section_id}]")
18
+ section_heading = heading_el.get_text(strip=True) if heading_el else ""
19
+ # Try to get the subheading from its parent section
20
+ parent_sec = section.find_parent('section')
21
+ subheading_el = parent_sec.select_one(".pmc_sec_title") if parent_sec else None
22
+ section_subheading = subheading_el.get_text(strip=True) if subheading_el else ""
23
+ headings = " > ".join(filter(None, [section_heading, section_subheading]))
24
+ else:
25
+ # Fallback if no section id is found
26
+ section_url = base_url
27
+ headings = ""
28
+
29
+ # Table name and caption
30
+ name_el = section.find("h4") if section else table.find("caption")
31
+ name = name_el.get_text(strip=True) if name_el else "Table"
32
+ caption_el = section.select_one('.caption p') if section else table.find("caption")
33
+ caption = caption_el.get_text(strip=True) if caption_el else ""
34
+
35
+ # Generate a referee_id from the table name
36
+ # e.g., "Table 1." → number = "1" → referee_id = "table_1"
37
+ # Look for parent <section> with an id containing 'tbl-'
38
+ section = table.find_parent("section", id=True)
39
+ table_id = section["id"] if section and "tbl-" in section["id"] else None
40
+ print(table_id)
41
+ match = re.search(r"tbl-(\d+)", table_id or "")
42
+ number = match.group(1).lstrip("0") if match else ""
43
+ referee_id = f"table_{number}" if number else "table_unknown"
44
+ label = f"Table {number}. " + caption if number else "Table"
45
+
46
+ # Collect footnotes
47
+ footnotes = {}
48
+ # case 1: <sup> outside <p>
49
+ for sup in section.select('.fn sup') if section else []:
50
+ sibling = sup.find_next_sibling("p")
51
+ if sibling:
52
+ key = sup.get_text(strip=True)
53
+ footnotes[key] = sibling.get_text(strip=True)
54
+
55
+ # case 2: <sup> inside <p>
56
+ for p in section.select('.fn p') if section else []:
57
+ # matches like "* text" or "# text"
58
+ matches = re.findall(r"(?<=(\*|#))\s*(.*?)(?=\s\*|\s#|$)", p.get_text())
59
+ for key, text in matches:
60
+ footnotes[key] = text.strip()
61
+ print(name)
62
+ return name, caption, footnotes, headings, label, referee_id, section_url
63
+
64
+
65
+ def get_table_data(table, footnotes):
66
+ table_data = []
67
+ rowspan_tracker = {}
68
+ subsec = ""
69
+
70
+ for tr in table.find_all("tr"):
71
+ row = []
72
+ col_index = 0
73
+
74
+ # Pre-fill cells carried over by rowspan
75
+ while col_index in rowspan_tracker:
76
+ value, remaining = rowspan_tracker[col_index]
77
+ row.append(value)
78
+ remaining -= 1
79
+ if remaining:
80
+ rowspan_tracker[col_index] = (value, remaining)
81
+ else:
82
+ del rowspan_tracker[col_index]
83
+ col_index += 1
84
+
85
+ for cell in tr.find_all(["th", "td"]):
86
+ cell_text = cell.get_text(separator="\n", strip=True)
87
+ cell_sups = [sup.get_text() for sup in cell.find_all("sup")]
88
+
89
+ # Normalize text if superscripts are inside
90
+ if cell_sups:
91
+ # remove short tokens
92
+ lines = [t for t in cell_text.split("\n") if len(t) > 1]
93
+ cell_text = " ".join(lines)
94
+
95
+ # Append footnote text if any
96
+ for sup in cell_sups:
97
+ if sup in footnotes:
98
+ cell_text += f" ({footnotes[sup]})"
99
+
100
+ # Handle colspan as a subsection marker
101
+ colspan = int(cell.get("colspan", 1))
102
+ if colspan > 1:
103
+ subsec = cell_text
104
+ continue
105
+
106
+ row.append(cell_text)
107
+
108
+ # Track rowspan for this column
109
+ rowspan = int(cell.get("rowspan", 1))
110
+ if rowspan > 1:
111
+ rowspan_tracker[col_index] = (cell_text, rowspan - 1)
112
+
113
+ col_index += 1
114
+
115
+ if row:
116
+ if subsec:
117
+ row.insert(0, subsec)
118
+ table_data.append(row)
119
+
120
+ return table_data
121
+
122
+
123
+ def to_text(table_data, label, caption):
124
+ lines = []
125
+ lines.append(f"**{label}**")
126
+
127
+ # Skip header row when enumerating data rows
128
+ headers = table_data[0] if table_data else []
129
+ for i, row in enumerate(table_data[1:], start=1):
130
+ row_text = ", ".join(f"{h}: {v}" for h, v in zip(headers, row) if v)
131
+ lines.append(f"{{Row {i} - {row_text}}}")
132
+
133
+ return "[" + "\n".join(lines) + "]"
134
+
135
+
136
+ def to_chunk(text_block, section_url, referee_id, headings):
137
+ return {
138
+ "text": text_block,
139
+ "metadata": {
140
+ "section": section_url,
141
+ "type": "HTML table",
142
+ "referee_id": referee_id,
143
+ # "headings": headings,
144
+ }
145
+ }
146
+
147
+
148
+ def tables_to_json(input_path="bipolar.html", base_url="https://pmc.ncbi.nlm.nih.gov/articles/PMC5947163/#"):
149
+ doc = []
150
+ with open(input_path, encoding="utf-8") as f:
151
+ html = f.read()
152
+ soup = BeautifulSoup(html, features="html.parser")
153
+ tables = soup.find_all("table")
154
+
155
+ print(f"Found {len(tables)} tables in document.")
156
+
157
+ for idx, tbl in enumerate(tables, start=1):
158
+ name, caption, footnotes, headings, label, referee_id, section_url = \
159
+ get_table_metadata(tbl, base_url)
160
+ table_data = get_table_data(tbl, footnotes)
161
+ text_block = to_text(table_data, label, caption)
162
+ chunk = to_chunk(text_block, section_url, referee_id, headings)
163
+ doc.append(chunk)
164
+
165
+ return doc
166
+
167
+
168
+ if __name__ == "__main__":
169
+ # doc = tables_to_json()
170
+ # with open("tables.json", "w", encoding="utf-8") as f:
171
+ # json.dump(doc, f, indent=4)
172
+ pass
src/precompute.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from sentence_transformers import SentenceTransformer
3
+ from Rag import load_json_to_db, make_embeddings, save_embeddings # Adjust import
4
+
5
+ def precompute_and_save(embedder_name, db_path):
6
+ print("Loading database...")
7
+ db = load_json_to_db(db_path)
8
+
9
+ print(f"Loading embedder: {embedder_name}")
10
+ model = SentenceTransformer(embedder_name, trust_remote_code=True)
11
+
12
+ print("Computing embeddings...")
13
+ embeddings = make_embeddings(model, embedder_name, db)
14
+
15
+ print("Saving embeddings...")
16
+ save_embeddings(embedder_name, embeddings)
17
+
18
+ print("Done.")
19
+
20
+ if __name__ == "__main__":
21
+ embedder_name = "Qwen/Qwen3-Embedding-0.6B" # Example embedder name
22
+ db_path = "../data/processed/guideline_db.json"
23
+ precompute_and_save(embedder_name, db_path)
src/readme.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ - `Rag.py` is the Core RAG pipeline, backend
2
+
3
+ - `app.py` is the frontend code implemented with streamlit
4
+
5
+ - `data_processing.py` contains code to process the original guideline knowledge base for the rag system
6
+
7
+ - `system_prompt.txt` is the system prompt we give to the LLM
8
+
9
+ - `run_batched_queries/` contains code to run multiple queries using the system and write results to a markdown file
src/system_prompt.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are a clinical decision support assistant. Use provided Clinical Guidelines Context to answer the user's question.
2
+
3
+ - Carefully review the retrieved text and find any relevant medication recommendations, treatment considerations, or patient-specific factors.
4
+ - If multiple options exist, summarize the top 1–2 and explain briefly why they are preferred.
5
+ - Quote the reference link provided if you used the info in that context to answer the question.
6
+ - If there is no explicit answer, you may reasonably infer from related sections (e.g. similar symptoms, comorbidities, or past treatments), but make that clear.
7
+ - If absolutely no relevant information is available in the manual, respond with: "No clear recommendation found in the clinical guideline."
8
+ - Use EXACT medication names from the context, Use markdown bold fonts on all the medication.
9
+ - Specify treatment line (first-line, second-line, etc.). If not mentioned, provide first-line option will be sufficient.
10
+ - Include relevant clinical details (dosing, monitoring, contraindications)
11
+ - If multiple options exist, list them clearly
12
+ - Base recommendations STRICTLY on the provided context
13
+
14
+ - Output in structural format with bullet points and bold fonts if necessary, and quote the important reference links that you used like this:
15
+ - **Reference:** [Table 3.1](https://pmc.ncbi.nlm.nih.gov/articles/PMC11351064/#section4F-07067437241245384)