yash's hf bipolar demo code with github action set
Browse files- .gitattributes +36 -0
- .github/workflows/sync-to-hf.yml +36 -0
- .gitignore +1 -0
- .idea/.gitignore +8 -0
- .idea/bipolar.iml +12 -0
- .idea/inspectionProfiles/Project_Default.xml +7 -0
- .idea/inspectionProfiles/profiles_settings.xml +6 -0
- .idea/misc.xml +7 -0
- .idea/modules.xml +8 -0
- .idea/vcs.xml +7 -0
- .streamlit/config.toml +55 -0
- Dockerfile +21 -0
- README.md +19 -2
- data/embeddings/Qwen_Qwen3-Embedding-0.6B.npy +3 -0
- data/processed/guideline_db.json +0 -0
- data/processed/referenced_table_chunks.json +0 -0
- data/readme.md +7 -0
- requirements.txt +13 -0
- src/Rag.py +275 -0
- src/app.py +137 -0
- src/data_processing/bipolar.html +0 -0
- src/data_processing/bipolar_modified.html +0 -0
- src/data_processing/guideline_db copy.json +0 -0
- src/data_processing/image_processing.py +91 -0
- src/data_processing/main.py +294 -0
- src/data_processing/table12_textual.txt +223 -0
- src/data_processing/table14_textual.txt +131 -0
- src/data_processing/table17_textual.txt +178 -0
- src/data_processing/tables.py +172 -0
- src/precompute.py +23 -0
- src/readme.md +9 -0
- src/system_prompt.txt +15 -0
.gitattributes
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.faiss filter=lfs diff=lfs merge=lfs -text
|
.github/workflows/sync-to-hf.yml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Sync hf-bipolar to HF Space main
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches: [ demo_bipolar ]
|
| 6 |
+
workflow_dispatch: {}
|
| 7 |
+
|
| 8 |
+
jobs:
|
| 9 |
+
sync:
|
| 10 |
+
runs-on: ubuntu-latest
|
| 11 |
+
steps:
|
| 12 |
+
- name: Checkout demo_bipolar (with LFS)
|
| 13 |
+
uses: actions/checkout@v4
|
| 14 |
+
with:
|
| 15 |
+
ref: demo_bipolar
|
| 16 |
+
lfs: true
|
| 17 |
+
fetch-depth: 0
|
| 18 |
+
|
| 19 |
+
- name: Setup Git LFS
|
| 20 |
+
run: |
|
| 21 |
+
git lfs install
|
| 22 |
+
git lfs fetch --all
|
| 23 |
+
|
| 24 |
+
- name: Push hf-demo_bipolar to HF Space main
|
| 25 |
+
env:
|
| 26 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 27 |
+
run: |
|
| 28 |
+
set -e
|
| 29 |
+
git config user.email "actions@github.com"
|
| 30 |
+
git config user.name "github-actions[bot]"
|
| 31 |
+
SPACE_ID="ymali/bipolar"
|
| 32 |
+
TARGET_BRANCH="main"
|
| 33 |
+
AUTHED_URL="https://user:${HF_TOKEN}@huggingface.co/spaces/${SPACE_ID}"
|
| 34 |
+
git remote add hf "$AUTHED_URL" || git remote set-url hf "$AUTHED_URL"
|
| 35 |
+
git lfs push hf --all
|
| 36 |
+
git push hf "HEAD:refs/heads/${TARGET_BRANCH}" --force
|
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
.env
|
.idea/.gitignore
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Default ignored files
|
| 2 |
+
/shelf/
|
| 3 |
+
/workspace.xml
|
| 4 |
+
# Editor-based HTTP Client requests
|
| 5 |
+
/httpRequests/
|
| 6 |
+
# Datasource local storage ignored files
|
| 7 |
+
/dataSources/
|
| 8 |
+
/dataSources.local.xml
|
.idea/bipolar.iml
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<module type="PYTHON_MODULE" version="4">
|
| 3 |
+
<component name="NewModuleRootManager">
|
| 4 |
+
<content url="file://$MODULE_DIR$" />
|
| 5 |
+
<orderEntry type="jdk" jdkName="Python 3.13" jdkType="Python SDK" />
|
| 6 |
+
<orderEntry type="sourceFolder" forTests="false" />
|
| 7 |
+
</component>
|
| 8 |
+
<component name="PyDocumentationSettings">
|
| 9 |
+
<option name="format" value="PLAIN" />
|
| 10 |
+
<option name="myDocStringFormat" value="Plain" />
|
| 11 |
+
</component>
|
| 12 |
+
</module>
|
.idea/inspectionProfiles/Project_Default.xml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<component name="InspectionProjectProfileManager">
|
| 2 |
+
<profile version="1.0">
|
| 3 |
+
<option name="myName" value="Project Default" />
|
| 4 |
+
<inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
|
| 5 |
+
<inspection_tool class="PyTypeCheckerInspection" enabled="false" level="WARNING" enabled_by_default="false" />
|
| 6 |
+
</profile>
|
| 7 |
+
</component>
|
.idea/inspectionProfiles/profiles_settings.xml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<component name="InspectionProjectProfileManager">
|
| 2 |
+
<settings>
|
| 3 |
+
<option name="USE_PROJECT_PROFILE" value="false" />
|
| 4 |
+
<version value="1.0" />
|
| 5 |
+
</settings>
|
| 6 |
+
</component>
|
.idea/misc.xml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="Black">
|
| 4 |
+
<option name="sdkName" value="Python 3.13" />
|
| 5 |
+
</component>
|
| 6 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13" project-jdk-type="Python SDK" />
|
| 7 |
+
</project>
|
.idea/modules.xml
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="ProjectModuleManager">
|
| 4 |
+
<modules>
|
| 5 |
+
<module fileurl="file://$PROJECT_DIR$/.idea/bipolar.iml" filepath="$PROJECT_DIR$/.idea/bipolar.iml" />
|
| 6 |
+
</modules>
|
| 7 |
+
</component>
|
| 8 |
+
</project>
|
.idea/vcs.xml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="VcsDirectoryMappings">
|
| 4 |
+
<mapping directory="" vcs="Git" />
|
| 5 |
+
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
| 6 |
+
</component>
|
| 7 |
+
</project>
|
.streamlit/config.toml
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[server]
|
| 2 |
+
enableStaticServing = false
|
| 3 |
+
#
|
| 4 |
+
#[[theme.fontFaces]]
|
| 5 |
+
#family = "SpaceGrotesk"
|
| 6 |
+
#url = "app/static/SpaceGrotesk-VariableFont_wght.ttf"
|
| 7 |
+
#
|
| 8 |
+
#[[theme.fontFaces]]
|
| 9 |
+
#family = "SpaceMono"
|
| 10 |
+
#url = "app/static/SpaceMono-Bold.ttf"
|
| 11 |
+
#style = "normal"
|
| 12 |
+
#weight = 700
|
| 13 |
+
#
|
| 14 |
+
#[[theme.fontFaces]]
|
| 15 |
+
#family = "SpaceMono"
|
| 16 |
+
#url = "app/static/SpaceMono-BoldItalic.ttf"
|
| 17 |
+
#style = "italic"
|
| 18 |
+
#weight = 700
|
| 19 |
+
#
|
| 20 |
+
#[[theme.fontFaces]]
|
| 21 |
+
#family = "SpaceMono"
|
| 22 |
+
#url = "app/static/SpaceMono-Italic.ttf"
|
| 23 |
+
#style = "italic"
|
| 24 |
+
#weight = 400
|
| 25 |
+
#
|
| 26 |
+
#[[theme.fontFaces]]
|
| 27 |
+
#family = "SpaceMono"
|
| 28 |
+
#url = "app/static/SpaceMono-Regular.ttf"
|
| 29 |
+
#style = "normal"
|
| 30 |
+
#weight = 400
|
| 31 |
+
|
| 32 |
+
[theme]
|
| 33 |
+
primaryColor = "#cb785c"
|
| 34 |
+
backgroundColor = "#fdfdf8"
|
| 35 |
+
secondaryBackgroundColor = "#ecebe3"
|
| 36 |
+
textColor = "#3d3a2a"
|
| 37 |
+
linkColor = "#3d3a2a"
|
| 38 |
+
borderColor = "#d3d2ca"
|
| 39 |
+
showWidgetBorder = true
|
| 40 |
+
baseRadius = "0.75rem"
|
| 41 |
+
buttonRadius = "full"
|
| 42 |
+
#font = "SpaceGrotesk"
|
| 43 |
+
headingFontWeights = [600,500,500,500,500,500]
|
| 44 |
+
headingFontSizes = ["3rem", "2rem"]
|
| 45 |
+
#codeFont = "SpaceMono"
|
| 46 |
+
codeFontSize = ".75rem"
|
| 47 |
+
codeBackgroundColor = "#ecebe4"
|
| 48 |
+
showSidebarBorder = true
|
| 49 |
+
chartCategoricalColors = ["#0ea5e9", "#059669", "#fbbf24"]
|
| 50 |
+
|
| 51 |
+
[theme.sidebar]
|
| 52 |
+
backgroundColor = "#f0f0ec"
|
| 53 |
+
secondaryBackgroundColor = "#ecebe3"
|
| 54 |
+
headingFontSizes = ["1.6rem", "1.4rem", "1.2rem"]
|
| 55 |
+
dataframeHeaderBackgroundColor = "#e4e4e0"
|
Dockerfile
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.9-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
RUN apt-get update && apt-get install -y \
|
| 6 |
+
build-essential \
|
| 7 |
+
curl \
|
| 8 |
+
software-properties-common \
|
| 9 |
+
git \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
COPY requirements.txt ./
|
| 13 |
+
COPY src/ ./src/
|
| 14 |
+
|
| 15 |
+
RUN pip3 install -r requirements.txt
|
| 16 |
+
|
| 17 |
+
EXPOSE 8501
|
| 18 |
+
|
| 19 |
+
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
| 20 |
+
|
| 21 |
+
ENTRYPOINT ["streamlit", "run", "src/app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
README.md
CHANGED
|
@@ -1,2 +1,19 @@
|
|
| 1 |
-
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Bipolar Chatbot
|
| 3 |
+
emoji: 🚀
|
| 4 |
+
colorFrom: red
|
| 5 |
+
colorTo: red
|
| 6 |
+
sdk: streamlit
|
| 7 |
+
app_port: 8501
|
| 8 |
+
sdk_version: "1.45.1"
|
| 9 |
+
app_file: src/app.py
|
| 10 |
+
pinned: true
|
| 11 |
+
short_description: Streamlit template space
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
# Welcome to Streamlit!
|
| 15 |
+
|
| 16 |
+
Edit `/src/app.py` to customize this app to your heart's desire. :heart:
|
| 17 |
+
|
| 18 |
+
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
| 19 |
+
forums](https://discuss.streamlit.io).
|
data/embeddings/Qwen_Qwen3-Embedding-0.6B.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc3ceea12f082aff546c36423f436922abb5600de3c77d494a5d8bd87f9edf62
|
| 3 |
+
size 1495168
|
data/processed/guideline_db.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/processed/referenced_table_chunks.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/readme.md
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
- The `embeddings/` contains the embedded data by different embedders.
|
| 2 |
+
|
| 3 |
+
- The `faiss_index/` contains the index built by faiss on different embedding files
|
| 4 |
+
|
| 5 |
+
- The `processed/` contains the chuncked data that's been preprocessed and used in the RAG system
|
| 6 |
+
|
| 7 |
+
- The `raw/` contains the original data from the clinical guideline
|
requirements.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
torch>=2.1.0 # >=2.1 preferred for compatibility on Apple Silicon
|
| 3 |
+
faiss-cpu>=1.7.4
|
| 4 |
+
numpy>=1.24.0
|
| 5 |
+
python-dotenv>=1.0.0
|
| 6 |
+
|
| 7 |
+
# Required by Qwen Embedding model
|
| 8 |
+
transformers>=4.51.0
|
| 9 |
+
sentence-transformers>=2.7.0
|
| 10 |
+
|
| 11 |
+
# LLM providers
|
| 12 |
+
openai
|
| 13 |
+
together>=0.2.8
|
src/Rag.py
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import time
|
| 4 |
+
import requests
|
| 5 |
+
import numpy as np
|
| 6 |
+
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
from sentence_transformers import SentenceTransformer
|
| 9 |
+
from together import Together
|
| 10 |
+
|
| 11 |
+
global db, referenced_tables_db, embedder, index, llm_client
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def load_json_to_db(file_path):
|
| 15 |
+
with open(file_path) as f:
|
| 16 |
+
db = json.load(f)
|
| 17 |
+
return db
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
# -------- Embedding Functions --------
|
| 21 |
+
def make_embeddings(embedder, embedder_name, db):
|
| 22 |
+
texts = [chunk['text'] for chunk in db]
|
| 23 |
+
embeddings = embedder.encode(texts, convert_to_numpy=True, batch_size=1, show_progress_bar=True)
|
| 24 |
+
return embeddings
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def get_project_root():
|
| 28 |
+
return os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def save_embeddings(embedder_name, embeddings):
|
| 32 |
+
root = get_project_root()
|
| 33 |
+
file_path = os.path.join(root, "data", "embeddings", f"{embedder_name.replace('/', '_')}.npy")
|
| 34 |
+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
| 35 |
+
np.save(file_path, embeddings)
|
| 36 |
+
print(f"Saved embeddings to: {file_path}")
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def load_embeddings(embedder_name):
|
| 40 |
+
root = get_project_root()
|
| 41 |
+
file_path = os.path.join(root, "data", "embeddings", f"{embedder_name.replace('/', '_')}.npy")
|
| 42 |
+
|
| 43 |
+
try:
|
| 44 |
+
embeddings = np.load(file_path, allow_pickle=True)
|
| 45 |
+
print(f"Loaded embeddings from: {file_path}")
|
| 46 |
+
except FileNotFoundError:
|
| 47 |
+
print(f"Embeddings not found. Recomputing for: {embedder_name}")
|
| 48 |
+
embeddings = make_embeddings(embedder, embedder_name, db)
|
| 49 |
+
save_embeddings(embedder_name, embeddings)
|
| 50 |
+
|
| 51 |
+
return embeddings
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def load_embedder_with_fallbacks(embedder_name):
|
| 55 |
+
print(f"Loading embedder {embedder_name}")
|
| 56 |
+
model = SentenceTransformer(
|
| 57 |
+
embedder_name,
|
| 58 |
+
trust_remote_code=True,
|
| 59 |
+
tokenizer_kwargs={"padding_side": "left"},
|
| 60 |
+
device='cpu'
|
| 61 |
+
)
|
| 62 |
+
return model
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
# -------- Cosine Similarity Index (no FAISS) --------
|
| 66 |
+
def build_cosine_index(embeddings):
|
| 67 |
+
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
|
| 68 |
+
return embeddings / norms
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def load_cosine_index(embedder_name):
|
| 72 |
+
embeddings = load_embeddings(embedder_name)
|
| 73 |
+
normalized_embeddings = build_cosine_index(embeddings)
|
| 74 |
+
return normalized_embeddings
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
# -------- Cosine Similarity Search (Brute Force) --------
|
| 78 |
+
def vector_search(query, embedder, db, index, referenced_table_db, k=6):
|
| 79 |
+
def get_detailed_instruct(task_description: str, query: str) -> str:
|
| 80 |
+
return f'Instruct: {task_description}\nQuery:{query}'
|
| 81 |
+
|
| 82 |
+
task = 'Given a search query, retrieve relevant passages that answer the query'
|
| 83 |
+
query_embedding = embedder.encode([get_detailed_instruct(task, query)], convert_to_numpy=True)
|
| 84 |
+
query_vec = query_embedding / np.linalg.norm(query_embedding)
|
| 85 |
+
|
| 86 |
+
cosine_similarities = np.dot(index, query_vec.T).flatten()
|
| 87 |
+
top_k_indices = np.argsort(-cosine_similarities)[:k]
|
| 88 |
+
|
| 89 |
+
results = []
|
| 90 |
+
referenced_tables = set()
|
| 91 |
+
existed_tables = set()
|
| 92 |
+
|
| 93 |
+
for i in top_k_indices:
|
| 94 |
+
results.append({
|
| 95 |
+
"text": db[i]['text'],
|
| 96 |
+
"section": db[i]['metadata']['section'],
|
| 97 |
+
"chunk_id": db[i]['metadata']['chunk_id'],
|
| 98 |
+
"similarity": float(cosine_similarities[i]),
|
| 99 |
+
})
|
| 100 |
+
if db[i]['metadata']['referee_id']:
|
| 101 |
+
existed_tables.add(db[i]['metadata']['referee_id'])
|
| 102 |
+
try:
|
| 103 |
+
if db[i]['metadata']['referenced_tables']:
|
| 104 |
+
referenced_tables.update(db[i]['metadata']['referenced_tables'])
|
| 105 |
+
except KeyError:
|
| 106 |
+
continue
|
| 107 |
+
|
| 108 |
+
table_to_add = [table for table in referenced_tables if table not in existed_tables]
|
| 109 |
+
|
| 110 |
+
for chunk in referenced_table_db:
|
| 111 |
+
if chunk['metadata']['referee_id'] in table_to_add:
|
| 112 |
+
results.append({
|
| 113 |
+
"text": chunk['text'],
|
| 114 |
+
"section": chunk['metadata']['section'],
|
| 115 |
+
"chunk_id": chunk['metadata']['chunk_id'],
|
| 116 |
+
})
|
| 117 |
+
return results
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def load_together_llm_client():
|
| 121 |
+
load_dotenv()
|
| 122 |
+
return Together(api_key=os.getenv("TOGETHER_API_KEY"))
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
# -------- Prompt Construction --------
|
| 126 |
+
def construct_prompt(query, faiss_results):
|
| 127 |
+
with open("src/system_prompt.txt", "r") as f:
|
| 128 |
+
system_prompt = f.read().strip()
|
| 129 |
+
|
| 130 |
+
prompt = f"""
|
| 131 |
+
### System Prompt
|
| 132 |
+
{system_prompt}
|
| 133 |
+
|
| 134 |
+
### User Query
|
| 135 |
+
{query}
|
| 136 |
+
|
| 137 |
+
### Clinical Guidelines Context
|
| 138 |
+
"""
|
| 139 |
+
for res in faiss_results:
|
| 140 |
+
prompt += f"- reference: {res['section']}\n- This paragraph is from section: {res['text']}\n"
|
| 141 |
+
return prompt
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def construct_prompt_with_memory(query, faiss_results, chat_history=None, history_limit=4):
|
| 145 |
+
with open("src/system_prompt.txt", "r") as f:
|
| 146 |
+
system_prompt = f.read().strip()
|
| 147 |
+
|
| 148 |
+
prompt = f"### System Prompt\n{system_prompt}\n\n"
|
| 149 |
+
|
| 150 |
+
if chat_history:
|
| 151 |
+
prompt += "### Chat History\n"
|
| 152 |
+
for m in chat_history[-history_limit:]:
|
| 153 |
+
prompt += f"{m['role'].title()}: {m['content']}\n"
|
| 154 |
+
prompt += "\n"
|
| 155 |
+
|
| 156 |
+
prompt += f"### User Query\n{query}\n\n"
|
| 157 |
+
prompt += "### Clinical Guidelines Context\n"
|
| 158 |
+
for res in faiss_results:
|
| 159 |
+
prompt += f"- reference: {res['section']}\n- This paragraph is from section: {res['text']}\n"
|
| 160 |
+
return prompt
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def call_llm(llm_client, prompt, stream_flag=False, max_tokens=500, temperature=0.05, top_p=0.9, model_name="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"):
|
| 164 |
+
print(f"Calling LLM with model: {model_name}")
|
| 165 |
+
try:
|
| 166 |
+
if stream_flag:
|
| 167 |
+
def stream_generator():
|
| 168 |
+
response = llm_client.chat.completions.create(
|
| 169 |
+
model=model_name,
|
| 170 |
+
messages=[{"role": "user", "content": prompt}],
|
| 171 |
+
max_tokens=max_tokens,
|
| 172 |
+
temperature=temperature,
|
| 173 |
+
top_p=top_p,
|
| 174 |
+
stream=True,
|
| 175 |
+
)
|
| 176 |
+
for chunk in response:
|
| 177 |
+
if chunk.choices and chunk.choices[0].delta.content:
|
| 178 |
+
yield chunk.choices[0].delta.content
|
| 179 |
+
return stream_generator()
|
| 180 |
+
else:
|
| 181 |
+
response = llm_client.chat.completions.create(
|
| 182 |
+
model=model_name,
|
| 183 |
+
messages=[{"role": "user", "content": prompt}],
|
| 184 |
+
max_tokens=max_tokens,
|
| 185 |
+
temperature=temperature,
|
| 186 |
+
top_p=top_p,
|
| 187 |
+
stream=False,
|
| 188 |
+
)
|
| 189 |
+
return response.choices[0].message.content
|
| 190 |
+
except Exception as e:
|
| 191 |
+
print("Error in call_llm:", str(e))
|
| 192 |
+
import traceback
|
| 193 |
+
traceback.print_exc()
|
| 194 |
+
raise
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
def call_ollama(prompt, model="mistral", stream_flag=False, max_tokens=500, temperature=0.05, top_p=0.9):
|
| 198 |
+
url = "http://localhost:11434/api/generate"
|
| 199 |
+
payload = {
|
| 200 |
+
"model": model,
|
| 201 |
+
"prompt": prompt,
|
| 202 |
+
"temperature": temperature,
|
| 203 |
+
"top_p": top_p,
|
| 204 |
+
"max_tokens": max_tokens,
|
| 205 |
+
"stream": True
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
with requests.post(url, json=payload, stream=True) as response:
|
| 209 |
+
for line in response.iter_lines():
|
| 210 |
+
if line:
|
| 211 |
+
try:
|
| 212 |
+
data = json.loads(line.decode("utf-8"))
|
| 213 |
+
yield data["response"]
|
| 214 |
+
except Exception:
|
| 215 |
+
continue
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
# -------- Main Assistant Entry Points --------
|
| 219 |
+
def launch_depression_assistant(embedder_name, designated_client=None):
|
| 220 |
+
global db, referenced_tables_db, embedder, index, llm_client
|
| 221 |
+
|
| 222 |
+
db = load_json_to_db("data/processed/guideline_db.json")
|
| 223 |
+
referenced_tables_db = load_json_to_db("data/processed/referenced_table_chunks.json")
|
| 224 |
+
|
| 225 |
+
embedder = load_embedder_with_fallbacks(embedder_name)
|
| 226 |
+
index = load_cosine_index(embedder_name)
|
| 227 |
+
|
| 228 |
+
if designated_client is None:
|
| 229 |
+
print("No LLM client provided. Loading Together LLM client...")
|
| 230 |
+
try:
|
| 231 |
+
llm_client = load_together_llm_client()
|
| 232 |
+
except Exception:
|
| 233 |
+
print("Failed to load Together LLM client. Please check your API key.")
|
| 234 |
+
else:
|
| 235 |
+
llm_client = designated_client
|
| 236 |
+
|
| 237 |
+
print("---------Depression Assistant is ready to use!--------------\n\n")
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
def depression_assistant(query, model_name="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free", max_tokens=500, temperature=0.05, top_p=0.9, stream_flag=False, chat_history=None):
|
| 241 |
+
results = vector_search(query, embedder, db, index, referenced_tables_db, k=3)
|
| 242 |
+
prompt = construct_prompt_with_memory(query, results, chat_history=chat_history)
|
| 243 |
+
|
| 244 |
+
if llm_client == "Run Ollama Locally":
|
| 245 |
+
return results, call_ollama(prompt, model_name, stream_flag, max_tokens, temperature, top_p)
|
| 246 |
+
else:
|
| 247 |
+
return results, call_llm(llm_client, prompt, stream_flag, max_tokens, temperature, top_p, model_name)
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
def load_queries_and_answers(query_file, answers_file):
|
| 251 |
+
with open(query_file, 'r') as f:
|
| 252 |
+
queries = f.readlines()
|
| 253 |
+
with open(answers_file, 'r') as f:
|
| 254 |
+
answers = f.readlines()
|
| 255 |
+
return queries, answers
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
def write_batched_results(embedder_name, result_path):
|
| 259 |
+
launch_depression_assistant(embedder_name)
|
| 260 |
+
queries, answers = load_queries_and_answers("data/raw/queries.txt", "data/raw/answers.txt")
|
| 261 |
+
embedder_filename = embedder_name.replace('/', '_')
|
| 262 |
+
|
| 263 |
+
with open(f"{result_path}Retrieved_Results_by_{embedder_filename}.md", "w") as f1, \
|
| 264 |
+
open(f"{result_path}Response_by_{embedder_filename}.md", "w") as f2:
|
| 265 |
+
|
| 266 |
+
for i, query in enumerate(queries):
|
| 267 |
+
result, response = depression_assistant(query)
|
| 268 |
+
|
| 269 |
+
f1.write(f"## Query {i+1}\n{query.strip()}\n\n## Answer\n{answers[i].strip()}\n\n## Retrieved Results\n")
|
| 270 |
+
for res in result:
|
| 271 |
+
f1.write(f"\n\n#### {res['section']}\n\n{res['text']}\n")
|
| 272 |
+
f1.write("\n\n---\n\n")
|
| 273 |
+
|
| 274 |
+
f2.write(f"## Query {i+1}\n{query.strip()}\n\n## Answer\n{answers[i].strip()}\n\n## Response\n{response}\n\n---\n\n")
|
| 275 |
+
break # remove this `break` if you want to process all queries
|
src/app.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from Rag import launch_depression_assistant, depression_assistant
|
| 3 |
+
from openai import OpenAI
|
| 4 |
+
from together import Together
|
| 5 |
+
import time
|
| 6 |
+
import os
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
|
| 9 |
+
load_dotenv()
|
| 10 |
+
|
| 11 |
+
if "embedder_loaded" not in st.session_state:
|
| 12 |
+
st.session_state.embedder_loaded = False
|
| 13 |
+
if "current_embedder_name" not in st.session_state:
|
| 14 |
+
st.session_state.current_embedder_name = None
|
| 15 |
+
if "last_sources" not in st.session_state:
|
| 16 |
+
st.session_state.last_sources = []
|
| 17 |
+
|
| 18 |
+
st.set_page_config(
|
| 19 |
+
page_title="Bipolar Assistant Chatbot",
|
| 20 |
+
page_icon=":robot_face:",
|
| 21 |
+
layout="wide",
|
| 22 |
+
initial_sidebar_state="collapsed"
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
model_options = [
|
| 26 |
+
"Qwen/Qwen3-Embedding-0.6B",
|
| 27 |
+
"jinaai/jina-embeddings-v3",
|
| 28 |
+
"BAAI/bge-large-en-v1.5",
|
| 29 |
+
"BAAI/bge-small-en-v1.5",
|
| 30 |
+
"BAAI/bge-base-en-v1.5",
|
| 31 |
+
"sentence-transformers/all-mpnet-base-v2",
|
| 32 |
+
"Other"
|
| 33 |
+
]
|
| 34 |
+
|
| 35 |
+
st.sidebar.title("Settings")
|
| 36 |
+
with st.sidebar:
|
| 37 |
+
st.subheader("Model Selection")
|
| 38 |
+
embedder_name = st.selectbox("Select embedder model", model_options, index=0)
|
| 39 |
+
|
| 40 |
+
if embedder_name == "Other":
|
| 41 |
+
embedder_name = st.text_input('Enter the embedder model name')
|
| 42 |
+
|
| 43 |
+
llm_client = Together(api_key=os.getenv("TOGETHER_API_KEY"))
|
| 44 |
+
|
| 45 |
+
if (not st.session_state.embedder_loaded or
|
| 46 |
+
st.session_state.current_embedder_name != embedder_name):
|
| 47 |
+
|
| 48 |
+
with st.spinner(f"Loading embedding model: {embedder_name}..."):
|
| 49 |
+
launch_depression_assistant(embedder_name=embedder_name, designated_client=llm_client)
|
| 50 |
+
st.session_state.embedder_loaded = True
|
| 51 |
+
st.session_state.current_embedder_name = embedder_name
|
| 52 |
+
st.success(f"✅ Embedding model {embedder_name} loaded successfully!")
|
| 53 |
+
else:
|
| 54 |
+
st.info(f"📋 Current embedding model: {st.session_state.current_embedder_name}")
|
| 55 |
+
|
| 56 |
+
selected_model = st.selectbox('Choose a model for generation',
|
| 57 |
+
["meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
|
| 58 |
+
"deepseek-ai/deepseek-r1",
|
| 59 |
+
"meta/llama-3.3-70b-instruct"],
|
| 60 |
+
key='selected_model')
|
| 61 |
+
|
| 62 |
+
if selected_model in ["deepseek-ai/deepseek-r1", "meta/llama-3.3-70b-instruct"]:
|
| 63 |
+
max_length = 1000
|
| 64 |
+
llm_client = OpenAI(
|
| 65 |
+
base_url="https://integrate.api.nvidia.com/v1",
|
| 66 |
+
api_key=os.getenv("NVIDIA_API_KEY", None),
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
temperature = st.slider('temperature', min_value=0.01, max_value=1.0, value=0.05, step=0.01)
|
| 70 |
+
top_p = st.slider('top_p', min_value=0.01, max_value=1.0, value=0.9, step=0.01)
|
| 71 |
+
max_length = st.slider('max_length', min_value=100, max_value=1000, value=500, step=10)
|
| 72 |
+
|
| 73 |
+
st.title("💬 Bipolar Assistant Chatbot")
|
| 74 |
+
|
| 75 |
+
if "messages" not in st.session_state:
|
| 76 |
+
st.session_state.messages = [{
|
| 77 |
+
"role": "assistant",
|
| 78 |
+
"content": "This is a simple Bipolar assistant bot that uses **RAG (Retrieval-Augmented Generation)** to answer questions related to bipolar. You can ask questions related to bipolar and get responses based on [CANMAT clinical guidelines](https://pmc.ncbi.nlm.nih.gov/articles/PMC11351064/)."
|
| 79 |
+
}]
|
| 80 |
+
|
| 81 |
+
chat_col, sources_col = st.columns([1, 1])
|
| 82 |
+
|
| 83 |
+
with sources_col:
|
| 84 |
+
st.markdown("### Sources")
|
| 85 |
+
sources_placeholder = st.empty()
|
| 86 |
+
|
| 87 |
+
with sources_placeholder.container():
|
| 88 |
+
if st.session_state.last_sources:
|
| 89 |
+
for i, result in enumerate(st.session_state.last_sources):
|
| 90 |
+
st.markdown(f"**Source {i + 1}** | Similarity: {result.get('similarity', 'N/A')}")
|
| 91 |
+
st.markdown(f"- **Section:** {result['section']}")
|
| 92 |
+
st.markdown(f"> {result['text']}")
|
| 93 |
+
st.markdown("---")
|
| 94 |
+
else:
|
| 95 |
+
st.markdown("*Sources will appear here after you ask a question.*")
|
| 96 |
+
|
| 97 |
+
with chat_col:
|
| 98 |
+
for message in st.session_state.messages:
|
| 99 |
+
with st.chat_message(message["role"]):
|
| 100 |
+
st.markdown(message["content"])
|
| 101 |
+
|
| 102 |
+
if user_input := st.chat_input("Ask me questions about the CANMAT bipolar guideline!"):
|
| 103 |
+
st.chat_message("user").markdown(user_input)
|
| 104 |
+
st.session_state.messages.append({"role": "user", "content": user_input})
|
| 105 |
+
|
| 106 |
+
history = st.session_state.messages[:-1][-4:]
|
| 107 |
+
|
| 108 |
+
collected = ""
|
| 109 |
+
t0 = time.perf_counter()
|
| 110 |
+
results, response = depression_assistant(user_input, model_name=selected_model, max_tokens=max_length,
|
| 111 |
+
temperature=temperature, top_p=top_p, stream_flag=True,
|
| 112 |
+
chat_history=history)
|
| 113 |
+
|
| 114 |
+
st.session_state.last_sources = results if results else []
|
| 115 |
+
|
| 116 |
+
with sources_placeholder.container():
|
| 117 |
+
if st.session_state.last_sources:
|
| 118 |
+
for i, result in enumerate(st.session_state.last_sources):
|
| 119 |
+
st.markdown(f"**Source {i + 1}** | Similarity: {result.get('similarity', 'N/A')}")
|
| 120 |
+
st.markdown(f"- **Section:** {result['section']}")
|
| 121 |
+
st.markdown(f"> {result['text']}")
|
| 122 |
+
st.markdown("---")
|
| 123 |
+
else:
|
| 124 |
+
st.markdown("*Sources will appear here after you ask a question.*")
|
| 125 |
+
|
| 126 |
+
placeholder = st.empty()
|
| 127 |
+
for chunk in response:
|
| 128 |
+
collected += chunk
|
| 129 |
+
placeholder.markdown(collected)
|
| 130 |
+
|
| 131 |
+
t1 = time.perf_counter()
|
| 132 |
+
print(f"[Time] Retriever + Generator takes: {t1 - t0:.2f} seconds in total.")
|
| 133 |
+
print(f"============== Finish R-A-Generation for Current Query {user_input} ==============")
|
| 134 |
+
|
| 135 |
+
st.session_state.messages.append({"role": "assistant", "content": collected})
|
| 136 |
+
|
| 137 |
+
st.rerun()
|
src/data_processing/bipolar.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/data_processing/bipolar_modified.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/data_processing/guideline_db copy.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/data_processing/image_processing.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from bs4 import BeautifulSoup
|
| 2 |
+
import json
|
| 3 |
+
import re
|
| 4 |
+
|
| 5 |
+
def get_graph_metadata(graph, url="https://pmc.ncbi.nlm.nih.gov/articles/PMC11351064/#"):
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
figure = graph.find_parent("figure")
|
| 10 |
+
figure_flag = False
|
| 11 |
+
|
| 12 |
+
section = graph.find_parent(id=re.compile(r'^section\d+-\d+$'))
|
| 13 |
+
section_id = section.get("id")
|
| 14 |
+
section_url = url + section_id
|
| 15 |
+
|
| 16 |
+
section_heading = section.find("h2").get_text()
|
| 17 |
+
section_subheading = section.find("h3").get_text()
|
| 18 |
+
headings = section_heading + " > " + section_subheading
|
| 19 |
+
|
| 20 |
+
attribution = ""
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
if figure:
|
| 24 |
+
|
| 25 |
+
figure_flag = True
|
| 26 |
+
|
| 27 |
+
image_url = graph.get("src")
|
| 28 |
+
|
| 29 |
+
name = figure.select_one(".obj_head").get_text()
|
| 30 |
+
all_p = [p.get_text() for p in figure.find_all("p") if not p.attrs]
|
| 31 |
+
caption = all_p[0]
|
| 32 |
+
label = name + " " + caption
|
| 33 |
+
|
| 34 |
+
attribution = "(" + figure.select_one('[aria-label="Attribution"]').get_text() + ")"
|
| 35 |
+
number = "_".join(re.findall(r"(.{1})\.", name)).lower()
|
| 36 |
+
referee_id = f"figure_{number}"
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
else:
|
| 40 |
+
|
| 41 |
+
image_url = graph.get("src")
|
| 42 |
+
|
| 43 |
+
table_section = graph.find_parent("section")
|
| 44 |
+
|
| 45 |
+
name = table_section.select_one(".obj_head").get_text()
|
| 46 |
+
caption = table_section.select_one(".caption p").get_text()
|
| 47 |
+
label = name + " " + caption
|
| 48 |
+
|
| 49 |
+
number = "_".join(re.findall(r"(.{1})\.", name)).lower()
|
| 50 |
+
referee_id = f"table_{number}"
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
return attribution, caption, figure_flag, headings, image_url, label, name, referee_id, section_url
|
| 54 |
+
|
| 55 |
+
def to_chunk(text_block, section_url, referee_id, headings):
|
| 56 |
+
|
| 57 |
+
d = {
|
| 58 |
+
"text": text_block,
|
| 59 |
+
"metadata": {
|
| 60 |
+
"section": section_url,
|
| 61 |
+
"type": "table image",
|
| 62 |
+
"referee_id": referee_id,
|
| 63 |
+
"headings": headings,
|
| 64 |
+
}
|
| 65 |
+
}
|
| 66 |
+
return d
|
| 67 |
+
|
| 68 |
+
def main():
|
| 69 |
+
|
| 70 |
+
with open('../data/raw/source.html', encoding="utf-8") as f:
|
| 71 |
+
html = f.read()
|
| 72 |
+
soup = BeautifulSoup(html)
|
| 73 |
+
|
| 74 |
+
with open("../data/processed/parsed_images.txt", encoding="utf-8") as f:
|
| 75 |
+
text = f.read()
|
| 76 |
+
text_blocks = text.split("------")
|
| 77 |
+
|
| 78 |
+
docs = []
|
| 79 |
+
for graph, text_block in zip(soup.select(".graphic"), text_blocks):
|
| 80 |
+
attribution, caption, figure_flag, headings, image_url, label, name, referee_id, section_url = get_graph_metadata(graph)
|
| 81 |
+
text_block = text_block.strip()
|
| 82 |
+
|
| 83 |
+
if text_block.startswith(name):
|
| 84 |
+
chunk = to_chunk(text_block, section_url, referee_id, headings)
|
| 85 |
+
docs.append(chunk)
|
| 86 |
+
|
| 87 |
+
with open("../data/processed/graphs.json", "w", encoding="utf-8") as f:
|
| 88 |
+
json.dump(docs, f, indent=4)
|
| 89 |
+
|
| 90 |
+
if __name__ == "__main__":
|
| 91 |
+
main()
|
src/data_processing/main.py
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import json
|
| 3 |
+
from tables import get_table_metadata, to_text, get_table_data
|
| 4 |
+
from bs4 import BeautifulSoup, Tag, NavigableString
|
| 5 |
+
|
| 6 |
+
abbr_map = {
|
| 7 |
+
"ACT": "Acceptance and commitment therapy",
|
| 8 |
+
"ADHD": "Attention-deficit hyperactivity disorder",
|
| 9 |
+
"AI": "Artificial intelligence",
|
| 10 |
+
"BA": "Behavioural activation",
|
| 11 |
+
"CAM": "Complementary and alternative medicine",
|
| 12 |
+
# "CANMAT":"Canadian Network for Mood and Anxiety Treatments",
|
| 13 |
+
"CBASP": "Cognitive behavioural analysis system of psychotherapy",
|
| 14 |
+
"CBT": "Cognitive-behavioural therapy",
|
| 15 |
+
"CPD": "Continuing professional development",
|
| 16 |
+
"CYP": "Cytochrome P450",
|
| 17 |
+
"DBS": "Deep brain stimulation",
|
| 18 |
+
"DHI": "Digital health intervention",
|
| 19 |
+
"DLPFC": "Dorsolateral prefrontal cortex",
|
| 20 |
+
"DSM-5": "Diagnostic and Statistical Manual",
|
| 21 |
+
"DSM-5-TR": "Diagnostic and Statistical Manual, 5th edition, Text Revision",
|
| 22 |
+
"DSM-IV-TR":"Diagnostic and Statistical Manual, 4th edition, Text Revision",
|
| 23 |
+
"DTD": "Difficult-to-treat depression",
|
| 24 |
+
"ECG": "Electrocardiography",
|
| 25 |
+
"ECT": "Electroconvulsive therapy",
|
| 26 |
+
"EEG": "Electroencephalography",
|
| 27 |
+
"GRADE": "Grading of Recommendations Assessment, Development, and Evaluation",
|
| 28 |
+
"ICD": "International Classification of Diseases",
|
| 29 |
+
"IPT": "Interpersonal therapy",
|
| 30 |
+
"MAOI": "Monoamine oxidase inhibitor",
|
| 31 |
+
"MBC": "Measurement-based care",
|
| 32 |
+
"MBCT": "Mindfulness-based cognitive therapy",
|
| 33 |
+
"MCT": "Metacognitive therapy",
|
| 34 |
+
"MDD": "Major depressive disorder",
|
| 35 |
+
"MDE": "Major depressive episode",
|
| 36 |
+
"MI": "Motivational interviewing",
|
| 37 |
+
"MST": "Magnetic seizure therapy",
|
| 38 |
+
"NbN": "Neuroscience-based nomenclature",
|
| 39 |
+
"NDRI": "Norepinephrine-dopamine reuptake inhibitor",
|
| 40 |
+
"NMDA": "N-methyl-D-aspartate",
|
| 41 |
+
"NSAID": "Nonsteroidal anti-inflammatory drug",
|
| 42 |
+
"PDD": "Persistent depressive disorder",
|
| 43 |
+
"PDT": "Psychodynamic psychotherapy",
|
| 44 |
+
"PHQ": "Patient health questionnaire",
|
| 45 |
+
"PST": "Problem-solving therapy",
|
| 46 |
+
"RCT": "Randomized controlled trial",
|
| 47 |
+
"rTMS": "Repetitive transcranial magnetic stimulation",
|
| 48 |
+
"SDM": "Shared decision-making",
|
| 49 |
+
"SNRI": "Serotonin-norepinephrine reuptake inhibitor",
|
| 50 |
+
"SSRI": "Selective serotonin reuptake inhibitor",
|
| 51 |
+
"STPP": "Short-term psychodynamic psychotherapy",
|
| 52 |
+
"TBS": "Theta burst stimulation",
|
| 53 |
+
"TCA": "Tricyclic antidepressants",
|
| 54 |
+
"tDCS": "Transcranial direct current stimulation",
|
| 55 |
+
"TMS": "Transcranial magnetic stimulation",
|
| 56 |
+
"TRD": "Treatment-resistant depression",
|
| 57 |
+
"VNS": "Vagus nerve stimulation",
|
| 58 |
+
"WHO": "World Health Organization",
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
def append_definition(guideline):
|
| 62 |
+
pattern = re.compile(r'\b([A-Z]{2,})\b')
|
| 63 |
+
|
| 64 |
+
for i in range(len(guideline)):
|
| 65 |
+
if guideline[i]['metadata']['referee_id'] == 'table_c':
|
| 66 |
+
continue
|
| 67 |
+
|
| 68 |
+
text = guideline[i]['text']
|
| 69 |
+
|
| 70 |
+
# Find all abbreviations in the text
|
| 71 |
+
found_abbrs = set()
|
| 72 |
+
matches = pattern.findall(text)
|
| 73 |
+
for abbr in matches:
|
| 74 |
+
if abbr in abbr_map:
|
| 75 |
+
found_abbrs.add(abbr)
|
| 76 |
+
|
| 77 |
+
# Create definitions section if abbreviations found
|
| 78 |
+
if found_abbrs:
|
| 79 |
+
definitions = []
|
| 80 |
+
for abbr in sorted(found_abbrs):
|
| 81 |
+
definitions.append(f"{abbr}: {abbr_map[abbr]}")
|
| 82 |
+
|
| 83 |
+
definitions_text = "Abbreviations: " + "; ".join(definitions) + "\n\n"
|
| 84 |
+
guideline[i]['text'] = text + "\n" + definitions_text
|
| 85 |
+
|
| 86 |
+
return guideline
|
| 87 |
+
|
| 88 |
+
def parse_title(soup):
|
| 89 |
+
title = soup.find("h1")
|
| 90 |
+
if title:
|
| 91 |
+
title = title.decode_contents().replace('\n', '')
|
| 92 |
+
return {
|
| 93 |
+
"text": title,
|
| 94 |
+
"metadata": {
|
| 95 |
+
"section": "title",
|
| 96 |
+
"type": "title",
|
| 97 |
+
"headings": "Title of the guideline document",
|
| 98 |
+
"referenced_tables": [],
|
| 99 |
+
"referee_id": ""
|
| 100 |
+
}
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
def prepend_headings_to_text(guideline):
|
| 104 |
+
for i in range(len(guideline)):
|
| 105 |
+
guideline[i]['metadata']['chunk_id'] = i
|
| 106 |
+
guideline[i]['text'] = guideline[i]['metadata']['headings'] + " > paragraph id: " + str(i) + "\n\n" + guideline[i]['text']
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def build_headings_trail(p):
|
| 110 |
+
# build headings trail
|
| 111 |
+
heading = p.find_previous_sibling(lambda tag: bool(re.match(r'^h[2-6]$', tag.name)))
|
| 112 |
+
headings = heading.get_text(strip=True) if heading else 'No heading'
|
| 113 |
+
|
| 114 |
+
parent_sec = p.find_parent(["section",'figure'], id=True)
|
| 115 |
+
while parent_sec:
|
| 116 |
+
heading = parent_sec.find_previous_sibling(lambda tag: bool(re.match(r'^h[2-6]$', tag.name)))
|
| 117 |
+
if heading:
|
| 118 |
+
headings = heading.get_text(strip=True) + ' > ' + headings
|
| 119 |
+
parent_sec = parent_sec.find_parent("section", id=True)
|
| 120 |
+
headings = headings.strip().replace('\n', ' ')
|
| 121 |
+
return headings
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def delete_bib_links(soup):
|
| 125 |
+
for a in soup.find_all("a", href=True):
|
| 126 |
+
if a["href"].startswith("#bdi12609-bib-"):
|
| 127 |
+
a.decompose()
|
| 128 |
+
return soup
|
| 129 |
+
|
| 130 |
+
def delete_fig_and_tbl_sections(soup):
|
| 131 |
+
# Remove all <figure> and <table> sections
|
| 132 |
+
for fig in soup.find_all('figure'):
|
| 133 |
+
fig.decompose()
|
| 134 |
+
# find section that has class "tw xbox font-sm" and remove it
|
| 135 |
+
for section in soup.find_all('section', class_="tw xbox font-sm"):
|
| 136 |
+
section.decompose()
|
| 137 |
+
return soup
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def scan_links_and_tables(p):
|
| 141 |
+
referenced_tables = set()
|
| 142 |
+
# scan for numeric links and reconstruct table IDs
|
| 143 |
+
for link in p.find_all('a', href=re.compile(r'-(fig|tbl)-')):
|
| 144 |
+
href = link['href']
|
| 145 |
+
tables = re.findall(r'(fig|tbl)-(\d+)', href)
|
| 146 |
+
for table_id in tables:
|
| 147 |
+
print(f"Found table links: {table_id[0]}-{table_id[1]}")
|
| 148 |
+
referenced_tables.add(f"{table_id[0]}-{table_id[1]}")
|
| 149 |
+
if referenced_tables:
|
| 150 |
+
print("--------")
|
| 151 |
+
|
| 152 |
+
return referenced_tables
|
| 153 |
+
|
| 154 |
+
def parse_paragraph(soup, output):
|
| 155 |
+
paragraphs = soup.find_all('p')
|
| 156 |
+
for p in paragraphs:
|
| 157 |
+
parent = p.find_parent("section")
|
| 158 |
+
sec_id = parent["id"] if parent else "unknown"
|
| 159 |
+
|
| 160 |
+
output.append({
|
| 161 |
+
"text": p.get_text(strip=False),
|
| 162 |
+
"metadata": {
|
| 163 |
+
"section": f"https://pmc.ncbi.nlm.nih.gov/articles/PMC5947163/#{sec_id}",
|
| 164 |
+
"type": "paragraph",
|
| 165 |
+
"headings": build_headings_trail(p),
|
| 166 |
+
"referenced_tables": list(scan_links_and_tables(p)),
|
| 167 |
+
"referee_id": "",
|
| 168 |
+
}
|
| 169 |
+
})
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def parse_figures(soup, output):
|
| 173 |
+
figures = soup.find_all('figure')
|
| 174 |
+
for fig in figures:
|
| 175 |
+
sec_id = fig["id"]
|
| 176 |
+
fig_caption = fig.find('figcaption').find('p').get_text()
|
| 177 |
+
img_src_link = fig.find('img')['src'] if fig.find('img') else "No image link found"
|
| 178 |
+
referee_id = re.search(r'(fig)-(\d+)', sec_id)
|
| 179 |
+
|
| 180 |
+
p = fig.find('p')
|
| 181 |
+
output.append({
|
| 182 |
+
"text": f" > Figure: Image link: {img_src_link}-----\nFigure Caption: {fig_caption}\n",
|
| 183 |
+
"metadata": {
|
| 184 |
+
"section": f"https://pmc.ncbi.nlm.nih.gov/articles/PMC5947163/#{sec_id}",
|
| 185 |
+
"type": "figure",
|
| 186 |
+
"headings": build_headings_trail(p),
|
| 187 |
+
"referenced_tables": list(scan_links_and_tables(p)),
|
| 188 |
+
"referee_id": referee_id.group(0) if referee_id else "fig_unknown",
|
| 189 |
+
}
|
| 190 |
+
})
|
| 191 |
+
|
| 192 |
+
def parse_tables(soup, output):
|
| 193 |
+
tables = soup.find_all("section", class_="tw xbox font-sm")
|
| 194 |
+
for table in tables:
|
| 195 |
+
sec_id = table["id"]
|
| 196 |
+
referee_id = re.search(r'(tbl)-(\d+)', sec_id)
|
| 197 |
+
|
| 198 |
+
img = table.find('img')
|
| 199 |
+
if img: # then this is a img table
|
| 200 |
+
img_src_link = img['src'] if img else "No image link found"
|
| 201 |
+
p = table.find('p')
|
| 202 |
+
captions = table.find_all('div', class_='caption p')
|
| 203 |
+
caption = ""
|
| 204 |
+
for cap in captions:
|
| 205 |
+
caption += cap.get_text(strip=True) + " "
|
| 206 |
+
output.append({
|
| 207 |
+
"text": f" > Table: Image link: {img_src_link}-----\nTable Caption: {caption}\n",
|
| 208 |
+
"metadata": {
|
| 209 |
+
"section": f"https://pmc.ncbi.nlm.nih.gov/articles/PMC5947163/#{sec_id}",
|
| 210 |
+
"type": "table",
|
| 211 |
+
"headings": build_headings_trail(p),
|
| 212 |
+
"referenced_tables": [],
|
| 213 |
+
"referee_id": referee_id.group(0) if referee_id else "tbl_unknown",
|
| 214 |
+
}
|
| 215 |
+
})
|
| 216 |
+
else: # else it is a html table
|
| 217 |
+
name, caption, footnotes, headings, label, ref_id, section_url = get_table_metadata(table, base_url="")
|
| 218 |
+
table_data = get_table_data(table, footnotes)
|
| 219 |
+
text = to_text(table_data, label, caption)
|
| 220 |
+
p = table.find('div')
|
| 221 |
+
|
| 222 |
+
output.append({
|
| 223 |
+
"text": text,
|
| 224 |
+
"metadata": {
|
| 225 |
+
"section": f"https://pmc.ncbi.nlm.nih.gov/articles/PMC5947163/#{sec_id}",
|
| 226 |
+
"type": "table",
|
| 227 |
+
"headings": build_headings_trail(p),
|
| 228 |
+
"referenced_tables": [],
|
| 229 |
+
"referee_id": referee_id.group(0) if referee_id else "tbl_unknown",
|
| 230 |
+
}
|
| 231 |
+
})
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
def parse_main_article(soup, output):
|
| 236 |
+
|
| 237 |
+
soup = delete_bib_links(soup)
|
| 238 |
+
parse_figures(soup, output)
|
| 239 |
+
parse_tables(soup, output)
|
| 240 |
+
|
| 241 |
+
paragraph_only_soup = delete_fig_and_tbl_sections(soup)
|
| 242 |
+
|
| 243 |
+
parse_paragraph(paragraph_only_soup, output)
|
| 244 |
+
|
| 245 |
+
# parse_tables(soup, output)
|
| 246 |
+
|
| 247 |
+
return output
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
def main():
|
| 251 |
+
with open('bipolar.html', 'r', encoding='utf-8') as f:
|
| 252 |
+
html = f.read()
|
| 253 |
+
soup = BeautifulSoup(html, "html.parser")
|
| 254 |
+
output = []
|
| 255 |
+
|
| 256 |
+
output.append(parse_title(soup))
|
| 257 |
+
output = parse_main_article(soup, output)
|
| 258 |
+
print(f"Parsed {len(output)} paragraphs from the main article.")
|
| 259 |
+
|
| 260 |
+
combined = output
|
| 261 |
+
prepend_headings_to_text(combined)
|
| 262 |
+
append_definition(combined)
|
| 263 |
+
|
| 264 |
+
with open("guideline_db.json", "w", encoding="utf-8") as f:
|
| 265 |
+
json.dump(combined, f, ensure_ascii=False, indent=4)
|
| 266 |
+
print(f"guideline_db.json for bipolar created with {len(combined)} chunks.")
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
def write_referenced_tables():
|
| 270 |
+
with open("guideline_db.json", "r", encoding="utf-8") as f:
|
| 271 |
+
guideline_db = json.load(f)
|
| 272 |
+
with open("table12_textual.txt", "r", encoding="utf-8") as f:
|
| 273 |
+
guideline_db[21]['text'] += f.read().strip()
|
| 274 |
+
with open("table14_textual.txt", "r", encoding="utf-8") as f:
|
| 275 |
+
guideline_db[23]['text'] += f.read().strip()
|
| 276 |
+
with open("table17_textual.txt", "r", encoding="utf-8") as f:
|
| 277 |
+
guideline_db[26]['text'] += f.read().strip()
|
| 278 |
+
|
| 279 |
+
figures_and_tables = guideline_db[1:34] # Assuming these are the table chunks
|
| 280 |
+
#write back to the original file
|
| 281 |
+
with open ("guideline_db.json", "w", encoding="utf-8") as f:
|
| 282 |
+
json.dump(guideline_db, f, ensure_ascii=False, indent=4)
|
| 283 |
+
print(f"guideline_db.json updated with table 12 14 17 chunks.")
|
| 284 |
+
with open("referenced_table_chunks.json", "w", encoding="utf-8") as f:
|
| 285 |
+
json.dump(figures_and_tables, f, ensure_ascii=False, indent=4)
|
| 286 |
+
print(f"referenced_table_chunks.json created with {len(figures_and_tables)} chunks.")
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
if __name__ == "__main__":
|
| 290 |
+
# main()
|
| 291 |
+
# read in the file guideline_db.json
|
| 292 |
+
|
| 293 |
+
# make the referenced_tables.json
|
| 294 |
+
write_referenced_tables()
|
src/data_processing/table12_textual.txt
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
#### Table 12. Hierarchical rankings of first and second‐line treatments recommended for management of acute mania
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
##### First-line treatments: Monotherapies
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
###### Lithium
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
####### Level of evidence by phase of treatment
|
| 12 |
+
|
| 13 |
+
Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 1 evidence. Prevention of mania during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Acute depression: level 2 evidence.
|
| 14 |
+
|
| 15 |
+
####### Considerations for treatment selection
|
| 16 |
+
|
| 17 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
|
| 18 |
+
|
| 19 |
+
###### Quetiapine
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
####### Level of evidence by phase of treatment
|
| 23 |
+
|
| 24 |
+
Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 1 evidence. Prevention of mania during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Acute depression: level 1 evidence.
|
| 25 |
+
|
| 26 |
+
####### Considerations for treatment selection
|
| 27 |
+
|
| 28 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
|
| 29 |
+
|
| 30 |
+
###### Divalproex
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
####### Level of evidence by phase of treatment
|
| 34 |
+
|
| 35 |
+
Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 1 evidence. Prevention of mania during maintenance: level 3 evidence. Prevention of depression during maintenance: level 2 evidence. Acute depression: level 2 evidence.
|
| 36 |
+
|
| 37 |
+
####### Considerations for treatment selection
|
| 38 |
+
|
| 39 |
+
Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection. Maintenance phase tolerability concerns: minor impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
|
| 40 |
+
|
| 41 |
+
###### Asenapine
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
####### Level of evidence by phase of treatment
|
| 45 |
+
|
| 46 |
+
Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 2 evidence. Prevention of mania during maintenance: level 2 evidence. Prevention of depression during maintenance: level 2 evidence. Acute depression: no data.
|
| 47 |
+
|
| 48 |
+
####### Considerations for treatment selection
|
| 49 |
+
|
| 50 |
+
Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: minor impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
|
| 51 |
+
|
| 52 |
+
###### Aripiprazole
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
####### Level of evidence by phase of treatment
|
| 56 |
+
|
| 57 |
+
Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 2 evidence. Prevention of mania during maintenance: level 2 evidence. Prevention of depression during maintenance: no data; although monotherapies are listed above combination therapies in the hierarchy, combination therapies may be indicated as the preferred choice in patients with previous history of partial response to monotherapy and in those with psychotic mania or in situations where rapid response is desirable. Acute depression: level 1 negative evidence.
|
| 58 |
+
|
| 59 |
+
####### Considerations for treatment selection
|
| 60 |
+
|
| 61 |
+
Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: minor impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
|
| 62 |
+
|
| 63 |
+
###### Paliperidone(>6mg)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
####### Level of evidence by phase of treatment
|
| 67 |
+
|
| 68 |
+
Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 2 evidence. Prevention of mania during maintenance: level 2 evidence. Prevention of depression during maintenance: no data; although monotherapies are listed above combination therapies in the hierarchy, combination therapies may be indicated as the preferred choice in patients with previous history of partial response to monotherapy and in those with psychotic mania or in situations where rapid response is desirable. Acute depression: no data.
|
| 69 |
+
|
| 70 |
+
####### Considerations for treatment selection
|
| 71 |
+
|
| 72 |
+
Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: minor impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
|
| 73 |
+
|
| 74 |
+
###### Risperidone
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
####### Level of evidence by phase of treatment
|
| 78 |
+
|
| 79 |
+
Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 3 evidence. Prevention of mania during maintenance: level 3 evidence. Prevention of depression during maintenance: no data. Acute depression: no data.
|
| 80 |
+
|
| 81 |
+
####### Considerations for treatment selection
|
| 82 |
+
|
| 83 |
+
Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: minor impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
|
| 84 |
+
|
| 85 |
+
###### Cariprazine
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
####### Level of evidence by phase of treatment
|
| 89 |
+
|
| 90 |
+
Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: no data. Prevention of mania during maintenance: no data. Prevention of depression during maintenance: no data. Acute depression: level 1 evidence.
|
| 91 |
+
|
| 92 |
+
####### Considerations for treatment selection
|
| 93 |
+
|
| 94 |
+
Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: limited impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
|
| 95 |
+
|
| 96 |
+
##### First-line treatments: Combination therapies
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
###### Quetiapine and Lithium/divalproex
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
####### Level of evidence by phase of treatment
|
| 103 |
+
|
| 104 |
+
Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 1 evidence. Prevention of mania during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Acute depression: level 4 evidence; no controlled trials; however, clinical experience suggests that it is a useful strategy.
|
| 105 |
+
|
| 106 |
+
####### Considerations for treatment selection
|
| 107 |
+
|
| 108 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: significant impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
|
| 109 |
+
|
| 110 |
+
###### Aripiprazole and Lithium/divalproex
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
####### Level of evidence by phase of treatment
|
| 114 |
+
|
| 115 |
+
Acute mania: level 2 evidence. Prevention of any mood episode during maintenance: level 2 evidence. Prevention of mania during maintenance: level 2 evidence. Prevention of depression during maintenance: no data; did not separate from placebo in those with index mania; no studies available in index depression. Acute depression: level 4 evidence.
|
| 116 |
+
|
| 117 |
+
####### Considerations for treatment selection
|
| 118 |
+
|
| 119 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
|
| 120 |
+
|
| 121 |
+
###### Risperidone and Lithium/divalproex
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
####### Level of evidence by phase of treatment
|
| 125 |
+
|
| 126 |
+
Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 4 evidence. Prevention of mania during maintenance: level 4 evidence. Prevention of depression during maintenance: no data. Acute depression: level 4 evidence.
|
| 127 |
+
|
| 128 |
+
####### Considerations for treatment selection
|
| 129 |
+
|
| 130 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: significant impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
|
| 131 |
+
|
| 132 |
+
###### Asenapine and Lithium/divalproex
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
####### Level of evidence by phase of treatment
|
| 136 |
+
|
| 137 |
+
Acute mania: level 2 evidence. Prevention of any mood episode during maintenance: level 4 evidence. Prevention of mania during maintenance: level 4 evidence. Prevention of depression during maintenance: no data. Acute depression: level 4 evidence.
|
| 138 |
+
|
| 139 |
+
####### Considerations for treatment selection
|
| 140 |
+
|
| 141 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: minor impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
|
| 142 |
+
|
| 143 |
+
##### Second-line treatments: Combination therapies
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
###### Olanzapine
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
####### Level of evidence by phase of treatment
|
| 150 |
+
|
| 151 |
+
Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 1 evidence. Prevention of mania during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Acute depression: level 1 evidence; did not separate from placebo on core symptoms of depression.
|
| 152 |
+
|
| 153 |
+
####### Considerations for treatment selection
|
| 154 |
+
|
| 155 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: significant impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
|
| 156 |
+
|
| 157 |
+
###### Carbamazepine
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
####### Level of evidence by phase of treatment
|
| 161 |
+
|
| 162 |
+
Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 2 evidence. Prevention of mania during maintenance: level 2 evidence. Prevention of depression during maintenance: level 2 evidence. Acute depression: level 3 evidence.
|
| 163 |
+
|
| 164 |
+
####### Considerations for treatment selection
|
| 165 |
+
|
| 166 |
+
Acute phase safety concerns: moderate impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
|
| 167 |
+
|
| 168 |
+
###### Olanzapine and Lithium/divalproex
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
####### Level of evidence by phase of treatment
|
| 172 |
+
|
| 173 |
+
Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 4 evidence. Prevention of mania during maintenance: level 4 evidence. Prevention of depression during maintenance: level 4 evidence. Acute depression: no data.
|
| 174 |
+
|
| 175 |
+
####### Considerations for treatment selection
|
| 176 |
+
|
| 177 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
|
| 178 |
+
|
| 179 |
+
###### Lithium and divalproex
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
####### Level of evidence by phase of treatment
|
| 183 |
+
|
| 184 |
+
Acute mania: level 2 evidence. Prevention of any mood episode during maintenance: level 2 evidence. Prevention of mania during maintenance: level 2 evidence. Prevention of depression during maintenance: no data. Acute depression: no data.
|
| 185 |
+
|
| 186 |
+
####### Considerations for treatment selection
|
| 187 |
+
|
| 188 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
|
| 189 |
+
|
| 190 |
+
###### Ziprasidone
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
####### Level of evidence by phase of treatment
|
| 194 |
+
|
| 195 |
+
Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: level 3 evidence. Prevention of mania during maintenance: level 3 evidence. Prevention of depression during maintenance: no data. Acute depression: level 1 negative evidence.
|
| 196 |
+
|
| 197 |
+
####### Considerations for treatment selection
|
| 198 |
+
|
| 199 |
+
Acute phase safety concerns: moderate impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection. Maintenance phase tolerability concerns: minor impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
|
| 200 |
+
|
| 201 |
+
###### Haloperidol
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
####### Level of evidence by phase of treatment
|
| 205 |
+
|
| 206 |
+
Acute mania: level 1 evidence. Prevention of any mood episode during maintenance: no data. Prevention of mania during maintenance: level 4 evidence. Prevention of depression during maintenance: level 4 negative evidence. Acute depression: no data.
|
| 207 |
+
|
| 208 |
+
####### Considerations for treatment selection
|
| 209 |
+
|
| 210 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: significant impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: moderate impact on treatment selection.
|
| 211 |
+
|
| 212 |
+
###### ECT
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
####### Level of evidence by phase of treatment
|
| 216 |
+
|
| 217 |
+
Acute mania: level 3 evidence. Prevention of any mood episode during maintenance: level 4 evidence. Prevention of mania during maintenance: level 4 evidence. Prevention of depression during maintenance: level 4 evidence. Acute depression: level 4 evidence.
|
| 218 |
+
|
| 219 |
+
####### Considerations for treatment selection
|
| 220 |
+
|
| 221 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: minor impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of depressive switch: limited impact on treatment selection.
|
| 222 |
+
|
| 223 |
+
|
src/data_processing/table14_textual.txt
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
#### Table 14. Hierarchical rankings of first and second‐line treatments recommended for management of acute bipolar I depression
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
##### First-line treatments
|
| 7 |
+
|
| 8 |
+
###### Quetiapine
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
####### Level of evidence by phase of treatment
|
| 12 |
+
|
| 13 |
+
Acute depression: level 1 evidence. Prevention of any mood episode during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Prevention of mania during maintenance: level 1 evidence. Acute mania: level 1 evidence.
|
| 14 |
+
|
| 15 |
+
####### Considerations for treatment selection
|
| 16 |
+
|
| 17 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of manic / hypomanic switch: limited impact on treatment selection.
|
| 18 |
+
|
| 19 |
+
###### Lurasidone and Lithium/divalproex
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
####### Level of evidence by phase of treatment
|
| 23 |
+
|
| 24 |
+
Acute depression: level 1 evidence. Prevention of any mood episode during maintenance: level 3 evidence; trend for superiority on the primary efficacy measure, hence the lower rating. Prevention of depression during maintenance: level 3 evidence; effective in those with an index episode of depression. Prevention of mania during maintenance: level 4 evidence; negative data from the trial are probably due to methodological issues; rating based on expert opinion. Acute mania: no data.
|
| 25 |
+
|
| 26 |
+
####### Considerations for treatment selection
|
| 27 |
+
|
| 28 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: moderate / limited impact on treatment selection. Risk of manic / hypomanic switch: limited impact on treatment selection.
|
| 29 |
+
|
| 30 |
+
###### Lithium
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
####### Level of evidence by phase of treatment
|
| 34 |
+
|
| 35 |
+
Acute depression: level 2 evidence. Prevention of any mood episode during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Prevention of mania during maintenance: level 1 evidence. Acute mania: no data.
|
| 36 |
+
|
| 37 |
+
####### Considerations for treatment selection
|
| 38 |
+
|
| 39 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of manic / hypomanic switch: limited impact on treatment selection.
|
| 40 |
+
|
| 41 |
+
###### Lamotrigine
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
####### Level of evidence by phase of treatment
|
| 45 |
+
|
| 46 |
+
Acute depression: level 2 evidence. Prevention of any mood episode during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Prevention of mania during maintenance: level 2 evidence. Acute mania: level 1 negative evidence.
|
| 47 |
+
|
| 48 |
+
####### Considerations for treatment selection
|
| 49 |
+
|
| 50 |
+
Acute phase safety concerns: moderate impact on treatment selection. Acute phase tolerability concerns: limited impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: limited impact on treatment selection. Risk of manic / hypomanic switch: limited impact on treatment selection.
|
| 51 |
+
|
| 52 |
+
###### Lurasidone
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
####### Level of evidence by phase of treatment
|
| 56 |
+
|
| 57 |
+
Acute depression: level 2 evidence. Prevention of any mood episode during maintenance: level 4 evidence. Prevention of depression during maintenance: level 4 evidence. Prevention of mania during maintenance: level 4 evidence. Acute mania: no data.
|
| 58 |
+
|
| 59 |
+
####### Considerations for treatment selection
|
| 60 |
+
|
| 61 |
+
Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: minor impact on treatment selection. Risk of manic / hypomanic switch: limited impact on treatment selection.
|
| 62 |
+
|
| 63 |
+
###### Lamotrigine(adj)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
####### Level of evidence by phase of treatment
|
| 67 |
+
|
| 68 |
+
Acute depression: level 2 evidence. Prevention of any mood episode during maintenance: level 4 evidence. Prevention of depression during maintenance: level 4 evidence. Prevention of mania during maintenance: level 4 evidence. Acute mania: level 4 negative evidence.
|
| 69 |
+
|
| 70 |
+
####### Considerations for treatment selection
|
| 71 |
+
|
| 72 |
+
Acute phase safety concerns: moderate impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of manic / hypomanic switch: limited impact on treatment selection.
|
| 73 |
+
|
| 74 |
+
##### Second-line treatments
|
| 75 |
+
|
| 76 |
+
###### Divalproex
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
####### Level of evidence by phase of treatment
|
| 80 |
+
|
| 81 |
+
Acute depression: level 2 evidence. Prevention of any mood episode during maintenance: level 1 evidence. Prevention of depression during maintenance: level 2 evidence. Prevention of mania during maintenance: level 3 evidence. Acute mania: level 1 evidence.
|
| 82 |
+
|
| 83 |
+
####### Considerations for treatment selection
|
| 84 |
+
|
| 85 |
+
Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: minor impact on treatment selection. Risk of manic / hypomanic switch: limited impact on treatment selection.
|
| 86 |
+
|
| 87 |
+
###### SSRIs/bupropion(adj)
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
####### Level of evidence by phase of treatment
|
| 91 |
+
|
| 92 |
+
Acute depression: level 1 evidence. Prevention of any mood episode during maintenance: no data. Prevention of depression during maintenance: level 4 evidence. Prevention of mania during maintenance: no data. Acute mania: no data.
|
| 93 |
+
|
| 94 |
+
####### Considerations for treatment selection
|
| 95 |
+
|
| 96 |
+
Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: minor impact on treatment selection. Risk of manic / hypomanic switch: minor impact on treatment selection.
|
| 97 |
+
|
| 98 |
+
###### ECT
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
####### Level of evidence by phase of treatment
|
| 102 |
+
|
| 103 |
+
Acute depression: level 4 evidence. Prevention of any mood episode during maintenance: level 4 evidence. Prevention of depression during maintenance: level 4 evidence. Prevention of mania during maintenance: level 4 evidence. Acute mania: level 3 evidence.
|
| 104 |
+
|
| 105 |
+
####### Considerations for treatment selection
|
| 106 |
+
|
| 107 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: minor impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection. Risk of manic / hypomanic switch: limited impact on treatment selection.
|
| 108 |
+
|
| 109 |
+
###### Cariprazine
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
####### Level of evidence by phase of treatment
|
| 113 |
+
|
| 114 |
+
Acute depression: level 1 evidence. Prevention of any mood episode during maintenance: no data. Prevention of depression during maintenance: no data. Prevention of mania during maintenance: no data. Acute mania: level 1 evidence.
|
| 115 |
+
|
| 116 |
+
####### Considerations for treatment selection
|
| 117 |
+
|
| 118 |
+
Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: limited impact on treatment selection. Risk of manic / hypomanic switch: limited impact on treatment selection.
|
| 119 |
+
|
| 120 |
+
###### Olanzapine-fluoxetine
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
####### Level of evidence by phase of treatment
|
| 124 |
+
|
| 125 |
+
Acute depression: level 2 evidence. Prevention of any mood episode during maintenance: no data. Prevention of depression during maintenance: no data. Prevention of mania during maintenance: no data. Acute mania: no data.
|
| 126 |
+
|
| 127 |
+
####### Considerations for treatment selection
|
| 128 |
+
|
| 129 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: significant impact on treatment selection. Maintenance phase tolerability concerns: minor impact on treatment selection. Risk of manic / hypomanic switch: minor impact on treatment selection.
|
| 130 |
+
|
| 131 |
+
|
src/data_processing/table17_textual.txt
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
#### Table 17. Hierarchical rankings of first‐ and second‐line treatments recommended for maintenance treatment in bipolar disorder
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
##### First-line treatment
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
###### Lithium
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
####### Level of evidence by phase of treatment
|
| 14 |
+
|
| 15 |
+
Prevention of any mood episode during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Prevention of mania during maintenance: level 1 evidence. Acute Depression: level 2 evidence. Acute Mania: level 1 evidence.
|
| 16 |
+
|
| 17 |
+
####### Considerations for treatment selection
|
| 18 |
+
|
| 19 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection.
|
| 20 |
+
|
| 21 |
+
###### Quetiapine
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
####### Level of evidence by phase of treatment
|
| 25 |
+
|
| 26 |
+
Prevention of any mood episode during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Prevention of mania during maintenance: level 1 evidence. Acute Depression: level 1 evidence. Acute Mania: level 1 evidence.
|
| 27 |
+
|
| 28 |
+
####### Considerations for treatment selection
|
| 29 |
+
|
| 30 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection.
|
| 31 |
+
|
| 32 |
+
###### Lamotrigine
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
####### Level of evidence by phase of treatment
|
| 36 |
+
|
| 37 |
+
Prevention of any mood episode during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Prevention of mania during maintenance: level 2 evidence. Acute Depression: level 1 evidence. Acute Mania: level 1 negative evidence.
|
| 38 |
+
|
| 39 |
+
####### Considerations for treatment selection
|
| 40 |
+
|
| 41 |
+
Acute phase safety concerns: moderate impact on treatment selection. Acute phase tolerability concerns: limited impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: limited impact on treatment selection.
|
| 42 |
+
|
| 43 |
+
###### Asenapine
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
####### Level of evidence by phase of treatment
|
| 47 |
+
|
| 48 |
+
Prevention of any mood episode during maintenance: level 2 evidence. Prevention of depression during maintenance: level 2 evidence. Prevention of mania during maintenance: level 2 evidence. Acute Depression: no data. Acute Mania: level 1 evidence.
|
| 49 |
+
|
| 50 |
+
####### Considerations for treatment selection
|
| 51 |
+
|
| 52 |
+
Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: minor impact on treatment selection.
|
| 53 |
+
|
| 54 |
+
###### Quetiapine + Lithium/divalproex
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
####### Level of evidence by phase of treatment
|
| 58 |
+
|
| 59 |
+
Prevention of any mood episode during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Prevention of mania during maintenance: level 1 evidence. Acute Depression: level 4 evidence. Acute Mania: level 1 evidence.
|
| 60 |
+
|
| 61 |
+
####### Considerations for treatment selection
|
| 62 |
+
|
| 63 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: significant impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: moderate impact on treatment selection.
|
| 64 |
+
|
| 65 |
+
###### Aripiprazole + Lithium/divalproex
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
####### Level of evidence by phase of treatment
|
| 69 |
+
|
| 70 |
+
Prevention of any mood episode during maintenance: level 2 evidence. Prevention of depression during maintenance: no data; did not separate from placebo in those with index mania; no studies available in index depression. Prevention of mania during maintenance: level 2 evidence. Acute Depression: level 4 evidence. Acute Mania: level 2 evidence.
|
| 71 |
+
|
| 72 |
+
####### Considerations for treatment selection
|
| 73 |
+
|
| 74 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: moderate impact on treatment selection.
|
| 75 |
+
|
| 76 |
+
###### Aripiprazole
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
####### Level of evidence by phase of treatment
|
| 80 |
+
|
| 81 |
+
Prevention of any mood episode during maintenance: level 2 evidence. Prevention of depression during maintenance: no data; did not separate from placebo in those with index mania; no studies available in index depression. Prevention of mania during maintenance: level 2 evidence. Acute Depression: level 1 negative evidence. Acute Mania: level 1 evidence.
|
| 82 |
+
|
| 83 |
+
####### Considerations for treatment selection
|
| 84 |
+
|
| 85 |
+
Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: minor impact on treatment selection.
|
| 86 |
+
|
| 87 |
+
###### Aripiprazole OM
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
####### Level of evidence by phase of treatment
|
| 91 |
+
|
| 92 |
+
Prevention of any mood episode during maintenance: level 2 evidence. Prevention of depression during maintenance: no data; did not separate from placebo in those with index mania; no studies available in index depression. Prevention of mania during maintenance: level 2 evidence. Acute Depression: no data. Acute Mania: no data.
|
| 93 |
+
|
| 94 |
+
####### Considerations for treatment selection
|
| 95 |
+
|
| 96 |
+
Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: limited impact on treatment selection. Maintenance phase tolerability concerns: minor impact on treatment selection.
|
| 97 |
+
|
| 98 |
+
##### Second-line treatments
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
###### Olanzapine
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
####### Level of evidence by phase of treatment
|
| 105 |
+
|
| 106 |
+
Prevention of any mood episode during maintenance: level 1 evidence. Prevention of depression during maintenance: level 1 evidence. Prevention of mania during maintenance: level 1 evidence. Acute Depression: level 1 evidence; did not separate from placebo on core symptoms of depression. Acute Mania: level 1 evidence.
|
| 107 |
+
|
| 108 |
+
####### Considerations for treatment selection
|
| 109 |
+
|
| 110 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: significant impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection.
|
| 111 |
+
|
| 112 |
+
###### Risperidone LAI
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
####### Level of evidence by phase of treatment
|
| 116 |
+
|
| 117 |
+
Prevention of any mood episode during maintenance: level 1 evidence. Prevention of depression during maintenance: no data; did not separate from placebo in those with index mania; no studies available in index depression. Prevention of mania during maintenance: level 1 evidence. Acute Depression: no data. Acute Mania: no data.
|
| 118 |
+
|
| 119 |
+
####### Considerations for treatment selection
|
| 120 |
+
|
| 121 |
+
Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: minor impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection.
|
| 122 |
+
|
| 123 |
+
###### Risperidone LAI (adj)
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
####### Level of evidence by phase of treatment
|
| 127 |
+
|
| 128 |
+
Prevention of any mood episode during maintenance: level 2 evidence. Prevention of depression during maintenance: level 4 evidence. Prevention of mania during maintenance: level 2 evidence. Acute Depression: no data. Acute Mania: no data.
|
| 129 |
+
|
| 130 |
+
####### Considerations for treatment selection
|
| 131 |
+
|
| 132 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: significant impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection.
|
| 133 |
+
|
| 134 |
+
###### Carbamazepine
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
####### Level of evidence by phase of treatment
|
| 138 |
+
|
| 139 |
+
Prevention of any mood episode during maintenance: level 2 evidence. Prevention of depression during maintenance: level 2 evidence. Prevention of mania during maintenance: level 2 evidence. Acute Depression: level 3 evidence. Acute Mania: level 1 evidence.
|
| 140 |
+
|
| 141 |
+
####### Considerations for treatment selection
|
| 142 |
+
|
| 143 |
+
Acute phase safety concerns: moderate impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: minor impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: moderate impact on treatment selection.
|
| 144 |
+
|
| 145 |
+
###### Paliperidone (>6 mg)
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
####### Level of evidence by phase of treatment
|
| 149 |
+
|
| 150 |
+
Prevention of any mood episode during maintenance: level 2 evidence. Prevention of depression during maintenance: no data; did not separate from placebo in those with index mania; no studies available in index depression. Prevention of mania during maintenance: level 2 evidence. Acute Depression: no data. Acute Mania: level 1 evidence.
|
| 151 |
+
|
| 152 |
+
####### Considerations for treatment selection
|
| 153 |
+
|
| 154 |
+
Acute phase safety concerns: limited impact on treatment selection. Acute phase tolerability concerns: minor impact on treatment selection. Maintenance phase safety concerns: minor impact on treatment selection. Maintenance phase tolerability concerns: moderate impact on treatment selection.
|
| 155 |
+
|
| 156 |
+
###### Lurasidone + Lithium/divalproex
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
####### Level of evidence by phase of treatment
|
| 160 |
+
|
| 161 |
+
Prevention of any mood episode during maintenance: level 3 evidence; trend for superiority on the primary efficacy measure, hence the lower rating. Prevention of depression during maintenance: level 3 evidence; effective in those with an index episode of depression. Prevention of mania during maintenance: level 4 evidence. Acute Depression: level 2 evidence. Acute Mania: no data.
|
| 162 |
+
|
| 163 |
+
####### Considerations for treatment selection
|
| 164 |
+
|
| 165 |
+
Acute phase safety concerns: minor impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: moderate / limited impact on treatment selection.
|
| 166 |
+
|
| 167 |
+
###### Ziprasidone + Lithium/divalproex
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
####### Level of evidence by phase of treatment
|
| 171 |
+
|
| 172 |
+
Prevention of any mood episode during maintenance: level 2 evidence. Prevention of depression during maintenance: no data; did not separate from placebo in those with index mania; no studies available in index depression. Prevention of mania during maintenance: level 2 evidence. Acute Depression: level 3 negative evidence. Acute Mania: level 2 negative evidence.
|
| 173 |
+
|
| 174 |
+
####### Considerations for treatment selection
|
| 175 |
+
|
| 176 |
+
Acute phase safety concerns: moderate impact on treatment selection. Acute phase tolerability concerns: moderate impact on treatment selection. Maintenance phase safety concerns: moderate impact on treatment selection; divalproex and carbamazepine should be used with caution in women of childbearing age. Maintenance phase tolerability concerns: minor impact on treatment selection.
|
| 177 |
+
|
| 178 |
+
|
src/data_processing/tables.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from bs4 import BeautifulSoup
|
| 2 |
+
import json
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import re
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def get_table_metadata(table, base_url):
|
| 8 |
+
# Find the nearest ancestor <section> that has an id
|
| 9 |
+
section = table.find_parent('section')
|
| 10 |
+
while section and not section.get('id'):
|
| 11 |
+
section = section.find_parent('section')
|
| 12 |
+
|
| 13 |
+
section_id = section.get("id") if section else None
|
| 14 |
+
if section_id:
|
| 15 |
+
section_url = base_url + section_id
|
| 16 |
+
# Try to get the main heading
|
| 17 |
+
heading_el = section.select_one(f"[data-anchor-id={section_id}]")
|
| 18 |
+
section_heading = heading_el.get_text(strip=True) if heading_el else ""
|
| 19 |
+
# Try to get the subheading from its parent section
|
| 20 |
+
parent_sec = section.find_parent('section')
|
| 21 |
+
subheading_el = parent_sec.select_one(".pmc_sec_title") if parent_sec else None
|
| 22 |
+
section_subheading = subheading_el.get_text(strip=True) if subheading_el else ""
|
| 23 |
+
headings = " > ".join(filter(None, [section_heading, section_subheading]))
|
| 24 |
+
else:
|
| 25 |
+
# Fallback if no section id is found
|
| 26 |
+
section_url = base_url
|
| 27 |
+
headings = ""
|
| 28 |
+
|
| 29 |
+
# Table name and caption
|
| 30 |
+
name_el = section.find("h4") if section else table.find("caption")
|
| 31 |
+
name = name_el.get_text(strip=True) if name_el else "Table"
|
| 32 |
+
caption_el = section.select_one('.caption p') if section else table.find("caption")
|
| 33 |
+
caption = caption_el.get_text(strip=True) if caption_el else ""
|
| 34 |
+
|
| 35 |
+
# Generate a referee_id from the table name
|
| 36 |
+
# e.g., "Table 1." → number = "1" → referee_id = "table_1"
|
| 37 |
+
# Look for parent <section> with an id containing 'tbl-'
|
| 38 |
+
section = table.find_parent("section", id=True)
|
| 39 |
+
table_id = section["id"] if section and "tbl-" in section["id"] else None
|
| 40 |
+
print(table_id)
|
| 41 |
+
match = re.search(r"tbl-(\d+)", table_id or "")
|
| 42 |
+
number = match.group(1).lstrip("0") if match else ""
|
| 43 |
+
referee_id = f"table_{number}" if number else "table_unknown"
|
| 44 |
+
label = f"Table {number}. " + caption if number else "Table"
|
| 45 |
+
|
| 46 |
+
# Collect footnotes
|
| 47 |
+
footnotes = {}
|
| 48 |
+
# case 1: <sup> outside <p>
|
| 49 |
+
for sup in section.select('.fn sup') if section else []:
|
| 50 |
+
sibling = sup.find_next_sibling("p")
|
| 51 |
+
if sibling:
|
| 52 |
+
key = sup.get_text(strip=True)
|
| 53 |
+
footnotes[key] = sibling.get_text(strip=True)
|
| 54 |
+
|
| 55 |
+
# case 2: <sup> inside <p>
|
| 56 |
+
for p in section.select('.fn p') if section else []:
|
| 57 |
+
# matches like "* text" or "# text"
|
| 58 |
+
matches = re.findall(r"(?<=(\*|#))\s*(.*?)(?=\s\*|\s#|$)", p.get_text())
|
| 59 |
+
for key, text in matches:
|
| 60 |
+
footnotes[key] = text.strip()
|
| 61 |
+
print(name)
|
| 62 |
+
return name, caption, footnotes, headings, label, referee_id, section_url
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def get_table_data(table, footnotes):
|
| 66 |
+
table_data = []
|
| 67 |
+
rowspan_tracker = {}
|
| 68 |
+
subsec = ""
|
| 69 |
+
|
| 70 |
+
for tr in table.find_all("tr"):
|
| 71 |
+
row = []
|
| 72 |
+
col_index = 0
|
| 73 |
+
|
| 74 |
+
# Pre-fill cells carried over by rowspan
|
| 75 |
+
while col_index in rowspan_tracker:
|
| 76 |
+
value, remaining = rowspan_tracker[col_index]
|
| 77 |
+
row.append(value)
|
| 78 |
+
remaining -= 1
|
| 79 |
+
if remaining:
|
| 80 |
+
rowspan_tracker[col_index] = (value, remaining)
|
| 81 |
+
else:
|
| 82 |
+
del rowspan_tracker[col_index]
|
| 83 |
+
col_index += 1
|
| 84 |
+
|
| 85 |
+
for cell in tr.find_all(["th", "td"]):
|
| 86 |
+
cell_text = cell.get_text(separator="\n", strip=True)
|
| 87 |
+
cell_sups = [sup.get_text() for sup in cell.find_all("sup")]
|
| 88 |
+
|
| 89 |
+
# Normalize text if superscripts are inside
|
| 90 |
+
if cell_sups:
|
| 91 |
+
# remove short tokens
|
| 92 |
+
lines = [t for t in cell_text.split("\n") if len(t) > 1]
|
| 93 |
+
cell_text = " ".join(lines)
|
| 94 |
+
|
| 95 |
+
# Append footnote text if any
|
| 96 |
+
for sup in cell_sups:
|
| 97 |
+
if sup in footnotes:
|
| 98 |
+
cell_text += f" ({footnotes[sup]})"
|
| 99 |
+
|
| 100 |
+
# Handle colspan as a subsection marker
|
| 101 |
+
colspan = int(cell.get("colspan", 1))
|
| 102 |
+
if colspan > 1:
|
| 103 |
+
subsec = cell_text
|
| 104 |
+
continue
|
| 105 |
+
|
| 106 |
+
row.append(cell_text)
|
| 107 |
+
|
| 108 |
+
# Track rowspan for this column
|
| 109 |
+
rowspan = int(cell.get("rowspan", 1))
|
| 110 |
+
if rowspan > 1:
|
| 111 |
+
rowspan_tracker[col_index] = (cell_text, rowspan - 1)
|
| 112 |
+
|
| 113 |
+
col_index += 1
|
| 114 |
+
|
| 115 |
+
if row:
|
| 116 |
+
if subsec:
|
| 117 |
+
row.insert(0, subsec)
|
| 118 |
+
table_data.append(row)
|
| 119 |
+
|
| 120 |
+
return table_data
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def to_text(table_data, label, caption):
|
| 124 |
+
lines = []
|
| 125 |
+
lines.append(f"**{label}**")
|
| 126 |
+
|
| 127 |
+
# Skip header row when enumerating data rows
|
| 128 |
+
headers = table_data[0] if table_data else []
|
| 129 |
+
for i, row in enumerate(table_data[1:], start=1):
|
| 130 |
+
row_text = ", ".join(f"{h}: {v}" for h, v in zip(headers, row) if v)
|
| 131 |
+
lines.append(f"{{Row {i} - {row_text}}}")
|
| 132 |
+
|
| 133 |
+
return "[" + "\n".join(lines) + "]"
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def to_chunk(text_block, section_url, referee_id, headings):
|
| 137 |
+
return {
|
| 138 |
+
"text": text_block,
|
| 139 |
+
"metadata": {
|
| 140 |
+
"section": section_url,
|
| 141 |
+
"type": "HTML table",
|
| 142 |
+
"referee_id": referee_id,
|
| 143 |
+
# "headings": headings,
|
| 144 |
+
}
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def tables_to_json(input_path="bipolar.html", base_url="https://pmc.ncbi.nlm.nih.gov/articles/PMC5947163/#"):
|
| 149 |
+
doc = []
|
| 150 |
+
with open(input_path, encoding="utf-8") as f:
|
| 151 |
+
html = f.read()
|
| 152 |
+
soup = BeautifulSoup(html, features="html.parser")
|
| 153 |
+
tables = soup.find_all("table")
|
| 154 |
+
|
| 155 |
+
print(f"Found {len(tables)} tables in document.")
|
| 156 |
+
|
| 157 |
+
for idx, tbl in enumerate(tables, start=1):
|
| 158 |
+
name, caption, footnotes, headings, label, referee_id, section_url = \
|
| 159 |
+
get_table_metadata(tbl, base_url)
|
| 160 |
+
table_data = get_table_data(tbl, footnotes)
|
| 161 |
+
text_block = to_text(table_data, label, caption)
|
| 162 |
+
chunk = to_chunk(text_block, section_url, referee_id, headings)
|
| 163 |
+
doc.append(chunk)
|
| 164 |
+
|
| 165 |
+
return doc
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
if __name__ == "__main__":
|
| 169 |
+
# doc = tables_to_json()
|
| 170 |
+
# with open("tables.json", "w", encoding="utf-8") as f:
|
| 171 |
+
# json.dump(doc, f, indent=4)
|
| 172 |
+
pass
|
src/precompute.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from sentence_transformers import SentenceTransformer
|
| 3 |
+
from Rag import load_json_to_db, make_embeddings, save_embeddings # Adjust import
|
| 4 |
+
|
| 5 |
+
def precompute_and_save(embedder_name, db_path):
|
| 6 |
+
print("Loading database...")
|
| 7 |
+
db = load_json_to_db(db_path)
|
| 8 |
+
|
| 9 |
+
print(f"Loading embedder: {embedder_name}")
|
| 10 |
+
model = SentenceTransformer(embedder_name, trust_remote_code=True)
|
| 11 |
+
|
| 12 |
+
print("Computing embeddings...")
|
| 13 |
+
embeddings = make_embeddings(model, embedder_name, db)
|
| 14 |
+
|
| 15 |
+
print("Saving embeddings...")
|
| 16 |
+
save_embeddings(embedder_name, embeddings)
|
| 17 |
+
|
| 18 |
+
print("Done.")
|
| 19 |
+
|
| 20 |
+
if __name__ == "__main__":
|
| 21 |
+
embedder_name = "Qwen/Qwen3-Embedding-0.6B" # Example embedder name
|
| 22 |
+
db_path = "../data/processed/guideline_db.json"
|
| 23 |
+
precompute_and_save(embedder_name, db_path)
|
src/readme.md
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
- `Rag.py` is the Core RAG pipeline, backend
|
| 2 |
+
|
| 3 |
+
- `app.py` is the frontend code implemented with streamlit
|
| 4 |
+
|
| 5 |
+
- `data_processing.py` contains code to process the original guideline knowledge base for the rag system
|
| 6 |
+
|
| 7 |
+
- `system_prompt.txt` is the system prompt we give to the LLM
|
| 8 |
+
|
| 9 |
+
- `run_batched_queries/` contains code to run multiple queries using the system and write results to a markdown file
|
src/system_prompt.txt
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are a clinical decision support assistant. Use provided Clinical Guidelines Context to answer the user's question.
|
| 2 |
+
|
| 3 |
+
- Carefully review the retrieved text and find any relevant medication recommendations, treatment considerations, or patient-specific factors.
|
| 4 |
+
- If multiple options exist, summarize the top 1–2 and explain briefly why they are preferred.
|
| 5 |
+
- Quote the reference link provided if you used the info in that context to answer the question.
|
| 6 |
+
- If there is no explicit answer, you may reasonably infer from related sections (e.g. similar symptoms, comorbidities, or past treatments), but make that clear.
|
| 7 |
+
- If absolutely no relevant information is available in the manual, respond with: "No clear recommendation found in the clinical guideline."
|
| 8 |
+
- Use EXACT medication names from the context, Use markdown bold fonts on all the medication.
|
| 9 |
+
- Specify treatment line (first-line, second-line, etc.). If not mentioned, provide first-line option will be sufficient.
|
| 10 |
+
- Include relevant clinical details (dosing, monitoring, contraindications)
|
| 11 |
+
- If multiple options exist, list them clearly
|
| 12 |
+
- Base recommendations STRICTLY on the provided context
|
| 13 |
+
|
| 14 |
+
- Output in structural format with bullet points and bold fonts if necessary, and quote the important reference links that you used like this:
|
| 15 |
+
- **Reference:** [Table 3.1](https://pmc.ncbi.nlm.nih.gov/articles/PMC11351064/#section4F-07067437241245384)
|