File size: 3,686 Bytes
46a030d
 
 
 
 
 
 
926183f
7b7d97b
616c59c
da90a83
46a030d
bee48d6
46a030d
 
 
926183f
 
46a030d
 
 
 
 
 
 
 
 
 
 
 
926183f
 
46a030d
 
 
 
 
 
bee48d6
46a030d
 
 
 
 
 
 
 
bee48d6
6ffd015
 
46a030d
 
 
 
 
 
7b7d97b
46a030d
 
 
 
 
 
 
 
 
 
1286697
7b7d97b
46a030d
7b7d97b
46a030d
7b7d97b
 
46a030d
 
 
7b7d97b
46a030d
 
 
 
 
 
 
 
7b7d97b
e5c0423
0abdfd0
46a030d
 
81d3143
46a030d
 
 
 
 
 
 
 
4295a5f
46a030d
 
 
 
 
 
 
 
 
 
 
 
 
4295a5f
7b7d97b
 
 
 
4295a5f
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import streamlit as st
import numpy as np
from pandas import DataFrame
import run_segbot
from functionforDownloadButtons import download_button
import os
import json


os.system('git clone --depth 1 https://github.com/neologd/mecab-ipadic-neologd.git && cd mecab-ipadic-neologd && ./bin/install-mecab-ipadic-neologd -n -y -u -p $PWD')

st.set_page_config(
    page_title="Clinical Segment Splitter",
    page_icon="🚑",
    layout="wide"
)


def _max_width_():
    max_width_str = f"max-width: 1400px;"
    st.markdown(
        f"""
    <style>
    .reportview-container .main .block-container{{
        {max_width_str}
    }}
    </style>    
    """,
        unsafe_allow_html=True,
    )


#_max_width_()

#c30 = st.columns([1,])

#with c30:
# st.image("logo.png", width=400)
st.title("🚑 Clinical Segment Splitter")
st.header("")



with st.expander("ℹ️ - About this app", expanded=True):

    st.write(
        """     
-   *Clinical segment splitter* app is an implementation of our paper.
    >Kenichiro Ando, Takashi Okumura, Mamoru Komachi, Hiromasa Horiguchi, Yuji Matsumoto (2022) [Exploring optimal granularity for extractive summarization of unstructured health records: Analysis of the largest multi-institutional archive of health records in Japan.](https://doi.org/10.1371/journal.pdig.0000099) PLOS Digital Health 1(9): e000009.
-   This app automatically splits Japanese sentences into smaller units representing medical meanings.
	    """
    )

    st.markdown("")

st.markdown("")
#st.markdown("## 📌 Paste document")
@st.cache(allow_output_mutation=True)
def model_load():
    return run_segbot.setup()
model,fm,index = model_load()
with st.form(key="my_form"):


    ce, c1, ce, c2, c3 = st.columns([0.07, 1, 0.07, 5, 0.07])
    with c1:
        ModelType = st.radio(
            "Select the sentence split method",
            ["pySBD (Default)", "full stop & linebreak"],
            help="""
            We have prepared 2 methods to break input text into sentences. 

            - The [pySBD](https://github.com/nipunsadvilkar/pySBD) is a more accurate method.
            - The full stop & linebreak is naive and has low accuracy, but can be robust to noise.
            """,
        )

        if ModelType == "full stop & linebreak":
            split_method="fullstop"
            
        else:
            split_method="pySBD"


    with c2:
        doc = st.text_area(
            "Paste your text",
            "グラム染色するも明らかな菌が見つからず、 髄液培養でも優位な菌は培養されなかった。細菌性髄膜炎に対するグラム染色の感度は60%程度であり、培養に関しても感度は高くない。また髄液中の糖はもう少し減るのではないだろうか。確定診断はつかないものの、最も疑わしい疾患であった。起因菌はMRSA,腸内細菌等を広域にカバーするためバンコマイシン,メロペネム(髄膜炎dose)とした。",
            height=300,
        )

        submit_button = st.form_submit_button(label="Go to split ✂︎")


if not submit_button:
    st.stop()

keywords = run_segbot.generate(doc, model, fm, index, split_method)


st.markdown("## Results")

st.header("")


cs, c1, c2, c3, cLast = st.columns([2, 1.5, 1.5, 1.5, 2])

st.header("")

df = DataFrame(keywords)
df.index += 1
df.columns = ['Segment']
print(df)


#with c2:
st.table(df)


with c1:
    CSVButton2 = download_button(keywords, "Data.csv", "📥 Download (.csv)")
with c2:
    CSVButton2 = download_button(keywords, "Data.txt", "📥 Download (.txt)")
with c3:
    CSVButton2 = download_button(keywords, "Data.json", "📥 Download (.json)")