Spaces:

kyauy
/

ClinFly

Running

App Files Files Community

GERNET Enody commited on Apr 2, 2024

Commit

44a9dbf

unverified ·

1 Parent(s): ccc4c35

Add files via upload

Browse files

Files changed (3) hide show

README.md +32 -12
clinfly_app_cli.py +152 -0
clinfly_app_st.py +195 -0

README.md CHANGED Viewed

@@ -1,9 +1,9 @@
 ---
 title: ClinFly
 emoji: small_airplane
-sdk: streamlit
 sdk_version: 1.21.0
-app_file: clinfly_app.py
 pinned: true
 ---
@@ -26,24 +26,44 @@ By facilitating the translation and anonymization of clinical reports, ClinFly h
 ![](img/pipeline.png)
-## Run the framework
-A webapp is accessible at [https://huggingface.co/spaces/kyauy/ClinFly](https://huggingface.co/spaces/kyauy/ClinFly), **please try it !**
-It's a streamlit application, where code is accessible in ̀`clinfly_app.py` file.
 To install on your local machine, you need `poetry` package manager and launch in the folder:
 ```
 poetry install
 ```
-To make it run in your local computer:
 ```
 poetry shell
-streamlit run clinfly_app.py
 ```
-Using requirement ?
 ```
-poetry export --without-hashes --format=requirements.txt > requirements.txt
-```

 ---
 title: ClinFly
 emoji: small_airplane
 sdk_version: 1.21.0
+streamlit_file: clinfly_app_st.py
+CLI_file: clinfly_app_cli.py
 pinned: true
 ---
 ![](img/pipeline.png)
+## Poetry Installation
 To install on your local machine, you need `poetry` package manager and launch in the folder:
 ```
 poetry install
 ```
+Using requirement ?
+```
+poetry export --without-hashes --format=requirements.txt > requirements.txt
+```
+## Run the code
+### Graphical User Interface - Single report usage with interactive analysis
+A webapp is accessible at https://huggingface.co/spaces/kyauy/ClinFly, please try it !
+It's a streamlit application, where code is accessible in ̀`clinfly_app_st.py` file. The functions are accessible in the `utilities` folder.
+To run the streamlit application on your local computer :
 ```
 poetry shell
+streamlit run clinfly_app_st.py
 ```
+### Command Line Interface - Multiple report usage with offline options
+The code is accessible in ̀`clinfly_app_cli.py` file. The functions are accessible in the `utilities` folder.
+The output will be placed in the `results` folder according to the file extension.
+A resume of the deidentify report will be generated and placed in the `results/Reports` folder.
+Three HPO extraction output will be generated, TSV, TXT and Json.
+To run the CLI application on your local computer :
+```
+poetry shell
+<python running version> clinfly_app_cli.py --file <input csv file with the registration> --language <language of the file> --output_dir <The output directory of the model (OPTIONAL)>
 ```

clinfly_app_cli.py ADDED Viewed

	@@ -0,0 +1,152 @@

+import csv
+import os
+import argparse
+import pandas as pd
+from utilities.anonymize import get_cities_list,get_abbreviation_dict_correction, reformat_to_report, anonymize_analyzer, anonymize_engine, add_space_to_comma_endpoint,get_list_not_deidentify, config_deidentify
+from utilities.translate import get_translation_dict_correction, translate_report
+from utilities.convert import convert_df_no_header, convert_df, convert_json, convert_list_phenogenius
+from utilities.extract_hpo import add_biometrics, extract_hpo
+from utilities.get_model import get_models, get_nlp_marian
+import gc
+def main():
+    print("Code Starting")
+    MarianText, _, _ = translate_report(
+        Report,
+        Last_name,
+        First_name,
+        nlp_fr,
+        marian_fr_en,
+        dict_correction,
+        dict_abbreviation_correction,
+    )
+    MarianText_report = reformat_to_report(MarianText, nlp_fr)
+    del MarianText
+    print("Translation and De-identification")
+    (
+        MarianText_anonymize_report_analyze,
+        analyzer_results_return,
+        _,
+        _,
+    ) = anonymize_analyzer(MarianText_report, analyzer, proper_noun, Last_name, First_name)
+    print(MarianText_anonymize_report_analyze)
+    MarianText_anonymize_report_engine = anonymize_engine(
+        MarianText_report, analyzer_results_return, engine, nlp_fr
+    )
+    MarianText_anonymize_report_engine_modif = pd.DataFrame(
+        [x for x in MarianText_anonymize_report_engine.split("\n")]
+    )
+    MarianText_anonymize_report_engine_df = MarianText_anonymize_report_engine_modif
+    with open("results/Reports/" + Last_name + "_" + First_name + "_translated_and_deindentified_report.txt", 'w') as file:
+            file.write(convert_df_no_header(MarianText_anonymize_report_engine_df).decode("utf-8"))
+    print("Text file created successfully : " + Last_name + "_" + First_name + "_translated_and_deindentified_report.txt")
+    print("Summarization")
+    MarianText_anonymized_reformat_space = add_space_to_comma_endpoint(
+        MarianText_anonymize_report_engine, nlp_fr
+    )
+    MarianText_anonymized_reformat_biometrics, _ = add_biometrics(
+        MarianText_anonymized_reformat_space, nlp_fr
+    )
+    clinphen, clinphen_unsafe = extract_hpo(MarianText_anonymized_reformat_biometrics)
+    del MarianText_anonymize_report_engine
+    del MarianText_anonymized_reformat_space
+    del MarianText_anonymized_reformat_biometrics
+    clinphen_unsafe_check_raw = clinphen_unsafe
+    clinphen_unsafe_check_raw["To keep in list"] = False
+    clinphen_unsafe_check_raw["Confidence on extraction"] = "low"
+    del clinphen_unsafe
+    clinphen["Confidence on extraction"] = "high"
+    clinphen["To keep in list"] = True
+    cols = [
+        "HPO ID",
+        "Phenotype name",
+        "To keep in list",
+        "No. occurrences",
+        "Earliness (lower = earlier)",
+        "Confidence on extraction",
+        "Example sentence",
+    ]
+    clinphen_all = pd.concat([clinphen, clinphen_unsafe_check_raw]).reset_index()
+    clinphen_all = clinphen_all[cols]
+    clinphen_df = clinphen_all
+    clinphen_df_without_low_confidence = clinphen_df[clinphen_df["To keep in list"]== True]
+    del clinphen
+    del clinphen_unsafe_check_raw
+    gc.collect()
+    with open("results/TSV/" + Last_name + "_" + First_name + "_summarized_report.tsv", 'w') as file:
+            file.write(convert_df(clinphen_df).decode("utf-8"))
+    print("Tsv file created successfully : " + Last_name + "_" + First_name + "_summarized_report.tsv")
+    with open("results/JSON/" + Last_name + "_" + First_name + "_summarized_report.json", 'w') as file:
+            file.write(convert_json(clinphen_df_without_low_confidence))
+    print("JSON file created successfully : " + Last_name + "_" + First_name + "_summarized_report.json")
+    with open("results/TXT/" + Last_name + "_" + First_name + "_summarized_report.txt", 'w') as file:
+            file.write(convert_list_phenogenius(clinphen_df_without_low_confidence))
+    print("Text file created successfully : " + Last_name + "_" + First_name + "_summarized_report.txt")
+if __name__ == "__main__":
+    print("Welcome to the Clinfly app")
+    parser = argparse.ArgumentParser(description="How to use Clinfly.")
+    parser.add_argument("--file", type=str,help="the input file which contains the visits informations", required=True)
+    parser.add_argument("--language", choices=['fr', 'es', 'de'],type=str, help="The language of the input : fr, es , de",required=True)
+    parser.add_argument("--output_dir",default=os.path.expanduser("~"),type=str, help="The directory where the models will be downloaded.")
+    args = parser.parse_args()
+    if not os.path.exists(args.output_dir):
+        os.makedirs(args.output_dir)
+    print("Language chosen :", args.language)
+    models_status = get_models(args.language,args.output_dir)
+    dict_correction = get_translation_dict_correction()
+    dict_abbreviation_correction = get_abbreviation_dict_correction()
+    proper_noun = get_list_not_deidentify()
+    cities_list = get_cities_list()
+    analyzer, engine = config_deidentify(cities_list)
+    nlp_fr, marian_fr_en = get_nlp_marian(args.language)
+    file_name = args.file
+    Last_name :str
+    First_name : str
+    Report : str
+    with open(file_name, newline='', encoding='utf-8-sig') as fichier_csv:
+        lecteur_csv = csv.reader(fichier_csv, delimiter=";")
+        for ligne in lecteur_csv:
+            Last_name, First_name, Report = ligne
+            print("Last_name:", Last_name)
+            print("First_name:", First_name)
+            print("Report:", Report)
+            main()
+            print()

clinfly_app_st.py ADDED Viewed

	@@ -0,0 +1,195 @@

+import pandas as pd
+from utilities.web_utilities import display_page_title, display_sidebar, stack_checker
+from utilities.anonymize import get_cities_list,get_abbreviation_dict_correction, reformat_to_report, anonymize_analyzer, anonymize_engine, add_space_to_comma_endpoint,get_list_not_deidentify, config_deidentify
+from utilities.translate import get_translation_dict_correction, translate_report
+from utilities.convert import convert_df_no_header, convert_df, convert_json, convert_list_phenogenius
+from utilities.extract_hpo import add_biometrics, extract_hpo
+from utilities.get_model import get_models, get_nlp_marian
+import streamlit as st
+import gc
+# -- Set page config
+app_title: str = "ClinFly"
+display_page_title(app_title)
+display_sidebar()
+cities_list = get_cities_list()
+dict_correction = get_translation_dict_correction()
+dict_abbreviation_correction = get_abbreviation_dict_correction()
+nom_propre = get_list_not_deidentify()
+analyzer, engine = config_deidentify(cities_list)
+if "load_models" not in st.session_state:
+    st.session_state.load_models = False
+if "select_lang" not in st.session_state:
+    st.session_state.select_lang = False
+if "nlp_fr" not in st.session_state:
+    st.session_state.nlp_fr = False
+if "marian_fr_en" not in st.session_state:
+    st.session_state.marian_fr_en = False
+if "load_report" not in st.session_state:
+    st.session_state.load_report = False
+if st.session_state.load_models is False:
+    with st.form("language"):
+        source_lang = st.selectbox(
+            "Which is the language of the letter :fr: :es: :de: ?", ("fr", "es", "de")  # "it"
+        )
+        submit_button_L = st.form_submit_button(label="Submit language")
+    if submit_button_L:
+        with st.spinner('Downloading models, it takes a moment, please wait'):
+            models_status = get_models(source_lang)
+            nlp_fr, marian_fr_en = get_nlp_marian(source_lang)
+            st.session_state.select_lang = source_lang
+            st.session_state.nlp_fr = nlp_fr
+            st.session_state.marian_fr_en = marian_fr_en
+            st.session_state.load_models = True
+if st.session_state.load_models is True:
+    st.info('Selected language is : ' + st.session_state.select_lang)
+    with st.form("my_form"):
+        c1, c2 = st.columns(2)
+        with c1:
+            nom = st.text_input("Last name", "Doe", key="name")
+        with c2:
+            prenom = st.text_input("First name", "John", key="surname")
+        courrier = st.text_area(
+            "Paste medical letter",
+            "Chers collegues, j'ai recu en consultation M. John Doe né le 14/07/1789 pour une fièvre récurrente et une maladie de Crohn. Il a pour antécédent des epistaxis recurrents. Parmi les antécédants familiaux, sa maman a présenté un cancer des ovaires. Il mesure 1.90 m (+2.5  DS),  pèse 93 kg (+3.6 DS) et son PC est à 57 cm (+0DS) ...",
+            height=200,
+            key="letter",
+        )
+        submit_button = st.form_submit_button(label="Submit report")
+    if submit_button or st.session_state.load_report:
+        st.session_state.load_report = True
+        MarianText, list_replaced, list_replaced_abb_name = translate_report(
+            courrier,
+            nom,
+            prenom,
+            st.session_state.nlp_fr,
+            st.session_state.marian_fr_en,
+            dict_correction,
+            dict_abbreviation_correction,
+        )
+        MarianText_letter = reformat_to_report(MarianText, st.session_state.nlp_fr)
+        del MarianText
+        st.subheader("Translation and De-identification")
+        (
+            MarianText_anonymize_letter_analyze,
+            analyzer_results_return,
+            analyzer_results_keep,
+            analyzer_results_saved,
+        ) = anonymize_analyzer(MarianText_letter, analyzer, nom_propre, nom, prenom)
+        st.caption(MarianText_anonymize_letter_analyze)
+        MarianText_anonymize_letter_engine = anonymize_engine(
+            MarianText_letter, analyzer_results_return, engine, st.session_state.nlp_fr
+        )
+        MarianText_anonymize_letter_engine_modif = pd.DataFrame(
+            [x for x in MarianText_anonymize_letter_engine.split("\n")]
+        )
+        MarianText_anonymize_letter_engine_modif.columns = [
+            "Modify / curate the automatically translated and de-identified letter before downloading:"
+        ]
+        MarianText_anonymize_letter_engine_df = st.data_editor(
+            MarianText_anonymize_letter_engine_modif,
+            num_rows="dynamic",
+            key="letter_editor",
+            use_container_width=True,
+        )
+        st.caption("Modify cells above 👆 or even ➕ add rows, before downloading 👇")
+        st.download_button(
+            "Download translated and de-identified letter",
+            convert_df_no_header(MarianText_anonymize_letter_engine_df),
+            nom + "_" + prenom + "_translated_and_deindentified_letter.txt",
+            "text",
+            key="download-translation-deindentification",
+        )
+        st.subheader("Summarization")
+        MarianText_anonymized_reformat_space = add_space_to_comma_endpoint(
+            MarianText_anonymize_letter_engine, st.session_state.nlp_fr
+        )
+        MarianText_anonymized_reformat_biometrics, additional_terms = add_biometrics(
+            MarianText_anonymized_reformat_space, st.session_state.nlp_fr
+        )
+        clinphen, clinphen_unsafe = extract_hpo(MarianText_anonymized_reformat_biometrics)
+        del MarianText_anonymize_letter_engine
+        del MarianText_anonymized_reformat_space
+        del MarianText_anonymized_reformat_biometrics
+        clinphen_unsafe_check_raw = clinphen_unsafe
+        # clinphen_unsafe_check_raw["name"] = nom
+        # clinphen_unsafe_check_raw["surname"] = prenom
+        clinphen_unsafe_check_raw["To keep in list"] = False
+        clinphen_unsafe_check_raw["Confidence on extraction"] = "low"
+        del clinphen_unsafe
+        # clinphen["name"] = nom
+        # clinphen["surname"] = prenom
+        clinphen["Confidence on extraction"] = "high"
+        clinphen["To keep in list"] = True
+        cols = [
+            "HPO ID",
+            "Phenotype name",
+            "To keep in list",
+            "No. occurrences",
+            "Earliness (lower = earlier)",
+            "Confidence on extraction",
+            "Example sentence",
+        ]
+        clinphen_all = pd.concat([clinphen, clinphen_unsafe_check_raw]).reset_index()
+        clinphen_all = clinphen_all[cols]
+        clinphen_df = st.data_editor(
+            clinphen_all, num_rows="dynamic", key="data_editor"
+        )
+        clinphen_df_without_low_confidence = clinphen_df[clinphen_df["To keep in list"]== True]
+        del clinphen
+        del clinphen_unsafe_check_raw
+        gc.collect()
+        st.caption(
+            "Modify cells above 👆, click ☐ to keep low confidence symptoms in list, or even ➕ add rows, before downloading 👇"
+        )
+        st.download_button(
+            "Download summarized letter in HPO CSV format",
+            convert_df(clinphen_df),
+            nom + "_" + prenom + "_summarized_letter.tsv",
+            "text/csv",
+            key="download-summarization",
+        )
+        st.download_button(
+            "Download summarized letter in Phenotips JSON format (hygen compatible)",
+            convert_json(clinphen_df_without_low_confidence),
+            nom + "_" + prenom + "_summarized_letter.json",
+            "json",
+            key="download-summarization-json",
+        )
+        st.download_button(
+            "Download summarized letter in PhenoGenius list of HPO format",
+            convert_list_phenogenius(clinphen_df_without_low_confidence),
+            nom + "_" + prenom + "_summarized_letter.txt",
+            "text",
+            key="download-summarization-phenogenius",
+        )