GERNET Enody commited on
Commit
ae03dcd
·
unverified ·
1 Parent(s): 52d408f

Add files via upload

Browse files
Files changed (3) hide show
  1. clinfly_app_cli.py +18 -5
  2. clinfly_app_st.py +7 -2
  3. pyproject.toml +2 -0
clinfly_app_cli.py CHANGED
@@ -18,6 +18,7 @@ from utilities.convert import (
18
  convert_df,
19
  convert_json,
20
  convert_list_phenogenius,
 
21
  )
22
  from utilities.extract_hpo import add_biometrics, extract_hpo
23
  from utilities.get_model import get_models, get_nlp_marian
@@ -258,13 +259,25 @@ if __name__ == "__main__":
258
  Last_name: str
259
  First_name: str
260
  Report: str
261
- with open(file_name, "r") as fichier:
262
- for ligne in fichier:
263
- elements = ligne.strip().split("\t")
264
- Report_id, Last_name, First_name, Report = elements
 
 
265
  print("Report_id:", Report_id)
266
  print("Last_name:", Last_name)
267
  print("First_name:", First_name)
268
- print("Report:", Report)
 
 
 
 
 
 
 
 
269
  main()
270
  print()
 
 
 
18
  convert_df,
19
  convert_json,
20
  convert_list_phenogenius,
21
+ convert_pdf_to_text,
22
  )
23
  from utilities.extract_hpo import add_biometrics, extract_hpo
24
  from utilities.get_model import get_models, get_nlp_marian
 
259
  Last_name: str
260
  First_name: str
261
  Report: str
262
+
263
+ if os.path.isfile(args.file):
264
+ with open(file_name, 'r') as fichier:
265
+ for ligne in fichier:
266
+ elements = ligne.strip().split('\t')
267
+ Report_id, Last_name, First_name, text_or_link = elements
268
  print("Report_id:", Report_id)
269
  print("Last_name:", Last_name)
270
  print("First_name:", First_name)
271
+ if os.path.exists(text_or_link):
272
+ if text_or_link.lower().endswith('.pdf'):
273
+ print(f"Processing PDF file: {text_or_link}")
274
+ Report = convert_pdf_to_text(text_or_link)
275
+ else:
276
+ print(f"Unsupported file type. Please provide a link to a PDF files.")
277
+ else:
278
+ Report = text_or_link
279
+ print("Report:", Report)
280
  main()
281
  print()
282
+ else:
283
+ print("Input is not a file. Please provide a valid input.")
clinfly_app_st.py CHANGED
@@ -2,7 +2,7 @@ import pandas as pd
2
  from utilities.web_utilities import display_page_title, display_sidebar, stack_checker
3
  from utilities.anonymize import get_cities_list,get_abbreviation_dict_correction, reformat_to_report, anonymize_analyzer, anonymize_engine, add_space_to_comma_endpoint,get_list_not_deidentify, config_deidentify
4
  from utilities.translate import get_translation_dict_correction, translate_report
5
- from utilities.convert import convert_df_no_header, convert_df, convert_json, convert_list_phenogenius
6
  from utilities.extract_hpo import add_biometrics, extract_hpo
7
  from utilities.get_model import get_models, get_nlp_marian
8
  import streamlit as st
@@ -60,14 +60,19 @@ if st.session_state.load_models is True:
60
  with c2:
61
  prenom = st.text_input("First name", "John", key="surname")
62
  courrier = st.text_area(
63
- "Paste medical letter",
64
  "Chers collegues, j'ai recu en consultation M. John Doe né le 14/07/1789 pour une fièvre récurrente et une maladie de Crohn. Il a pour antécédent des epistaxis recurrents. Parmi les antécédants familiaux, sa maman a présenté un cancer des ovaires. Il mesure 1.90 m (+2.5 DS), pèse 93 kg (+3.6 DS) et son PC est à 57 cm (+0DS) ...",
65
  height=200,
66
  key="letter",
67
  )
 
68
 
69
  submit_button = st.form_submit_button(label="Submit report")
70
 
 
 
 
 
71
 
72
  if submit_button or st.session_state.load_report:
73
  st.session_state.load_report = True
 
2
  from utilities.web_utilities import display_page_title, display_sidebar, stack_checker
3
  from utilities.anonymize import get_cities_list,get_abbreviation_dict_correction, reformat_to_report, anonymize_analyzer, anonymize_engine, add_space_to_comma_endpoint,get_list_not_deidentify, config_deidentify
4
  from utilities.translate import get_translation_dict_correction, translate_report
5
+ from utilities.convert import convert_df_no_header, convert_df, convert_json, convert_list_phenogenius, convert_pdf_to_text
6
  from utilities.extract_hpo import add_biometrics, extract_hpo
7
  from utilities.get_model import get_models, get_nlp_marian
8
  import streamlit as st
 
60
  with c2:
61
  prenom = st.text_input("First name", "John", key="surname")
62
  courrier = st.text_area(
63
+ "You can paste the medical letter",
64
  "Chers collegues, j'ai recu en consultation M. John Doe né le 14/07/1789 pour une fièvre récurrente et une maladie de Crohn. Il a pour antécédent des epistaxis recurrents. Parmi les antécédants familiaux, sa maman a présenté un cancer des ovaires. Il mesure 1.90 m (+2.5 DS), pèse 93 kg (+3.6 DS) et son PC est à 57 cm (+0DS) ...",
65
  height=200,
66
  key="letter",
67
  )
68
+ uploaded_file = st.file_uploader("Or upload it (only pdf files are supported)")
69
 
70
  submit_button = st.form_submit_button(label="Submit report")
71
 
72
+ if uploaded_file is not None:
73
+ # To read file as bytes:
74
+ bytes_data = uploaded_file.getvalue()
75
+ courrier = convert_pdf_to_text(bytes_data)
76
 
77
  if submit_button or st.session_state.load_report:
78
  st.session_state.load_report = True
pyproject.toml CHANGED
@@ -20,6 +20,8 @@ streamlit = "^1.20.0"
20
  memory-profiler = "^0.61.0"
21
  Unidecode = "^1.3.6"
22
  pydantic = "1.10.13"
 
 
23
 
24
  [tool.poetry.dev-dependencies]
25
  pytest = "^5.2"
 
20
  memory-profiler = "^0.61.0"
21
  Unidecode = "^1.3.6"
22
  pydantic = "1.10.13"
23
+ pdf2image = "^1.17.0"
24
+ pytesseract = "^0.3.10"
25
 
26
  [tool.poetry.dev-dependencies]
27
  pytest = "^5.2"