DavidNgoue commited on
Commit
971ca2e
·
verified ·
1 Parent(s): 9f01360

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +180 -56
src/streamlit_app.py CHANGED
@@ -28,25 +28,138 @@ import subprocess
28
  import requests
29
  from transformers import pipeline, BioGptTokenizer, BioGptForCausalLM
30
  from transformers import MarianMTModel, MarianTokenizer
 
 
 
 
 
31
 
32
  # Set page configuration as the first Streamlit command
33
  st.set_page_config(page_title="Omics Risk Predictor", layout="wide", initial_sidebar_state="expanded")
34
 
 
 
 
 
35
  # Load translations without Streamlit commands
36
  def load_translations_raw():
37
  try:
 
 
38
  with open("src/translations.json", "r", encoding="utf-8") as f:
39
- return json.load(f)
40
- except FileNotFoundError:
41
- return {}
42
- except json.JSONDecodeError:
43
- return {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  translations = load_translations_raw()
46
 
47
  # Get translation function
48
  def t(key, lang="fr"):
49
- return translations.get(lang, {}).get(key, key)
50
 
51
  # Check for translation file errors after set_page_config
52
  if not translations:
@@ -93,7 +206,7 @@ Session = sessionmaker(bind=engine)
93
  # Sidebar
94
  with st.sidebar:
95
  st.markdown(f"<div class='sidebar-header'>{t('app_title')}</div>", unsafe_allow_html=True)
96
- st.image("https://via.placeholder.com/150", caption=t("author"))
97
  st.markdown(f"<div class='sidebar-subheader'>{t('navigation')}</div>", unsafe_allow_html=True)
98
 
99
  # Language selector
@@ -107,6 +220,7 @@ with st.sidebar:
107
 
108
  # Device configuration
109
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
110
 
111
  # IRC biomarkers
112
  irc_biomarkers = [
@@ -327,9 +441,9 @@ def generate_pdf_report(risk_score, important_biomarkers, shap_plot_path, recomm
327
  try:
328
  c = canvas.Canvas(output_path, pagesize=letter)
329
  c.setFont("Helvetica-Bold", 16)
330
- c.drawString(100, 750, t('report_title', lang))
331
  c.setFont("Helvetica", 12)
332
- c.drawString(100, 730, t('author', lang))
333
  c.drawString(100, 710, t('footer', lang))
334
 
335
  c.setFont("Helvetica-Bold", 14)
@@ -362,13 +476,19 @@ def generate_pdf_report(risk_score, important_biomarkers, shap_plot_path, recomm
362
  raise Exception(t("pdf_error").format(error=str(e)))
363
 
364
  # Load BioGPT for recommendations
 
 
365
  try:
 
366
  biogpt_tokenizer = BioGptTokenizer.from_pretrained("microsoft/biogpt")
 
367
  biogpt_model = BioGptForCausalLM.from_pretrained("microsoft/biogpt").to(device)
 
368
  biogpt_model.eval()
 
369
  except Exception as e:
370
- st.error(t("biogpt_load_error"))
371
- st.stop()
372
 
373
  # Load translation model (English to French)
374
  try:
@@ -381,6 +501,8 @@ except Exception as e:
381
  # Generate recommendation using BioGPT
382
  @st.cache_data
383
  def generate_recommendation(prompt, max_length=100):
 
 
384
  try:
385
  inputs = biogpt_tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True).to(device)
386
  outputs = biogpt_model.generate(
@@ -515,7 +637,7 @@ elif page == t("about_omics"):
515
  <li><b>{t('genomics')}</b>: {t('genomics_format')}</li>
516
  <li><b>{t('transcriptomics')}</b>: {t('transcriptomics_format')}</li>
517
  <li><b>{t('proteomics')}</b>: {t('proteomics_format')}</li>
518
- <li><b>{t('metabolomics')}</b>: {t('metabolomics_format')}</li>
519
  </ul>
520
  <p>{t('raw_omics_processing')}</p>
521
  </div>
@@ -527,52 +649,52 @@ elif page == t("about_omics"):
527
  <p>{t('preprocessing_desc')}</p>
528
  <ol>
529
  <li><b>{t('quality_control')}</b>: {t('quality_control_desc')}</li>
530
- <li><b>{t('normalization')}</b>: {t('normalization_desc')}</li>
531
- <li><b>{t('imputation')}</b>: {t('imputation_desc')}</li>
532
- <li><b>{t('standardization')}</b>: {t('standardization_desc')}</li>
533
  <li><b>{t('feature_selection')}</b>: {t('feature_selection_desc')}</li>
534
  </ol>
535
  <p>{t('preprocessing_importance')}</p>
536
  </div>
537
  """, unsafe_allow_html=True)
538
 
539
- st.markdown(f"<div class='section-header'>{t('csv_format')}</div>", unsafe_allow_html=True)
540
- st.markdown(f"""
541
  <div class='content animate__fadeIn'>
542
  <p>{t('csv_format_desc')}</p>
543
  <ul>
544
  <li><b>{t('columns')}</b>: {t('columns_desc')}</li>
545
- <li><b>{t('rows')}</b>: {t('rows_desc')}</li>
546
  <li><b>{t('values')}</b>: {t('values_desc')}</li>
547
- <li><b>{t('separator')}</b>: {t('separator_desc')}</li>
548
- <li><b>{t('header')}</b>: {t('header_desc')}</li>
549
  </ul>
550
  <p>{t('csv_example')}</p>
551
  <pre class='code-block'>
552
- PatientID,UMOD_rs12917707,APOL1_rs73885319,MYH9_rs4821480,Status
553
- P001,0,1,0,Unknown
554
  </pre>
555
  <p>{t('csv_patient_id_desc')}</p>
556
  </div>
557
  """, unsafe_allow_html=True)
558
 
559
- st.markdown(f"<div class='section-header'>{t('data_quality')}</div>", unsafe_allow_html=True)
560
  st.markdown(f"""
561
  <div class='content animate__fadeIn'>
562
  <p>{t('data_quality_desc')}</p>
563
  <ul>
564
- <li><b>{t('consistency')}</b>: {t('consistency_desc')}</li>
565
  <li><b>{t('completeness')}</b>: {t('completeness_desc')}</li>
566
- <li><b>{t('accuracy')}</b>: {t('accuracy_desc')}</li>
567
- <li><b>{t('alignment')}</b>: {t('alignment_desc')}</li>
568
  </ul>
569
- <p>{t('data_quality_importance')}</p>
570
  </div>
571
  """, unsafe_allow_html=True)
572
 
573
  # Predict Risk Page
574
  elif page == t("predict_risk"):
575
- st.markdown(f"<div class='title animate__fadeIn'>{t('predict_risk_title')}</div>", unsafe_allow_html=True)
576
  st.markdown(f"""
577
  <div class='content animate__fadeIn'>
578
  <p>{t('predict_risk_intro')}</p>
@@ -607,7 +729,7 @@ elif page == t("predict_risk"):
607
 
608
  # Validate and process files
609
  if st.button(t("run_prediction"), key="run_prediction"):
610
- if all(file_paths[omic] for omic in file_paths) and st.session_state.patient_id:
611
  try:
612
  with st.spinner(t("processing")):
613
  # Load and preprocess data
@@ -620,7 +742,7 @@ elif page == t("predict_risk"):
620
  patient_tensors = [torch.tensor(data_dict[omic].values, dtype=torch.float32) for omic in data_dict]
621
  patient_dataset = TensorDataset(*patient_tensors)
622
  patient_loader = DataLoader(patient_dataset, batch_size=best_hyperparams['batch_size'],
623
- shuffle=False)
624
 
625
  # Extract latent representations
626
  encoded_data = extract_latent_representations(model, patient_loader, device)
@@ -666,7 +788,7 @@ elif page == t("predict_risk"):
666
  # Store results
667
  st.session_state.results = {
668
  'umap_df': umap_df,
669
- 'important_biomarkers': important_biomarkers,
670
  'output_dir': output_dir,
671
  'pdf_path': pdf_path,
672
  'recommendations': recommendations
@@ -674,8 +796,8 @@ elif page == t("predict_risk"):
674
 
675
  # Display results
676
  st.markdown(f"<div class='section-header'>{t('prediction_results')}</div>", unsafe_allow_html=True)
677
- st.markdown(f"<div class='result-box'>{t('ckd_risk_score').format(score=risk_score):.2f}</div>",
678
- unsafe_allow_html=True)
679
 
680
  # Risk gauge
681
  fig_gauge = go.Figure(go.Indicator(
@@ -686,7 +808,7 @@ elif page == t("predict_risk"):
686
  'axis': {'range': [0, 100], 'tickcolor': '#FFF', 'tickfont': {'color': '#FFF'}},
687
  'bar': {'color': '#0000FF'},
688
  'steps': [
689
- {'range': [0, 33], 'color': '#4ADE80'},
690
  {'range': [33, 66], 'color': '#FACC15'},
691
  {'range': [66, 100], 'color': '#EF4444'}
692
  ],
@@ -699,7 +821,7 @@ elif page == t("predict_risk"):
699
  ))
700
  fig_gauge.update_layout(
701
  paper_bgcolor='#0E1117',
702
- font={'color': "#FFFFFF"},
703
  margin={'l': 20, 'r': 20, 't': 50, 'b': 20}
704
  )
705
  st.plotly_chart(fig_gauge, use_container_width=True)
@@ -708,7 +830,7 @@ elif page == t("predict_risk"):
708
  st.markdown(f"<div class='subheader'>{t('key_biomarkers')}</div>", unsafe_allow_html=True)
709
  shap_fig = plt.figure(figsize=(12, 8))
710
  sns.barplot(data=important_biomarkers.head(20), x='SHAP_Mean_Abs', y=t('feature'), hue=t('omic'),
711
- palette=['#FF6F61', '#6B5B95', '#4CAF50', '#2196F3'])
712
  plt.title(t('shap_top_features'), fontsize=16, color='#FFFFFF')
713
  plt.xlabel(t('shap_mean_value'), fontsize=12, color='#FFFFFF')
714
  plt.ylabel(t('feature'), fontsize=12, color='#FFFFFF')
@@ -720,26 +842,26 @@ elif page == t("predict_risk"):
720
 
721
  # Display saved SHAP plot
722
  if os.path.exists(shap_plot_path):
723
- st.image(shap_plot_path, caption=t('shap'), use_column_width=True)
724
 
725
  # Recommendations
726
  st.markdown(f"<div class='subheader'>{t('recommendations')}</div>", unsafe_allow_html=True)
727
  for rec in recommendations:
728
- st.markdown(f"<div class='recommendation'>{rec}</div>", unsafe_allow_html=True)
729
 
730
  # Download results
731
  st.markdown(f"<div class='subheader'>{t('download_results')}</div>", unsafe_allow_html=True)
732
  col1, col2 = st.columns(2)
733
  with col1:
734
- csv = important_biomarkers.to_csv(index=False).encode('utf-8')
735
  b64 = base64.b64encode(csv).decode()
736
- href = f'<a href="data:file/csv;base64,{b64}" download="important_biomarkers.csv" class="download-btn">{t("download_biomarkers", lang)}</a>'
737
- st.markdown(href, unsafe_allow_html=True)
738
  with col2:
739
  with open(pdf_path, "rb") as f:
740
  pdf_data = f.read()
741
  b64_pdf = base64.b64encode(pdf_data).decode()
742
- href_pdf = f'<a href="data:application/pdf;base64,{b64_pdf}" download="ckd_risk_report.pdf" class="download-btn">{t("download_report", lang)}</a>'
743
  st.markdown(href_pdf, unsafe_allow_html=True)
744
 
745
  except Exception as e:
@@ -753,7 +875,7 @@ elif page == t("chatbot"):
753
  st.markdown(f"<div class='title animate__fadeIn'>{t('chatbot_title')}</div>", unsafe_allow_html=True)
754
  st.markdown(f"""
755
  <div class='content animate__fadeIn'>
756
- <p>{t('chatbot_intro')}</p>
757
  </div>
758
  """, unsafe_allow_html=True)
759
 
@@ -768,18 +890,18 @@ elif page == t("chatbot"):
768
  st.markdown(f"<div class='chat-bubble bot'>{message['content']}</div>", unsafe_allow_html=True)
769
 
770
  # User input
771
- user_input = st.text_area(t("type_message"), key="chat_input")
772
- if st.button(t("send"), key="button_send"):
773
  if user_input:
774
  try:
775
- # Add user message to history
776
  st.session_state.chat_history.append({'role': 'user', 'content': user_input})
777
 
778
  # Check if user is asking about results
779
  if st.session_state.results and any(
780
  keyword in user_input.lower() for keyword in t('chat_risk_keywords').split(',')):
781
  risk_score = st.session_state.results['umap_df'][t('risk_score')].iloc[0]
782
- biomarkers = st.session_state.results['important_biomarkers'][t('feature')].head(5).tolist()
783
  context = t('chat_risk_response').format(score=risk_score, biomarkers=', '.join(biomarkers))
784
  response = send_to_rasa(f"{context} {user_input}", lang)
785
  else:
@@ -794,7 +916,7 @@ elif page == t("chatbot"):
794
 
795
  if intent != 'None' and st.session_state.results:
796
  risk_score = st.session_state.results['umap_df'][t('risk_score')].iloc[0]
797
- biomarkers = st.session_state.results['important_biomarkers'][t('feature')].head(5).tolist()
798
  prompt = formulate_prompt(intent, risk_score, biomarkers)
799
  response = generate_recommendation(prompt)
800
  if validate_recommendation(response):
@@ -804,11 +926,12 @@ elif page == t("chatbot"):
804
  else:
805
  response = send_to_rasa(user_input, lang)
806
 
807
- # Add bot response to history
808
  st.session_state.chat_history.append({'role': 'bot', 'content': response})
809
 
810
  # Rerender chat
811
  st.rerun()
 
812
 
813
  except Exception as e:
814
  st.error(t("chatbot_error").format(error=str(e)))
@@ -818,7 +941,7 @@ elif page == t("dashboard"):
818
  st.markdown(f"<div class='title animate__fadeIn'>{t('dashboard_title')}</div>", unsafe_allow_html=True)
819
  st.markdown(f"""
820
  <div class='content animate__fadeIn'>
821
- <p>{t('dashboard_intro')}</p>
822
  </div>
823
  """, unsafe_allow_html=True)
824
 
@@ -834,17 +957,18 @@ elif page == t("dashboard"):
834
  } for p in patients]
835
  df_patients = pd.DataFrame(patient_data)
836
 
837
- # Filters
838
- st.markdown(f"<div class='subheader'>{t('filters')}</div>", unsafe_allow_html=True)
839
- risk_threshold = st.slider(t("risk_threshold"), 0, 100, 50)
 
840
  filtered_df = df_patients[df_patients[t('risk_score')] >= risk_threshold]
841
 
842
  # Display table
843
- st.dataframe(filtered_df, use_container_width=True)
844
 
845
  # Plot
846
  fig = px.scatter(
847
- filtered_df,
848
  x=t('risk_score'),
849
  y=t('patient_id'),
850
  color=t('risk_score'),
@@ -874,7 +998,7 @@ if rasa_process:
874
  try:
875
  rasa_process.terminate()
876
  except Exception as e:
877
- print(f"Error terminating Rasa server: {str(e)}")
878
 
879
  st.markdown(f"""
880
  <div class='footer'>
 
28
  import requests
29
  from transformers import pipeline, BioGptTokenizer, BioGptForCausalLM
30
  from transformers import MarianMTModel, MarianTokenizer
31
+ import logging
32
+
33
+ # Configure logging
34
+ logging.basicConfig(level=logging.DEBUG)
35
+ logger = logging.getLogger(__name__)
36
 
37
  # Set page configuration as the first Streamlit command
38
  st.set_page_config(page_title="Omics Risk Predictor", layout="wide", initial_sidebar_state="expanded")
39
 
40
+ # Set Hugging Face cache
41
+ os.environ["HF_HOME"] = "/app/hf_cache"
42
+ os.makedirs("/app/hf_cache", exist_ok=True)
43
+
44
  # Load translations without Streamlit commands
45
  def load_translations_raw():
46
  try:
47
+ logger.debug(f"Current directory: {os.getcwd()}")
48
+ logger.debug(f"Translations file path: {os.path.abspath('src/translations.json')}")
49
  with open("src/translations.json", "r", encoding="utf-8") as f:
50
+ data = json.load(f)
51
+ logger.debug("Translations loaded successfully")
52
+ return data
53
+ except FileNotFoundError as e:
54
+ logger.error(f"Translation file not found: {e}")
55
+ return {
56
+ "en": {
57
+ "app_title": "Omics Risk Predictor",
58
+ "patient_id": "Patient ID",
59
+ "select_language": "Select Language",
60
+ "home": "Home",
61
+ "about_omics": "About Omics Data",
62
+ "predict_risk": "Predict Risk",
63
+ "chatbot": "Chatbot Assistant",
64
+ "dashboard": "Dashboard",
65
+ "footer": "© 2025, Ngoue David Roger Yannick",
66
+ "upload_warning": "Please upload all omics files and provide a patient ID.",
67
+ "risk_score": "Risk Score",
68
+ "ckd_risk_score": "CKD Risk Score: {score:.2f}%",
69
+ "key_biomarkers": "Key Biomarkers",
70
+ "download_results": "Download Results",
71
+ "chatbot_title": "Chatbot Assistant",
72
+ "chatbot_intro": "Interact with our assistant for recommendations.",
73
+ "chat_with_assistant": "Chat with Assistant",
74
+ "type_message": "Type your message",
75
+ "send": "Send",
76
+ "biogpt_load_error": "Error loading BioGPT",
77
+ "recommendation_error": "Error generating recommendation",
78
+ "medication_error": "Error generating medication recommendation",
79
+ "diet_error": "Error generating diet recommendation",
80
+ "exercise_error": "Error generating exercise recommendation",
81
+ "css_error": "CSS file not found",
82
+ "model_weights_error": "Model weights not found",
83
+ "model_load_error": "Error loading model: {error}",
84
+ "file_process_error": "Error processing files: {error}",
85
+ "chat_risk_keywords": "risk,score",
86
+ "chat_medication_keywords": "medication,drug",
87
+ "chat_diet_keywords": "diet,food",
88
+ "chat_exercise_keywords": "exercise,workout",
89
+ "chat_risk_response": "Risk score: {score:.2f}%. Biomarkers: {biomarkers}.",
90
+ "shap_top_features": "Top SHAP Features",
91
+ "shap_mean_value": "SHAP Mean Value",
92
+ "feature": "Feature",
93
+ "omic": "Omic",
94
+ "omics": "Omics",
95
+ "genomics": "Genomics",
96
+ "transcriptomics": "Transcriptomics",
97
+ "proteomics": "Proteomics",
98
+ "metabolomics": "Metabolomics",
99
+ "recommendations": "Recommendations",
100
+ "download_biomarkers": "Download Biomarkers",
101
+ "download_report": "Download Report",
102
+ "shap": "SHAP Analysis"
103
+ }
104
+ }
105
+ except json.JSONDecodeError as e:
106
+ logger.error(f"JSON decode error: {e}")
107
+ return {
108
+ "en": {
109
+ "app_title": "Omics Risk Predictor",
110
+ "patient_id": "Patient ID",
111
+ "select_language": "Select Language",
112
+ "home": "Home",
113
+ "about_omics": "About Omics Data",
114
+ "predict_risk": "Predict Risk",
115
+ "chatbot": "Chatbot Assistant",
116
+ "dashboard": "Dashboard",
117
+ "footer": "© 2025, Ngoue David Roger Yannick",
118
+ "upload_warning": "Please upload all omics files and provide a patient ID.",
119
+ "risk_score": "Risk Score",
120
+ "ckd_risk_score": "CKD Risk Score: {score:.2f}%",
121
+ "key_biomarkers": "Key Biomarkers",
122
+ "download_results": "Download Results",
123
+ "chatbot_title": "Chatbot Assistant",
124
+ "chatbot_intro": "Interact with our assistant for recommendations.",
125
+ "chat_with_assistant": "Chat with Assistant",
126
+ "type_message": "Type your message",
127
+ "send": "Send",
128
+ "biogpt_load_error": "Error loading BioGPT",
129
+ "recommendation_error": "Error generating recommendation",
130
+ "medication_error": "Error generating medication recommendation",
131
+ "diet_error": "Error generating diet recommendation",
132
+ "exercise_error": "Error generating exercise recommendation",
133
+ "css_error": "CSS file not found",
134
+ "model_weights_error": "Model weights not found",
135
+ "model_load_error": "Error loading model: {error}",
136
+ "file_process_error": "Error processing files: {error}",
137
+ "chat_risk_keywords": "risk,score",
138
+ "chat_medication_keywords": "medication,drug",
139
+ "chat_diet_keywords": "diet,food",
140
+ "chat_exercise_keywords": "exercise,workout",
141
+ "chat_risk_response": "Risk score: {score:.2f}%. Biomarkers: {biomarkers}.",
142
+ "shap_top_features": "Top SHAP Features",
143
+ "shap_mean_value": "SHAP Mean Value",
144
+ "feature": "Feature",
145
+ "omic": "Omic",
146
+ "omics": "Omics",
147
+ "genomics": "Genomics",
148
+ "transcriptomics": "Transcriptomics",
149
+ "proteomics": "Proteomics",
150
+ "metabolomics": "Metabolomics",
151
+ "recommendations": "Recommendations",
152
+ "download_biomarkers": "Download Biomarkers",
153
+ "download_report": "Download Report",
154
+ "shap": "SHAP Analysis"
155
+ }
156
+ }
157
 
158
  translations = load_translations_raw()
159
 
160
  # Get translation function
161
  def t(key, lang="fr"):
162
+ return translations.get(lang, {}).get(key, translations.get("en", {}).get(key, key))
163
 
164
  # Check for translation file errors after set_page_config
165
  if not translations:
 
206
  # Sidebar
207
  with st.sidebar:
208
  st.markdown(f"<div class='sidebar-header'>{t('app_title')}</div>", unsafe_allow_html=True)
209
+ st.image("https://via.placeholder.com/150", caption="Ngoue David Roger Yannick")
210
  st.markdown(f"<div class='sidebar-subheader'>{t('navigation')}</div>", unsafe_allow_html=True)
211
 
212
  # Language selector
 
220
 
221
  # Device configuration
222
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
223
+ logger.debug(f"Using device: {device}")
224
 
225
  # IRC biomarkers
226
  irc_biomarkers = [
 
441
  try:
442
  c = canvas.Canvas(output_path, pagesize=letter)
443
  c.setFont("Helvetica-Bold", 16)
444
+ c.drawString(100, 750, t('app_title', lang))
445
  c.setFont("Helvetica", 12)
446
+ c.drawString(100, 730, "Ngoue David Roger Yannick")
447
  c.drawString(100, 710, t('footer', lang))
448
 
449
  c.setFont("Helvetica-Bold", 14)
 
476
  raise Exception(t("pdf_error").format(error=str(e)))
477
 
478
  # Load BioGPT for recommendations
479
+ biogpt_tokenizer = None
480
+ biogpt_model = None
481
  try:
482
+ logger.debug("Loading BioGPT tokenizer...")
483
  biogpt_tokenizer = BioGptTokenizer.from_pretrained("microsoft/biogpt")
484
+ logger.debug("Loading BioGPT model...")
485
  biogpt_model = BioGptForCausalLM.from_pretrained("microsoft/biogpt").to(device)
486
+ logger.debug("Setting BioGPT model to eval mode...")
487
  biogpt_model.eval()
488
+ logger.debug("BioGPT loaded successfully")
489
  except Exception as e:
490
+ logger.error(f"Failed to load BioGPT: {str(e)}")
491
+ st.warning(t("biogpt_load_error") + f": {str(e)}. Using mock recommendations.")
492
 
493
  # Load translation model (English to French)
494
  try:
 
501
  # Generate recommendation using BioGPT
502
  @st.cache_data
503
  def generate_recommendation(prompt, max_length=100):
504
+ if biogpt_model is None:
505
+ return t("recommendation_error") + " (BioGPT not loaded, using mock response)"
506
  try:
507
  inputs = biogpt_tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True).to(device)
508
  outputs = biogpt_model.generate(
 
637
  <li><b>{t('genomics')}</b>: {t('genomics_format')}</li>
638
  <li><b>{t('transcriptomics')}</b>: {t('transcriptomics_format')}</li>
639
  <li><b>{t('proteomics')}</b>: {t('proteomics_format')}</li>
640
+ <li><b>{t('metabolomics')}</b></li>
641
  </ul>
642
  <p>{t('raw_omics_processing')}</p>
643
  </div>
 
649
  <p>{t('preprocessing_desc')}</p>
650
  <ol>
651
  <li><b>{t('quality_control')}</b>: {t('quality_control_desc')}</li>
652
+ <li><b>{t('normalization')}</b></li>
653
+ <li><b>{t('imputation')}</b>: {t('imputation_desc')}</b></li>
654
+ <li><b>{t('standardization')}</b></li>
655
  <li><b>{t('feature_selection')}</b>: {t('feature_selection_desc')}</li>
656
  </ol>
657
  <p>{t('preprocessing_importance')}</p>
658
  </div>
659
  """, unsafe_allow_html=True)
660
 
661
+ st.markdown(f"<div class='section-title'>{t('csv_format')}</div>", unsafe_allow_html=True)
662
+ st.markdown("""
663
  <div class='content animate__fadeIn'>
664
  <p>{t('csv_format_desc')}</p>
665
  <ul>
666
  <li><b>{t('columns')}</b>: {t('columns_desc')}</li>
667
+ <li><b>{t('rows')}</b>: {t('rows_desc')}</p>
668
  <li><b>{t('values')}</b>: {t('values_desc')}</li>
669
+ <li><b>{t('separator')}</b></li>
670
+ </li><b>{t('header')}</b>: {t('header_desc')}</p>
671
  </ul>
672
  <p>{t('csv_example')}</p>
673
  <pre class='code-block'>
674
+ Patient_ID,UMOD_rs12917707,APOL1_rs73885319,MYH9_rs4821480
675
+ P001,0,0,1
676
  </pre>
677
  <p>{t('csv_patient_id_desc')}</p>
678
  </div>
679
  """, unsafe_allow_html=True)
680
 
681
+ st.markdown(f"<div class='section-title'>{t('data_quality')}</div>", unsafe_allow_html=True)
682
  st.markdown(f"""
683
  <div class='content animate__fadeIn'>
684
  <p>{t('data_quality_desc')}</p>
685
  <ul>
686
+ <li><b>{t('consistency')}</b></li>
687
  <li><b>{t('completeness')}</b>: {t('completeness_desc')}</li>
688
+ <li><b>{t('accuracy')}</b>: {t('accuracy_desc')}</b>
689
+ <li><b>{t('alignment')}</b>: {t('alignment_desc')}</b></li>
690
  </ul>
691
+ <p>{t('data_quality_desc')}</p>
692
  </div>
693
  """, unsafe_allow_html=True)
694
 
695
  # Predict Risk Page
696
  elif page == t("predict_risk"):
697
+ st.markdown(f"<div class='title animate__fadeIn'>{t('predict_risk')}</div>", unsafe_allow_html=True)
698
  st.markdown(f"""
699
  <div class='content animate__fadeIn'>
700
  <p>{t('predict_risk_intro')}</p>
 
729
 
730
  # Validate and process files
731
  if st.button(t("run_prediction"), key="run_prediction"):
732
+ if all(file_paths.values()) and st.session_state.patient_id:
733
  try:
734
  with st.spinner(t("processing")):
735
  # Load and preprocess data
 
742
  patient_tensors = [torch.tensor(data_dict[omic].values, dtype=torch.float32) for omic in data_dict]
743
  patient_dataset = TensorDataset(*patient_tensors)
744
  patient_loader = DataLoader(patient_dataset, batch_size=best_hyperparams['batch_size'],
745
+ shuffle=False)
746
 
747
  # Extract latent representations
748
  encoded_data = extract_latent_representations(model, patient_loader, device)
 
788
  # Store results
789
  st.session_state.results = {
790
  'umap_df': umap_df,
791
+ 'important_biom': important_biomarkers,
792
  'output_dir': output_dir,
793
  'pdf_path': pdf_path,
794
  'recommendations': recommendations
 
796
 
797
  # Display results
798
  st.markdown(f"<div class='section-header'>{t('prediction_results')}</div>", unsafe_allow_html=True)
799
+ st.markdown(f"<div class='result-box'>{t('ckd_risk_score').format(score=risk_score):,.2f}</div>",
800
+ unsafe_allow_html=True)
801
 
802
  # Risk gauge
803
  fig_gauge = go.Figure(go.Indicator(
 
808
  'axis': {'range': [0, 100], 'tickcolor': '#FFF', 'tickfont': {'color': '#FFF'}},
809
  'bar': {'color': '#0000FF'},
810
  'steps': [
811
+ {'range': [0, 33], 'color': '#FFD700'},
812
  {'range': [33, 66], 'color': '#FACC15'},
813
  {'range': [66, 100], 'color': '#EF4444'}
814
  ],
 
821
  ))
822
  fig_gauge.update_layout(
823
  paper_bgcolor='#0E1117',
824
+ font={'color': '#FFFFFF'},
825
  margin={'l': 20, 'r': 20, 't': 50, 'b': 20}
826
  )
827
  st.plotly_chart(fig_gauge, use_container_width=True)
 
830
  st.markdown(f"<div class='subheader'>{t('key_biomarkers')}</div>", unsafe_allow_html=True)
831
  shap_fig = plt.figure(figsize=(12, 8))
832
  sns.barplot(data=important_biomarkers.head(20), x='SHAP_Mean_Abs', y=t('feature'), hue=t('omic'),
833
+ palette=['#FF6F61', '#6B5B95', '#4CAF50', '#2196F3'])
834
  plt.title(t('shap_top_features'), fontsize=16, color='#FFFFFF')
835
  plt.xlabel(t('shap_mean_value'), fontsize=12, color='#FFFFFF')
836
  plt.ylabel(t('feature'), fontsize=12, color='#FFFFFF')
 
842
 
843
  # Display saved SHAP plot
844
  if os.path.exists(shap_plot_path):
845
+ st.image(shap_plot_path, caption=t('shap'), use_column_width=True
846
 
847
  # Recommendations
848
  st.markdown(f"<div class='subheader'>{t('recommendations')}</div>", unsafe_allow_html=True)
849
  for rec in recommendations:
850
+ st.markdown(f"<div class='recommendation'>{rec['text']}</div>", unsafe_allow_html=True)
851
 
852
  # Download results
853
  st.markdown(f"<div class='subheader'>{t('download_results')}</div>", unsafe_allow_html=True)
854
  col1, col2 = st.columns(2)
855
  with col1:
856
+ csv = important_biom.to_csv(index=False).encode('utf-8')
857
  b64 = base64.b64encode(csv).decode()
858
+ href = f'<a href="data:///csv;base64,{b64}" download="important_biomarkers.csv" class="download-btn">{t("download_biomarkers", lang)}</a>'
859
+ st.markdown(f"{href, unsafe_allow_html=True)
860
  with col2:
861
  with open(pdf_path, "rb") as f:
862
  pdf_data = f.read()
863
  b64_pdf = base64.b64encode(pdf_data).decode()
864
+ href_pdf = f'<a href="data:application/pdf;base64,b64_pdf}" download="ckd_risk_report.pdf" class="download-btn">{t("download_report", lang)}</a>'
865
  st.markdown(href_pdf, unsafe_allow_html=True)
866
 
867
  except Exception as e:
 
875
  st.markdown(f"<div class='title animate__fadeIn'>{t('chatbot_title')}</div>", unsafe_allow_html=True)
876
  st.markdown(f"""
877
  <div class='content animate__fadeIn'>
878
+ <p>{t('chatbot_int')}</p>
879
  </div>
880
  """, unsafe_allow_html=True)
881
 
 
890
  st.markdown(f"<div class='chat-bubble bot'>{message['content']}</div>", unsafe_allow_html=True)
891
 
892
  # User input
893
+ user_input = st.text_area(t("type_message"), key="chat_input"))
894
+ if st.button(t("send"), key="button_send")):
895
  if user_input:
896
  try:
897
+ # Append user message to history
898
  st.session_state.chat_history.append({'role': 'user', 'content': user_input})
899
 
900
  # Check if user is asking about results
901
  if st.session_state.results and any(
902
  keyword in user_input.lower() for keyword in t('chat_risk_keywords').split(',')):
903
  risk_score = st.session_state.results['umap_df'][t('risk_score')].iloc[0]
904
+ biomarkers = st.session_state.results['important_biom'][t('feature')].head(5).tolist()
905
  context = t('chat_risk_response').format(score=risk_score, biomarkers=', '.join(biomarkers))
906
  response = send_to_rasa(f"{context} {user_input}", lang)
907
  else:
 
916
 
917
  if intent != 'None' and st.session_state.results:
918
  risk_score = st.session_state.results['umap_df'][t('risk_score')].iloc[0]
919
+ biomarkers = st.session_state.results['important_biom'][t('feature')].head(5).tolist()
920
  prompt = formulate_prompt(intent, risk_score, biomarkers)
921
  response = generate_recommendation(prompt)
922
  if validate_recommendation(response):
 
926
  else:
927
  response = send_to_rasa(user_input, lang)
928
 
929
+ # Append bot response to history
930
  st.session_state.chat_history.append({'role': 'bot', 'content': response})
931
 
932
  # Rerender chat
933
  st.rerun()
934
+ )
935
 
936
  except Exception as e:
937
  st.error(t("chatbot_error").format(error=str(e)))
 
941
  st.markdown(f"<div class='title animate__fadeIn'>{t('dashboard_title')}</div>", unsafe_allow_html=True)
942
  st.markdown(f"""
943
  <div class='content animate__fadeIn'>
944
+ <p>{t('dashboard_int')}</p>
945
  </div>
946
  """, unsafe_allow_html=True)
947
 
 
957
  } for p in patients]
958
  df_patients = pd.DataFrame(patient_data)
959
 
960
+ # Filter data
961
+ st.markdown(f"<div class='section-title'>{t('filters')}</div>", unsafe_allow_html=True)
962
+ risk_threshold = t('risk_threshold')
963
+ st.markdown(f"Slider(t({risk_threshold}), 0, 100, 50)")
964
  filtered_df = df_patients[df_patients[t('risk_score')] >= risk_threshold]
965
 
966
  # Display table
967
+ st.markdown(filtered_df, use_container_width=True)
968
 
969
  # Plot
970
  fig = px.scatter(
971
+ data=filtered_df,
972
  x=t('risk_score'),
973
  y=t('patient_id'),
974
  color=t('risk_score'),
 
998
  try:
999
  rasa_process.terminate()
1000
  except Exception as e:
1001
+ print(f"Error occurred terminating Rasa server: {str(e)}")
1002
 
1003
  st.markdown(f"""
1004
  <div class='footer'>