Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +180 -56
src/streamlit_app.py
CHANGED
|
@@ -28,25 +28,138 @@ import subprocess
|
|
| 28 |
import requests
|
| 29 |
from transformers import pipeline, BioGptTokenizer, BioGptForCausalLM
|
| 30 |
from transformers import MarianMTModel, MarianTokenizer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
# Set page configuration as the first Streamlit command
|
| 33 |
st.set_page_config(page_title="Omics Risk Predictor", layout="wide", initial_sidebar_state="expanded")
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
# Load translations without Streamlit commands
|
| 36 |
def load_translations_raw():
|
| 37 |
try:
|
|
|
|
|
|
|
| 38 |
with open("src/translations.json", "r", encoding="utf-8") as f:
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
except
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
translations = load_translations_raw()
|
| 46 |
|
| 47 |
# Get translation function
|
| 48 |
def t(key, lang="fr"):
|
| 49 |
-
return translations.get(lang, {}).get(key, key)
|
| 50 |
|
| 51 |
# Check for translation file errors after set_page_config
|
| 52 |
if not translations:
|
|
@@ -93,7 +206,7 @@ Session = sessionmaker(bind=engine)
|
|
| 93 |
# Sidebar
|
| 94 |
with st.sidebar:
|
| 95 |
st.markdown(f"<div class='sidebar-header'>{t('app_title')}</div>", unsafe_allow_html=True)
|
| 96 |
-
st.image("https://via.placeholder.com/150", caption=
|
| 97 |
st.markdown(f"<div class='sidebar-subheader'>{t('navigation')}</div>", unsafe_allow_html=True)
|
| 98 |
|
| 99 |
# Language selector
|
|
@@ -107,6 +220,7 @@ with st.sidebar:
|
|
| 107 |
|
| 108 |
# Device configuration
|
| 109 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
|
|
| 110 |
|
| 111 |
# IRC biomarkers
|
| 112 |
irc_biomarkers = [
|
|
@@ -327,9 +441,9 @@ def generate_pdf_report(risk_score, important_biomarkers, shap_plot_path, recomm
|
|
| 327 |
try:
|
| 328 |
c = canvas.Canvas(output_path, pagesize=letter)
|
| 329 |
c.setFont("Helvetica-Bold", 16)
|
| 330 |
-
c.drawString(100, 750, t('
|
| 331 |
c.setFont("Helvetica", 12)
|
| 332 |
-
c.drawString(100, 730,
|
| 333 |
c.drawString(100, 710, t('footer', lang))
|
| 334 |
|
| 335 |
c.setFont("Helvetica-Bold", 14)
|
|
@@ -362,13 +476,19 @@ def generate_pdf_report(risk_score, important_biomarkers, shap_plot_path, recomm
|
|
| 362 |
raise Exception(t("pdf_error").format(error=str(e)))
|
| 363 |
|
| 364 |
# Load BioGPT for recommendations
|
|
|
|
|
|
|
| 365 |
try:
|
|
|
|
| 366 |
biogpt_tokenizer = BioGptTokenizer.from_pretrained("microsoft/biogpt")
|
|
|
|
| 367 |
biogpt_model = BioGptForCausalLM.from_pretrained("microsoft/biogpt").to(device)
|
|
|
|
| 368 |
biogpt_model.eval()
|
|
|
|
| 369 |
except Exception as e:
|
| 370 |
-
|
| 371 |
-
st.
|
| 372 |
|
| 373 |
# Load translation model (English to French)
|
| 374 |
try:
|
|
@@ -381,6 +501,8 @@ except Exception as e:
|
|
| 381 |
# Generate recommendation using BioGPT
|
| 382 |
@st.cache_data
|
| 383 |
def generate_recommendation(prompt, max_length=100):
|
|
|
|
|
|
|
| 384 |
try:
|
| 385 |
inputs = biogpt_tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True).to(device)
|
| 386 |
outputs = biogpt_model.generate(
|
|
@@ -515,7 +637,7 @@ elif page == t("about_omics"):
|
|
| 515 |
<li><b>{t('genomics')}</b>: {t('genomics_format')}</li>
|
| 516 |
<li><b>{t('transcriptomics')}</b>: {t('transcriptomics_format')}</li>
|
| 517 |
<li><b>{t('proteomics')}</b>: {t('proteomics_format')}</li>
|
| 518 |
-
<li><b>{t('metabolomics')}</b>
|
| 519 |
</ul>
|
| 520 |
<p>{t('raw_omics_processing')}</p>
|
| 521 |
</div>
|
|
@@ -527,52 +649,52 @@ elif page == t("about_omics"):
|
|
| 527 |
<p>{t('preprocessing_desc')}</p>
|
| 528 |
<ol>
|
| 529 |
<li><b>{t('quality_control')}</b>: {t('quality_control_desc')}</li>
|
| 530 |
-
<li><b>{t('normalization')}</b>
|
| 531 |
-
<li><b>{t('imputation')}</b>: {t('imputation_desc')}</li>
|
| 532 |
-
<li><b>{t('standardization')}</b>
|
| 533 |
<li><b>{t('feature_selection')}</b>: {t('feature_selection_desc')}</li>
|
| 534 |
</ol>
|
| 535 |
<p>{t('preprocessing_importance')}</p>
|
| 536 |
</div>
|
| 537 |
""", unsafe_allow_html=True)
|
| 538 |
|
| 539 |
-
st.markdown(f"<div class='section-
|
| 540 |
-
st.markdown(
|
| 541 |
<div class='content animate__fadeIn'>
|
| 542 |
<p>{t('csv_format_desc')}</p>
|
| 543 |
<ul>
|
| 544 |
<li><b>{t('columns')}</b>: {t('columns_desc')}</li>
|
| 545 |
-
<li><b>{t('rows')}</b>: {t('rows_desc')}</
|
| 546 |
<li><b>{t('values')}</b>: {t('values_desc')}</li>
|
| 547 |
-
<li><b>{t('separator')}</b>
|
| 548 |
-
<li><b>{t('header')}</b>: {t('header_desc')}</
|
| 549 |
</ul>
|
| 550 |
<p>{t('csv_example')}</p>
|
| 551 |
<pre class='code-block'>
|
| 552 |
-
|
| 553 |
-
P001,0,
|
| 554 |
</pre>
|
| 555 |
<p>{t('csv_patient_id_desc')}</p>
|
| 556 |
</div>
|
| 557 |
""", unsafe_allow_html=True)
|
| 558 |
|
| 559 |
-
st.markdown(f"<div class='section-
|
| 560 |
st.markdown(f"""
|
| 561 |
<div class='content animate__fadeIn'>
|
| 562 |
<p>{t('data_quality_desc')}</p>
|
| 563 |
<ul>
|
| 564 |
-
<li><b>{t('consistency')}</b>
|
| 565 |
<li><b>{t('completeness')}</b>: {t('completeness_desc')}</li>
|
| 566 |
-
<li><b>{t('accuracy')}</b>: {t('accuracy_desc')}</
|
| 567 |
-
<li><b>{t('alignment')}</b>: {t('alignment_desc')}</li>
|
| 568 |
</ul>
|
| 569 |
-
<p>{t('
|
| 570 |
</div>
|
| 571 |
""", unsafe_allow_html=True)
|
| 572 |
|
| 573 |
# Predict Risk Page
|
| 574 |
elif page == t("predict_risk"):
|
| 575 |
-
st.markdown(f"<div class='title animate__fadeIn'>{t('
|
| 576 |
st.markdown(f"""
|
| 577 |
<div class='content animate__fadeIn'>
|
| 578 |
<p>{t('predict_risk_intro')}</p>
|
|
@@ -607,7 +729,7 @@ elif page == t("predict_risk"):
|
|
| 607 |
|
| 608 |
# Validate and process files
|
| 609 |
if st.button(t("run_prediction"), key="run_prediction"):
|
| 610 |
-
if all(file_paths
|
| 611 |
try:
|
| 612 |
with st.spinner(t("processing")):
|
| 613 |
# Load and preprocess data
|
|
@@ -620,7 +742,7 @@ elif page == t("predict_risk"):
|
|
| 620 |
patient_tensors = [torch.tensor(data_dict[omic].values, dtype=torch.float32) for omic in data_dict]
|
| 621 |
patient_dataset = TensorDataset(*patient_tensors)
|
| 622 |
patient_loader = DataLoader(patient_dataset, batch_size=best_hyperparams['batch_size'],
|
| 623 |
-
|
| 624 |
|
| 625 |
# Extract latent representations
|
| 626 |
encoded_data = extract_latent_representations(model, patient_loader, device)
|
|
@@ -666,7 +788,7 @@ elif page == t("predict_risk"):
|
|
| 666 |
# Store results
|
| 667 |
st.session_state.results = {
|
| 668 |
'umap_df': umap_df,
|
| 669 |
-
'
|
| 670 |
'output_dir': output_dir,
|
| 671 |
'pdf_path': pdf_path,
|
| 672 |
'recommendations': recommendations
|
|
@@ -674,8 +796,8 @@ elif page == t("predict_risk"):
|
|
| 674 |
|
| 675 |
# Display results
|
| 676 |
st.markdown(f"<div class='section-header'>{t('prediction_results')}</div>", unsafe_allow_html=True)
|
| 677 |
-
st.markdown(f"<div class='result-box'>{t('ckd_risk_score').format(score=risk_score):.2f}</div>",
|
| 678 |
-
|
| 679 |
|
| 680 |
# Risk gauge
|
| 681 |
fig_gauge = go.Figure(go.Indicator(
|
|
@@ -686,7 +808,7 @@ elif page == t("predict_risk"):
|
|
| 686 |
'axis': {'range': [0, 100], 'tickcolor': '#FFF', 'tickfont': {'color': '#FFF'}},
|
| 687 |
'bar': {'color': '#0000FF'},
|
| 688 |
'steps': [
|
| 689 |
-
{'range': [0, 33], 'color': '#
|
| 690 |
{'range': [33, 66], 'color': '#FACC15'},
|
| 691 |
{'range': [66, 100], 'color': '#EF4444'}
|
| 692 |
],
|
|
@@ -699,7 +821,7 @@ elif page == t("predict_risk"):
|
|
| 699 |
))
|
| 700 |
fig_gauge.update_layout(
|
| 701 |
paper_bgcolor='#0E1117',
|
| 702 |
-
font={'color':
|
| 703 |
margin={'l': 20, 'r': 20, 't': 50, 'b': 20}
|
| 704 |
)
|
| 705 |
st.plotly_chart(fig_gauge, use_container_width=True)
|
|
@@ -708,7 +830,7 @@ elif page == t("predict_risk"):
|
|
| 708 |
st.markdown(f"<div class='subheader'>{t('key_biomarkers')}</div>", unsafe_allow_html=True)
|
| 709 |
shap_fig = plt.figure(figsize=(12, 8))
|
| 710 |
sns.barplot(data=important_biomarkers.head(20), x='SHAP_Mean_Abs', y=t('feature'), hue=t('omic'),
|
| 711 |
-
|
| 712 |
plt.title(t('shap_top_features'), fontsize=16, color='#FFFFFF')
|
| 713 |
plt.xlabel(t('shap_mean_value'), fontsize=12, color='#FFFFFF')
|
| 714 |
plt.ylabel(t('feature'), fontsize=12, color='#FFFFFF')
|
|
@@ -720,26 +842,26 @@ elif page == t("predict_risk"):
|
|
| 720 |
|
| 721 |
# Display saved SHAP plot
|
| 722 |
if os.path.exists(shap_plot_path):
|
| 723 |
-
st.image(shap_plot_path, caption=t('shap'), use_column_width=True
|
| 724 |
|
| 725 |
# Recommendations
|
| 726 |
st.markdown(f"<div class='subheader'>{t('recommendations')}</div>", unsafe_allow_html=True)
|
| 727 |
for rec in recommendations:
|
| 728 |
-
st.markdown(f"<div class='recommendation'>{rec}</div>", unsafe_allow_html=True)
|
| 729 |
|
| 730 |
# Download results
|
| 731 |
st.markdown(f"<div class='subheader'>{t('download_results')}</div>", unsafe_allow_html=True)
|
| 732 |
col1, col2 = st.columns(2)
|
| 733 |
with col1:
|
| 734 |
-
csv =
|
| 735 |
b64 = base64.b64encode(csv).decode()
|
| 736 |
-
href = f'<a href="data:
|
| 737 |
-
st.markdown(href, unsafe_allow_html=True)
|
| 738 |
with col2:
|
| 739 |
with open(pdf_path, "rb") as f:
|
| 740 |
pdf_data = f.read()
|
| 741 |
b64_pdf = base64.b64encode(pdf_data).decode()
|
| 742 |
-
href_pdf = f'<a href="data:application/pdf;base64,
|
| 743 |
st.markdown(href_pdf, unsafe_allow_html=True)
|
| 744 |
|
| 745 |
except Exception as e:
|
|
@@ -753,7 +875,7 @@ elif page == t("chatbot"):
|
|
| 753 |
st.markdown(f"<div class='title animate__fadeIn'>{t('chatbot_title')}</div>", unsafe_allow_html=True)
|
| 754 |
st.markdown(f"""
|
| 755 |
<div class='content animate__fadeIn'>
|
| 756 |
-
<p>{t('
|
| 757 |
</div>
|
| 758 |
""", unsafe_allow_html=True)
|
| 759 |
|
|
@@ -768,18 +890,18 @@ elif page == t("chatbot"):
|
|
| 768 |
st.markdown(f"<div class='chat-bubble bot'>{message['content']}</div>", unsafe_allow_html=True)
|
| 769 |
|
| 770 |
# User input
|
| 771 |
-
user_input = st.text_area(t("type_message"), key="chat_input")
|
| 772 |
-
if st.button(t("send"), key="button_send"):
|
| 773 |
if user_input:
|
| 774 |
try:
|
| 775 |
-
#
|
| 776 |
st.session_state.chat_history.append({'role': 'user', 'content': user_input})
|
| 777 |
|
| 778 |
# Check if user is asking about results
|
| 779 |
if st.session_state.results and any(
|
| 780 |
keyword in user_input.lower() for keyword in t('chat_risk_keywords').split(',')):
|
| 781 |
risk_score = st.session_state.results['umap_df'][t('risk_score')].iloc[0]
|
| 782 |
-
biomarkers = st.session_state.results['
|
| 783 |
context = t('chat_risk_response').format(score=risk_score, biomarkers=', '.join(biomarkers))
|
| 784 |
response = send_to_rasa(f"{context} {user_input}", lang)
|
| 785 |
else:
|
|
@@ -794,7 +916,7 @@ elif page == t("chatbot"):
|
|
| 794 |
|
| 795 |
if intent != 'None' and st.session_state.results:
|
| 796 |
risk_score = st.session_state.results['umap_df'][t('risk_score')].iloc[0]
|
| 797 |
-
biomarkers = st.session_state.results['
|
| 798 |
prompt = formulate_prompt(intent, risk_score, biomarkers)
|
| 799 |
response = generate_recommendation(prompt)
|
| 800 |
if validate_recommendation(response):
|
|
@@ -804,11 +926,12 @@ elif page == t("chatbot"):
|
|
| 804 |
else:
|
| 805 |
response = send_to_rasa(user_input, lang)
|
| 806 |
|
| 807 |
-
#
|
| 808 |
st.session_state.chat_history.append({'role': 'bot', 'content': response})
|
| 809 |
|
| 810 |
# Rerender chat
|
| 811 |
st.rerun()
|
|
|
|
| 812 |
|
| 813 |
except Exception as e:
|
| 814 |
st.error(t("chatbot_error").format(error=str(e)))
|
|
@@ -818,7 +941,7 @@ elif page == t("dashboard"):
|
|
| 818 |
st.markdown(f"<div class='title animate__fadeIn'>{t('dashboard_title')}</div>", unsafe_allow_html=True)
|
| 819 |
st.markdown(f"""
|
| 820 |
<div class='content animate__fadeIn'>
|
| 821 |
-
<p>{t('
|
| 822 |
</div>
|
| 823 |
""", unsafe_allow_html=True)
|
| 824 |
|
|
@@ -834,17 +957,18 @@ elif page == t("dashboard"):
|
|
| 834 |
} for p in patients]
|
| 835 |
df_patients = pd.DataFrame(patient_data)
|
| 836 |
|
| 837 |
-
#
|
| 838 |
-
st.markdown(f"<div class='
|
| 839 |
-
risk_threshold =
|
|
|
|
| 840 |
filtered_df = df_patients[df_patients[t('risk_score')] >= risk_threshold]
|
| 841 |
|
| 842 |
# Display table
|
| 843 |
-
st.
|
| 844 |
|
| 845 |
# Plot
|
| 846 |
fig = px.scatter(
|
| 847 |
-
filtered_df,
|
| 848 |
x=t('risk_score'),
|
| 849 |
y=t('patient_id'),
|
| 850 |
color=t('risk_score'),
|
|
@@ -874,7 +998,7 @@ if rasa_process:
|
|
| 874 |
try:
|
| 875 |
rasa_process.terminate()
|
| 876 |
except Exception as e:
|
| 877 |
-
print(f"Error terminating Rasa server: {str(e)}")
|
| 878 |
|
| 879 |
st.markdown(f"""
|
| 880 |
<div class='footer'>
|
|
|
|
| 28 |
import requests
|
| 29 |
from transformers import pipeline, BioGptTokenizer, BioGptForCausalLM
|
| 30 |
from transformers import MarianMTModel, MarianTokenizer
|
| 31 |
+
import logging
|
| 32 |
+
|
| 33 |
+
# Configure logging
|
| 34 |
+
logging.basicConfig(level=logging.DEBUG)
|
| 35 |
+
logger = logging.getLogger(__name__)
|
| 36 |
|
| 37 |
# Set page configuration as the first Streamlit command
|
| 38 |
st.set_page_config(page_title="Omics Risk Predictor", layout="wide", initial_sidebar_state="expanded")
|
| 39 |
|
| 40 |
+
# Set Hugging Face cache
|
| 41 |
+
os.environ["HF_HOME"] = "/app/hf_cache"
|
| 42 |
+
os.makedirs("/app/hf_cache", exist_ok=True)
|
| 43 |
+
|
| 44 |
# Load translations without Streamlit commands
|
| 45 |
def load_translations_raw():
|
| 46 |
try:
|
| 47 |
+
logger.debug(f"Current directory: {os.getcwd()}")
|
| 48 |
+
logger.debug(f"Translations file path: {os.path.abspath('src/translations.json')}")
|
| 49 |
with open("src/translations.json", "r", encoding="utf-8") as f:
|
| 50 |
+
data = json.load(f)
|
| 51 |
+
logger.debug("Translations loaded successfully")
|
| 52 |
+
return data
|
| 53 |
+
except FileNotFoundError as e:
|
| 54 |
+
logger.error(f"Translation file not found: {e}")
|
| 55 |
+
return {
|
| 56 |
+
"en": {
|
| 57 |
+
"app_title": "Omics Risk Predictor",
|
| 58 |
+
"patient_id": "Patient ID",
|
| 59 |
+
"select_language": "Select Language",
|
| 60 |
+
"home": "Home",
|
| 61 |
+
"about_omics": "About Omics Data",
|
| 62 |
+
"predict_risk": "Predict Risk",
|
| 63 |
+
"chatbot": "Chatbot Assistant",
|
| 64 |
+
"dashboard": "Dashboard",
|
| 65 |
+
"footer": "© 2025, Ngoue David Roger Yannick",
|
| 66 |
+
"upload_warning": "Please upload all omics files and provide a patient ID.",
|
| 67 |
+
"risk_score": "Risk Score",
|
| 68 |
+
"ckd_risk_score": "CKD Risk Score: {score:.2f}%",
|
| 69 |
+
"key_biomarkers": "Key Biomarkers",
|
| 70 |
+
"download_results": "Download Results",
|
| 71 |
+
"chatbot_title": "Chatbot Assistant",
|
| 72 |
+
"chatbot_intro": "Interact with our assistant for recommendations.",
|
| 73 |
+
"chat_with_assistant": "Chat with Assistant",
|
| 74 |
+
"type_message": "Type your message",
|
| 75 |
+
"send": "Send",
|
| 76 |
+
"biogpt_load_error": "Error loading BioGPT",
|
| 77 |
+
"recommendation_error": "Error generating recommendation",
|
| 78 |
+
"medication_error": "Error generating medication recommendation",
|
| 79 |
+
"diet_error": "Error generating diet recommendation",
|
| 80 |
+
"exercise_error": "Error generating exercise recommendation",
|
| 81 |
+
"css_error": "CSS file not found",
|
| 82 |
+
"model_weights_error": "Model weights not found",
|
| 83 |
+
"model_load_error": "Error loading model: {error}",
|
| 84 |
+
"file_process_error": "Error processing files: {error}",
|
| 85 |
+
"chat_risk_keywords": "risk,score",
|
| 86 |
+
"chat_medication_keywords": "medication,drug",
|
| 87 |
+
"chat_diet_keywords": "diet,food",
|
| 88 |
+
"chat_exercise_keywords": "exercise,workout",
|
| 89 |
+
"chat_risk_response": "Risk score: {score:.2f}%. Biomarkers: {biomarkers}.",
|
| 90 |
+
"shap_top_features": "Top SHAP Features",
|
| 91 |
+
"shap_mean_value": "SHAP Mean Value",
|
| 92 |
+
"feature": "Feature",
|
| 93 |
+
"omic": "Omic",
|
| 94 |
+
"omics": "Omics",
|
| 95 |
+
"genomics": "Genomics",
|
| 96 |
+
"transcriptomics": "Transcriptomics",
|
| 97 |
+
"proteomics": "Proteomics",
|
| 98 |
+
"metabolomics": "Metabolomics",
|
| 99 |
+
"recommendations": "Recommendations",
|
| 100 |
+
"download_biomarkers": "Download Biomarkers",
|
| 101 |
+
"download_report": "Download Report",
|
| 102 |
+
"shap": "SHAP Analysis"
|
| 103 |
+
}
|
| 104 |
+
}
|
| 105 |
+
except json.JSONDecodeError as e:
|
| 106 |
+
logger.error(f"JSON decode error: {e}")
|
| 107 |
+
return {
|
| 108 |
+
"en": {
|
| 109 |
+
"app_title": "Omics Risk Predictor",
|
| 110 |
+
"patient_id": "Patient ID",
|
| 111 |
+
"select_language": "Select Language",
|
| 112 |
+
"home": "Home",
|
| 113 |
+
"about_omics": "About Omics Data",
|
| 114 |
+
"predict_risk": "Predict Risk",
|
| 115 |
+
"chatbot": "Chatbot Assistant",
|
| 116 |
+
"dashboard": "Dashboard",
|
| 117 |
+
"footer": "© 2025, Ngoue David Roger Yannick",
|
| 118 |
+
"upload_warning": "Please upload all omics files and provide a patient ID.",
|
| 119 |
+
"risk_score": "Risk Score",
|
| 120 |
+
"ckd_risk_score": "CKD Risk Score: {score:.2f}%",
|
| 121 |
+
"key_biomarkers": "Key Biomarkers",
|
| 122 |
+
"download_results": "Download Results",
|
| 123 |
+
"chatbot_title": "Chatbot Assistant",
|
| 124 |
+
"chatbot_intro": "Interact with our assistant for recommendations.",
|
| 125 |
+
"chat_with_assistant": "Chat with Assistant",
|
| 126 |
+
"type_message": "Type your message",
|
| 127 |
+
"send": "Send",
|
| 128 |
+
"biogpt_load_error": "Error loading BioGPT",
|
| 129 |
+
"recommendation_error": "Error generating recommendation",
|
| 130 |
+
"medication_error": "Error generating medication recommendation",
|
| 131 |
+
"diet_error": "Error generating diet recommendation",
|
| 132 |
+
"exercise_error": "Error generating exercise recommendation",
|
| 133 |
+
"css_error": "CSS file not found",
|
| 134 |
+
"model_weights_error": "Model weights not found",
|
| 135 |
+
"model_load_error": "Error loading model: {error}",
|
| 136 |
+
"file_process_error": "Error processing files: {error}",
|
| 137 |
+
"chat_risk_keywords": "risk,score",
|
| 138 |
+
"chat_medication_keywords": "medication,drug",
|
| 139 |
+
"chat_diet_keywords": "diet,food",
|
| 140 |
+
"chat_exercise_keywords": "exercise,workout",
|
| 141 |
+
"chat_risk_response": "Risk score: {score:.2f}%. Biomarkers: {biomarkers}.",
|
| 142 |
+
"shap_top_features": "Top SHAP Features",
|
| 143 |
+
"shap_mean_value": "SHAP Mean Value",
|
| 144 |
+
"feature": "Feature",
|
| 145 |
+
"omic": "Omic",
|
| 146 |
+
"omics": "Omics",
|
| 147 |
+
"genomics": "Genomics",
|
| 148 |
+
"transcriptomics": "Transcriptomics",
|
| 149 |
+
"proteomics": "Proteomics",
|
| 150 |
+
"metabolomics": "Metabolomics",
|
| 151 |
+
"recommendations": "Recommendations",
|
| 152 |
+
"download_biomarkers": "Download Biomarkers",
|
| 153 |
+
"download_report": "Download Report",
|
| 154 |
+
"shap": "SHAP Analysis"
|
| 155 |
+
}
|
| 156 |
+
}
|
| 157 |
|
| 158 |
translations = load_translations_raw()
|
| 159 |
|
| 160 |
# Get translation function
|
| 161 |
def t(key, lang="fr"):
|
| 162 |
+
return translations.get(lang, {}).get(key, translations.get("en", {}).get(key, key))
|
| 163 |
|
| 164 |
# Check for translation file errors after set_page_config
|
| 165 |
if not translations:
|
|
|
|
| 206 |
# Sidebar
|
| 207 |
with st.sidebar:
|
| 208 |
st.markdown(f"<div class='sidebar-header'>{t('app_title')}</div>", unsafe_allow_html=True)
|
| 209 |
+
st.image("https://via.placeholder.com/150", caption="Ngoue David Roger Yannick")
|
| 210 |
st.markdown(f"<div class='sidebar-subheader'>{t('navigation')}</div>", unsafe_allow_html=True)
|
| 211 |
|
| 212 |
# Language selector
|
|
|
|
| 220 |
|
| 221 |
# Device configuration
|
| 222 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 223 |
+
logger.debug(f"Using device: {device}")
|
| 224 |
|
| 225 |
# IRC biomarkers
|
| 226 |
irc_biomarkers = [
|
|
|
|
| 441 |
try:
|
| 442 |
c = canvas.Canvas(output_path, pagesize=letter)
|
| 443 |
c.setFont("Helvetica-Bold", 16)
|
| 444 |
+
c.drawString(100, 750, t('app_title', lang))
|
| 445 |
c.setFont("Helvetica", 12)
|
| 446 |
+
c.drawString(100, 730, "Ngoue David Roger Yannick")
|
| 447 |
c.drawString(100, 710, t('footer', lang))
|
| 448 |
|
| 449 |
c.setFont("Helvetica-Bold", 14)
|
|
|
|
| 476 |
raise Exception(t("pdf_error").format(error=str(e)))
|
| 477 |
|
| 478 |
# Load BioGPT for recommendations
|
| 479 |
+
biogpt_tokenizer = None
|
| 480 |
+
biogpt_model = None
|
| 481 |
try:
|
| 482 |
+
logger.debug("Loading BioGPT tokenizer...")
|
| 483 |
biogpt_tokenizer = BioGptTokenizer.from_pretrained("microsoft/biogpt")
|
| 484 |
+
logger.debug("Loading BioGPT model...")
|
| 485 |
biogpt_model = BioGptForCausalLM.from_pretrained("microsoft/biogpt").to(device)
|
| 486 |
+
logger.debug("Setting BioGPT model to eval mode...")
|
| 487 |
biogpt_model.eval()
|
| 488 |
+
logger.debug("BioGPT loaded successfully")
|
| 489 |
except Exception as e:
|
| 490 |
+
logger.error(f"Failed to load BioGPT: {str(e)}")
|
| 491 |
+
st.warning(t("biogpt_load_error") + f": {str(e)}. Using mock recommendations.")
|
| 492 |
|
| 493 |
# Load translation model (English to French)
|
| 494 |
try:
|
|
|
|
| 501 |
# Generate recommendation using BioGPT
|
| 502 |
@st.cache_data
|
| 503 |
def generate_recommendation(prompt, max_length=100):
|
| 504 |
+
if biogpt_model is None:
|
| 505 |
+
return t("recommendation_error") + " (BioGPT not loaded, using mock response)"
|
| 506 |
try:
|
| 507 |
inputs = biogpt_tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True).to(device)
|
| 508 |
outputs = biogpt_model.generate(
|
|
|
|
| 637 |
<li><b>{t('genomics')}</b>: {t('genomics_format')}</li>
|
| 638 |
<li><b>{t('transcriptomics')}</b>: {t('transcriptomics_format')}</li>
|
| 639 |
<li><b>{t('proteomics')}</b>: {t('proteomics_format')}</li>
|
| 640 |
+
<li><b>{t('metabolomics')}</b></li>
|
| 641 |
</ul>
|
| 642 |
<p>{t('raw_omics_processing')}</p>
|
| 643 |
</div>
|
|
|
|
| 649 |
<p>{t('preprocessing_desc')}</p>
|
| 650 |
<ol>
|
| 651 |
<li><b>{t('quality_control')}</b>: {t('quality_control_desc')}</li>
|
| 652 |
+
<li><b>{t('normalization')}</b></li>
|
| 653 |
+
<li><b>{t('imputation')}</b>: {t('imputation_desc')}</b></li>
|
| 654 |
+
<li><b>{t('standardization')}</b></li>
|
| 655 |
<li><b>{t('feature_selection')}</b>: {t('feature_selection_desc')}</li>
|
| 656 |
</ol>
|
| 657 |
<p>{t('preprocessing_importance')}</p>
|
| 658 |
</div>
|
| 659 |
""", unsafe_allow_html=True)
|
| 660 |
|
| 661 |
+
st.markdown(f"<div class='section-title'>{t('csv_format')}</div>", unsafe_allow_html=True)
|
| 662 |
+
st.markdown("""
|
| 663 |
<div class='content animate__fadeIn'>
|
| 664 |
<p>{t('csv_format_desc')}</p>
|
| 665 |
<ul>
|
| 666 |
<li><b>{t('columns')}</b>: {t('columns_desc')}</li>
|
| 667 |
+
<li><b>{t('rows')}</b>: {t('rows_desc')}</p>
|
| 668 |
<li><b>{t('values')}</b>: {t('values_desc')}</li>
|
| 669 |
+
<li><b>{t('separator')}</b></li>
|
| 670 |
+
</li><b>{t('header')}</b>: {t('header_desc')}</p>
|
| 671 |
</ul>
|
| 672 |
<p>{t('csv_example')}</p>
|
| 673 |
<pre class='code-block'>
|
| 674 |
+
Patient_ID,UMOD_rs12917707,APOL1_rs73885319,MYH9_rs4821480
|
| 675 |
+
P001,0,0,1
|
| 676 |
</pre>
|
| 677 |
<p>{t('csv_patient_id_desc')}</p>
|
| 678 |
</div>
|
| 679 |
""", unsafe_allow_html=True)
|
| 680 |
|
| 681 |
+
st.markdown(f"<div class='section-title'>{t('data_quality')}</div>", unsafe_allow_html=True)
|
| 682 |
st.markdown(f"""
|
| 683 |
<div class='content animate__fadeIn'>
|
| 684 |
<p>{t('data_quality_desc')}</p>
|
| 685 |
<ul>
|
| 686 |
+
<li><b>{t('consistency')}</b></li>
|
| 687 |
<li><b>{t('completeness')}</b>: {t('completeness_desc')}</li>
|
| 688 |
+
<li><b>{t('accuracy')}</b>: {t('accuracy_desc')}</b>
|
| 689 |
+
<li><b>{t('alignment')}</b>: {t('alignment_desc')}</b></li>
|
| 690 |
</ul>
|
| 691 |
+
<p>{t('data_quality_desc')}</p>
|
| 692 |
</div>
|
| 693 |
""", unsafe_allow_html=True)
|
| 694 |
|
| 695 |
# Predict Risk Page
|
| 696 |
elif page == t("predict_risk"):
|
| 697 |
+
st.markdown(f"<div class='title animate__fadeIn'>{t('predict_risk')}</div>", unsafe_allow_html=True)
|
| 698 |
st.markdown(f"""
|
| 699 |
<div class='content animate__fadeIn'>
|
| 700 |
<p>{t('predict_risk_intro')}</p>
|
|
|
|
| 729 |
|
| 730 |
# Validate and process files
|
| 731 |
if st.button(t("run_prediction"), key="run_prediction"):
|
| 732 |
+
if all(file_paths.values()) and st.session_state.patient_id:
|
| 733 |
try:
|
| 734 |
with st.spinner(t("processing")):
|
| 735 |
# Load and preprocess data
|
|
|
|
| 742 |
patient_tensors = [torch.tensor(data_dict[omic].values, dtype=torch.float32) for omic in data_dict]
|
| 743 |
patient_dataset = TensorDataset(*patient_tensors)
|
| 744 |
patient_loader = DataLoader(patient_dataset, batch_size=best_hyperparams['batch_size'],
|
| 745 |
+
shuffle=False)
|
| 746 |
|
| 747 |
# Extract latent representations
|
| 748 |
encoded_data = extract_latent_representations(model, patient_loader, device)
|
|
|
|
| 788 |
# Store results
|
| 789 |
st.session_state.results = {
|
| 790 |
'umap_df': umap_df,
|
| 791 |
+
'important_biom': important_biomarkers,
|
| 792 |
'output_dir': output_dir,
|
| 793 |
'pdf_path': pdf_path,
|
| 794 |
'recommendations': recommendations
|
|
|
|
| 796 |
|
| 797 |
# Display results
|
| 798 |
st.markdown(f"<div class='section-header'>{t('prediction_results')}</div>", unsafe_allow_html=True)
|
| 799 |
+
st.markdown(f"<div class='result-box'>{t('ckd_risk_score').format(score=risk_score):,.2f}</div>",
|
| 800 |
+
unsafe_allow_html=True)
|
| 801 |
|
| 802 |
# Risk gauge
|
| 803 |
fig_gauge = go.Figure(go.Indicator(
|
|
|
|
| 808 |
'axis': {'range': [0, 100], 'tickcolor': '#FFF', 'tickfont': {'color': '#FFF'}},
|
| 809 |
'bar': {'color': '#0000FF'},
|
| 810 |
'steps': [
|
| 811 |
+
{'range': [0, 33], 'color': '#FFD700'},
|
| 812 |
{'range': [33, 66], 'color': '#FACC15'},
|
| 813 |
{'range': [66, 100], 'color': '#EF4444'}
|
| 814 |
],
|
|
|
|
| 821 |
))
|
| 822 |
fig_gauge.update_layout(
|
| 823 |
paper_bgcolor='#0E1117',
|
| 824 |
+
font={'color': '#FFFFFF'},
|
| 825 |
margin={'l': 20, 'r': 20, 't': 50, 'b': 20}
|
| 826 |
)
|
| 827 |
st.plotly_chart(fig_gauge, use_container_width=True)
|
|
|
|
| 830 |
st.markdown(f"<div class='subheader'>{t('key_biomarkers')}</div>", unsafe_allow_html=True)
|
| 831 |
shap_fig = plt.figure(figsize=(12, 8))
|
| 832 |
sns.barplot(data=important_biomarkers.head(20), x='SHAP_Mean_Abs', y=t('feature'), hue=t('omic'),
|
| 833 |
+
palette=['#FF6F61', '#6B5B95', '#4CAF50', '#2196F3'])
|
| 834 |
plt.title(t('shap_top_features'), fontsize=16, color='#FFFFFF')
|
| 835 |
plt.xlabel(t('shap_mean_value'), fontsize=12, color='#FFFFFF')
|
| 836 |
plt.ylabel(t('feature'), fontsize=12, color='#FFFFFF')
|
|
|
|
| 842 |
|
| 843 |
# Display saved SHAP plot
|
| 844 |
if os.path.exists(shap_plot_path):
|
| 845 |
+
st.image(shap_plot_path, caption=t('shap'), use_column_width=True
|
| 846 |
|
| 847 |
# Recommendations
|
| 848 |
st.markdown(f"<div class='subheader'>{t('recommendations')}</div>", unsafe_allow_html=True)
|
| 849 |
for rec in recommendations:
|
| 850 |
+
st.markdown(f"<div class='recommendation'>{rec['text']}</div>", unsafe_allow_html=True)
|
| 851 |
|
| 852 |
# Download results
|
| 853 |
st.markdown(f"<div class='subheader'>{t('download_results')}</div>", unsafe_allow_html=True)
|
| 854 |
col1, col2 = st.columns(2)
|
| 855 |
with col1:
|
| 856 |
+
csv = important_biom.to_csv(index=False).encode('utf-8')
|
| 857 |
b64 = base64.b64encode(csv).decode()
|
| 858 |
+
href = f'<a href="data:///csv;base64,{b64}" download="important_biomarkers.csv" class="download-btn">{t("download_biomarkers", lang)}</a>'
|
| 859 |
+
st.markdown(f"{href, unsafe_allow_html=True)
|
| 860 |
with col2:
|
| 861 |
with open(pdf_path, "rb") as f:
|
| 862 |
pdf_data = f.read()
|
| 863 |
b64_pdf = base64.b64encode(pdf_data).decode()
|
| 864 |
+
href_pdf = f'<a href="data:application/pdf;base64,b64_pdf}" download="ckd_risk_report.pdf" class="download-btn">{t("download_report", lang)}</a>'
|
| 865 |
st.markdown(href_pdf, unsafe_allow_html=True)
|
| 866 |
|
| 867 |
except Exception as e:
|
|
|
|
| 875 |
st.markdown(f"<div class='title animate__fadeIn'>{t('chatbot_title')}</div>", unsafe_allow_html=True)
|
| 876 |
st.markdown(f"""
|
| 877 |
<div class='content animate__fadeIn'>
|
| 878 |
+
<p>{t('chatbot_int')}</p>
|
| 879 |
</div>
|
| 880 |
""", unsafe_allow_html=True)
|
| 881 |
|
|
|
|
| 890 |
st.markdown(f"<div class='chat-bubble bot'>{message['content']}</div>", unsafe_allow_html=True)
|
| 891 |
|
| 892 |
# User input
|
| 893 |
+
user_input = st.text_area(t("type_message"), key="chat_input"))
|
| 894 |
+
if st.button(t("send"), key="button_send")):
|
| 895 |
if user_input:
|
| 896 |
try:
|
| 897 |
+
# Append user message to history
|
| 898 |
st.session_state.chat_history.append({'role': 'user', 'content': user_input})
|
| 899 |
|
| 900 |
# Check if user is asking about results
|
| 901 |
if st.session_state.results and any(
|
| 902 |
keyword in user_input.lower() for keyword in t('chat_risk_keywords').split(',')):
|
| 903 |
risk_score = st.session_state.results['umap_df'][t('risk_score')].iloc[0]
|
| 904 |
+
biomarkers = st.session_state.results['important_biom'][t('feature')].head(5).tolist()
|
| 905 |
context = t('chat_risk_response').format(score=risk_score, biomarkers=', '.join(biomarkers))
|
| 906 |
response = send_to_rasa(f"{context} {user_input}", lang)
|
| 907 |
else:
|
|
|
|
| 916 |
|
| 917 |
if intent != 'None' and st.session_state.results:
|
| 918 |
risk_score = st.session_state.results['umap_df'][t('risk_score')].iloc[0]
|
| 919 |
+
biomarkers = st.session_state.results['important_biom'][t('feature')].head(5).tolist()
|
| 920 |
prompt = formulate_prompt(intent, risk_score, biomarkers)
|
| 921 |
response = generate_recommendation(prompt)
|
| 922 |
if validate_recommendation(response):
|
|
|
|
| 926 |
else:
|
| 927 |
response = send_to_rasa(user_input, lang)
|
| 928 |
|
| 929 |
+
# Append bot response to history
|
| 930 |
st.session_state.chat_history.append({'role': 'bot', 'content': response})
|
| 931 |
|
| 932 |
# Rerender chat
|
| 933 |
st.rerun()
|
| 934 |
+
)
|
| 935 |
|
| 936 |
except Exception as e:
|
| 937 |
st.error(t("chatbot_error").format(error=str(e)))
|
|
|
|
| 941 |
st.markdown(f"<div class='title animate__fadeIn'>{t('dashboard_title')}</div>", unsafe_allow_html=True)
|
| 942 |
st.markdown(f"""
|
| 943 |
<div class='content animate__fadeIn'>
|
| 944 |
+
<p>{t('dashboard_int')}</p>
|
| 945 |
</div>
|
| 946 |
""", unsafe_allow_html=True)
|
| 947 |
|
|
|
|
| 957 |
} for p in patients]
|
| 958 |
df_patients = pd.DataFrame(patient_data)
|
| 959 |
|
| 960 |
+
# Filter data
|
| 961 |
+
st.markdown(f"<div class='section-title'>{t('filters')}</div>", unsafe_allow_html=True)
|
| 962 |
+
risk_threshold = t('risk_threshold')
|
| 963 |
+
st.markdown(f"Slider(t({risk_threshold}), 0, 100, 50)")
|
| 964 |
filtered_df = df_patients[df_patients[t('risk_score')] >= risk_threshold]
|
| 965 |
|
| 966 |
# Display table
|
| 967 |
+
st.markdown(filtered_df, use_container_width=True)
|
| 968 |
|
| 969 |
# Plot
|
| 970 |
fig = px.scatter(
|
| 971 |
+
data=filtered_df,
|
| 972 |
x=t('risk_score'),
|
| 973 |
y=t('patient_id'),
|
| 974 |
color=t('risk_score'),
|
|
|
|
| 998 |
try:
|
| 999 |
rasa_process.terminate()
|
| 1000 |
except Exception as e:
|
| 1001 |
+
print(f"Error occurred terminating Rasa server: {str(e)}")
|
| 1002 |
|
| 1003 |
st.markdown(f"""
|
| 1004 |
<div class='footer'>
|