Spaces:

Ci-Dave
/

DR_Classification

Runtime error

App Files Files Community

3v324v23 commited on May 12, 2025

Commit

b925d8f

1 Parent(s): da6f0a0

revert changes

Browse files

Files changed (1) hide show

pages/Model_Evaluation.py +54 -32

pages/Model_Evaluation.py CHANGED Viewed

@@ -16,7 +16,7 @@ import streamlit as st
 import matplotlib.pyplot as plt
 from fpdf import FPDF
 from datasets import load_dataset
-from huggingface_hub import hf_hub_download
 # ---- Streamlit State Initialization ----
 if 'stop_eval' not in st.session_state:
@@ -27,7 +27,7 @@ if 'trigger_eval' not in st.session_state:
     st.session_state.trigger_eval = False
 # ---- Streamlit Title ----
-st.markdown("<h2 style='color: #2E86C1;'>\ud83d\udcc8 Model Evaluation</h2>", unsafe_allow_html=True)
 # ---- Class Names & Label Mapping ----
 class_names = ['No_DR', 'Mild', 'Moderate', 'Severe', 'Proliferative_DR']
@@ -93,13 +93,21 @@ val_transform = transforms.Compose([
     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 ])
-# ---- Load Data ----
 @st.cache_resource
-def load_test_data():
-    dataset = DDRDataset(csv_path="splits/test_labels_with_paths.csv", transform=val_transform)
     return DataLoader(dataset, batch_size=32, shuffle=False)
-# ---- Load Model ----
 @st.cache_resource
 def load_model():
     model_path = hf_hub_download(repo_id="Ci-Dave/Densenet121", filename="Pretrained_Densenet-121.pth")
@@ -111,43 +119,55 @@ def load_model():
 # ---- UI Buttons ----
 model = load_model()
-test_loader = load_test_data()
 col1, col2 = st.columns([1, 1])
 with col1:
-    if st.button("\ud83d\ude80 Start Evaluation"):
         st.session_state.stop_eval = False
         st.session_state.evaluation_done = False
         st.session_state.trigger_eval = True
 with col2:
-    if st.button("\ud83d\udea9 Stop Evaluation"):
         st.session_state.stop_eval = True
 if st.session_state.evaluation_done:
     reevaluate_col, download_col = st.columns([1, 1])
-# ---- Model Evaluation Explanation ----
-with st.expander("\u2139\ufe0f **What is Model Evaluation?**", expanded=True):
     st.markdown("""
     <div style='font-size:16px;'>
-    The <strong>Model Evaluation</strong> section tests how well the trained AI model performs on the test set of retinal images.
-    #### What It Does:
-    - Loads the test dataset
     - Runs the model to predict labels
     - Compares predictions vs. true labels
     - Computes:
-        - Classification Report
-        - Confusion Matrix
-        - ROC Curve
-        - Misclassified Samples
-    - Saves a downloadable PDF report
     </div>
     """, unsafe_allow_html=True)
 # ---- Evaluation Logic ----
 if st.session_state.trigger_eval:
-    st.markdown("### \u23f1\ufe0f Evaluation Results")
     start_time = time.time()
     y_true = []
@@ -163,14 +183,14 @@ if st.session_state.trigger_eval:
     with torch.no_grad():
         for i, (images, labels) in enumerate(test_loader):
             if st.session_state.stop_eval:
-                stop_info.warning("\ud83d\udea9 Evaluation stopped by user.")
                 break
             outputs = model(images)
             _, predicted = torch.max(outputs, 1)
             y_true.extend(labels.numpy())
             y_pred.extend(predicted.numpy())
-            y_score.extend(outputs.numpy())
             for j in range(len(labels)):
                 if predicted[j] != labels[j]:
@@ -178,14 +198,15 @@ if st.session_state.trigger_eval:
             percent_complete = (i + 1) / total_batches
             progress_bar.progress(min(percent_complete, 1.0))
-            status_text.text(f"Evaluating: {int(percent_complete * 100)}% | Batch {i+1}/{total_batches}")
     end_time = time.time()
     eval_time = end_time - start_time
     if not st.session_state.stop_eval:
         st.session_state.evaluation_done = True
-        st.session_state.trigger_eval = False
         st.success(f"✅ Evaluation completed in **{eval_time:.2f} seconds**")
         report = classification_report(y_true, y_pred, target_names=class_names, output_dict=True)
@@ -195,20 +216,21 @@ if st.session_state.trigger_eval:
         pdf = FPDF()
         pdf.add_page()
         pdf.set_font("Arial", size=12)
-        pdf.cell(200, 10, txt="Classification Report", ln=True, align='C')
         headers = ["Class", "Precision", "Recall", "F1-Score"]
-        for header in headers:
-            pdf.cell(40, 10, header, border=1)
         pdf.ln()
         for idx, row in report_df.iterrows():
             if idx in ['accuracy', 'macro avg', 'weighted avg']:
                 continue
-            pdf.cell(40, 10, str(idx), border=1)
-            pdf.cell(40, 10, f"{row['precision']:.2f}", border=1)
-            pdf.cell(40, 10, f"{row['recall']:.2f}", border=1)
-            pdf.cell(40, 10, f"{row['f1-score']:.2f}", border=1)
             pdf.ln()
         cm = confusion_matrix(y_true, y_pred)
@@ -257,4 +279,4 @@ if st.session_state.trigger_eval:
         with open(output_pdf, "rb") as f:
             reevaluate_col, download_col = st.columns([1, 1])
             with download_col:
-                st.download_button("\ud83d\udcc4 Download Full Evaluation PDF", f, file_name="evaluation_report.pdf")

 import matplotlib.pyplot as plt
 from fpdf import FPDF
 from datasets import load_dataset
+from huggingface_hub import hf_hub_download  # ✅ NEW
 # ---- Streamlit State Initialization ----
 if 'stop_eval' not in st.session_state:
     st.session_state.trigger_eval = False
 # ---- Streamlit Title ----
+st.markdown("<h2 style='color: #2E86C1;'>📈 Model Evaluation</h2>", unsafe_allow_html=True)
 # ---- Class Names & Label Mapping ----
 class_names = ['No_DR', 'Mild', 'Moderate', 'Severe', 'Proliferative_DR']
     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 ])
+# ---- Load Data from Hugging Face (cached) ----
 @st.cache_resource
+def load_test_data_from_huggingface():
+    dataset = load_dataset(
+        "Ci-Dave/DDR_dataset_train_test",
+        data_files={"test": "splits/test_labels.csv"},
+        split="test"
+    )
+    df = dataset.to_pandas()
+    csv_path = "test_labels_temp.csv"
+    df.to_csv(csv_path, index=False)
+    dataset = DDRDataset(csv_path=csv_path, transform=val_transform)
     return DataLoader(dataset, batch_size=32, shuffle=False)
+# ---- Load Model from Hugging Face (cached) ----
 @st.cache_resource
 def load_model():
     model_path = hf_hub_download(repo_id="Ci-Dave/Densenet121", filename="Pretrained_Densenet-121.pth")
 # ---- UI Buttons ----
 model = load_model()
+test_loader = load_test_data_from_huggingface()
 col1, col2 = st.columns([1, 1])
 with col1:
+    if st.button("🚀 Start Evaluation"):
         st.session_state.stop_eval = False
         st.session_state.evaluation_done = False
         st.session_state.trigger_eval = True
 with col2:
+    if st.button("🚩 Stop Evaluation"):
         st.session_state.stop_eval = True
 if st.session_state.evaluation_done:
     reevaluate_col, download_col = st.columns([1, 1])
+    # ---- Description for Model Evaluation ----
+with st.expander("ℹ️ **What is Model Evaluation?**", expanded=True):
     st.markdown("""
     <div style='font-size:16px;'>
+    The **Model Evaluation** section tests how well the trained AI model performs on the unseen <strong>test set</strong> of retinal images. This provides insights into the reliability and performance of the model when deployed in real scenarios.
+    #### 🔍 What It Does:
+    - Loads the test dataset of labeled retinal images
     - Runs the model to predict labels
     - Compares predictions vs. true labels
     - Computes:
+        - 📋 **Classification Report** (Precision, Recall, F1-Score)
+        - 🧊 **Confusion Matrix**
+        - 📈 **Multi-class ROC Curve**
+        - ❌ **Misclassified Image Samples**
+    - Saves the full report as a downloadable PDF
+    #### 🧭 How to Use:
+    1. Click **🚀 Start Evaluation** to begin analyzing the model’s performance.
+    2. Wait for the evaluation to finish (shows progress bar and batch updates).
+    3. Once done:
+        - Check performance scores for each DR class
+        - View visual summaries like confusion matrix and ROC curve
+        - See the top 5 misclassified examples
+    4. Optionally, download the full evaluation report via **📄 Download PDF**
+    ⚠️ <i>Note: This evaluation runs on the full test set and might take several seconds depending on hardware.</i>
     </div>
     """, unsafe_allow_html=True)
 # ---- Evaluation Logic ----
 if st.session_state.trigger_eval:
+    st.markdown("### ⏱️ Evaluation Results")
     start_time = time.time()
     y_true = []
     with torch.no_grad():
         for i, (images, labels) in enumerate(test_loader):
             if st.session_state.stop_eval:
+                stop_info.warning("🚩 Evaluation stopped by user.")
                 break
             outputs = model(images)
             _, predicted = torch.max(outputs, 1)
             y_true.extend(labels.numpy())
             y_pred.extend(predicted.numpy())
+            y_score.extend(outputs.detach().numpy())
             for j in range(len(labels)):
                 if predicted[j] != labels[j]:
             percent_complete = (i + 1) / total_batches
             progress_bar.progress(min(percent_complete, 1.0))
+            status_text.text(f"Evaluating on Test Set: {int(percent_complete * 100)}% | Batch {i+1}/{total_batches}")
+            time.sleep(0.1)
     end_time = time.time()
     eval_time = end_time - start_time
     if not st.session_state.stop_eval:
         st.session_state.evaluation_done = True
+        st.session_state.trigger_eval = False  # ✅ Reset the trigger
         st.success(f"✅ Evaluation completed in **{eval_time:.2f} seconds**")
         report = classification_report(y_true, y_pred, target_names=class_names, output_dict=True)
         pdf = FPDF()
         pdf.add_page()
         pdf.set_font("Arial", size=12)
+        pdf.cell(200, 10, txt=clean_text("Classification Report"), ln=True, align='C')
+        col_widths = [40, 40, 40, 40]
         headers = ["Class", "Precision", "Recall", "F1-Score"]
+        for i, header in enumerate(headers):
+            pdf.cell(col_widths[i], 10, header, border=1)
         pdf.ln()
         for idx, row in report_df.iterrows():
             if idx in ['accuracy', 'macro avg', 'weighted avg']:
                 continue
+            pdf.cell(col_widths[0], 10, str(idx), border=1)
+            pdf.cell(col_widths[1], 10, f"{row['precision']:.2f}", border=1)
+            pdf.cell(col_widths[2], 10, f"{row['recall']:.2f}", border=1)
+            pdf.cell(col_widths[3], 10, f"{row['f1-score']:.2f}", border=1)
             pdf.ln()
         cm = confusion_matrix(y_true, y_pred)
         with open(output_pdf, "rb") as f:
             reevaluate_col, download_col = st.columns([1, 1])
             with download_col:
+                st.download_button("📄 Download Full Evaluation PDF", f, file_name="evaluation_report.pdf")