| import streamlit as st |
| import joblib |
| import numpy as np |
| from skimage.io import imread |
| from skimage.transform import resize |
| from skimage.feature import hog |
| import matplotlib.pyplot as plt |
| import os |
| import base64 |
| import shap |
| from lime.lime_tabular import LimeTabularExplainer |
| import io |
|
|
| |
| st.set_page_config( |
| page_title="Malaria Cell Detection", |
| page_icon="🔬", |
| layout="wide" |
| ) |
|
|
| |
| def get_binary_file_downloader_html(bin_file, file_label): |
| with open(bin_file, 'rb') as f: |
| data = f.read() |
| bin_str = base64.b64encode(data).decode() |
| href = f'<a href="data:application/octet-stream;base64,{bin_str}" download="{os.path.basename(bin_file)}">{file_label}</a>' |
| return href |
|
|
| |
| @st.cache_resource |
| def load_model(): |
| model_path = 'malaria_svm_hog_model.joblib' |
| le_path = 'label_encoder.joblib' |
| |
| if not os.path.exists(model_path) or not os.path.exists(le_path): |
| st.error("Model files not found. Please run the notebook to export the model first.") |
| st.stop() |
| |
| model = joblib.load(model_path) |
| label_encoder = joblib.load(le_path) |
| return model, label_encoder |
|
|
| |
| def extract_hog_features(image): |
| IMG_DIMS = (128, 64) |
| image = resize(image, IMG_DIMS) |
| features, hog_image = hog(image, orientations=9, |
| pixels_per_cell=(8, 8), |
| cells_per_block=(2, 2), |
| visualize=True, |
| channel_axis=-1) |
| return features, hog_image |
|
|
| |
| @st.cache_data(show_spinner=False) |
| def process_image_bytes(file_bytes: bytes): |
| img = imread(io.BytesIO(file_bytes)) |
| feats, hog_img = extract_hog_features(img) |
| return img, feats, hog_img |
|
|
| def normalize_to_uint8(image_like: np.ndarray) -> np.ndarray: |
| """Normalize array to 0-255 uint8 for stable st.image rendering.""" |
| arr = np.asarray(image_like).astype(np.float32) |
| min_v = float(arr.min()) |
| max_v = float(arr.max()) |
| denom = (max_v - min_v) if (max_v - min_v) > 1e-8 else 1.0 |
| arr = (arr - min_v) / denom |
| return (arr * 255.0).astype(np.uint8) |
|
|
| |
| def build_background_from_features(instance_features: np.ndarray, num_samples: int = 50, noise_scale: float = 0.01) -> np.ndarray: |
| """Create a lightweight background distribution around the instance. |
| |
| Kernel SHAP and LIME both need a reference dataset. Since we don't ship the |
| full training set with the app, we synthesize a small local neighborhood |
| around the current image's HOG features by adding low-variance Gaussian noise. |
| """ |
| rng = np.random.default_rng(42) |
| tiled = np.tile(instance_features, (num_samples, 1)) |
| noise = rng.normal(loc=0.0, scale=noise_scale, size=tiled.shape) |
| background = tiled + noise |
| return background.astype(np.float64) |
|
|
| |
| def predict_proba_from_features(model, X: np.ndarray) -> np.ndarray: |
| """Return pseudo-probabilities for LIME using sigmoid(decision_function).""" |
| raw = model.decision_function(X) |
| |
| raw = np.array(raw).reshape(-1) |
| p1 = 1.0 / (1.0 + np.exp(-raw)) |
| p0 = 1.0 - p1 |
| return np.vstack([p0, p1]).T |
|
|
| |
| def main(): |
| |
| model, label_encoder = load_model() |
| |
| |
| st.markdown( |
| """ |
| <style> |
| :root { |
| --brand: #5b8def; |
| --accent: #7bdcb5; |
| --text-strong: #111827; |
| --muted: #6b7280; |
| } |
| .main > div { padding-top: 0.5rem; } |
| h1, h2, h3, h4 { color: var(--text-strong); } |
| .stAlert { border-radius: 12px; box-shadow: 0 2px 8px rgba(0,0,0,0.04); } |
| .block-container { padding-top: 1rem; padding-bottom: 2.5rem; } |
| .section-title { margin: 0 0 6px 0; font-weight: 700; } |
| .footer-card { background: linear-gradient(135deg, rgba(91,141,239,0.08), rgba(123,220,181,0.08)); border: 1px solid rgba(0,0,0,0.06); border-radius: 14px; padding: 18px 20px; text-align: center; } |
| .footer-title { font-weight: 600; font-size: 16px; margin: 0 0 8px 0; } |
| .footer-sub { font-size: 13px; color: var(--muted); margin: 0; } |
| </style> |
| """, |
| unsafe_allow_html=True, |
| ) |
|
|
| |
| with st.sidebar: |
| st.title("🔬 Malaria Cell Detection") |
| st.markdown("Model: SVM (RBF) on HOG features") |
| st.markdown("Made for education. Not for clinical use.") |
| st.divider() |
| st.markdown("### Quick Tips") |
| st.markdown("- Upload a cell image below to get a prediction\n- Open the SHAP/LIME tabs in results for explanations") |
| |
| |
| st.info(""" |
| ### Model Performance |
| - **Training Accuracy**: 92.2% |
| - **Validation Accuracy**: 80.4% |
| |
| ⚠️ **Disclaimer**: This model is for educational purposes and experimentation only. Please use with caution and do not use for actual medical diagnosis. |
| """) |
| |
| |
| with st.expander("📚 Documentation"): |
| st.markdown(""" |
| ## Problem Statement |
| |
| Malaria remains one of the world's deadliest infectious diseases, affecting millions of people worldwide. Early and accurate diagnosis is crucial for effective treatment and reducing mortality rates. Traditional diagnosis involves manual microscopic examination of blood smears by trained professionals, which is time-consuming, requires expertise, and can be subject to human error. |
| |
| ## Project Approach |
| |
| This project uses machine learning to automate the detection of malaria parasites in blood smear images: |
| |
| 1. **Data Collection**: Used a dataset of labeled blood cell images (parasitized and uninfected) |
| 2. **Feature Extraction**: Applied Histogram of Oriented Gradients (HOG) to extract meaningful features from the cell images |
| 3. **Model Training**: Trained a Support Vector Machine (SVM) classifier on the extracted features |
| 4. **Evaluation**: Validated the model on a separate test set to ensure generalizability |
| |
| ## How It Works |
| |
| The application follows these steps to analyze a blood cell image: |
| |
| 1. **Image Upload**: User uploads a microscopic blood cell image |
| 2. **Preprocessing**: The image is resized and normalized |
| 3. **Feature Extraction**: HOG features are extracted to capture the cell's morphological characteristics |
| 4. **Classification**: The SVM model predicts whether the cell is infected with malaria parasites |
| 5. **Result Display**: The prediction result and confidence score are displayed along with visual aids |
| |
| ## Technical Details |
| |
| - **Feature Extraction**: HOG with 9 orientations, 8×8 pixels per cell, and 2×2 cells per block |
| - **Classifier**: SVM with RBF kernel |
| - **Image Size**: Processed at 128×64 pixels |
| """) |
| |
| |
| st.markdown("### Example Images") |
| example_folder = "examples" |
| if os.path.exists(example_folder): |
| st.success("Example images are available in the examples folder. You can use them to test the model.") |
| col_examples1, col_examples2 = st.columns(2) |
|
|
| with col_examples1: |
| st.markdown("#### Parasitized (Malaria) Examples:") |
| malaria_files = [ |
| "C50P11thinF_IMG_20150724_114951_cell_148.png", |
| "C59P20thinF_IMG_20150803_111333_cell_144.png" |
| ] |
| for file in malaria_files: |
| file_path = os.path.join(example_folder, file) |
| if os.path.exists(file_path): |
| try: |
| img = imread(file_path) |
| st.image(img, caption=file, width=250) |
| st.markdown(get_binary_file_downloader_html(file_path, f"Download {file}"), unsafe_allow_html=True) |
| except Exception as e: |
| st.warning(f"Could not display {file}: {e}") |
|
|
| with col_examples2: |
| st.markdown("#### Uninfected (Healthy) Examples:") |
| healthy_files = [ |
| "C112P73ThinF_IMG_20150930_131659_cell_94.png", |
| "C130P91ThinF_IMG_20151004_142709_cell_110.png" |
| ] |
| for file in healthy_files: |
| file_path = os.path.join(example_folder, file) |
| if os.path.exists(file_path): |
| try: |
| img = imread(file_path) |
| st.image(img, caption=file, width=250) |
| st.markdown(get_binary_file_downloader_html(file_path, f"Download {file}"), unsafe_allow_html=True) |
| except Exception as e: |
| st.warning(f"Could not display {file}: {e}") |
| else: |
| st.warning("Examples folder not found. Add an 'examples' folder with a few sample images.") |
| |
| |
| st.markdown("### Upload Blood Cell Image") |
| st.write("Upload an image of a blood cell to detect whether it's infected with malaria parasites.") |
| uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) |
| |
| |
| prediction_result = None |
| confidence_score = None |
| hog_img = None |
| original_image = None |
| |
| |
| if uploaded_file is not None: |
| try: |
| |
| file_bytes = uploaded_file.getvalue() |
| original_image, features, hog_img = process_image_bytes(file_bytes) |
| |
| |
| prediction = model.predict([features])[0] |
| prediction_proba = model.decision_function([features])[0] |
| confidence_score = abs(prediction_proba) |
| prediction_result = label_encoder.inverse_transform([prediction])[0] |
| |
| |
| st.markdown("---") |
| |
| |
| st.header("Prediction Results") |
| |
| |
| res_col1, res_col2 = st.columns(2) |
| |
| with res_col1: |
| st.markdown("### Uploaded Image") |
| st.image(original_image, caption="Original Blood Cell Image", width=300) |
| |
| |
| st.markdown("### Image Analysis") |
| st.markdown(f""" |
| - **Image Size**: {original_image.shape[0]} × {original_image.shape[1]} pixels |
| - **Color Channels**: {original_image.shape[2] if len(original_image.shape) > 2 else "Grayscale"} |
| - **Processing**: HOG feature extraction with 9 orientations |
| """) |
| |
| with res_col2: |
| |
| st.markdown("### Detection Result") |
| |
| if prediction_result.lower() in ["parasitized", "malaria"]: |
| st.error(f"## Prediction: {prediction_result}") |
| st.markdown("⚠️ **Cell appears to be infected with malaria parasites**") |
| else: |
| st.success(f"## Prediction: {prediction_result}") |
| st.markdown("✅ **Cell appears to be uninfected**") |
| |
| st.metric("Confidence Score", f"{confidence_score:.2f}") |
| |
| |
| st.markdown("### HOG Feature Visualization") |
| hog_disp = normalize_to_uint8(hog_img) |
| st.session_state["hog_disp"] = hog_disp |
| st.image(st.session_state["hog_disp"], caption="HOG Features", use_column_width=True, clamp=True) |
|
|
| |
| st.markdown("---") |
| st.subheader("SHAP Explanation (Kernel SHAP on HOG features)") |
| try: |
| background = build_background_from_features(features, num_samples=80, noise_scale=0.02) |
| |
| explainer = shap.LinearExplainer(model, background) |
| shap_values = explainer.shap_values(features.reshape(1, -1)) |
| |
| if isinstance(shap_values, list): |
| shap_1d = np.array(shap_values).reshape(-1) |
| else: |
| shap_1d = np.array(shap_values).reshape(-1) |
| top_k = 20 |
| top_idx = np.argsort(np.abs(shap_1d))[-top_k:][::-1] |
| fig_shap, ax_shap = plt.subplots(figsize=(7, 4)) |
| ax_shap.bar(range(len(top_idx)), shap_1d[top_idx], color=["#d62728" if v < 0 else "#2ca02c" for v in shap_1d[top_idx]]) |
| ax_shap.set_xticks(range(len(top_idx))) |
| ax_shap.set_xticklabels([f"f{int(i)}" for i in top_idx], rotation=45, ha="right") |
| ax_shap.set_ylabel("SHAP value") |
| ax_shap.set_title("Top HOG features driving the decision") |
| st.pyplot(fig_shap) |
| plt.close(fig_shap) |
| with st.expander("What does SHAP mean?"): |
| st.markdown(""" |
| SHAP shows how each input feature pushes the model toward malaria (positive) or healthy (negative). Bars to the right (green) increase the malaria score; bars to the left (red) decrease it. Length = strength of influence. |
| """) |
| except Exception as e: |
| st.warning(f"SHAP explanation unavailable: {e}") |
|
|
| st.subheader("LIME Explanation (on HOG features)") |
| try: |
| background = build_background_from_features(features, num_samples=200, noise_scale=0.03) |
| feature_names = [f"hog_{i}" for i in range(features.shape[0])] |
| class_names = list(label_encoder.classes_) |
| explainer = LimeTabularExplainer( |
| training_data=background, |
| feature_names=feature_names, |
| class_names=class_names, |
| mode="classification", |
| discretize_continuous=True, |
| random_state=42, |
| ) |
| exp = explainer.explain_instance( |
| data_row=features, |
| predict_fn=lambda x: predict_proba_from_features(model, x), |
| num_features=15, |
| num_samples=1000, |
| ) |
| fig_lime = exp.as_pyplot_figure() |
| st.pyplot(fig_lime) |
| plt.close(fig_lime) |
| except Exception as e: |
| st.warning(f"LIME explanation unavailable: {e}") |
| |
| |
| st.markdown("---") |
| st.markdown(""" |
| ### Understanding the Results |
| |
| - **HOG Features**: The visualization shows gradient directions that help the model identify the presence of malaria parasites |
| - **Confidence Score**: Higher values indicate stronger model certainty |
| - **Explainability**: SHAP estimates feature attributions using a game-theoretic approach; LIME approximates the model locally with a simple surrogate |
| - **Limitations**: This model has approximately 80% accuracy on validation data and should not be used for clinical diagnosis |
| """) |
| |
| except Exception as e: |
| st.error(f"Error processing image: {e}") |
| |
| |
| footer = st.container() |
| with footer: |
| st.markdown("---") |
| st.markdown(""" |
| ### Educational Purpose Only |
| |
| This application is designed for educational and experimental purposes only. It demonstrates the potential of machine learning in medical image analysis but is not intended for clinical use. |
| |
| **Important Notice**: Do not use this tool for actual medical diagnosis. Always consult qualified healthcare professionals for medical advice and diagnosis. |
| """) |
| st.markdown(""" |
| <div class="footer-card"> |
| <p class="footer-title">Malaria Detection Model · Educational Use</p> |
| <p class="footer-sub">Designed & Developed by <b>Aradhya Pavan</b> · SVM with HOG Features</p> |
| </div> |
| """, unsafe_allow_html=True) |
|
|
|
|
| |
|
|
| if __name__ == "__main__": |
| main() |