neuroscreenn / app /cnn_classifier.py
saann's picture
SuvSan
36c468d
# cnn_classifier.py β€” NeuroScreen CNN inference + Grad-CAM
# Fixed: Grad-CAM rewritten for Keras 3 / TF 2.16+ compatibility
# Old tf.keras.Model(inputs, conv_output) approach fails in Keras 3.
# New approach uses tf.GradientTape directly on the sub-model.
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.applications import EfficientNetB0
import cv2
import json
import os
import base64
from PIL import Image
import io
import matplotlib
matplotlib.use('Agg')
# ── Config ────────────────────────────────────────────────────────
CLASS_NAMES_PATH = "model/class_names.json"
WEIGHTS_PATH = "model/neuroscreen_model.weights.h5"
IMG_SIZE = (224, 224)
ASD_CLASSES = [
'avoid_eye_contact', 'hand_flapping', 'finger_flapping',
'spinning', 'stimming', 'rocking', 'lack_social_skill'
]
ADHD_CLASSES = ['continuous_moving']
NORMAL_CLASS = 'normal'
def build_model(num_classes: int) -> Model:
base = EfficientNetB0(
weights=None,
include_top=False,
input_shape=(*IMG_SIZE, 3)
)
inputs = tf.keras.Input(shape=(*IMG_SIZE, 3))
x = base(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(256, activation="relu",
kernel_regularizer=tf.keras.regularizers.l2(1e-4))(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(128, activation="relu",
kernel_regularizer=tf.keras.regularizers.l2(1e-4))(x)
x = layers.Dropout(0.4)(x)
outputs = layers.Dense(num_classes, activation="softmax")(x)
return Model(inputs, outputs, name="NeuroScreen")
class NeuroScreenClassifier:
def __init__(self):
self.model = None
self.class_names = []
self.loaded = False
self._last_conv_layer_name = None
def load(self):
if not os.path.exists(WEIGHTS_PATH):
print(f"⚠️ Weights not found at {WEIGHTS_PATH} β€” running in demo mode")
return False
with open(CLASS_NAMES_PATH) as f:
self.class_names = json.load(f)
self.model = build_model(len(self.class_names))
self.model(tf.zeros((1, *IMG_SIZE, 3))) # build before loading weights
self.model.load_weights(WEIGHTS_PATH)
self.loaded = True
# Find the last Conv2D layer name inside EfficientNetB0
backbone = self.model.layers[1] # EfficientNetB0
for layer in reversed(backbone.layers):
if isinstance(layer, tf.keras.layers.Conv2D):
self._last_conv_layer_name = layer.name
break
print(f"βœ… NeuroScreen loaded β€” {len(self.class_names)} classes")
print(f" Grad-CAM target layer: {self._last_conv_layer_name}")
return True
def preprocess(self, image: np.ndarray) -> np.ndarray:
img = cv2.resize(image, IMG_SIZE)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = tf.keras.applications.efficientnet.preprocess_input(
img.astype(np.float32)
)
return np.expand_dims(img, 0)
def predict(self, image: np.ndarray) -> dict:
if not self.loaded:
return self._demo_prediction()
inp = self.preprocess(image)
pred = self.model.predict(inp, verbose=0)[0]
top_idx = int(np.argmax(pred))
top_class = self.class_names[top_idx]
top_conf = float(pred[top_idx])
all_preds = {
self.class_names[i]: round(float(pred[i]) * 100, 2)
for i in range(len(self.class_names))
}
return {
"predicted_symptom": top_class,
"confidence": round(top_conf * 100, 2),
"is_asd_related": top_class in ASD_CLASSES,
"is_adhd_related": top_class in ADHD_CLASSES,
"is_normal": top_class == NORMAL_CLASS,
"all_predictions": all_preds,
}
def generate_gradcam(self, image: np.ndarray) -> str:
"""
Keras 3 compatible Grad-CAM.
Strategy: build a sub-model from the backbone alone
(input β†’ last conv layer output), then use GradientTape
to watch that output while running the full model forward.
This avoids the 'output not connected to inputs' error
that occurs when trying to wire backbone internals into
a new tf.keras.Model with the full model's input tensor.
"""
if not self.loaded or self._last_conv_layer_name is None:
return self._demo_gradcam(image)
try:
inp = self.preprocess(image)
backbone = self.model.layers[1] # EfficientNetB0
# Build a standalone model: backbone_input β†’ last conv output
conv_output_model = tf.keras.Model(
inputs = backbone.input,
outputs = backbone.get_layer(self._last_conv_layer_name).output,
name = "gradcam_conv_extractor"
)
inp_tensor = tf.constant(inp, dtype=tf.float32)
with tf.GradientTape() as tape:
# Step 1: get conv feature maps from backbone
# We need to watch this intermediate tensor
conv_outputs = conv_output_model(inp_tensor, training=False)
tape.watch(conv_outputs)
# Step 2: run rest of the model manually
# Get layers after the backbone
x = conv_outputs
# GlobalAveragePooling2D
x = self.model.get_layer('global_average_pooling2d')(x)
# BatchNorm
x = self.model.get_layer('batch_normalization')(x, training=False)
# Dense 256
x = self.model.get_layer('dense')(x)
# Dense 128
x = self.model.get_layer('dense_1')(x)
# Output
predictions = self.model.get_layer('dense_2')(x)
# Score for predicted class
pred_index = tf.argmax(predictions[0])
class_score = predictions[:, pred_index]
# Gradients of class score w.r.t. conv feature maps
grads = tape.gradient(class_score, conv_outputs)
# Pool gradients spatially β†’ importance weight per channel
pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
# Weight feature maps
conv_outputs = conv_outputs[0]
heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
heatmap = tf.squeeze(heatmap)
heatmap = tf.maximum(heatmap, 0)
heatmap = heatmap / (tf.math.reduce_max(heatmap) + 1e-8)
heatmap = heatmap.numpy()
# Resize to original image dimensions
h, w = image.shape[:2]
heatmap_resized = cv2.resize(heatmap, (w, h))
heatmap_colored = cv2.applyColorMap(
np.uint8(255 * heatmap_resized), cv2.COLORMAP_JET
)
# Overlay on original image
original_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
superimposed = cv2.addWeighted(original_rgb, 0.6, heatmap_colored, 0.4, 0)
# Encode to base64 PNG
pil_img = Image.fromarray(superimposed)
buffer = io.BytesIO()
pil_img.save(buffer, format="PNG")
return base64.b64encode(buffer.getvalue()).decode()
except Exception as e:
print(f"⚠️ Grad-CAM failed: {e} β€” returning empty")
return ""
def _demo_prediction(self) -> dict:
return {
"predicted_symptom": "demo_mode",
"confidence": 0.0,
"is_asd_related": False,
"is_adhd_related": False,
"is_normal": True,
"all_predictions": {},
}
def _demo_gradcam(self, image: np.ndarray) -> str:
h, w = image.shape[:2]
dummy = np.zeros((h, w, 3), dtype=np.uint8)
dummy[h//4:3*h//4, w//4:3*w//4] = [0, 100, 200]
pil_img = Image.fromarray(dummy)
buffer = io.BytesIO()
pil_img.save(buffer, format="PNG")
return base64.b64encode(buffer.getvalue()).decode()
# Global singleton
classifier = NeuroScreenClassifier()
def load_classifier():
classifier.load()
def predict_from_image_array(image: np.ndarray) -> dict:
prediction = classifier.predict(image)
gradcam = classifier.generate_gradcam(image)
return {**prediction, "gradcam_b64": gradcam}