Spaces:

sudo-paras-shah
/

micro-expression-recognition

Paused

App Files Files Community

sudo-paras-shah commited on Jun 1, 2025

Commit

e916c8e

1 Parent(s): 87ffbc7

Remove async limitations

Browse files

Add capability for mobilenet model

Files changed (8) hide show

src/classification.py +60 -0
src/nets/Loss.py +1 -42
src/nets/resnet50.py +0 -8
src/nets/vgg16.py +6 -6
src/streamlit_app.py +25 -89
src/utils/callbacks.py +0 -1
src/utils/dataloader.py +1 -21
src/utils/utils.py +8 -21

src/classification.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import os
+import numpy as np
+import tensorflow as tf
+from huggingface_hub import hf_hub_download
+from nets import get_model_from_name
+from utils.utils import cvtColor, get_classes, letterbox_image, preprocess_input
+import tempfile
+class Classification:
+    def __init__(self, model_choice):
+        self.model_choice = model_choice
+        self.classes_path = "src/model_data/cls_classes.txt"
+        self.input_shape = (224, 224)
+        self.alpha = 0.25
+        # Download the model from Hugging Face
+        cache_dir = os.path.join(tempfile.gettempdir(), "hf_cache")
+        os.makedirs(cache_dir, exist_ok=True)
+        self.model_path = hf_hub_download(
+            repo_id="sudo-paras-shah/micro-expression-casme2",
+            filename="ep097.weights.h5" if self.model_choice is "mobilenet" else "ep089.weights.h5",
+            cache_dir=cache_dir
+        )
+        # Load class names
+        self.class_names, self.num_classes = get_classes(self.classes_path)
+        # Load model
+        self.load_model()
+    def load_model(self):
+        if self.model_choice == "mobilenet":
+            self.model = get_model_from_name[self.model_choice](
+                input_shape=[self.input_shape[0], self.input_shape[1], 3],
+                classes=self.num_classes,
+                alpha=self.alpha
+            )
+        else:
+            self.model = get_model_from_name[self.model_choice](
+                input_shape=[self.input_shape[0], self.input_shape[1], 3],
+                classes=self.num_classes
+            )
+        self.model.load_weights(self.model_path)
+        print("Model loaded from", self.model_path)
+        print("Classes:", self.class_names)
+    def detect_image(self, image):
+        image = cvtColor(image)
+        image = letterbox_image(image, [self.input_shape[1], self.input_shape[0]])
+        image = np.array(image, dtype=np.float32)
+        image = preprocess_input(image)
+        image = np.expand_dims(image, axis=0)
+        preds = self.model.predict(image)[0]
+        class_index = np.argmax(preds)
+        class_name = self.class_names[class_index]
+        probability = preds[class_index]
+        return class_name, probability

src/nets/Loss.py CHANGED Viewed

@@ -1,19 +1,8 @@
 import tensorflow as tf
 from keras import backend as K
 def multi_category_focal_loss2(gamma=2., alpha=1):
-    """
-    focal loss for multi category of multi label problem
-    适用于多分类或多标签问题的focal loss
-    alpha控制真值y_true为1/0时的权重
-        1的权重为alpha, 0的权重为1-alpha
-    当你的模型欠拟合，学习存在困难时，可以尝试适用本函数作为loss
-    当模型过于激进(无论何时总是倾向于预测出1),尝试将alpha调小
-    当模型过于惰性(无论何时总是倾向于预测出0,或是某一个固定的常数,说明没有学到有效特征)
-        尝试将alpha调大,鼓励模型进行预测出1。
-    Usage:
-     model.compile(loss=[multi_category_focal_loss2(alpha=0.25, gamma=2)], metrics=["accuracy"], optimizer=adam)
-    """
     epsilon = 1.e-7
     gamma = float(gamma)
     alpha = tf.constant(alpha, dtype=tf.float32)
@@ -33,18 +22,8 @@ def multi_category_focal_loss2(gamma=2., alpha=1):
     return multi_category_focal_loss2_fixed
 def multi_category_focal_loss1(alpha, gamma=2.0):
-    """
-    focal loss for multi category of multi label problem
-    适用于多分类或多标签问题的focal loss
-    alpha用于指定不同类别/标签的权重，数组大小需要与类别个数一致
-    当你的数据集不同类别/标签之间存在偏斜，可以尝试适用本函数作为loss
-    Usage:
-     model.compile(loss=[multi_category_focal_loss1(alpha=[1,2,3,2], gamma=2)], metrics=["accuracy"], optimizer=adam)
-    """
     epsilon = 1.e-7
     alpha = tf.constant(alpha, dtype=tf.float32)
-    #alpha = tf.constant([[1],[1],[1],[1],[1]], dtype=tf.float32)
-    #alpha = tf.constant_initializer(alpha)
     gamma = float(gamma)
     def multi_category_focal_loss1_fixed(y_true, y_pred):
         y_true = tf.cast(y_true, tf.float32)
@@ -59,34 +38,17 @@ def multi_category_focal_loss1(alpha, gamma=2.0):
 def Cross_entropy_loss(y_true, y_pred):
-    '''
-    :param y_true: ont-hot encoding ,shape is [batch_size,nums_classes]
-    :param y_pred: shape is [batch_size,nums_classes],each example defined as probability for per class
-    :return:shape is [batch_size,], a list include cross_entropy for per example
-    '''
     y_pred = K.clip(y_pred, K.epsilon(), 1.0 - K.epsilon())
     crossEntropyLoss = -y_true * tf.log(y_pred)
     return tf.reduce_sum(crossEntropyLoss, -1)
-# focal loss with multi label
 def focal_loss(classes_num, gamma=2., alpha=.25, e=0.1):
-    # classes_num contains sample number of each classes
     def focal_loss_fixed(target_tensor, prediction_tensor):
-        '''
-        prediction_tensor is the output tensor with shape [None, 100], where 100 is the number of classes
-        target_tensor is the label tensor, same shape as predcition_tensor
-        '''
-        import tensorflow as tf
-        from tensorflow.python.ops import array_ops
-        from keras import backend as K
-        #1# get focal loss with no balanced weight which presented in paper function (4)
         zeros = array_ops.zeros_like(prediction_tensor, dtype=prediction_tensor.dtype)
         one_minus_p = array_ops.where(tf.greater(target_tensor,zeros), target_tensor - prediction_tensor, zeros)
         FT = -1 * (one_minus_p ** gamma) * tf.log(tf.clip_by_value(prediction_tensor, 1e-8, 1.0))
-        #2# get balanced weight alpha
         classes_weight = array_ops.zeros_like(prediction_tensor, dtype=prediction_tensor.dtype)
         total_num = float(sum(classes_num))
@@ -98,12 +60,9 @@ def focal_loss(classes_num, gamma=2., alpha=.25, e=0.1):
         alpha = array_ops.where(tf.greater(target_tensor, zeros), classes_weight, zeros)
-        #3# get balanced focal loss
         balanced_fl = alpha * FT
         balanced_fl = tf.reduce_mean(balanced_fl)
-        #4# add other op to prevent overfit
-        # reference : https://spaces.ac.cn/archives/4493
         nb_classes = len(classes_num)
         fianal_loss = (1-e) * balanced_fl + e * K.categorical_crossentropy(K.ones_like(prediction_tensor)/nb_classes, prediction_tensor)

 import tensorflow as tf
+from tensorflow.python.ops import array_ops
 from keras import backend as K
 def multi_category_focal_loss2(gamma=2., alpha=1):
     epsilon = 1.e-7
     gamma = float(gamma)
     alpha = tf.constant(alpha, dtype=tf.float32)
     return multi_category_focal_loss2_fixed
 def multi_category_focal_loss1(alpha, gamma=2.0):
     epsilon = 1.e-7
     alpha = tf.constant(alpha, dtype=tf.float32)
     gamma = float(gamma)
     def multi_category_focal_loss1_fixed(y_true, y_pred):
         y_true = tf.cast(y_true, tf.float32)
 def Cross_entropy_loss(y_true, y_pred):
     y_pred = K.clip(y_pred, K.epsilon(), 1.0 - K.epsilon())
     crossEntropyLoss = -y_true * tf.log(y_pred)
     return tf.reduce_sum(crossEntropyLoss, -1)
 def focal_loss(classes_num, gamma=2., alpha=.25, e=0.1):
     def focal_loss_fixed(target_tensor, prediction_tensor):
         zeros = array_ops.zeros_like(prediction_tensor, dtype=prediction_tensor.dtype)
         one_minus_p = array_ops.where(tf.greater(target_tensor,zeros), target_tensor - prediction_tensor, zeros)
         FT = -1 * (one_minus_p ** gamma) * tf.log(tf.clip_by_value(prediction_tensor, 1e-8, 1.0))
         classes_weight = array_ops.zeros_like(prediction_tensor, dtype=prediction_tensor.dtype)
         total_num = float(sum(classes_num))
         alpha = array_ops.where(tf.greater(target_tensor, zeros), classes_weight, zeros)
         balanced_fl = alpha * FT
         balanced_fl = tf.reduce_mean(balanced_fl)
         nb_classes = len(classes_num)
         fianal_loss = (1-e) * balanced_fl + e * K.categorical_crossentropy(K.ones_like(prediction_tensor)/nb_classes, prediction_tensor)

src/nets/resnet50.py CHANGED Viewed

@@ -12,17 +12,14 @@ def identity_block(input_tensor, kernel_size, filters, stage, block):
     conv_name_base = 'res' + str(stage) + block + '_branch'
     bn_name_base = 'bn' + str(stage) + block + '_branch'
-    # 减少通道数
     x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor)
     x = BatchNormalization(name=bn_name_base + '2a')(x)
     x = Activation('relu')(x)
-    # 3x3卷积
     x = Conv2D(filters2, kernel_size,padding='same', name=conv_name_base + '2b')(x)
     x = BatchNormalization(name=bn_name_base + '2b')(x)
     x = Activation('relu')(x)
-    # 上升通道数
     x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
     x = BatchNormalization(name=bn_name_base + '2c')(x)
@@ -37,21 +34,17 @@ def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2))
     conv_name_base = 'res' + str(stage) + block + '_branch'
     bn_name_base = 'bn' + str(stage) + block + '_branch'
-    # 减少通道数
     x = Conv2D(filters1, (1, 1), strides=strides, name=conv_name_base + '2a')(input_tensor)
     x = BatchNormalization(name=bn_name_base + '2a')(x)
     x = Activation('relu')(x)
-    # 3x3卷积
     x = Conv2D(filters2, kernel_size, padding='same', name=conv_name_base + '2b')(x)
     x = BatchNormalization(name=bn_name_base + '2b')(x)
     x = Activation('relu')(x)
-    # 上升通道数
     x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
     x = BatchNormalization(name=bn_name_base + '2c')(x)
-    # 残差边
     shortcut = Conv2D(filters3, (1, 1), strides=strides,
                       name=conv_name_base + '1')(input_tensor)
     shortcut = BatchNormalization(name=bn_name_base + '1')(shortcut)
@@ -101,7 +94,6 @@ def ResNet50(input_shape=[224,224,3], classes=1000):
     # 1,1,2048
     x = AveragePooling2D((7, 7), name='avg_pool')(x)
-    # 进行预测
     # 2048
     x = Flatten()(x)

     conv_name_base = 'res' + str(stage) + block + '_branch'
     bn_name_base = 'bn' + str(stage) + block + '_branch'
     x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor)
     x = BatchNormalization(name=bn_name_base + '2a')(x)
     x = Activation('relu')(x)
     x = Conv2D(filters2, kernel_size,padding='same', name=conv_name_base + '2b')(x)
     x = BatchNormalization(name=bn_name_base + '2b')(x)
     x = Activation('relu')(x)
     x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
     x = BatchNormalization(name=bn_name_base + '2c')(x)
     conv_name_base = 'res' + str(stage) + block + '_branch'
     bn_name_base = 'bn' + str(stage) + block + '_branch'
     x = Conv2D(filters1, (1, 1), strides=strides, name=conv_name_base + '2a')(input_tensor)
     x = BatchNormalization(name=bn_name_base + '2a')(x)
     x = Activation('relu')(x)
     x = Conv2D(filters2, kernel_size, padding='same', name=conv_name_base + '2b')(x)
     x = BatchNormalization(name=bn_name_base + '2b')(x)
     x = Activation('relu')(x)
     x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
     x = BatchNormalization(name=bn_name_base + '2c')(x)
     shortcut = Conv2D(filters3, (1, 1), strides=strides,
                       name=conv_name_base + '1')(input_tensor)
     shortcut = BatchNormalization(name=bn_name_base + '1')(shortcut)
     # 1,1,2048
     x = AveragePooling2D((7, 7), name='avg_pool')(x)
     # 2048
     x = Flatten()(x)

src/nets/vgg16.py CHANGED Viewed

@@ -1,15 +1,15 @@
 from keras.layers import Conv2D, Dense, Flatten, Input, MaxPooling2D
-from keras.models import Model        #导入包Conv2D是卷积核 Flatten是展开 Input输入  MaxPooling2D最大卷积核
-def VGG16(input_shape=None, classes=1000): #def 就是开始定义VGG16的网络
     img_input = Input(shape=input_shape)  # 224, 224, 3
     # Block 1
     # 224, 224, 3 -> 224, 224, 64
-    x = Conv2D(64, (3, 3),  #开始第一个卷积核进行特征提取，64为卷积核的个数；(3, 3)是卷积核的大小
-                      activation='relu', #relu激活函数
-                      padding='same',  #padding='same' 尺寸不变
-                      name='block1_conv1')(img_input)  #dui juanjihe jinxing mingming  # x= 224*224*64
     x = Conv2D(64, (3, 3),
                       activation='relu',
                       padding='same',

 from keras.layers import Conv2D, Dense, Flatten, Input, MaxPooling2D
+from keras.models import Model
+def VGG16(input_shape=None, classes=1000):
     img_input = Input(shape=input_shape)  # 224, 224, 3
     # Block 1
     # 224, 224, 3 -> 224, 224, 64
+    x = Conv2D(64, (3, 3),
+                      activation='relu',
+                      padding='same',
+                      name='block1_conv1')(img_input)
     x = Conv2D(64, (3, 3),
                       activation='relu',
                       padding='same',

src/streamlit_app.py CHANGED Viewed

@@ -1,7 +1,4 @@
 import os
-import sys
-import asyncio
-import tempfile
 import traceback
 os.environ["HOME"] = "/tmp"
@@ -10,19 +7,12 @@ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
 os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"
-if sys.platform.startswith('linux'):
-    try:
-        asyncio.get_event_loop()
-    except RuntimeError:
-        asyncio.set_event_loop(asyncio.new_event_loop())
 import cv2
 import numpy as np
 from PIL import Image
 import streamlit as st
 from streamlit_webrtc import VideoProcessorBase, webrtc_streamer, RTCConfiguration
-from huggingface_hub import hf_hub_download
 from twilio.rest import Client
 account_sid = os.environ.get("ACCOUNT_SID")
@@ -30,7 +20,7 @@ auth_token = os.environ.get("AUTH_TOKEN")
 ICE_SERVERS = [{"urls": ["stun:stun.l.google.com:19302"]}]
 if account_sid and auth_token:
     try:
-        twilio_client = Client(account_sid, auth_token)
         token = twilio_client.tokens.create()
         try:
             ICE_SERVERS = [
@@ -56,86 +46,36 @@ if gpus:
     except Exception as e:
         print(e)
-# --- Utility functions (from utils/utils.py) ---
-# You must ensure these are implemented or import them if available.
-from nets import get_model_from_name
-from utils.utils import (cvtColor, get_classes, letterbox_image, preprocess_input)
-# --- Classification class (merged from classification.py) ---
-cache_dir = os.path.join(tempfile.gettempdir(), "hf_cache")
-os.makedirs(cache_dir, exist_ok=True)
-class Classification(object):
-    _defaults = {
-        "model_path": hf_hub_download(
-            repo_id="sudo-paras-shah/micro-expression-casme2",
-            filename="ep089.weights.h5",
-            cache_dir=cache_dir
-        ),
-        "classes_path": 'src/model_data/cls_classes.txt',
-        "input_shape": [224, 224],
-        "backbone": 'vgg16',
-        "alpha": 0.25
-    }
-    @classmethod
-    def get_defaults(cls, n):
-        if n in cls._defaults:
-            return cls._defaults[n]
-        else:
-            return "Unrecognized attribute name '" + n + "'"
-    def __init__(self, **kwargs):
-        self.__dict__.update(self._defaults)
-        for name, value in kwargs.items():
-            setattr(self, name, value)
-        self.class_names, self.num_classes = get_classes(self.classes_path)
-        self.generate()
-    def generate(self):
-        model_path = os.path.expanduser(self.model_path)
-        assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
-        if self.backbone == "mobilenet":
-            self.model = get_model_from_name[self.backbone](
-                input_shape=[self.input_shape[0], self.input_shape[1], 3],
-                classes=self.num_classes,
-                alpha=self.alpha
-            )
-        else:
-            self.model = get_model_from_name[self.backbone](
-                input_shape=[self.input_shape[0], self.input_shape[1], 3],
-                classes=self.num_classes
-            )
-        self.model.load_weights(self.model_path)
-        print('{} model, and classes {} loaded.'.format(model_path, self.class_names))
-    def detect_image(self, image):
-        image = cvtColor(image)
-        image_data = letterbox_image(image, [self.input_shape[1], self.input_shape[0]])
-        image_data = np.expand_dims(preprocess_input(np.array(image_data, np.float32)), 0)
-        preds = self.model.predict(image_data)[0]
-        class_name = self.class_names[np.argmax(preds)]
-        probability = np.max(preds)
-        return class_name, probability
 # --- Main Streamlit App ---
 if __name__ == '__main__':
     @st.cache_resource
-    def get_model():
-        return Classification()
-    classificator = get_model()
     face_cascade = cv2.CascadeClassifier(
         cv2.data.haarcascades + 'haarcascade_frontalface_alt.xml'
     )
-    if face_cascade.empty():
-        st.error("Failed to load Haarcascade XML. Check the path.")
-    st.title("Real-Time Micro-Emotion Recognition")
-    st.write("Turn on your camera and detect emotions in real-time.")
     def face_detect(img):
         try:
             img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
@@ -184,8 +124,7 @@ if __name__ == '__main__':
                 img = frame.to_ndarray(format="bgr24")
                 self.frame_count += 1
-                # Only run detection every 5th frame, reuse previous results otherwise
-                if self.frame_count % 2 == 0:
                     img_disp, img_gray, faces = face_detect(img)
                     self.last_faces = faces
                     self.last_img_gray = img_gray
@@ -214,11 +153,8 @@ if __name__ == '__main__':
                         current_class = emotion_class
                     if current_class:
-                        history = st.session_state['emotion_history']
-                        history.append(current_class)
-                        if len(history) > 10:
-                            history.pop(0)
-                        if len(history) >= 3 and len(set(history[-3:])) > 1:
                             self.rapid_change_count += 1
                         else:
                             self.rapid_change_count = 0
@@ -252,7 +188,7 @@ if __name__ == '__main__':
                 return frame.from_ndarray(img_disp, format="bgr24")
             except Exception as e:
-                st.error(f"Error in video processing: {e}")
                 return frame
     RTC_CONFIGURATION = RTCConfiguration({"iceServers": ICE_SERVERS})
@@ -264,7 +200,7 @@ if __name__ == '__main__':
         media_stream_constraints={"video": True, "audio": False},
     )
-    history = st.session_state['emotion_history']
     if len(history) >= 3 and len(set(history[-3:])) > 1:
         st.warning(
             "⚠️ Rapid changes in your detected emotional state were observed. "

 import os
 import traceback
 os.environ["HOME"] = "/tmp"
 os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
 os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"
 import cv2
 import numpy as np
 from PIL import Image
 import streamlit as st
 from streamlit_webrtc import VideoProcessorBase, webrtc_streamer, RTCConfiguration
 from twilio.rest import Client
 account_sid = os.environ.get("ACCOUNT_SID")
 ICE_SERVERS = [{"urls": ["stun:stun.l.google.com:19302"]}]
 if account_sid and auth_token:
     try:
+        twilio_client = Client(account_sid, auth_token, region="in1")
         token = twilio_client.tokens.create()
         try:
             ICE_SERVERS = [
     except Exception as e:
         print(e)
+from collections import deque
+shared_emotion_history = deque(maxlen=20)
+import logging
+logging.getLogger("streamlit.runtime.scriptrunner.script_run_context").setLevel(logging.ERROR)
+logger = logging.getLogger(__name__)
+from classification import Classification
 # --- Main Streamlit App ---
 if __name__ == '__main__':
+    st.title("Personal Video Logger")
+    st.write("Turn on your camera and talk about anything that worries you or just about your day.")
+    model_choice = st.selectbox(
+        "Choose a model:",
+        options=["mobilenet", "vgg16"],
+        index=0,
+        help="Select the model used for emotion classification."
+    )
     @st.cache_resource
+    def get_model(model):
+        return Classification(model)
+    classificator = get_model(model_choice)
     face_cascade = cv2.CascadeClassifier(
         cv2.data.haarcascades + 'haarcascade_frontalface_alt.xml'
     )
     def face_detect(img):
         try:
             img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                 img = frame.to_ndarray(format="bgr24")
                 self.frame_count += 1
+                if self.frame_count % 1 == 0:
                     img_disp, img_gray, faces = face_detect(img)
                     self.last_faces = faces
                     self.last_img_gray = img_gray
                         current_class = emotion_class
                     if current_class:
+                        shared_emotion_history.append(current_class)
+                        if len(shared_emotion_history) >= 3 and len(set(list(shared_emotion_history)[-3:])) > 1:
                             self.rapid_change_count += 1
                         else:
                             self.rapid_change_count = 0
                 return frame.from_ndarray(img_disp, format="bgr24")
             except Exception as e:
+                logger.exception("Video processing error", e)
                 return frame
     RTC_CONFIGURATION = RTCConfiguration({"iceServers": ICE_SERVERS})
         media_stream_constraints={"video": True, "audio": False},
     )
+    history = list(shared_emotion_history)
     if len(history) >= 3 and len(set(history[-3:])) > 1:
         st.warning(
             "⚠️ Rapid changes in your detected emotional state were observed. "

src/utils/callbacks.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import os
 import matplotlib
 matplotlib.use('Agg')
 from matplotlib import pyplot as plt

 import os
 import matplotlib
 matplotlib.use('Agg')
 from matplotlib import pyplot as plt

src/utils/dataloader.py CHANGED Viewed

@@ -50,13 +50,8 @@ class ClsDatasets(keras.utils.Sequence):
         return np.random.rand()*(b-a) + a
     def get_random_data(self, image, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True):
-        #------------------------------#
-        #   读取图像并转换成RGB图像
-        #------------------------------#
         image   = cvtColor(image)
-        #------------------------------#
-        #   获得图像的高宽与目标高宽
-        #------------------------------#
         iw, ih  = image.size
         h, w    = input_shape
@@ -67,9 +62,6 @@ class ClsDatasets(keras.utils.Sequence):
             dx = (w-nw)//2
             dy = (h-nh)//2
-            #---------------------------------#
-            #   将图像多余的部分加上灰条
-            #---------------------------------#
             image       = image.resize((nw,nh), Image.BICUBIC)
             new_image   = Image.new('RGB', (w,h), (128,128,128))
             new_image.paste(image, (dx, dy))
@@ -77,9 +69,6 @@ class ClsDatasets(keras.utils.Sequence):
             return image_data
-        #------------------------------------------#
-        #   对图像进行缩放并且进行长和宽的扭曲
-        #------------------------------------------#
         new_ar = w/h * self.rand(1-jitter,1+jitter)/self.rand(1-jitter,1+jitter)
         scale = self.rand(.75, 1.25)
         if new_ar < 1:
@@ -90,18 +79,12 @@ class ClsDatasets(keras.utils.Sequence):
             nh = int(nw/new_ar)
         image = image.resize((nw,nh), Image.BICUBIC)
-        #------------------------------------------#
-        #   将图像多余的部分加上灰条
-        #------------------------------------------#
         dx = int(self.rand(0, w-nw))
         dy = int(self.rand(0, h-nh))
         new_image = Image.new('RGB', (w,h), (128,128,128))
         new_image.paste(image, (dx, dy))
         image = new_image
-        #------------------------------------------#
-        #   翻转图像
-        #------------------------------------------#
         flip = self.rand()<.5
         if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
@@ -112,9 +95,6 @@ class ClsDatasets(keras.utils.Sequence):
             M = cv2.getRotationMatrix2D((a,b),angle,1)
             image = cv2.warpAffine(np.array(image), M, (w,h), borderValue=[128,128,128])
-        #------------------------------------------#
-        #   色域扭曲
-        #------------------------------------------#
         hue = self.rand(-hue, hue)
         sat = self.rand(1, sat) if self.rand()<.5 else 1/self.rand(1, sat)
         val = self.rand(1, val) if self.rand()<.5 else 1/self.rand(1, val)

         return np.random.rand()*(b-a) + a
     def get_random_data(self, image, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True):
+        #   Read and convert images to RGB
         image   = cvtColor(image)
         iw, ih  = image.size
         h, w    = input_shape
             dx = (w-nw)//2
             dy = (h-nh)//2
             image       = image.resize((nw,nh), Image.BICUBIC)
             new_image   = Image.new('RGB', (w,h), (128,128,128))
             new_image.paste(image, (dx, dy))
             return image_data
         new_ar = w/h * self.rand(1-jitter,1+jitter)/self.rand(1-jitter,1+jitter)
         scale = self.rand(.75, 1.25)
         if new_ar < 1:
             nh = int(nw/new_ar)
         image = image.resize((nw,nh), Image.BICUBIC)
         dx = int(self.rand(0, w-nw))
         dy = int(self.rand(0, h-nh))
         new_image = Image.new('RGB', (w,h), (128,128,128))
         new_image.paste(image, (dx, dy))
         image = new_image
         flip = self.rand()<.5
         if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
             M = cv2.getRotationMatrix2D((a,b),angle,1)
             image = cv2.warpAffine(np.array(image), M, (w,h), borderValue=[128,128,128])
         hue = self.rand(-hue, hue)
         sat = self.rand(1, sat) if self.rand()<.5 else 1/self.rand(1, sat)
         val = self.rand(1, val) if self.rand()<.5 else 1/self.rand(1, val)

src/utils/utils.py CHANGED Viewed

@@ -1,36 +1,26 @@
 import numpy as np
 from PIL import Image
-#---------------------------------------------------#
-#   不失真的resize
-#---------------------------------------------------#
 def letterbox_image(image, size):
-    iw, ih  = image.size
-    w, h    = size
-    scale   = min(w/iw, h/ih)
-    nw      = int(iw*scale)
-    nh      = int(ih*scale)
-    image   = image.resize((nw,nh), Image.BICUBIC)
-    new_image = Image.new('RGB', size, (128,128,128))
-    new_image.paste(image, ((w-nw)//2, (h-nh)//2))
     return new_image
-#---------------------------------------------------#
-#   获得类
-#---------------------------------------------------#
 def get_classes(classes_path):
     with open(classes_path, encoding='utf-8') as f:
         class_names = f.readlines()
     class_names = [c.strip() for c in class_names]
     return class_names, len(class_names)
-#---------------------------------------------------------#
-#   将图像转换成RGB图像，防止灰度图在预测时报错。
-#   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
-#---------------------------------------------------------#
 def cvtColor(image):
     if len(np.shape(image)) == 3 and np.shape(image)[2] == 3:
         return image
@@ -38,9 +28,6 @@ def cvtColor(image):
         image = image.convert('RGB')
         return image
-#----------------------------------------#
-#   预处理训练图片
-#----------------------------------------#
 def preprocess_input(x):
     x /= 127.5
     x -= 1.

 import numpy as np
 from PIL import Image
 def letterbox_image(image, size):
+    iw, ih = image.size
+    w, h = size
+    scale = min(w / iw, h / ih)
+    nw = int(iw * scale)
+    nh = int(ih * scale)
+    image   = image.resize((nw, nh), Image.BICUBIC)
+    new_image = Image.new('RGB', size, (128, 128, 128))
+    new_image.paste(image, ((w - nw) // 2, (h - nh) // 2))
     return new_image
 def get_classes(classes_path):
     with open(classes_path, encoding='utf-8') as f:
         class_names = f.readlines()
     class_names = [c.strip() for c in class_names]
     return class_names, len(class_names)
 def cvtColor(image):
     if len(np.shape(image)) == 3 and np.shape(image)[2] == 3:
         return image
         image = image.convert('RGB')
         return image
 def preprocess_input(x):
     x /= 127.5
     x -= 1.