diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..145977f3dc80fc5066a67296262909f755b60fce --- /dev/null +++ b/Dockerfile @@ -0,0 +1,38 @@ +# Base image with TensorFlow GPU support +# Use CPU version for Hugging Face Spaces free tier compatibility if needed +# But keeping GPU as the project is configured for it. +# HF Spaces offers CPU Basic (Free) and GPU upgrades. +# Using a lighter base image might be better for free tier, but TF is heavy anyway. +FROM tensorflow/tensorflow:2.13.0 + +# Set working directory +WORKDIR /app + +# Install system dependencies including libGL for OpenCV +RUN apt-get update && apt-get install -y \ + libgl1-mesa-glx \ + libglib2.0-0 \ + libsm6 \ + libxext6 \ + libxrender-dev \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements first for caching +COPY requirements.txt . + +# Install Python dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY src/ ./src/ +COPY frontend/ ./frontend/ + +# Create a models directory +# Note: You must upload your trained models here or use Git LFS +COPY models/ ./models/ + +# Expose the port Hugging Face Spaces expects +EXPOSE 7860 + +# Default command to run Streamlit on port 7860 +CMD ["streamlit", "run", "frontend/app.py", "--server.port", "7860", "--server.address", "0.0.0.0"] diff --git a/frontend/app.py b/frontend/app.py new file mode 100644 index 0000000000000000000000000000000000000000..22aee822f681af5f1eb6d7e674e90195a55735f7 --- /dev/null +++ b/frontend/app.py @@ -0,0 +1,485 @@ +""" +Streamlit Dashboard for Emotion Recognition System. +""" +import io +import sys +from pathlib import Path + +import streamlit as st +import numpy as np +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go +from PIL import Image + +# Add project root to path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from src.config import EMOTION_CLASSES, MODELS_DIR +from src.inference.predictor import EmotionPredictor + +# Page configuration +st.set_page_config( + page_title="Emotion Recognition Dashboard", + page_icon="😊", + layout="wide", + initial_sidebar_state="expanded" +) + +# Custom CSS +st.markdown(""" + +""", unsafe_allow_html=True) + +# Emotion emoji mapping +EMOTION_EMOJIS = { + "angry": "😠", + "disgusted": "🤢", + "fearful": "😨", + "happy": "😊", + "neutral": "😐", + "sad": "😢", + "surprised": "😲" +} + +# Color palette for emotions +EMOTION_COLORS = { + "angry": "#ef4444", + "disgusted": "#84cc16", + "fearful": "#a855f7", + "happy": "#22c55e", + "neutral": "#6b7280", + "sad": "#3b82f6", + "surprised": "#f59e0b" +} + + +@st.cache_resource +def load_predictor(model_name: str): + """Load and cache the emotion predictor.""" + predictor = EmotionPredictor(model_name) + if predictor.load(): + return predictor + return None + + +def get_intensity_class(intensity: str) -> str: + """Get CSS class for intensity.""" + return f"confidence-{intensity}" + + +def create_probability_chart(probabilities: dict) -> go.Figure: + """Create a horizontal bar chart for probabilities.""" + emotions = list(probabilities.keys()) + values = list(probabilities.values()) + colors = [EMOTION_COLORS.get(e, "#6b7280") for e in emotions] + + fig = go.Figure(go.Bar( + x=values, + y=[f"{EMOTION_EMOJIS.get(e, '')} {e.capitalize()}" for e in emotions], + orientation='h', + marker_color=colors, + text=[f"{v:.1%}" for v in values], + textposition='outside' + )) + + fig.update_layout( + title="Emotion Probabilities", + xaxis_title="Probability", + yaxis_title="Emotion", + height=350, + margin=dict(l=20, r=20, t=40, b=20), + xaxis=dict(range=[0, 1.1]) + ) + + return fig + + +def create_emotion_distribution_pie(counts: dict) -> go.Figure: + """Create a pie chart for emotion distribution.""" + emotions = [e for e, c in counts.items() if c > 0] + values = [c for c in counts.values() if c > 0] + colors = [EMOTION_COLORS.get(e, "#6b7280") for e in emotions] + + fig = go.Figure(go.Pie( + labels=[f"{EMOTION_EMOJIS.get(e, '')} {e.capitalize()}" for e in emotions], + values=values, + marker_colors=colors, + hole=0.4, + textinfo='percent+label' + )) + + fig.update_layout( + title="Emotion Distribution", + height=400, + margin=dict(l=20, r=20, t=40, b=20) + ) + + return fig + + +def main(): + """Main dashboard application.""" + # Header + st.markdown('

🎭 Emotion Recognition Dashboard

', unsafe_allow_html=True) + st.markdown("---") + + # Sidebar + with st.sidebar: + st.image("https://img.icons8.com/clouds/200/brain.png", width=100) + st.title("⚙️ Settings") + + # Model selection + available_models = EmotionPredictor.get_available_models() + model_options = [name for name, available in available_models.items() if available] + + if not model_options: + st.error("No trained models found! Please train a model first.") + st.info("Run: `python scripts/train_models.py`") + model_name = None + else: + model_name = st.selectbox( + "🤖 Select Model", + model_options, + format_func=lambda x: { + "custom_cnn": "Custom CNN", + "mobilenet": "MobileNetV2", + "vgg19": "VGG-19" + }.get(x, x) + ) + + # Face detection toggle + detect_face = st.toggle("👤 Enable Face Detection", value=True) + + # Confidence threshold + confidence_threshold = st.slider( + "📊 Confidence Threshold", + min_value=0.0, + max_value=1.0, + value=0.5, + step=0.05 + ) + + st.markdown("---") + + # Model info + st.subheader("📋 Model Status") + for name, available in available_models.items(): + icon = "✅" if available else "❌" + display_name = { + "custom_cnn": "Custom CNN", + "mobilenet": "MobileNetV2", + "vgg19": "VGG-19" + }.get(name, name) + st.write(f"{icon} {display_name}") + + # Main content + if model_name is None: + st.warning("Please train a model before using the dashboard.") + return + + # Load predictor + predictor = load_predictor(model_name) + if predictor is None: + st.error(f"Failed to load model: {model_name}") + return + + # Tabs + tab1, tab2, tab3 = st.tabs(["📷 Single Image", "📁 Batch Processing", "📊 Model Performance"]) + + # Tab 1: Single Image Analysis + with tab1: + st.subheader("Upload an Image for Emotion Analysis") + + col1, col2 = st.columns([1, 1]) + + with col1: + uploaded_file = st.file_uploader( + "Choose an image...", + type=["jpg", "jpeg", "png", "bmp"], + key="single_upload" + ) + + if uploaded_file is not None: + image = Image.open(uploaded_file) + st.image(image, caption="Uploaded Image", width="stretch") + + with col2: + if uploaded_file is not None: + with st.spinner("Analyzing emotion..."): + # Convert to numpy array + image_array = np.array(image.convert("RGB")) + + # Predict + result = predictor.predict(image_array, detect_face=detect_face) + + if "error" in result: + st.error(f"❌ {result['error']}") + else: + # Display result + emotion = result["emotion"] + confidence = result["confidence"] + intensity = result["intensity"] + + # Emotion card + st.markdown(f""" +
+

{EMOTION_EMOJIS.get(emotion, '🎭')}

+

{emotion.upper()}

+

Confidence: {confidence:.1%}

+

Intensity: {intensity.capitalize()}

+
+ """, unsafe_allow_html=True) + + # Probability chart + if "all_probabilities" in result: + fig = create_probability_chart(result["all_probabilities"]) + st.plotly_chart(fig, use_container_width=True) + + # Face detection info + if result["face_detected"]: + st.success("✅ Face detected successfully") + else: + st.warning("⚠️ No face detected - using full image") + + # Tab 2: Batch Processing + with tab2: + st.subheader("Upload Multiple Images for Batch Analysis") + + uploaded_files = st.file_uploader( + "Choose images...", + type=["jpg", "jpeg", "png", "bmp"], + accept_multiple_files=True, + key="batch_upload" + ) + + if uploaded_files: + st.write(f"📁 {len(uploaded_files)} files selected") + + if st.button("🚀 Analyze All", type="primary"): + progress_bar = st.progress(0) + status_text = st.empty() + + results = [] + images = [] + + for i, file in enumerate(uploaded_files): + status_text.text(f"Processing image {i+1}/{len(uploaded_files)}...") + progress_bar.progress((i + 1) / len(uploaded_files)) + + try: + image = Image.open(file) + images.append(image) + image_array = np.array(image.convert("RGB")) + result = predictor.predict(image_array, detect_face=detect_face) + result["filename"] = file.name + results.append(result) + except Exception as e: + results.append({"error": str(e), "filename": file.name}) + + status_text.text("✅ Analysis complete!") + + # Display results + col1, col2 = st.columns([1, 1]) + + with col1: + # Summary statistics + successful = [r for r in results if "error" not in r] + + if successful: + emotion_counts = {} + for r in successful: + emotion = r["emotion"] + emotion_counts[emotion] = emotion_counts.get(emotion, 0) + 1 + + # Pie chart + fig = create_emotion_distribution_pie(emotion_counts) + st.plotly_chart(fig, use_container_width=True) + + st.metric("Total Images", len(results)) + st.metric("Successful", len(successful)) + st.metric("Failed", len(results) - len(successful)) + + with col2: + # Results table + table_data = [] + for r in results: + if "error" in r: + table_data.append({ + "File": r.get("filename", "Unknown"), + "Emotion": "❌ Error", + "Confidence": "-", + "Intensity": "-" + }) + else: + table_data.append({ + "File": r.get("filename", "Unknown"), + "Emotion": f"{EMOTION_EMOJIS.get(r['emotion'], '')} {r['emotion'].capitalize()}", + "Confidence": f"{r['confidence']:.1%}", + "Intensity": r["intensity"].capitalize() + }) + + df = pd.DataFrame(table_data) + st.dataframe(df, use_container_width=True, height=400) + + # Download button + csv = df.to_csv(index=False) + st.download_button( + "📥 Download Results (CSV)", + csv, + "emotion_results.csv", + "text/csv" + ) + + # Image gallery with predictions + st.subheader("📷 Analyzed Images") + cols = st.columns(4) + for i, (img, result) in enumerate(zip(images, results)): + with cols[i % 4]: + if "error" not in result: + emoji = EMOTION_EMOJIS.get(result["emotion"], "") + st.image(img, caption=f"{emoji} {result['emotion']}", width="stretch") + else: + st.image(img, caption="❌ Error", width="stretch") + + # Tab 3: Model Performance + with tab3: + st.subheader("📊 Model Performance Metrics") + + # Check for saved metrics + metrics_path = MODELS_DIR / f"{model_name}.meta.json" + history_path = MODELS_DIR / f"{model_name}.history.json" + + if metrics_path.exists(): + import json + with open(metrics_path, 'r') as f: + metadata = json.load(f) + + col1, col2, col3 = st.columns(3) + + with col1: + st.metric( + "Best Validation Accuracy", + f"{metadata.get('best_val_accuracy', 0):.1%}" + ) + + with col2: + st.metric( + "Training Duration", + f"{metadata.get('training_duration_seconds', 0)/60:.1f} min" + ) + + with col3: + st.metric( + "Epochs Completed", + metadata.get('epochs_completed', 0) + ) + + if history_path.exists(): + with open(history_path, 'r') as f: + history = json.load(f) + + # Training curves + fig = go.Figure() + + epochs = list(range(1, len(history['accuracy']) + 1)) + + fig.add_trace(go.Scatter( + x=epochs, y=history['accuracy'], + mode='lines', name='Training Accuracy', + line=dict(color='#3b82f6') + )) + + fig.add_trace(go.Scatter( + x=epochs, y=history['val_accuracy'], + mode='lines', name='Validation Accuracy', + line=dict(color='#ef4444') + )) + + fig.update_layout( + title="Training History", + xaxis_title="Epoch", + yaxis_title="Accuracy", + height=400 + ) + + st.plotly_chart(fig, use_container_width=True) + + # Loss curves + fig2 = go.Figure() + + fig2.add_trace(go.Scatter( + x=epochs, y=history['loss'], + mode='lines', name='Training Loss', + line=dict(color='#3b82f6') + )) + + fig2.add_trace(go.Scatter( + x=epochs, y=history['val_loss'], + mode='lines', name='Validation Loss', + line=dict(color='#ef4444') + )) + + fig2.update_layout( + title="Loss History", + xaxis_title="Epoch", + yaxis_title="Loss", + height=400 + ) + + st.plotly_chart(fig2, use_container_width=True) + else: + st.info("No training metrics found for this model. Train the model to see performance data.") + + # Show placeholder + st.markdown(""" + ### Expected Metrics After Training + + | Model | Expected Accuracy | Training Time | + |-------|------------------|---------------| + | Custom CNN | 60-68% | ~30 min | + | MobileNetV2 | 65-72% | ~45 min | + | VGG-19 | 68-75% | ~60 min | + """) + + +if __name__ == "__main__": + main() diff --git a/models/custom_cnn.h5 b/models/custom_cnn.h5 new file mode 100644 index 0000000000000000000000000000000000000000..f3cd453c089053ecdd117044793d0f0dec3acd5b --- /dev/null +++ b/models/custom_cnn.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:588b34caa2f1b8a8f7c29cdc51005ad244ffa3451dbff10de14b45a1c30f5ad6 +size 86397296 diff --git a/models/custom_cnn.history.json b/models/custom_cnn.history.json new file mode 100644 index 0000000000000000000000000000000000000000..aa9d25d0cf821d914286d027df951783e11107d6 --- /dev/null +++ b/models/custom_cnn.history.json @@ -0,0 +1,262 @@ +{ + "accuracy": [ + 0.15269069373607635, + 0.15229885280132294, + 0.14925113320350647, + 0.16035354137420654, + 0.14650818705558777, + 0.1529519259929657, + 0.18447405099868774, + 0.20154127478599548, + 0.20162835717201233, + 0.2072448581457138, + 0.2176506370306015, + 0.23267154395580292, + 0.23820097744464874, + 0.23772205412387848, + 0.2543974220752716, + 0.2947579324245453, + 0.31957507133483887, + 0.3651602268218994, + 0.3738679885864258, + 0.3878439664840698, + 0.4040403962135315, + 0.40382272005081177, + 0.42293626070022583, + 0.4291187822818756, + 0.43521422147750854, + 0.4663880169391632, + 0.47574886679649353, + 0.480581670999527, + 0.49076977372169495, + 0.4974747598171234, + 0.5010885000228882, + 0.501349687576294, + 0.5120167136192322, + 0.5220306515693665, + 0.522944986820221, + 0.5263845324516296, + 0.5292145609855652, + 0.5357889533042908, + 0.5326105952262878, + 0.5370079874992371, + 0.5618686676025391, + 0.5710553526878357, + 0.5716649293899536, + 0.5865116715431213, + 0.5881226062774658, + 0.5922152400016785, + 0.5961337685585022, + 0.5938697457313538, + 0.6016196608543396, + 0.6010971665382385 + ], + "loss": [ + 20.70295524597168, + 6.6751275062561035, + 3.4141359329223633, + 2.6772806644439697, + 2.363776922225952, + 2.305654764175415, + 2.403165102005005, + 2.533033847808838, + 2.592125415802002, + 2.516871929168701, + 2.634507179260254, + 2.2788898944854736, + 2.248971700668335, + 2.259343147277832, + 2.2574307918548584, + 2.277164936065674, + 2.2215516567230225, + 2.0204102993011475, + 2.056856632232666, + 2.005807638168335, + 2.003291368484497, + 1.9824334383010864, + 1.954105257987976, + 1.9302726984024048, + 1.9290143251419067, + 1.7764708995819092, + 1.699206829071045, + 1.6769280433654785, + 1.6617697477340698, + 1.6487752199172974, + 1.6480680704116821, + 1.647431492805481, + 1.618944525718689, + 1.6199805736541748, + 1.6172568798065186, + 1.6095706224441528, + 1.60358726978302, + 1.5906955003738403, + 1.5961298942565918, + 1.5957502126693726, + 1.4978364706039429, + 1.4400925636291504, + 1.4301934242248535, + 1.3841499090194702, + 1.3796941041946411, + 1.3780864477157593, + 1.367702603340149, + 1.3718606233596802, + 1.3513654470443726, + 1.335976481437683 + ], + "val_accuracy": [ + 0.019334610551595688, + 0.03274690732359886, + 0.17749521136283875, + 0.09249259531497955, + 0.16617314517498016, + 0.1546768844127655, + 0.17035359144210815, + 0.14997386932373047, + 0.18359170854091644, + 0.12088486552238464, + 0.23880857229232788, + 0.28549033403396606, + 0.2426406592130661, + 0.14178714156150818, + 0.16094757616519928, + 0.21390001475811005, + 0.2203448861837387, + 0.352726012468338, + 0.34628114104270935, + 0.42222610116004944, + 0.32328861951828003, + 0.39296290278434753, + 0.38094407320022583, + 0.3227660655975342, + 0.35046160221099854, + 0.4741334319114685, + 0.49294549226760864, + 0.4274516701698303, + 0.5124542713165283, + 0.46873366832733154, + 0.5080996155738831, + 0.5359693169593811, + 0.4953840672969818, + 0.5192475318908691, + 0.5549556016921997, + 0.5014805793762207, + 0.5380595922470093, + 0.5481623411178589, + 0.5141961574554443, + 0.55303955078125, + 0.5638390779495239, + 0.5594844222068787, + 0.6054694056510925, + 0.5887476205825806, + 0.5864831805229187, + 0.5976310968399048, + 0.5755094885826111, + 0.5990245342254639, + 0.5859606266021729, + 0.5878766775131226 + ], + "val_loss": [ + 10.306150436401367, + 4.229166507720947, + 2.9087648391723633, + 2.51203989982605, + 2.2901315689086914, + 2.252924680709839, + 2.433279275894165, + 3.151798963546753, + 2.5496649742126465, + 2.702935218811035, + 2.5200254917144775, + 2.191758155822754, + 2.398240089416504, + 2.371654987335205, + 2.4415881633758545, + 2.3200838565826416, + 2.431246519088745, + 2.050586223602295, + 2.0909876823425293, + 1.967246413230896, + 2.160478115081787, + 2.0182101726531982, + 1.986769676208496, + 2.2352793216705322, + 2.157156467437744, + 1.7153019905090332, + 1.662605881690979, + 1.8105882406234741, + 1.6218799352645874, + 1.729047417640686, + 1.681536316871643, + 1.5821231603622437, + 1.6714152097702026, + 1.6351673603057861, + 1.5606050491333008, + 1.6746041774749756, + 1.6208828687667847, + 1.5802900791168213, + 1.672598958015442, + 1.58816397190094, + 1.512089729309082, + 1.5220553874969482, + 1.3933297395706177, + 1.4107120037078857, + 1.4645394086837769, + 1.416934609413147, + 1.4484670162200928, + 1.3919000625610352, + 1.4328689575195312, + 1.4462361335754395 + ], + "learning_rate": [ + 0.0010000000474974513, + 0.0010000000474974513, + 0.0010000000474974513, + 0.0010000000474974513, + 0.0010000000474974513, + 0.0010000000474974513, + 0.0010000000474974513, + 0.0010000000474974513, + 0.0010000000474974513, + 0.0010000000474974513, + 0.0010000000474974513, + 0.0005000000237487257, + 0.0005000000237487257, + 0.0005000000237487257, + 0.0005000000237487257, + 0.0005000000237487257, + 0.0005000000237487257, + 0.0002500000118743628, + 0.0002500000118743628, + 0.0002500000118743628, + 0.0002500000118743628, + 0.0002500000118743628, + 0.0002500000118743628, + 0.0002500000118743628, + 0.0002500000118743628, + 0.0001250000059371814, + 0.0001250000059371814, + 0.0001250000059371814, + 0.0001250000059371814, + 0.0001250000059371814, + 0.0001250000059371814, + 0.0001250000059371814, + 0.0001250000059371814, + 0.0001250000059371814, + 0.0001250000059371814, + 0.0001250000059371814, + 0.0001250000059371814, + 0.0001250000059371814, + 0.0001250000059371814, + 0.0001250000059371814, + 6.25000029685907e-05, + 6.25000029685907e-05, + 6.25000029685907e-05, + 6.25000029685907e-05, + 6.25000029685907e-05, + 6.25000029685907e-05, + 6.25000029685907e-05, + 6.25000029685907e-05, + 6.25000029685907e-05, + 6.25000029685907e-05 + ] +} \ No newline at end of file diff --git a/models/custom_cnn.meta.json b/models/custom_cnn.meta.json new file mode 100644 index 0000000000000000000000000000000000000000..15f9bafe66d84e43246b29ddad267adaf0130af6 --- /dev/null +++ b/models/custom_cnn.meta.json @@ -0,0 +1,15 @@ +{ + "learning_rate": 0.001, + "loss_function": "categorical_crossentropy", + "metrics": [ + "accuracy" + ], + "training_started": "2026-02-02T04:27:09.945021", + "epochs_requested": 50, + "training_ended": "2026-02-02T13:34:58.163201", + "training_duration_seconds": 32868.21818, + "epochs_completed": 50, + "final_accuracy": 0.6010971665382385, + "final_val_accuracy": 0.5878766775131226, + "best_val_accuracy": 0.6054694056510925 +} \ No newline at end of file diff --git a/models/logs/custom_cnn/train/events.out.tfevents.1769986631.JOSH_MARK.24880.0.v2 b/models/logs/custom_cnn/train/events.out.tfevents.1769986631.JOSH_MARK.24880.0.v2 new file mode 100644 index 0000000000000000000000000000000000000000..66652c92e150233bae4bb383c249aa3963c6e4e5 --- /dev/null +++ b/models/logs/custom_cnn/train/events.out.tfevents.1769986631.JOSH_MARK.24880.0.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cd35d42f7aee79730fef0a4045798070058d2917e0fe0f645ce912cd7bd6644 +size 2535576 diff --git a/models/logs/custom_cnn/validation/events.out.tfevents.1769987506.JOSH_MARK.24880.1.v2 b/models/logs/custom_cnn/validation/events.out.tfevents.1769987506.JOSH_MARK.24880.1.v2 new file mode 100644 index 0000000000000000000000000000000000000000..bbc22ad22669db87a5fce2fe98596943e41e2a16 --- /dev/null +++ b/models/logs/custom_cnn/validation/events.out.tfevents.1769987506.JOSH_MARK.24880.1.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd7d41f1dee732d8d180d5baa04ffda084995f29e0092703985c30b00648cb37 +size 16084 diff --git a/models/logs/mobilenet_v2/train/events.out.tfevents.1770019504.JOSH_MARK.24880.2.v2 b/models/logs/mobilenet_v2/train/events.out.tfevents.1770019504.JOSH_MARK.24880.2.v2 new file mode 100644 index 0000000000000000000000000000000000000000..74ee2ac930a74b9c06e27b2d3ee1b64d06f2b0e8 --- /dev/null +++ b/models/logs/mobilenet_v2/train/events.out.tfevents.1770019504.JOSH_MARK.24880.2.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4098b3cc7c927c62fb9a294b8dbaf53b44a792f37aee5ea9ea78c9cb97d00c3a +size 3921756 diff --git a/models/logs/mobilenet_v2/train/events.out.tfevents.1770020997.JOSH_MARK.24880.4.v2 b/models/logs/mobilenet_v2/train/events.out.tfevents.1770020997.JOSH_MARK.24880.4.v2 new file mode 100644 index 0000000000000000000000000000000000000000..21c199b9cc15ce53667628fa127abfbf2d1c6a6e --- /dev/null +++ b/models/logs/mobilenet_v2/train/events.out.tfevents.1770020997.JOSH_MARK.24880.4.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7845dd0eb4203fb6daff5b842829bd3deef54c6001fdcb1ad88e2f851edc6b3 +size 4585881 diff --git a/models/logs/mobilenet_v2/train/events.out.tfevents.1770060970.JOSH_MARK.1932.0.v2 b/models/logs/mobilenet_v2/train/events.out.tfevents.1770060970.JOSH_MARK.1932.0.v2 new file mode 100644 index 0000000000000000000000000000000000000000..ad07d7a7092d7551a226d96a41c428c51d3f864e --- /dev/null +++ b/models/logs/mobilenet_v2/train/events.out.tfevents.1770060970.JOSH_MARK.1932.0.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42f1d7dee5c5ec1a8c6f30fadbe097cda75a4fc6065260e77480aafa696004e3 +size 3036210 diff --git a/models/logs/mobilenet_v2/train/events.out.tfevents.1770062582.JOSH_MARK.1932.2.v2 b/models/logs/mobilenet_v2/train/events.out.tfevents.1770062582.JOSH_MARK.1932.2.v2 new file mode 100644 index 0000000000000000000000000000000000000000..b0a4f0a7e26e709d4ce99c21b3bfb60a89f9de60 --- /dev/null +++ b/models/logs/mobilenet_v2/train/events.out.tfevents.1770062582.JOSH_MARK.1932.2.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da8f5e7986b6ae22edf80a618a65a5ba606d9a9151597579a42802cdde4f215 +size 2593460 diff --git a/models/logs/mobilenet_v2/validation/events.out.tfevents.1770019615.JOSH_MARK.24880.3.v2 b/models/logs/mobilenet_v2/validation/events.out.tfevents.1770019615.JOSH_MARK.24880.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..a60e8691f16890bf429657fe0154f20e74876b25 --- /dev/null +++ b/models/logs/mobilenet_v2/validation/events.out.tfevents.1770019615.JOSH_MARK.24880.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:199544d9f955f5479f456aca9a2227ee975516753d990cc719285f22ea8b0b9b +size 5514 diff --git a/models/logs/mobilenet_v2/validation/events.out.tfevents.1770021071.JOSH_MARK.24880.5.v2 b/models/logs/mobilenet_v2/validation/events.out.tfevents.1770021071.JOSH_MARK.24880.5.v2 new file mode 100644 index 0000000000000000000000000000000000000000..aea001c219d3b8dd37ac36766d447a3339d30a2d --- /dev/null +++ b/models/logs/mobilenet_v2/validation/events.out.tfevents.1770021071.JOSH_MARK.24880.5.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9118aa70fcaa07e680166b4e8048b69b846b9e709e58788e295d4f9d65c7bd65 +size 6474 diff --git a/models/logs/mobilenet_v2/validation/events.out.tfevents.1770061342.JOSH_MARK.1932.1.v2 b/models/logs/mobilenet_v2/validation/events.out.tfevents.1770061342.JOSH_MARK.1932.1.v2 new file mode 100644 index 0000000000000000000000000000000000000000..074902dbf32d468d9529489489c723f382fa1512 --- /dev/null +++ b/models/logs/mobilenet_v2/validation/events.out.tfevents.1770061342.JOSH_MARK.1932.1.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:118bbf8244c6d8db7d9e0065489c9b9f1dffc03caf99b98a0f7029d63077dd60 +size 4234 diff --git a/models/logs/mobilenet_v2/validation/events.out.tfevents.1770062665.JOSH_MARK.1932.3.v2 b/models/logs/mobilenet_v2/validation/events.out.tfevents.1770062665.JOSH_MARK.1932.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..efad3faef2695f48ae0ef6936aafa78cd4c366d5 --- /dev/null +++ b/models/logs/mobilenet_v2/validation/events.out.tfevents.1770062665.JOSH_MARK.1932.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5ad6267d9dbfcc3ec01c803f8c1fa5822d259ba24659d3fe6a53cd0551c1404 +size 3594 diff --git a/models/logs/vgg19/train/events.out.tfevents.1770023002.JOSH_MARK.24880.6.v2 b/models/logs/vgg19/train/events.out.tfevents.1770023002.JOSH_MARK.24880.6.v2 new file mode 100644 index 0000000000000000000000000000000000000000..0966d47f36e605a001007ec0c44e77d69e6a3cca --- /dev/null +++ b/models/logs/vgg19/train/events.out.tfevents.1770023002.JOSH_MARK.24880.6.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadbebc8dd36dfc128649c3175ee75300927a7f8821e6647ea47550b8c0bcc23 +size 515013 diff --git a/models/logs/vgg19/train/events.out.tfevents.1770029728.JOSH_MARK.24880.8.v2 b/models/logs/vgg19/train/events.out.tfevents.1770029728.JOSH_MARK.24880.8.v2 new file mode 100644 index 0000000000000000000000000000000000000000..aaf212dd0126894fd12fdfb29494dd082bd7fcbd --- /dev/null +++ b/models/logs/vgg19/train/events.out.tfevents.1770029728.JOSH_MARK.24880.8.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ce34f9b9aa9c8f0948b66719f0c0f9eaebb1a4106869bf30985862e82312ccc +size 775630 diff --git a/models/logs/vgg19/train/events.out.tfevents.1770063874.JOSH_MARK.14568.0.v2 b/models/logs/vgg19/train/events.out.tfevents.1770063874.JOSH_MARK.14568.0.v2 new file mode 100644 index 0000000000000000000000000000000000000000..d8b87da47baecaffc68edc3906a460ac02075f70 --- /dev/null +++ b/models/logs/vgg19/train/events.out.tfevents.1770063874.JOSH_MARK.14568.0.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e20bdc0419e03f073770425f6dab7687c3c244febfb3256023db1b0c040fdba0 +size 290086 diff --git a/models/logs/vgg19/train/events.out.tfevents.1770068280.JOSH_MARK.14988.0.v2 b/models/logs/vgg19/train/events.out.tfevents.1770068280.JOSH_MARK.14988.0.v2 new file mode 100644 index 0000000000000000000000000000000000000000..7e0a9f46640dd696a35839d3c35e65e04c5d0628 --- /dev/null +++ b/models/logs/vgg19/train/events.out.tfevents.1770068280.JOSH_MARK.14988.0.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26e239ce23e47c7fd42eae35402c1265d50d180260eafc77011e85ff38c04b26 +size 1146399 diff --git a/models/logs/vgg19/train/events.out.tfevents.1770082770.JOSH_MARK.14988.2.v2 b/models/logs/vgg19/train/events.out.tfevents.1770082770.JOSH_MARK.14988.2.v2 new file mode 100644 index 0000000000000000000000000000000000000000..647b093ba1dc8ce6e72b4597c058b2db69657945 --- /dev/null +++ b/models/logs/vgg19/train/events.out.tfevents.1770082770.JOSH_MARK.14988.2.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6f1737790b2df32c086e0443054fa7239a4a3f59abe0938fdeaa426055c41d5 +size 774089 diff --git a/models/logs/vgg19/validation/events.out.tfevents.1770023476.JOSH_MARK.24880.7.v2 b/models/logs/vgg19/validation/events.out.tfevents.1770023476.JOSH_MARK.24880.7.v2 new file mode 100644 index 0000000000000000000000000000000000000000..ea4101dd3adea862e19cb7d50e68c2e98a9a9a99 --- /dev/null +++ b/models/logs/vgg19/validation/events.out.tfevents.1770023476.JOSH_MARK.24880.7.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79c078cbf2229a8a97568c940eb3a25bbbf6ad0a91d5a5084a7acfedef1a66f5 +size 4234 diff --git a/models/logs/vgg19/validation/events.out.tfevents.1770030127.JOSH_MARK.24880.9.v2 b/models/logs/vgg19/validation/events.out.tfevents.1770030127.JOSH_MARK.24880.9.v2 new file mode 100644 index 0000000000000000000000000000000000000000..a5e200e36279c5b6b31e30571f8ce519e589aa86 --- /dev/null +++ b/models/logs/vgg19/validation/events.out.tfevents.1770030127.JOSH_MARK.24880.9.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9398a51151e81b15eb0df2a09de090d5e26731f9bdf922aeef1b518f3020226 +size 6474 diff --git a/models/logs/vgg19/validation/events.out.tfevents.1770064525.JOSH_MARK.14568.1.v2 b/models/logs/vgg19/validation/events.out.tfevents.1770064525.JOSH_MARK.14568.1.v2 new file mode 100644 index 0000000000000000000000000000000000000000..aad99e2578ed63bf6b471c8e712db2927855a3d8 --- /dev/null +++ b/models/logs/vgg19/validation/events.out.tfevents.1770064525.JOSH_MARK.14568.1.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d1a407c4f8c12b6d320a024e9c292edf58e5ce84d91ea824584cc3240dfcbb7 +size 2314 diff --git a/models/logs/vgg19/validation/events.out.tfevents.1770068666.JOSH_MARK.14988.1.v2 b/models/logs/vgg19/validation/events.out.tfevents.1770068666.JOSH_MARK.14988.1.v2 new file mode 100644 index 0000000000000000000000000000000000000000..2d27014e533a900b95e02dd7f308a8168d7f48d1 --- /dev/null +++ b/models/logs/vgg19/validation/events.out.tfevents.1770068666.JOSH_MARK.14988.1.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f201dbfc9797a11794dbd64b866a881823ff7baa76acb47631e31ced8c2603e +size 9674 diff --git a/models/logs/vgg19/validation/events.out.tfevents.1770083165.JOSH_MARK.14988.3.v2 b/models/logs/vgg19/validation/events.out.tfevents.1770083165.JOSH_MARK.14988.3.v2 new file mode 100644 index 0000000000000000000000000000000000000000..3a58bc31651db8d556104d0cc3b8469023ef5ae5 --- /dev/null +++ b/models/logs/vgg19/validation/events.out.tfevents.1770083165.JOSH_MARK.14988.3.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed2d98df90f98d6a03e33dbc7e68563d046fa282ab371d68c574ac8d2cb82663 +size 6474 diff --git a/models/mobilenet_v2.h5 b/models/mobilenet_v2.h5 new file mode 100644 index 0000000000000000000000000000000000000000..ff150965ff05fad069f2b4172dc52c4f67bf6859 --- /dev/null +++ b/models/mobilenet_v2.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2034e436498e4e419a49981d52892e9196283744a3f64b0c0151357d462c267 +size 31157400 diff --git a/models/mobilenet_v2.history.json b/models/mobilenet_v2.history.json new file mode 100644 index 0000000000000000000000000000000000000000..811df31980cfe128ab231a2392369463c9339ee9 --- /dev/null +++ b/models/mobilenet_v2.history.json @@ -0,0 +1,67 @@ +{ + "accuracy": [ + 0.2706809341907501, + 0.30677464604377747, + 0.315395325422287, + 0.3210989236831665, + 0.33437827229499817, + 0.3370341360569, + 0.3439567983150482, + 0.34918147325515747, + 0.3457418978214264, + 0.3518373370170593, + 0.3530564308166504 + ], + "loss": [ + 1.8339711427688599, + 1.7757574319839478, + 1.743255615234375, + 1.720676302909851, + 1.689762830734253, + 1.6830896139144897, + 1.670864462852478, + 1.6629376411437988, + 1.6575630903244019, + 1.650472640991211, + 1.643319010734558 + ], + "val_accuracy": [ + 0.25134992599487305, + 0.25134992599487305, + 0.14439992606639862, + 0.25134992599487305, + 0.25134992599487305, + 0.25134992599487305, + 0.26127851009368896, + 0.25622713565826416, + 0.11043372005224228, + 0.11757533252239227, + 0.11896882206201553 + ], + "val_loss": [ + 6.824132442474365, + 8.780102729797363, + 8.830862998962402, + 8.673893928527832, + 10.961349487304688, + 9.59477424621582, + 7.310698986053467, + 7.944781303405762, + 10.567312240600586, + 7.704894542694092, + 6.902732849121094 + ], + "learning_rate": [ + 9.999999747378752e-05, + 9.999999747378752e-05, + 9.999999747378752e-05, + 9.999999747378752e-05, + 9.999999747378752e-05, + 9.999999747378752e-05, + 4.999999873689376e-05, + 4.999999873689376e-05, + 4.999999873689376e-05, + 4.999999873689376e-05, + 4.999999873689376e-05 + ] +} \ No newline at end of file diff --git a/models/mobilenet_v2.meta.json b/models/mobilenet_v2.meta.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa5f61954a6d992475e744a84fdf8ea0b9426dc --- /dev/null +++ b/models/mobilenet_v2.meta.json @@ -0,0 +1,15 @@ +{ + "learning_rate": 0.0001, + "loss_function": "categorical_crossentropy", + "metrics": [ + "accuracy" + ], + "training_started": "2026-02-03T01:33:02.533762", + "epochs_requested": 20, + "training_ended": "2026-02-03T01:51:17.847554", + "training_duration_seconds": 1095.313792, + "epochs_completed": 11, + "final_accuracy": 0.3530564308166504, + "final_val_accuracy": 0.11896882206201553, + "best_val_accuracy": 0.26127851009368896 +} \ No newline at end of file diff --git a/models/vgg19.h5 b/models/vgg19.h5 new file mode 100644 index 0000000000000000000000000000000000000000..db740e7a0e87776e1fee8c45632ebe36c1f33e5d --- /dev/null +++ b/models/vgg19.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c1022f6a4ecc46f0de58d2f82dd783c03919ec783cc619a63f5073e359f1cc +size 141626776 diff --git a/models/vgg19.history.json b/models/vgg19.history.json new file mode 100644 index 0000000000000000000000000000000000000000..0e972551d773d501d789cc22a1c64802036b7ea6 --- /dev/null +++ b/models/vgg19.history.json @@ -0,0 +1,112 @@ +{ + "accuracy": [ + 0.6472048163414001, + 0.6577411890029907, + 0.663314163684845, + 0.6661877632141113, + 0.6683211326599121, + 0.6746777892112732, + 0.678639829158783, + 0.6781609058380127, + 0.6804684996604919, + 0.6846917271614075, + 0.6875653266906738, + 0.6928770542144775, + 0.6930947303771973, + 0.6967955231666565, + 0.6986677050590515, + 0.7037182450294495, + 0.7013235688209534, + 0.705329179763794, + 0.7071577906608582, + 0.7095524072647095 + ], + "loss": [ + 0.9494282603263855, + 0.9269279837608337, + 0.9132461547851562, + 0.9060502648353577, + 0.8947601914405823, + 0.8831118941307068, + 0.8768442869186401, + 0.874686062335968, + 0.8647421002388, + 0.8572869896888733, + 0.8513924479484558, + 0.8356429934501648, + 0.8330938816070557, + 0.8248153924942017, + 0.827460765838623, + 0.8220475912094116, + 0.8190444111824036, + 0.8056929111480713, + 0.8031319379806519, + 0.7939973473548889 + ], + "val_accuracy": [ + 0.6099982857704163, + 0.6087789535522461, + 0.6148754358291626, + 0.6120885014533997, + 0.6239331364631653, + 0.6180108189582825, + 0.6162689328193665, + 0.6113917231559753, + 0.6174882650375366, + 0.6145271062850952, + 0.6225396394729614, + 0.6141787171363831, + 0.6279394030570984, + 0.6181849837303162, + 0.6190559267997742, + 0.6277651786804199, + 0.6309005618095398, + 0.6220170855522156, + 0.6244556903839111, + 0.6169657111167908 + ], + "val_loss": [ + 1.0432090759277344, + 1.0663282871246338, + 1.2026596069335938, + 1.053371787071228, + 1.1141172647476196, + 1.0393040180206299, + 1.0537439584732056, + 1.0666412115097046, + 1.0747647285461426, + 1.0494613647460938, + 1.0501089096069336, + 1.0589252710342407, + 1.0657376050949097, + 1.0464764833450317, + 1.0556851625442505, + 1.0362597703933716, + 1.051085352897644, + 1.071323037147522, + 1.0513226985931396, + 1.1507371664047241 + ], + "learning_rate": [ + 9.999999747378752e-05, + 9.999999747378752e-05, + 9.999999747378752e-05, + 9.999999747378752e-05, + 9.999999747378752e-05, + 9.999999747378752e-05, + 9.999999747378752e-05, + 9.999999747378752e-05, + 9.999999747378752e-05, + 9.999999747378752e-05, + 9.999999747378752e-05, + 4.999999873689376e-05, + 4.999999873689376e-05, + 4.999999873689376e-05, + 4.999999873689376e-05, + 4.999999873689376e-05, + 4.999999873689376e-05, + 4.999999873689376e-05, + 4.999999873689376e-05, + 4.999999873689376e-05 + ] +} \ No newline at end of file diff --git a/models/vgg19.meta.json b/models/vgg19.meta.json new file mode 100644 index 0000000000000000000000000000000000000000..5f3df753b8f17c19f63e1dbae9dd4554b787059a --- /dev/null +++ b/models/vgg19.meta.json @@ -0,0 +1,15 @@ +{ + "learning_rate": 0.0001, + "loss_function": "categorical_crossentropy", + "metrics": [ + "accuracy" + ], + "training_started": "2026-02-03T07:09:30.363125", + "epochs_requested": 20, + "training_ended": "2026-02-03T09:49:04.904804", + "training_duration_seconds": 9574.541679, + "epochs_completed": 20, + "final_accuracy": 0.7095524072647095, + "final_val_accuracy": 0.6169657111167908, + "best_val_accuracy": 0.6309005618095398 +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3ebbcf8bef2ef16ebe93458dd5867903248939ee --- /dev/null +++ b/requirements.txt @@ -0,0 +1,28 @@ +# Core Deep Learning +tensorflow>=2.10.0 +keras>=2.10.0 +numpy>=1.21.0 +pandas>=1.4.0 +scikit-learn>=1.0.0 + +# Image Processing +opencv-python>=4.5.0 +Pillow>=9.0.0 +mtcnn>=0.1.1 + +# API +fastapi>=0.95.0 +uvicorn>=0.21.0 +python-multipart>=0.0.6 + +# Frontend +streamlit>=1.22.0 +plotly>=5.13.0 + +# Visualization +matplotlib>=3.5.0 +seaborn>=0.12.0 + +# Development +pytest>=7.0.0 +httpx>=0.23.0 diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d49938bda192404c759dfc6c25b135a2d3cc24ef --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,2 @@ +# Emotion Recognition System +__version__ = "1.0.0" diff --git a/src/__pycache__/__init__.cpython-310.pyc b/src/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5f586abbd681f90e8882d1c1197169a7a2f38896 Binary files /dev/null and b/src/__pycache__/__init__.cpython-310.pyc differ diff --git a/src/__pycache__/config.cpython-310.pyc b/src/__pycache__/config.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..34498b0a90ad8f5b2a38442772601ef894a30c38 Binary files /dev/null and b/src/__pycache__/config.cpython-310.pyc differ diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000000000000000000000000000000000000..fc479011cf5f7d748a2b9f3cdbfac5f5e537c76c --- /dev/null +++ b/src/config.py @@ -0,0 +1,76 @@ +""" +Configuration settings for the Emotion Recognition System. +""" +import os +from pathlib import Path + +# Project paths +PROJECT_ROOT = Path(__file__).parent.parent +DATA_DIR = PROJECT_ROOT / "data" +TRAIN_DIR = DATA_DIR / "train" +TEST_DIR = DATA_DIR / "test" +MODELS_DIR = PROJECT_ROOT / "models" + +# Create models directory if it doesn't exist +MODELS_DIR.mkdir(exist_ok=True) + +# Image settings +IMAGE_SIZE = (48, 48) +IMAGE_SIZE_TRANSFER = (96, 96) # For transfer learning models +NUM_CHANNELS = 1 # Grayscale +NUM_CHANNELS_RGB = 3 # For transfer learning + +# Emotion classes (7 classes from FER dataset) +EMOTION_CLASSES = [ + "angry", + "disgusted", + "fearful", + "happy", + "neutral", + "sad", + "surprised" +] +NUM_CLASSES = len(EMOTION_CLASSES) + +# Emotion to index mapping +EMOTION_TO_IDX = {emotion: idx for idx, emotion in enumerate(EMOTION_CLASSES)} +IDX_TO_EMOTION = {idx: emotion for idx, emotion in enumerate(EMOTION_CLASSES)} + +# Training hyperparameters +BATCH_SIZE = 64 +EPOCHS = 50 +LEARNING_RATE = 0.001 +LEARNING_RATE_FINE_TUNE = 0.0001 +VALIDATION_SPLIT = 0.2 + +# Data augmentation parameters +AUGMENTATION_CONFIG = { + "rotation_range": 15, + "width_shift_range": 0.1, + "height_shift_range": 0.1, + "horizontal_flip": True, + "zoom_range": 0.1, + "brightness_range": (0.9, 1.1), + "fill_mode": "nearest" +} + +# Model save paths +CUSTOM_CNN_PATH = MODELS_DIR / "custom_cnn.h5" +MOBILENET_PATH = MODELS_DIR / "mobilenet_v2.h5" +VGG_PATH = MODELS_DIR / "vgg19.h5" + +# Training callbacks +EARLY_STOPPING_PATIENCE = 10 +REDUCE_LR_PATIENCE = 5 +REDUCE_LR_FACTOR = 0.5 + +# Intensity thresholds +INTENSITY_HIGH_THRESHOLD = 0.8 +INTENSITY_MEDIUM_THRESHOLD = 0.5 + +# API settings +API_HOST = "0.0.0.0" +API_PORT = 8000 + +# Streamlit settings +STREAMLIT_PORT = 8501 diff --git a/src/inference/__init__.py b/src/inference/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..adddf0135cd3d618c3f4bb1ebb054c7b5c10db56 --- /dev/null +++ b/src/inference/__init__.py @@ -0,0 +1,3 @@ +from .predictor import EmotionPredictor + +__all__ = ["EmotionPredictor"] diff --git a/src/inference/__pycache__/__init__.cpython-310.pyc b/src/inference/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f945a76603c96b6608124f332f776008b16322ff Binary files /dev/null and b/src/inference/__pycache__/__init__.cpython-310.pyc differ diff --git a/src/inference/__pycache__/predictor.cpython-310.pyc b/src/inference/__pycache__/predictor.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a19376eea03106033441e49e8bc5393599db9548 Binary files /dev/null and b/src/inference/__pycache__/predictor.cpython-310.pyc differ diff --git a/src/inference/predictor.py b/src/inference/predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..d10301050521bf6dbe26b6b550c5a8b9b47bf4f8 --- /dev/null +++ b/src/inference/predictor.py @@ -0,0 +1,346 @@ +""" +Inference pipeline for emotion recognition. +""" +import numpy as np +from pathlib import Path +from typing import Dict, List, Optional, Tuple, Union + +import cv2 +from PIL import Image +import tensorflow as tf +from tensorflow.keras.models import Model + +import sys +sys.path.append(str(Path(__file__).parent.parent.parent)) +from src.config import ( + IMAGE_SIZE, IMAGE_SIZE_TRANSFER, EMOTION_CLASSES, IDX_TO_EMOTION, + INTENSITY_HIGH_THRESHOLD, INTENSITY_MEDIUM_THRESHOLD, + CUSTOM_CNN_PATH, MOBILENET_PATH, VGG_PATH +) +from src.preprocessing.face_detector import FaceDetector +from src.models.model_utils import load_model + + +class EmotionPredictor: + """ + Unified prediction interface for emotion recognition. + """ + + def __init__( + self, + model_name: str = "custom_cnn", + model_path: Optional[Path] = None, + use_face_detection: bool = True + ): + """ + Initialize the predictor. + + Args: + model_name: Name of the model ('custom_cnn', 'mobilenet', 'vgg19') + model_path: Optional custom model path + use_face_detection: Whether to detect faces before prediction + """ + self.model_name = model_name + self.model = None + self.face_detector = FaceDetector() if use_face_detection else None + + # Determine model path + if model_path: + self.model_path = Path(model_path) + else: + paths = { + "custom_cnn": CUSTOM_CNN_PATH, + "mobilenet": MOBILENET_PATH, + "vgg19": VGG_PATH + } + self.model_path = paths.get(model_name) + + # Set preprocessing based on model type + self.is_transfer_model = model_name in ["mobilenet", "vgg19"] + self.target_size = IMAGE_SIZE_TRANSFER if self.is_transfer_model else IMAGE_SIZE + self.use_rgb = self.is_transfer_model + + def load(self) -> bool: + """ + Load the model. + + Returns: + True if model loaded successfully + """ + try: + if self.model_path and self.model_path.exists(): + self.model = load_model(self.model_path) + return True + else: + print(f"Model file not found: {self.model_path}") + return False + except Exception as e: + print(f"Error loading model: {e}") + return False + + def preprocess_image( + self, + image: np.ndarray, + detect_face: bool = True + ) -> Tuple[Optional[np.ndarray], List[dict]]: + """ + Preprocess an image for prediction. + + Args: + image: Input image (BGR or RGB format) + detect_face: Whether to detect and extract face + + Returns: + Tuple of (preprocessed image, face info) + """ + faces_info = [] + + if detect_face and self.face_detector: + # Detect and extract face + face, faces_info = self.face_detector.detect_and_extract( + image, + target_size=self.target_size, + to_grayscale=not self.use_rgb + ) + + if face is None: + return None, faces_info + + processed = face + else: + # Resize directly + processed = cv2.resize(image, self.target_size) + + # Convert color if needed + if self.use_rgb: + if len(processed.shape) == 2: + processed = cv2.cvtColor(processed, cv2.COLOR_GRAY2RGB) + elif processed.shape[2] == 1: + processed = np.repeat(processed, 3, axis=2) + else: + if len(processed.shape) == 3 and processed.shape[2] == 3: + processed = cv2.cvtColor(processed, cv2.COLOR_BGR2GRAY) + + # Normalize + processed = processed.astype(np.float32) / 255.0 + + # Add channel dimension if grayscale + if len(processed.shape) == 2: + processed = np.expand_dims(processed, axis=-1) + + # Add batch dimension + processed = np.expand_dims(processed, axis=0) + + return processed, faces_info + + def predict( + self, + image: Union[np.ndarray, str, Path], + detect_face: bool = True, + return_all_scores: bool = True + ) -> Dict: + """ + Predict emotion from an image. + + Args: + image: Input image (array, file path, or PIL Image) + detect_face: Whether to detect face first + return_all_scores: Whether to return all class scores + + Returns: + Prediction result dictionary + """ + if self.model is None: + success = self.load() + if not success: + return {"error": "Model not loaded"} + + # Load image if path provided + if isinstance(image, (str, Path)): + image = cv2.imread(str(image)) + if image is None: + return {"error": f"Could not load image: {image}"} + elif isinstance(image, Image.Image): + image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) + + # Preprocess + processed, faces_info = self.preprocess_image(image, detect_face) + + if processed is None: + return { + "error": "No face detected", + "face_detected": False, + "faces_info": faces_info + } + + # Predict + predictions = self.model.predict(processed, verbose=0) + + # Get top prediction + pred_idx = int(np.argmax(predictions[0])) + confidence = float(predictions[0][pred_idx]) + emotion = IDX_TO_EMOTION[pred_idx] + + # Calculate intensity + intensity = self._calculate_intensity(confidence) + + result = { + "emotion": emotion, + "confidence": confidence, + "intensity": intensity, + "face_detected": len(faces_info) > 0, + "faces_info": faces_info, + "model_used": self.model_name + } + + if return_all_scores: + result["all_probabilities"] = { + EMOTION_CLASSES[i]: float(predictions[0][i]) + for i in range(len(EMOTION_CLASSES)) + } + + return result + + def predict_batch( + self, + images: List[Union[np.ndarray, str, Path]], + detect_face: bool = True + ) -> Dict: + """ + Predict emotions for multiple images. + + Args: + images: List of images + detect_face: Whether to detect faces + + Returns: + Batch prediction results + """ + results = [] + emotion_counts = {e: 0 for e in EMOTION_CLASSES} + successful_predictions = 0 + + for i, image in enumerate(images): + result = self.predict(image, detect_face) + result["image_index"] = i + results.append(result) + + if "error" not in result: + emotion_counts[result["emotion"]] += 1 + successful_predictions += 1 + + # Calculate distribution + if successful_predictions > 0: + emotion_distribution = { + e: count / successful_predictions + for e, count in emotion_counts.items() + } + else: + emotion_distribution = {e: 0.0 for e in EMOTION_CLASSES} + + # Find dominant emotion + dominant_emotion = max(emotion_counts.items(), key=lambda x: x[1]) + + return { + "results": results, + "summary": { + "total_images": len(images), + "successful_predictions": successful_predictions, + "failed_predictions": len(images) - successful_predictions, + "emotion_counts": emotion_counts, + "emotion_distribution": emotion_distribution, + "dominant_emotion": dominant_emotion[0], + "dominant_emotion_count": dominant_emotion[1] + }, + "model_used": self.model_name + } + + def _calculate_intensity(self, confidence: float) -> str: + """ + Calculate emotion intensity based on confidence. + + Args: + confidence: Prediction confidence + + Returns: + Intensity level ('high', 'medium', 'low') + """ + if confidence >= INTENSITY_HIGH_THRESHOLD: + return "high" + elif confidence >= INTENSITY_MEDIUM_THRESHOLD: + return "medium" + else: + return "low" + + def visualize_prediction( + self, + image: np.ndarray, + prediction: Dict + ) -> np.ndarray: + """ + Visualize prediction on image. + + Args: + image: Original image + prediction: Prediction result + + Returns: + Image with visualizations + """ + result = image.copy() + + if self.face_detector and prediction.get("faces_info"): + # Draw face detection and emotion label + result = self.face_detector.draw_detections( + result, + prediction["faces_info"], + emotions=[prediction.get("emotion", "Unknown")], + confidences=[prediction.get("confidence", 0)] + ) + + return result + + @staticmethod + def get_available_models() -> Dict[str, bool]: + """ + Get available trained models. + + Returns: + Dictionary of model name -> availability + """ + return { + "custom_cnn": CUSTOM_CNN_PATH.exists(), + "mobilenet": MOBILENET_PATH.exists(), + "vgg19": VGG_PATH.exists() + } + + +def create_predictor( + model_name: str = "custom_cnn", + auto_load: bool = True +) -> Optional[EmotionPredictor]: + """ + Factory function to create a predictor. + + Args: + model_name: Name of the model + auto_load: Whether to automatically load the model + + Returns: + EmotionPredictor instance or None if loading fails + """ + predictor = EmotionPredictor(model_name) + + if auto_load: + if not predictor.load(): + return None + + return predictor + + +if __name__ == "__main__": + # Show available models + print("Available models:") + for name, available in EmotionPredictor.get_available_models().items(): + status = "✓" if available else "✗" + print(f" {status} {name}") diff --git a/src/models/__init__.py b/src/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8a4957d3edadf19ec3f964ddcb4e0b00c533e6e2 --- /dev/null +++ b/src/models/__init__.py @@ -0,0 +1,13 @@ +from .custom_cnn import build_custom_cnn +from .mobilenet_model import build_mobilenet_model +from .vgg_model import build_vgg_model +from .model_utils import load_model, save_model, get_model_summary + +__all__ = [ + "build_custom_cnn", + "build_mobilenet_model", + "build_vgg_model", + "load_model", + "save_model", + "get_model_summary" +] diff --git a/src/models/__pycache__/__init__.cpython-310.pyc b/src/models/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..08c31b4acc08f29c96a740084d554000adfea678 Binary files /dev/null and b/src/models/__pycache__/__init__.cpython-310.pyc differ diff --git a/src/models/__pycache__/custom_cnn.cpython-310.pyc b/src/models/__pycache__/custom_cnn.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6a7dd42380fc9cff481db60e1e83e606c98b9f7d Binary files /dev/null and b/src/models/__pycache__/custom_cnn.cpython-310.pyc differ diff --git a/src/models/__pycache__/mobilenet_model.cpython-310.pyc b/src/models/__pycache__/mobilenet_model.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cd8fb5544f38d05926e6c991676745ee1d9143d1 Binary files /dev/null and b/src/models/__pycache__/mobilenet_model.cpython-310.pyc differ diff --git a/src/models/__pycache__/model_utils.cpython-310.pyc b/src/models/__pycache__/model_utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c1b9a606c3b5f360374947647bb405a0abaecde8 Binary files /dev/null and b/src/models/__pycache__/model_utils.cpython-310.pyc differ diff --git a/src/models/__pycache__/vgg_model.cpython-310.pyc b/src/models/__pycache__/vgg_model.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6696754cbe7f92925ce7290d1df0d9d9b3fcc3a9 Binary files /dev/null and b/src/models/__pycache__/vgg_model.cpython-310.pyc differ diff --git a/src/models/custom_cnn.py b/src/models/custom_cnn.py new file mode 100644 index 0000000000000000000000000000000000000000..749c7d328bc2972d293e49a0bd9c2dda9073337a --- /dev/null +++ b/src/models/custom_cnn.py @@ -0,0 +1,183 @@ +""" +Custom CNN model architecture for emotion recognition. +Optimized for 48x48 grayscale images. +""" +import tensorflow as tf +from tensorflow.keras.models import Sequential, Model +from tensorflow.keras.layers import ( + Conv2D, MaxPooling2D, Dense, Dropout, Flatten, + BatchNormalization, Input, GlobalAveragePooling2D +) +from tensorflow.keras.regularizers import l2 + +import sys +from pathlib import Path +sys.path.append(str(Path(__file__).parent.parent.parent)) +from src.config import IMAGE_SIZE, NUM_CLASSES, NUM_CHANNELS + + +def build_custom_cnn( + input_shape: tuple = (*IMAGE_SIZE, NUM_CHANNELS), + num_classes: int = NUM_CLASSES, + dropout_rate: float = 0.25, + dense_dropout: float = 0.5, + l2_reg: float = 0.01 +) -> Model: + """ + Build a custom CNN architecture for emotion recognition. + + Architecture: + - 4 Convolutional blocks with increasing filters (64 -> 128 -> 256 -> 512) + - Each block: Conv2D -> BatchNorm -> ReLU -> MaxPool -> Dropout + - Dense layers for classification + + Args: + input_shape: Input image shape (height, width, channels) + num_classes: Number of emotion classes + dropout_rate: Dropout rate for conv blocks + dense_dropout: Dropout rate for dense layers + l2_reg: L2 regularization factor + + Returns: + Compiled Keras model + """ + model = Sequential([ + # Input layer + Input(shape=input_shape), + + # Block 1: 64 filters + Conv2D(64, (3, 3), padding='same', activation='relu', + kernel_regularizer=l2(l2_reg)), + BatchNormalization(), + Conv2D(64, (3, 3), padding='same', activation='relu', + kernel_regularizer=l2(l2_reg)), + BatchNormalization(), + MaxPooling2D(pool_size=(2, 2)), + Dropout(dropout_rate), + + # Block 2: 128 filters + Conv2D(128, (3, 3), padding='same', activation='relu', + kernel_regularizer=l2(l2_reg)), + BatchNormalization(), + Conv2D(128, (3, 3), padding='same', activation='relu', + kernel_regularizer=l2(l2_reg)), + BatchNormalization(), + MaxPooling2D(pool_size=(2, 2)), + Dropout(dropout_rate), + + # Block 3: 256 filters + Conv2D(256, (3, 3), padding='same', activation='relu', + kernel_regularizer=l2(l2_reg)), + BatchNormalization(), + Conv2D(256, (3, 3), padding='same', activation='relu', + kernel_regularizer=l2(l2_reg)), + BatchNormalization(), + MaxPooling2D(pool_size=(2, 2)), + Dropout(dropout_rate), + + # Block 4: 512 filters + Conv2D(512, (3, 3), padding='same', activation='relu', + kernel_regularizer=l2(l2_reg)), + BatchNormalization(), + Conv2D(512, (3, 3), padding='same', activation='relu', + kernel_regularizer=l2(l2_reg)), + BatchNormalization(), + MaxPooling2D(pool_size=(2, 2)), + Dropout(dropout_rate), + + # Classification head + Flatten(), + Dense(512, activation='relu', kernel_regularizer=l2(l2_reg)), + BatchNormalization(), + Dropout(dense_dropout), + Dense(256, activation='relu', kernel_regularizer=l2(l2_reg)), + BatchNormalization(), + Dropout(dense_dropout), + Dense(num_classes, activation='softmax') + ], name='custom_emotion_cnn') + + return model + + +def build_custom_cnn_v2( + input_shape: tuple = (*IMAGE_SIZE, NUM_CHANNELS), + num_classes: int = NUM_CLASSES +) -> Model: + """ + Alternative CNN architecture with residual-like connections. + + Args: + input_shape: Input image shape + num_classes: Number of emotion classes + + Returns: + Keras model + """ + inputs = Input(shape=input_shape) + + # Initial convolution + x = Conv2D(32, (3, 3), padding='same', activation='relu')(inputs) + x = BatchNormalization()(x) + + # Block 1 + x = Conv2D(64, (3, 3), padding='same', activation='relu')(x) + x = BatchNormalization()(x) + x = Conv2D(64, (3, 3), padding='same', activation='relu')(x) + x = BatchNormalization()(x) + x = MaxPooling2D(pool_size=(2, 2))(x) + x = Dropout(0.25)(x) + + # Block 2 + x = Conv2D(128, (3, 3), padding='same', activation='relu')(x) + x = BatchNormalization()(x) + x = Conv2D(128, (3, 3), padding='same', activation='relu')(x) + x = BatchNormalization()(x) + x = MaxPooling2D(pool_size=(2, 2))(x) + x = Dropout(0.25)(x) + + # Block 3 + x = Conv2D(256, (3, 3), padding='same', activation='relu')(x) + x = BatchNormalization()(x) + x = Conv2D(256, (3, 3), padding='same', activation='relu')(x) + x = BatchNormalization()(x) + x = MaxPooling2D(pool_size=(2, 2))(x) + x = Dropout(0.25)(x) + + # Global pooling and classification + x = GlobalAveragePooling2D()(x) + x = Dense(256, activation='relu')(x) + x = BatchNormalization()(x) + x = Dropout(0.5)(x) + outputs = Dense(num_classes, activation='softmax')(x) + + model = Model(inputs=inputs, outputs=outputs, name='custom_emotion_cnn_v2') + + return model + + +def get_model_config() -> dict: + """ + Get the default model configuration. + + Returns: + Dictionary with model configuration + """ + return { + "name": "Custom CNN", + "input_shape": (*IMAGE_SIZE, NUM_CHANNELS), + "num_classes": NUM_CLASSES, + "expected_accuracy": "60-68%", + "training_time": "~30 minutes (GPU)", + "parameters": "~5M" + } + + +if __name__ == "__main__": + # Build and display model summary + model = build_custom_cnn() + model.summary() + + print("\nModel configuration:") + config = get_model_config() + for key, value in config.items(): + print(f" {key}: {value}") diff --git a/src/models/mobilenet_model.py b/src/models/mobilenet_model.py new file mode 100644 index 0000000000000000000000000000000000000000..42f7f8f5f020f05570ed8e476262a077486fa010 --- /dev/null +++ b/src/models/mobilenet_model.py @@ -0,0 +1,203 @@ +""" +MobileNetV2 transfer learning model for emotion recognition. +""" +import tensorflow as tf +from tensorflow.keras.models import Model +from tensorflow.keras.layers import ( + Dense, Dropout, GlobalAveragePooling2D, + BatchNormalization, Input, Lambda +) +from tensorflow.keras.applications import MobileNetV2 + +import sys +from pathlib import Path +sys.path.append(str(Path(__file__).parent.parent.parent)) +from src.config import IMAGE_SIZE_TRANSFER, NUM_CLASSES, NUM_CHANNELS_RGB + + +def build_mobilenet_model( + input_shape: tuple = (*IMAGE_SIZE_TRANSFER, NUM_CHANNELS_RGB), + num_classes: int = NUM_CLASSES, + trainable_layers: int = 30, + dropout_rate: float = 0.5 +) -> Model: + """ + Build MobileNetV2 transfer learning model for emotion recognition. + + Args: + input_shape: Input image shape (height, width, channels) + num_classes: Number of emotion classes + trainable_layers: Number of top layers to make trainable + dropout_rate: Dropout rate for dense layers + + Returns: + Keras model + """ + # Load pre-trained MobileNetV2 + base_model = MobileNetV2( + weights='imagenet', + include_top=False, + input_shape=input_shape + ) + + # Freeze base layers + for layer in base_model.layers[:-trainable_layers]: + layer.trainable = False + + # Make top layers trainable + for layer in base_model.layers[-trainable_layers:]: + layer.trainable = True + + # Build the model + inputs = Input(shape=input_shape) + + # Preprocess input for MobileNetV2 using Rescaling layer + # MobileNetV2 expects inputs in [-1, 1] range + x = tf.keras.layers.Rescaling(scale=1./127.5, offset=-1.0)(inputs) + + # Pass through base model + x = base_model(x, training=True) + + # Classification head + x = GlobalAveragePooling2D()(x) + x = Dense(512, activation='relu')(x) + x = BatchNormalization()(x) + x = Dropout(dropout_rate)(x) + x = Dense(256, activation='relu')(x) + x = BatchNormalization()(x) + x = Dropout(dropout_rate)(x) + outputs = Dense(num_classes, activation='softmax')(x) + + model = Model(inputs=inputs, outputs=outputs, name='mobilenet_emotion') + + return model + + +def build_mobilenet_from_grayscale( + input_shape: tuple = (*IMAGE_SIZE_TRANSFER, 1), + num_classes: int = NUM_CLASSES, + trainable_layers: int = 30, + dropout_rate: float = 0.5 +) -> Model: + """ + Build MobileNetV2 model that accepts grayscale input. + Converts grayscale to RGB internally. + + Args: + input_shape: Input shape for grayscale images + num_classes: Number of emotion classes + trainable_layers: Number of top layers to make trainable + dropout_rate: Dropout rate + + Returns: + Keras model + """ + # Load pre-trained MobileNetV2 + base_model = MobileNetV2( + weights='imagenet', + include_top=False, + input_shape=(*IMAGE_SIZE_TRANSFER, 3) + ) + + # Freeze base layers + for layer in base_model.layers[:-trainable_layers]: + layer.trainable = False + + # Input for grayscale image + inputs = Input(shape=input_shape) + + # Convert grayscale to RGB by repeating channels + x = tf.keras.layers.Concatenate()([inputs, inputs, inputs]) + + # Preprocess for MobileNetV2 using Rescaling layer + x = tf.keras.layers.Rescaling(scale=1./127.5, offset=-1.0)(x) + + # Base model + x = base_model(x, training=True) + + # Classification head + x = GlobalAveragePooling2D()(x) + x = Dense(512, activation='relu')(x) + x = BatchNormalization()(x) + x = Dropout(dropout_rate)(x) + x = Dense(256, activation='relu')(x) + x = BatchNormalization()(x) + x = Dropout(dropout_rate)(x) + outputs = Dense(num_classes, activation='softmax')(x) + + model = Model(inputs=inputs, outputs=outputs, name='mobilenet_emotion_grayscale') + + return model + + +def freeze_base_model(model: Model) -> Model: + """ + Freeze all layers in the base MobileNetV2 model. + Useful for initial training with frozen weights. + + Args: + model: MobileNet emotion model + + Returns: + Model with frozen base + """ + for layer in model.layers: + if 'mobilenet' in layer.name.lower(): + layer.trainable = False + return model + + +def unfreeze_top_layers(model: Model, num_layers: int = 30) -> Model: + """ + Unfreeze top layers of the base model for fine-tuning. + + Args: + model: MobileNet emotion model + num_layers: Number of top layers to unfreeze + + Returns: + Model with partially unfrozen base + """ + for layer in model.layers: + if 'mobilenet' in layer.name.lower(): + # Get base model and unfreeze top layers + for base_layer in layer.layers[-num_layers:]: + base_layer.trainable = True + return model + + +def get_model_config() -> dict: + """ + Get the default model configuration. + + Returns: + Dictionary with model configuration + """ + return { + "name": "MobileNetV2", + "input_shape": (*IMAGE_SIZE_TRANSFER, NUM_CHANNELS_RGB), + "num_classes": NUM_CLASSES, + "expected_accuracy": "65-72%", + "training_time": "~45 minutes (GPU)", + "parameters": "~3.5M", + "base_model": "MobileNetV2 (ImageNet)" + } + + +if __name__ == "__main__": + # Build and display model summary + print("Building MobileNetV2 model...") + model = build_mobilenet_model() + + # Count trainable parameters + trainable = sum([tf.keras.backend.count_params(w) for w in model.trainable_weights]) + non_trainable = sum([tf.keras.backend.count_params(w) for w in model.non_trainable_weights]) + + print(f"\nTotal parameters: {trainable + non_trainable:,}") + print(f"Trainable parameters: {trainable:,}") + print(f"Non-trainable parameters: {non_trainable:,}") + + print("\nModel configuration:") + config = get_model_config() + for key, value in config.items(): + print(f" {key}: {value}") diff --git a/src/models/model_utils.py b/src/models/model_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e2050b3472e30c39de226852f244fae7cd5e4ab6 --- /dev/null +++ b/src/models/model_utils.py @@ -0,0 +1,491 @@ +# """ +# Model utility functions for saving, loading, and inspecting models. +# """ +# import os +# import json +# from pathlib import Path +# from typing import Dict, Optional, Union + +# import tensorflow as tf +# from tensorflow.keras.models import Model, load_model as keras_load_model + +# import sys +# sys.path.append(str(Path(__file__).parent.parent.parent)) +# from src.config import MODELS_DIR, CUSTOM_CNN_PATH, MOBILENET_PATH, VGG_PATH + + +# def save_model( +# model: Model, +# save_path: Union[str, Path], +# save_format: str = 'h5', +# include_optimizer: bool = True, +# save_metadata: bool = True, +# metadata: Optional[Dict] = None +# ) -> None: +# """ +# Save a trained model to disk. + +# Args: +# model: Keras model to save +# save_path: Path to save the model +# save_format: Format to save ('h5' or 'tf') +# include_optimizer: Whether to include optimizer state +# save_metadata: Whether to save training metadata +# metadata: Optional metadata dictionary +# """ +# save_path = Path(save_path) + +# # Create directory if needed +# save_path.parent.mkdir(parents=True, exist_ok=True) + +# if save_format == 'h5': +# model.save(str(save_path), include_optimizer=include_optimizer) +# else: +# # SavedModel format +# model.save(str(save_path.with_suffix('')), save_format='tf') + +# # Save metadata if requested +# if save_metadata and metadata: +# metadata_path = save_path.with_suffix('.json') +# with open(metadata_path, 'w') as f: +# json.dump(metadata, f, indent=2) + +# print(f"Model saved to: {save_path}") + + +# def load_model( +# model_path: Union[str, Path], +# custom_objects: Optional[Dict] = None, +# compile_model: bool = True +# ) -> Model: +# """ +# Load a saved model from disk. + +# Args: +# model_path: Path to the saved model +# custom_objects: Optional custom objects for loading +# compile_model: Whether to compile the model + +# Returns: +# Loaded Keras model +# """ +# model_path = Path(model_path) + +# if not model_path.exists(): +# # Check if it's a SavedModel directory +# if model_path.with_suffix('').exists(): +# model_path = model_path.with_suffix('') +# else: +# raise FileNotFoundError(f"Model not found: {model_path}") + +# model = keras_load_model( +# str(model_path), +# custom_objects=custom_objects, +# compile=compile_model +# ) + +# print(f"Model loaded from: {model_path}") +# return model + + +# def load_model_metadata(model_path: Union[str, Path]) -> Optional[Dict]: +# """ +# Load metadata for a saved model. + +# Args: +# model_path: Path to the saved model + +# Returns: +# Metadata dictionary or None +# """ +# metadata_path = Path(model_path).with_suffix('.json') + +# if metadata_path.exists(): +# with open(metadata_path, 'r') as f: +# return json.load(f) +# return None + + +# def get_model_summary(model: Model, print_summary: bool = True) -> Dict: +# """ +# Get a summary of the model architecture. + +# Args: +# model: Keras model +# print_summary: Whether to print the summary + +# Returns: +# Dictionary with model statistics +# """ +# if print_summary: +# model.summary() + +# # Calculate parameters +# trainable = sum([tf.keras.backend.count_params(w) for w in model.trainable_weights]) +# non_trainable = sum([tf.keras.backend.count_params(w) for w in model.non_trainable_weights]) + +# summary = { +# "name": model.name, +# "total_params": trainable + non_trainable, +# "trainable_params": trainable, +# "non_trainable_params": non_trainable, +# "num_layers": len(model.layers), +# "input_shape": model.input_shape, +# "output_shape": model.output_shape +# } + +# return summary + + +# def get_available_models() -> Dict[str, Dict]: +# """ +# Get information about available pre-trained models. + +# Returns: +# Dictionary with model information +# """ +# models = {} + +# model_paths = { +# "custom_cnn": CUSTOM_CNN_PATH, +# "mobilenet": MOBILENET_PATH, +# "vgg19": VGG_PATH +# } + +# for name, path in model_paths.items(): +# if Path(path).exists(): +# metadata = load_model_metadata(path) +# models[name] = { +# "path": str(path), +# "exists": True, +# "metadata": metadata +# } +# else: +# models[name] = { +# "path": str(path), +# "exists": False, +# "metadata": None +# } + +# return models + + +# def compare_models(models: Dict[str, Model]) -> Dict: +# """ +# Compare multiple models. + +# Args: +# models: Dictionary of model name -> model + +# Returns: +# Comparison dictionary +# """ +# comparison = {} + +# for name, model in models.items(): +# summary = get_model_summary(model, print_summary=False) +# comparison[name] = { +# "params": summary["total_params"], +# "trainable_params": summary["trainable_params"], +# "layers": summary["num_layers"] +# } + +# return comparison + + +# def export_to_tflite( +# model: Model, +# save_path: Union[str, Path], +# quantize: bool = False +# ) -> None: +# """ +# Export model to TensorFlow Lite format. + +# Args: +# model: Keras model to export +# save_path: Path to save the TFLite model +# quantize: Whether to apply quantization +# """ +# converter = tf.lite.TFLiteConverter.from_keras_model(model) + +# if quantize: +# converter.optimizations = [tf.lite.Optimize.DEFAULT] + +# tflite_model = converter.convert() + +# save_path = Path(save_path) +# save_path.parent.mkdir(parents=True, exist_ok=True) + +# with open(save_path, 'wb') as f: +# f.write(tflite_model) + +# print(f"TFLite model saved to: {save_path}") + + +# if __name__ == "__main__": +# print("Available models:") +# models = get_available_models() +# for name, info in models.items(): +# status = "✓ Trained" if info["exists"] else "✗ Not trained" +# print(f" {name}: {status}") + +""" +Model utility functions for saving, loading, and inspecting models. +""" +import os +import json +from pathlib import Path +from typing import Dict, Optional, Union + +import tensorflow as tf +from tensorflow.keras.models import Model, load_model as keras_load_model + +import sys +sys.path.append(str(Path(__file__).parent.parent.parent)) +from src.config import MODELS_DIR, CUSTOM_CNN_PATH, MOBILENET_PATH, VGG_PATH + + +# --------------------------------------------------------------------------- +# Legacy preprocessing functions +# --------------------------------------------------------------------------- +# Older saved .h5 models used Lambda layers that baked these functions in. +# Current model code uses Rescaling layers instead, but these definitions +# must remain so keras_load_model() can deserialise the old .h5 files. +# --------------------------------------------------------------------------- + +def preprocess_mobilenet(x): + """Legacy MobileNetV2 preprocessor — scales pixels to [-1, 1].""" + return x / 127.5 - 1.0 + + +def preprocess_vgg(x): + """Legacy VGG-19 preprocessor — mean-subtracted scaling.""" + return x * 255.0 - 127.5 + + +_LEGACY_CUSTOM_OBJECTS: Dict = { + "preprocess_mobilenet": preprocess_mobilenet, + "preprocess_vgg": preprocess_vgg, +} + + +def save_model( + model: Model, + save_path: Union[str, Path], + save_format: str = 'h5', + include_optimizer: bool = True, + save_metadata: bool = True, + metadata: Optional[Dict] = None +) -> None: + """ + Save a trained model to disk. + + Args: + model: Keras model to save + save_path: Path to save the model + save_format: Format to save ('h5' or 'tf') + include_optimizer: Whether to include optimizer state + save_metadata: Whether to save training metadata + metadata: Optional metadata dictionary + """ + save_path = Path(save_path) + + # Create directory if needed + save_path.parent.mkdir(parents=True, exist_ok=True) + + if save_format == 'h5': + model.save(str(save_path), include_optimizer=include_optimizer) + else: + # SavedModel format + model.save(str(save_path.with_suffix('')), save_format='tf') + + # Save metadata if requested + if save_metadata and metadata: + metadata_path = save_path.with_suffix('.json') + with open(metadata_path, 'w') as f: + json.dump(metadata, f, indent=2) + + print(f"Model saved to: {save_path}") + + +def load_model( + model_path: Union[str, Path], + custom_objects: Optional[Dict] = None, + compile_model: bool = True +) -> Model: + """ + Load a saved model from disk. + + Args: + model_path: Path to the saved model + custom_objects: Optional custom objects for loading + compile_model: Whether to compile the model + + Returns: + Loaded Keras model + """ + model_path = Path(model_path) + + # Always include legacy preprocessing functions so that old .h5 models + # saved with Lambda layers can be loaded without extra steps. + merged_objects = dict(_LEGACY_CUSTOM_OBJECTS) + if custom_objects: + merged_objects.update(custom_objects) + + if not model_path.exists(): + # Check if it's a SavedModel directory + if model_path.with_suffix('').exists(): + model_path = model_path.with_suffix('') + else: + raise FileNotFoundError(f"Model not found: {model_path}") + + model = keras_load_model( + str(model_path), + custom_objects=merged_objects, + compile=compile_model + ) + + print(f"Model loaded from: {model_path}") + return model + + +def load_model_metadata(model_path: Union[str, Path]) -> Optional[Dict]: + """ + Load metadata for a saved model. + + Args: + model_path: Path to the saved model + + Returns: + Metadata dictionary or None + """ + metadata_path = Path(model_path).with_suffix('.json') + + if metadata_path.exists(): + with open(metadata_path, 'r') as f: + return json.load(f) + return None + + +def get_model_summary(model: Model, print_summary: bool = True) -> Dict: + """ + Get a summary of the model architecture. + + Args: + model: Keras model + print_summary: Whether to print the summary + + Returns: + Dictionary with model statistics + """ + if print_summary: + model.summary() + + # Calculate parameters + trainable = sum([tf.keras.backend.count_params(w) for w in model.trainable_weights]) + non_trainable = sum([tf.keras.backend.count_params(w) for w in model.non_trainable_weights]) + + summary = { + "name": model.name, + "total_params": trainable + non_trainable, + "trainable_params": trainable, + "non_trainable_params": non_trainable, + "num_layers": len(model.layers), + "input_shape": model.input_shape, + "output_shape": model.output_shape + } + + return summary + + +def get_available_models() -> Dict[str, Dict]: + """ + Get information about available pre-trained models. + + Returns: + Dictionary with model information + """ + models = {} + + model_paths = { + "custom_cnn": CUSTOM_CNN_PATH, + "mobilenet": MOBILENET_PATH, + "vgg19": VGG_PATH + } + + for name, path in model_paths.items(): + if Path(path).exists(): + metadata = load_model_metadata(path) + models[name] = { + "path": str(path), + "exists": True, + "metadata": metadata + } + else: + models[name] = { + "path": str(path), + "exists": False, + "metadata": None + } + + return models + + +def compare_models(models: Dict[str, Model]) -> Dict: + """ + Compare multiple models. + + Args: + models: Dictionary of model name -> model + + Returns: + Comparison dictionary + """ + comparison = {} + + for name, model in models.items(): + summary = get_model_summary(model, print_summary=False) + comparison[name] = { + "params": summary["total_params"], + "trainable_params": summary["trainable_params"], + "layers": summary["num_layers"] + } + + return comparison + + +def export_to_tflite( + model: Model, + save_path: Union[str, Path], + quantize: bool = False +) -> None: + """ + Export model to TensorFlow Lite format. + + Args: + model: Keras model to export + save_path: Path to save the TFLite model + quantize: Whether to apply quantization + """ + converter = tf.lite.TFLiteConverter.from_keras_model(model) + + if quantize: + converter.optimizations = [tf.lite.Optimize.DEFAULT] + + tflite_model = converter.convert() + + save_path = Path(save_path) + save_path.parent.mkdir(parents=True, exist_ok=True) + + with open(save_path, 'wb') as f: + f.write(tflite_model) + + print(f"TFLite model saved to: {save_path}") + + +if __name__ == "__main__": + print("Available models:") + models = get_available_models() + for name, info in models.items(): + status = "✓ Trained" if info["exists"] else "✗ Not trained" + print(f" {name}: {status}") \ No newline at end of file diff --git a/src/models/vgg_model.py b/src/models/vgg_model.py new file mode 100644 index 0000000000000000000000000000000000000000..c306e00353964f36ff233b2ac79898e21b8a91eb --- /dev/null +++ b/src/models/vgg_model.py @@ -0,0 +1,257 @@ +""" +VGG-19 transfer learning model for emotion recognition. +""" +import tensorflow as tf +from tensorflow.keras.models import Model +from tensorflow.keras.layers import ( + Dense, Dropout, GlobalAveragePooling2D, Flatten, + BatchNormalization, Input, Lambda +) +from tensorflow.keras.applications import VGG19 + +import sys +from pathlib import Path +sys.path.append(str(Path(__file__).parent.parent.parent)) +from src.config import IMAGE_SIZE_TRANSFER, NUM_CLASSES, NUM_CHANNELS_RGB + + +def build_vgg_model( + input_shape: tuple = (*IMAGE_SIZE_TRANSFER, NUM_CHANNELS_RGB), + num_classes: int = NUM_CLASSES, + trainable_layers: int = 4, + dropout_rate: float = 0.5 +) -> Model: + """ + Build VGG-19 transfer learning model for emotion recognition. + + Args: + input_shape: Input image shape (height, width, channels) + num_classes: Number of emotion classes + trainable_layers: Number of top convolutional layers to make trainable + dropout_rate: Dropout rate for dense layers + + Returns: + Keras model + """ + # Load pre-trained VGG19 + base_model = VGG19( + weights='imagenet', + include_top=False, + input_shape=input_shape + ) + + # Freeze all layers initially + for layer in base_model.layers: + layer.trainable = False + + # Unfreeze top convolutional layers for fine-tuning + for layer in base_model.layers[-trainable_layers:]: + layer.trainable = True + + # Build the model + inputs = Input(shape=input_shape) + + # Preprocess input for VGG19 using Rescaling layer + # VGG19 expects inputs scaled to 0-255 range with mean subtraction + x = tf.keras.layers.Rescaling(scale=255.0, offset=-127.5)(inputs) + + # Pass through base model + x = base_model(x, training=True) + + # Classification head + x = GlobalAveragePooling2D()(x) + x = Dense(512, activation='relu')(x) + x = BatchNormalization()(x) + x = Dropout(dropout_rate)(x) + x = Dense(256, activation='relu')(x) + x = BatchNormalization()(x) + x = Dropout(dropout_rate)(x) + outputs = Dense(num_classes, activation='softmax')(x) + + model = Model(inputs=inputs, outputs=outputs, name='vgg19_emotion') + + return model + + +def build_vgg_from_grayscale( + input_shape: tuple = (*IMAGE_SIZE_TRANSFER, 1), + num_classes: int = NUM_CLASSES, + trainable_layers: int = 4, + dropout_rate: float = 0.5 +) -> Model: + """ + Build VGG-19 model that accepts grayscale input. + Converts grayscale to RGB internally. + + Args: + input_shape: Input shape for grayscale images + num_classes: Number of emotion classes + trainable_layers: Number of top layers to make trainable + dropout_rate: Dropout rate + + Returns: + Keras model + """ + # Load pre-trained VGG19 + base_model = VGG19( + weights='imagenet', + include_top=False, + input_shape=(*IMAGE_SIZE_TRANSFER, 3) + ) + + # Freeze base layers + for layer in base_model.layers: + layer.trainable = False + + # Unfreeze top layers + for layer in base_model.layers[-trainable_layers:]: + layer.trainable = True + + # Input for grayscale image + inputs = Input(shape=input_shape) + + # Convert grayscale to RGB by repeating channels + x = tf.keras.layers.Concatenate()([inputs, inputs, inputs]) + + # Preprocess for VGG19 using Rescaling layer + x = tf.keras.layers.Rescaling(scale=255.0, offset=-127.5)(x) + + # Base model + x = base_model(x, training=True) + + # Classification head + x = GlobalAveragePooling2D()(x) + x = Dense(512, activation='relu')(x) + x = BatchNormalization()(x) + x = Dropout(dropout_rate)(x) + x = Dense(256, activation='relu')(x) + x = BatchNormalization()(x) + x = Dropout(dropout_rate)(x) + outputs = Dense(num_classes, activation='softmax')(x) + + model = Model(inputs=inputs, outputs=outputs, name='vgg19_emotion_grayscale') + + return model + + +def build_vgg_with_flatten( + input_shape: tuple = (*IMAGE_SIZE_TRANSFER, NUM_CHANNELS_RGB), + num_classes: int = NUM_CLASSES, + dropout_rate: float = 0.5 +) -> Model: + """ + Alternative VGG-19 architecture using Flatten instead of GAP. + This is closer to the original VGG architecture. + + Args: + input_shape: Input image shape + num_classes: Number of emotion classes + dropout_rate: Dropout rate + + Returns: + Keras model + """ + base_model = VGG19( + weights='imagenet', + include_top=False, + input_shape=input_shape + ) + + # Freeze base model + for layer in base_model.layers: + layer.trainable = False + + inputs = Input(shape=input_shape) + x = tf.keras.layers.Rescaling(scale=255.0, offset=-127.5)(inputs) + x = base_model(x, training=False) + + # VGG-style classification head + x = Flatten()(x) + x = Dense(4096, activation='relu')(x) + x = Dropout(dropout_rate)(x) + x = Dense(4096, activation='relu')(x) + x = Dropout(dropout_rate)(x) + outputs = Dense(num_classes, activation='softmax')(x) + + model = Model(inputs=inputs, outputs=outputs, name='vgg19_emotion_flatten') + + return model + + +def freeze_base_model(model: Model) -> Model: + """ + Freeze all layers in the base VGG model. + + Args: + model: VGG emotion model + + Returns: + Model with frozen base + """ + for layer in model.layers: + if 'vgg' in layer.name.lower(): + layer.trainable = False + return model + + +def unfreeze_top_blocks(model: Model, num_blocks: int = 1) -> Model: + """ + Unfreeze top convolutional blocks of VGG for fine-tuning. + VGG19 has 5 blocks. Block 5 has 4 conv layers. + + Args: + model: VGG emotion model + num_blocks: Number of blocks to unfreeze from top + + Returns: + Model with partially unfrozen base + """ + # Block layer counts: block1=2, block2=2, block3=4, block4=4, block5=4 + block_layers = {5: 4, 4: 4, 3: 4, 2: 2, 1: 2} + + layers_to_unfreeze = sum([block_layers[i] for i in range(6 - num_blocks, 6)]) + + for layer in model.layers: + if 'vgg' in layer.name.lower(): + for vgg_layer in layer.layers[-layers_to_unfreeze:]: + if 'conv' in vgg_layer.name: + vgg_layer.trainable = True + + return model + + +def get_model_config() -> dict: + """ + Get the default model configuration. + + Returns: + Dictionary with model configuration + """ + return { + "name": "VGG-19", + "input_shape": (*IMAGE_SIZE_TRANSFER, NUM_CHANNELS_RGB), + "num_classes": NUM_CLASSES, + "expected_accuracy": "68-75%", + "training_time": "~60 minutes (GPU)", + "parameters": "~20M", + "base_model": "VGG-19 (ImageNet)" + } + + +if __name__ == "__main__": + # Build and display model summary + print("Building VGG-19 model...") + model = build_vgg_model() + + # Count trainable parameters + trainable = sum([tf.keras.backend.count_params(w) for w in model.trainable_weights]) + non_trainable = sum([tf.keras.backend.count_params(w) for w in model.non_trainable_weights]) + + print(f"\nTotal parameters: {trainable + non_trainable:,}") + print(f"Trainable parameters: {trainable:,}") + print(f"Non-trainable parameters: {non_trainable:,}") + + print("\nModel configuration:") + config = get_model_config() + for key, value in config.items(): + print(f" {key}: {value}") diff --git a/src/preprocessing/__init__.py b/src/preprocessing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..abe449db734dcfad94ee81daa25b6730caba89d3 --- /dev/null +++ b/src/preprocessing/__init__.py @@ -0,0 +1,12 @@ +from .data_loader import create_data_generators, load_dataset, get_class_weights +from .augmentation import get_augmentation_generator, augment_image +from .face_detector import FaceDetector + +__all__ = [ + "create_data_generators", + "load_dataset", + "get_class_weights", + "get_augmentation_generator", + "augment_image", + "FaceDetector" +] diff --git a/src/preprocessing/__pycache__/__init__.cpython-310.pyc b/src/preprocessing/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..58c8b86ff6361566f1cf7832c0402ef194626743 Binary files /dev/null and b/src/preprocessing/__pycache__/__init__.cpython-310.pyc differ diff --git a/src/preprocessing/__pycache__/augmentation.cpython-310.pyc b/src/preprocessing/__pycache__/augmentation.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4d882ffac1d13d40c9b422287a07abff8c70629b Binary files /dev/null and b/src/preprocessing/__pycache__/augmentation.cpython-310.pyc differ diff --git a/src/preprocessing/__pycache__/data_loader.cpython-310.pyc b/src/preprocessing/__pycache__/data_loader.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7eddd821431c0cdc5758e05b3b442f762699ee9a Binary files /dev/null and b/src/preprocessing/__pycache__/data_loader.cpython-310.pyc differ diff --git a/src/preprocessing/__pycache__/face_detector.cpython-310.pyc b/src/preprocessing/__pycache__/face_detector.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aec22bc31c46e84267f20dccfd6bb9582c1ed242 Binary files /dev/null and b/src/preprocessing/__pycache__/face_detector.cpython-310.pyc differ diff --git a/src/preprocessing/augmentation.py b/src/preprocessing/augmentation.py new file mode 100644 index 0000000000000000000000000000000000000000..a4959d2cc025a7f5d8cbb507b9c0b34c551522ee --- /dev/null +++ b/src/preprocessing/augmentation.py @@ -0,0 +1,157 @@ +""" +Data augmentation utilities for the Emotion Recognition System. +""" +import numpy as np +from typing import Tuple, Optional +from tensorflow.keras.preprocessing.image import ImageDataGenerator + +import sys +from pathlib import Path +sys.path.append(str(Path(__file__).parent.parent.parent)) +from src.config import AUGMENTATION_CONFIG + + +def get_augmentation_generator( + rotation_range: int = AUGMENTATION_CONFIG["rotation_range"], + width_shift_range: float = AUGMENTATION_CONFIG["width_shift_range"], + height_shift_range: float = AUGMENTATION_CONFIG["height_shift_range"], + horizontal_flip: bool = AUGMENTATION_CONFIG["horizontal_flip"], + zoom_range: float = AUGMENTATION_CONFIG["zoom_range"], + brightness_range: Tuple[float, float] = AUGMENTATION_CONFIG["brightness_range"], + fill_mode: str = AUGMENTATION_CONFIG["fill_mode"], + rescale: float = 1./255 +) -> ImageDataGenerator: + """ + Create an ImageDataGenerator with augmentation settings. + + Args: + rotation_range: Degree range for random rotations + width_shift_range: Fraction for horizontal shifts + height_shift_range: Fraction for vertical shifts + horizontal_flip: Whether to randomly flip horizontally + zoom_range: Range for random zoom + brightness_range: Range for brightness adjustment + fill_mode: Points outside boundaries fill method + rescale: Rescaling factor + + Returns: + Configured ImageDataGenerator + """ + return ImageDataGenerator( + rescale=rescale, + rotation_range=rotation_range, + width_shift_range=width_shift_range, + height_shift_range=height_shift_range, + horizontal_flip=horizontal_flip, + zoom_range=zoom_range, + brightness_range=brightness_range, + fill_mode=fill_mode + ) + + +def augment_image( + image: np.ndarray, + num_augmentations: int = 5, + generator: Optional[ImageDataGenerator] = None +) -> np.ndarray: + """ + Generate augmented versions of a single image. + + Args: + image: Input image array of shape (height, width, channels) + num_augmentations: Number of augmented images to generate + generator: Optional ImageDataGenerator, creates default if None + + Returns: + Array of augmented images of shape (num_augmentations, height, width, channels) + """ + if generator is None: + generator = get_augmentation_generator(rescale=1.0) # No rescale for single images + + # Reshape for generator (needs batch dimension) + image_batch = np.expand_dims(image, axis=0) + + # Generate augmented images + augmented_images = [] + aug_iter = generator.flow(image_batch, batch_size=1) + + for _ in range(num_augmentations): + augmented = next(aug_iter)[0] + augmented_images.append(augmented) + + return np.array(augmented_images) + + +def create_balanced_augmentation( + images: np.ndarray, + labels: np.ndarray, + target_samples_per_class: int +) -> Tuple[np.ndarray, np.ndarray]: + """ + Create a balanced dataset through augmentation of minority classes. + + Args: + images: Array of images + labels: Array of one-hot encoded labels + target_samples_per_class: Target number of samples per class + + Returns: + Tuple of (augmented_images, augmented_labels) + """ + generator = get_augmentation_generator(rescale=1.0) + + # Convert one-hot to class indices + class_indices = np.argmax(labels, axis=1) + unique_classes = np.unique(class_indices) + + augmented_images = [] + augmented_labels = [] + + for class_idx in unique_classes: + # Get images of this class + class_mask = class_indices == class_idx + class_images = images[class_mask] + class_labels = labels[class_mask] + + current_count = len(class_images) + + # Add original images + augmented_images.extend(class_images) + augmented_labels.extend(class_labels) + + # Generate more if needed + if current_count < target_samples_per_class: + needed = target_samples_per_class - current_count + + for i in range(needed): + # Select random image from class + idx = np.random.randint(0, current_count) + original = class_images[idx] + + # Generate one augmented version + aug = augment_image(original, num_augmentations=1, generator=generator)[0] + augmented_images.append(aug) + augmented_labels.append(class_labels[idx]) + + return np.array(augmented_images), np.array(augmented_labels) + + +def get_augmentation_preview( + image: np.ndarray, + num_samples: int = 9 +) -> np.ndarray: + """ + Generate a preview of augmentations for visualization. + + Args: + image: Original image + num_samples: Number of augmented samples to generate + + Returns: + Array including original + augmented images + """ + augmented = augment_image(image, num_augmentations=num_samples - 1) + + # Add original as first image + original = np.expand_dims(image, axis=0) + return np.concatenate([original, augmented], axis=0) diff --git a/src/preprocessing/data_loader.py b/src/preprocessing/data_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..3cde49797e62671a346d81cdf5779df2e7e02b28 --- /dev/null +++ b/src/preprocessing/data_loader.py @@ -0,0 +1,217 @@ +""" +Data loading utilities for the Emotion Recognition System. +""" +import os +import numpy as np +from pathlib import Path +from typing import Tuple, Dict, Optional +from collections import Counter + +import tensorflow as tf +from tensorflow.keras.preprocessing.image import ImageDataGenerator + +import sys +sys.path.append(str(Path(__file__).parent.parent.parent)) +from src.config import ( + TRAIN_DIR, TEST_DIR, IMAGE_SIZE, IMAGE_SIZE_TRANSFER, + BATCH_SIZE, VALIDATION_SPLIT, EMOTION_CLASSES, NUM_CLASSES, + AUGMENTATION_CONFIG +) + + +def create_data_generators( + use_augmentation: bool = True, + for_transfer_learning: bool = False, + batch_size: int = BATCH_SIZE, + validation_split: float = VALIDATION_SPLIT +) -> Tuple[tf.keras.preprocessing.image.DirectoryIterator, + tf.keras.preprocessing.image.DirectoryIterator, + tf.keras.preprocessing.image.DirectoryIterator]: + """ + Create data generators for training, validation, and testing. + + Args: + use_augmentation: Whether to apply data augmentation for training + for_transfer_learning: If True, resize images for transfer learning models + batch_size: Batch size for generators + validation_split: Fraction of training data to use for validation + + Returns: + Tuple of (train_generator, val_generator, test_generator) + """ + target_size = IMAGE_SIZE_TRANSFER if for_transfer_learning else IMAGE_SIZE + color_mode = 'rgb' if for_transfer_learning else 'grayscale' + + # Training data generator with augmentation + if use_augmentation: + train_datagen = ImageDataGenerator( + rescale=1./255, + rotation_range=AUGMENTATION_CONFIG["rotation_range"], + width_shift_range=AUGMENTATION_CONFIG["width_shift_range"], + height_shift_range=AUGMENTATION_CONFIG["height_shift_range"], + horizontal_flip=AUGMENTATION_CONFIG["horizontal_flip"], + zoom_range=AUGMENTATION_CONFIG["zoom_range"], + brightness_range=AUGMENTATION_CONFIG["brightness_range"], + fill_mode=AUGMENTATION_CONFIG["fill_mode"], + validation_split=validation_split + ) + else: + train_datagen = ImageDataGenerator( + rescale=1./255, + validation_split=validation_split + ) + + # Test data generator (no augmentation) + test_datagen = ImageDataGenerator(rescale=1./255) + + # Create generators + train_generator = train_datagen.flow_from_directory( + str(TRAIN_DIR), + target_size=target_size, + color_mode=color_mode, + batch_size=batch_size, + class_mode='categorical', + classes=EMOTION_CLASSES, + subset='training', + shuffle=True + ) + + val_generator = train_datagen.flow_from_directory( + str(TRAIN_DIR), + target_size=target_size, + color_mode=color_mode, + batch_size=batch_size, + class_mode='categorical', + classes=EMOTION_CLASSES, + subset='validation', + shuffle=False + ) + + test_generator = test_datagen.flow_from_directory( + str(TEST_DIR), + target_size=target_size, + color_mode=color_mode, + batch_size=batch_size, + class_mode='categorical', + classes=EMOTION_CLASSES, + shuffle=False + ) + + return train_generator, val_generator, test_generator + + +def load_dataset( + for_transfer_learning: bool = False +) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + """ + Load the entire dataset into memory as numpy arrays. + + Args: + for_transfer_learning: If True, resize images for transfer learning models + + Returns: + Tuple of (X_train, y_train, X_test, y_test) + """ + target_size = IMAGE_SIZE_TRANSFER if for_transfer_learning else IMAGE_SIZE + color_mode = 'rgb' if for_transfer_learning else 'grayscale' + + datagen = ImageDataGenerator(rescale=1./255) + + # Load training data + train_gen = datagen.flow_from_directory( + str(TRAIN_DIR), + target_size=target_size, + color_mode=color_mode, + batch_size=1, + class_mode='categorical', + classes=EMOTION_CLASSES, + shuffle=False + ) + + # Load test data + test_gen = datagen.flow_from_directory( + str(TEST_DIR), + target_size=target_size, + color_mode=color_mode, + batch_size=1, + class_mode='categorical', + classes=EMOTION_CLASSES, + shuffle=False + ) + + # Extract all data + X_train = np.concatenate([train_gen[i][0] for i in range(len(train_gen))]) + y_train = np.concatenate([train_gen[i][1] for i in range(len(train_gen))]) + X_test = np.concatenate([test_gen[i][0] for i in range(len(test_gen))]) + y_test = np.concatenate([test_gen[i][1] for i in range(len(test_gen))]) + + return X_train, y_train, X_test, y_test + + +def get_class_weights(train_generator) -> Dict[int, float]: + """ + Calculate class weights to handle class imbalance. + + Args: + train_generator: Training data generator + + Returns: + Dictionary mapping class indices to weights + """ + # Get class distribution + class_counts = Counter(train_generator.classes) + total_samples = sum(class_counts.values()) + num_classes = len(class_counts) + + # Calculate weights (inverse frequency) + class_weights = {} + for class_idx, count in class_counts.items(): + class_weights[class_idx] = total_samples / (num_classes * count) + + return class_weights + + +def get_dataset_info() -> Dict: + """ + Get information about the dataset. + + Returns: + Dictionary with dataset statistics + """ + info = { + "train": {}, + "test": {}, + "emotion_classes": EMOTION_CLASSES, + "num_classes": NUM_CLASSES + } + + # Count training samples per class + for emotion in EMOTION_CLASSES: + train_path = TRAIN_DIR / emotion + test_path = TEST_DIR / emotion + + if train_path.exists(): + info["train"][emotion] = len(list(train_path.glob("*.png"))) + len(list(train_path.glob("*.jpg"))) + else: + info["train"][emotion] = 0 + + if test_path.exists(): + info["test"][emotion] = len(list(test_path.glob("*.png"))) + len(list(test_path.glob("*.jpg"))) + else: + info["test"][emotion] = 0 + + info["total_train"] = sum(info["train"].values()) + info["total_test"] = sum(info["test"].values()) + + return info + + +if __name__ == "__main__": + # Test data loading + print("Dataset Information:") + info = get_dataset_info() + print(f"Total training samples: {info['total_train']}") + print(f"Total test samples: {info['total_test']}") + print("\nSamples per class (training):") + for emotion, count in info["train"].items(): + print(f" {emotion}: {count}") diff --git a/src/preprocessing/face_detector.py b/src/preprocessing/face_detector.py new file mode 100644 index 0000000000000000000000000000000000000000..276faf71d2ba96895e3adbd9859a922f7d972bde --- /dev/null +++ b/src/preprocessing/face_detector.py @@ -0,0 +1,311 @@ +""" +Face detection using MTCNN for the Emotion Recognition System. +""" +import cv2 +import numpy as np +from typing import List, Tuple, Optional +from pathlib import Path +from PIL import Image + +try: + from mtcnn import MTCNN + MTCNN_AVAILABLE = True +except ImportError: + MTCNN_AVAILABLE = False + print("Warning: MTCNN not installed. Install with: pip install mtcnn") + +import sys +sys.path.append(str(Path(__file__).parent.parent.parent)) +from src.config import IMAGE_SIZE, IMAGE_SIZE_TRANSFER + + +class FaceDetector: + """ + Face detection and extraction using MTCNN. + """ + + def __init__(self, min_face_size: int = 20, confidence_threshold: float = 0.9): + """ + Initialize the face detector. + + Args: + min_face_size: Minimum face size to detect + confidence_threshold: Minimum confidence for face detection + """ + self.min_face_size = min_face_size + self.confidence_threshold = confidence_threshold + + if MTCNN_AVAILABLE: + try: + # Try newer MTCNN API + self.detector = MTCNN(min_face_size=min_face_size) + except TypeError: + try: + # Try older MTCNN API without parameters + self.detector = MTCNN() + except Exception: + self.detector = None + else: + self.detector = None + # Fallback to OpenCV Haar Cascade + cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml' + self.cascade = cv2.CascadeClassifier(cascade_path) + + def detect_faces(self, image: np.ndarray) -> List[dict]: + """ + Detect faces in an image. + + Args: + image: Input image (BGR or RGB format) + + Returns: + List of dictionaries with 'box' (x, y, w, h) and 'confidence' + """ + # Convert BGR to RGB if needed + if len(image.shape) == 3 and image.shape[2] == 3: + rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + else: + rgb_image = image + + faces = [] + + if self.detector is not None: + # Use MTCNN + detections = self.detector.detect_faces(rgb_image) + for detection in detections: + if detection['confidence'] >= self.confidence_threshold: + faces.append({ + 'box': detection['box'], # [x, y, width, height] + 'confidence': detection['confidence'], + 'keypoints': detection.get('keypoints', {}) + }) + else: + # Fallback to Haar Cascade + gray = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY) if len(rgb_image.shape) == 3 else rgb_image + detected = self.cascade.detectMultiScale( + gray, + scaleFactor=1.1, + minNeighbors=5, + minSize=(self.min_face_size, self.min_face_size) + ) + for (x, y, w, h) in detected: + faces.append({ + 'box': [x, y, w, h], + 'confidence': 1.0, # Haar doesn't provide confidence + 'keypoints': {} + }) + + return faces + + def extract_face( + self, + image: np.ndarray, + box: List[int], + target_size: Tuple[int, int] = IMAGE_SIZE, + margin: float = 0.2, + to_grayscale: bool = True + ) -> np.ndarray: + """ + Extract and preprocess a face region from an image. + + Args: + image: Input image + box: Face bounding box [x, y, width, height] + target_size: Target size for the extracted face + margin: Margin to add around the face (fraction of face size) + to_grayscale: Whether to convert to grayscale + + Returns: + Preprocessed face image + """ + x, y, w, h = box + + # Add margin + margin_x = int(w * margin) + margin_y = int(h * margin) + + # Calculate new coordinates with margin + x1 = max(0, x - margin_x) + y1 = max(0, y - margin_y) + x2 = min(image.shape[1], x + w + margin_x) + y2 = min(image.shape[0], y + h + margin_y) + + # Extract face region + face = image[y1:y2, x1:x2] + + # Convert to grayscale if needed + if to_grayscale and len(face.shape) == 3: + face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY) + + # Resize to target size + face = cv2.resize(face, target_size) + + return face + + def detect_and_extract( + self, + image: np.ndarray, + target_size: Tuple[int, int] = IMAGE_SIZE, + to_grayscale: bool = True, + return_all: bool = False + ) -> Tuple[Optional[np.ndarray], List[dict]]: + """ + Detect faces and extract them from an image. + + Args: + image: Input image + target_size: Target size for extracted faces + to_grayscale: Whether to convert to grayscale + return_all: If True, return all faces; else return only the largest + + Returns: + Tuple of (extracted_face(s), face_info) + """ + faces = self.detect_faces(image) + + if not faces: + return None, [] + + if return_all: + extracted = [] + for face_info in faces: + face = self.extract_face( + image, face_info['box'], + target_size=target_size, + to_grayscale=to_grayscale + ) + extracted.append(face) + return extracted, faces + else: + # Return largest face + largest_face = max(faces, key=lambda f: f['box'][2] * f['box'][3]) + face = self.extract_face( + image, largest_face['box'], + target_size=target_size, + to_grayscale=to_grayscale + ) + return face, [largest_face] + + def preprocess_for_model( + self, + face: np.ndarray, + for_transfer_learning: bool = False + ) -> np.ndarray: + """ + Preprocess an extracted face for model prediction. + + Args: + face: Extracted face image + for_transfer_learning: If True, prepare for transfer learning models + + Returns: + Preprocessed face ready for model input + """ + target_size = IMAGE_SIZE_TRANSFER if for_transfer_learning else IMAGE_SIZE + + # Resize if needed + if face.shape[:2] != target_size: + face = cv2.resize(face, target_size) + + # Normalize + face = face.astype(np.float32) / 255.0 + + # Add channel dimension if grayscale + if len(face.shape) == 2: + if for_transfer_learning: + # Convert to RGB by repeating grayscale + face = np.stack([face, face, face], axis=-1) + else: + face = np.expand_dims(face, axis=-1) + + # Add batch dimension + face = np.expand_dims(face, axis=0) + + return face + + def draw_detections( + self, + image: np.ndarray, + faces: List[dict], + emotions: Optional[List[str]] = None, + confidences: Optional[List[float]] = None + ) -> np.ndarray: + """ + Draw face detections and emotion labels on an image. + + Args: + image: Input image + faces: List of face detection results + emotions: Optional list of emotion labels + confidences: Optional list of confidence scores + + Returns: + Image with drawn detections + """ + result = image.copy() + + for i, face_info in enumerate(faces): + x, y, w, h = face_info['box'] + + # Draw rectangle + cv2.rectangle(result, (x, y), (x + w, y + h), (0, 255, 0), 2) + + # Draw emotion label if provided + if emotions and i < len(emotions): + label = emotions[i] + if confidences and i < len(confidences): + label = f"{label}: {confidences[i]:.2f}" + + # Draw label background + (label_w, label_h), _ = cv2.getTextSize( + label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2 + ) + cv2.rectangle( + result, (x, y - label_h - 10), (x + label_w, y), (0, 255, 0), -1 + ) + + # Draw label text + cv2.putText( + result, label, (x, y - 5), + cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2 + ) + + return result + + +def load_image(image_path: str) -> np.ndarray: + """ + Load an image from file. + + Args: + image_path: Path to the image file + + Returns: + Image as numpy array (BGR format) + """ + image = cv2.imread(str(image_path)) + if image is None: + raise ValueError(f"Could not load image: {image_path}") + return image + + +def load_image_pil(image_path: str) -> Image.Image: + """ + Load an image using PIL. + + Args: + image_path: Path to the image file + + Returns: + PIL Image object + """ + return Image.open(image_path) + + +if __name__ == "__main__": + # Test face detection + import sys + + detector = FaceDetector() + print(f"MTCNN available: {MTCNN_AVAILABLE}") + print(f"Using: {'MTCNN' if detector.detector else 'Haar Cascade'}") diff --git a/src/training/__init__.py b/src/training/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5cfb57eb51fdb35633658e6435dc11bde84eba56 --- /dev/null +++ b/src/training/__init__.py @@ -0,0 +1,9 @@ +from .trainer import EmotionModelTrainer +from .evaluate import evaluate_model, generate_classification_report, plot_confusion_matrix + +__all__ = [ + "EmotionModelTrainer", + "evaluate_model", + "generate_classification_report", + "plot_confusion_matrix" +] diff --git a/src/training/__pycache__/__init__.cpython-310.pyc b/src/training/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0525f4e374e85e69b660e7e5b04aa6d84c075bae Binary files /dev/null and b/src/training/__pycache__/__init__.cpython-310.pyc differ diff --git a/src/training/__pycache__/evaluate.cpython-310.pyc b/src/training/__pycache__/evaluate.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7d7418a07f70e6df6de898703f80a616b48bd4d5 Binary files /dev/null and b/src/training/__pycache__/evaluate.cpython-310.pyc differ diff --git a/src/training/__pycache__/trainer.cpython-310.pyc b/src/training/__pycache__/trainer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..936f3cd386f29f08205a888d0c6abdc2da16710a Binary files /dev/null and b/src/training/__pycache__/trainer.cpython-310.pyc differ diff --git a/src/training/evaluate.py b/src/training/evaluate.py new file mode 100644 index 0000000000000000000000000000000000000000..46c3eaddaef7fa38dbf098e4611ec43430760da8 --- /dev/null +++ b/src/training/evaluate.py @@ -0,0 +1,430 @@ +""" +Model evaluation utilities for emotion recognition. +""" +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +from sklearn.metrics import ( + classification_report, confusion_matrix, + accuracy_score, precision_recall_fscore_support, + roc_curve, auc +) +from tensorflow.keras.models import Model + +import sys +sys.path.append(str(Path(__file__).parent.parent.parent)) +from src.config import EMOTION_CLASSES, NUM_CLASSES, MODELS_DIR + + +def evaluate_model( + model: Model, + test_generator, + class_names: List[str] = EMOTION_CLASSES +) -> Dict: + """ + Evaluate a trained model on test data. + + Args: + model: Trained Keras model + test_generator: Test data generator + class_names: List of class names + + Returns: + Dictionary with evaluation metrics + """ + # Reset generator to start + test_generator.reset() + + # Get predictions + predictions = model.predict(test_generator, verbose=1) + y_pred = np.argmax(predictions, axis=1) + y_true = test_generator.classes + + # Calculate metrics + accuracy = accuracy_score(y_true, y_pred) + precision, recall, f1, support = precision_recall_fscore_support( + y_true, y_pred, average=None + ) + + # Per-class metrics + per_class_metrics = {} + for i, class_name in enumerate(class_names): + per_class_metrics[class_name] = { + "precision": float(precision[i]), + "recall": float(recall[i]), + "f1_score": float(f1[i]), + "support": int(support[i]) + } + + # Overall metrics + precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support( + y_true, y_pred, average='macro' + ) + precision_weighted, recall_weighted, f1_weighted, _ = precision_recall_fscore_support( + y_true, y_pred, average='weighted' + ) + + results = { + "accuracy": float(accuracy), + "macro_precision": float(precision_macro), + "macro_recall": float(recall_macro), + "macro_f1": float(f1_macro), + "weighted_precision": float(precision_weighted), + "weighted_recall": float(recall_weighted), + "weighted_f1": float(f1_weighted), + "per_class": per_class_metrics, + "predictions": y_pred.tolist(), + "true_labels": y_true.tolist(), + "probabilities": predictions.tolist() + } + + return results + + +def generate_classification_report( + y_true: np.ndarray, + y_pred: np.ndarray, + class_names: List[str] = EMOTION_CLASSES, + output_dict: bool = True +) -> Dict: + """ + Generate a classification report. + + Args: + y_true: True labels + y_pred: Predicted labels + class_names: List of class names + output_dict: Whether to return as dictionary + + Returns: + Classification report + """ + report = classification_report( + y_true, y_pred, + target_names=class_names, + output_dict=output_dict + ) + + if not output_dict: + print(report) + + return report + + +def compute_confusion_matrix( + y_true: np.ndarray, + y_pred: np.ndarray, + normalize: bool = True +) -> np.ndarray: + """ + Compute confusion matrix. + + Args: + y_true: True labels + y_pred: Predicted labels + normalize: Whether to normalize the matrix + + Returns: + Confusion matrix + """ + cm = confusion_matrix(y_true, y_pred) + + if normalize: + cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] + + return cm + + +def plot_confusion_matrix( + y_true: np.ndarray, + y_pred: np.ndarray, + class_names: List[str] = EMOTION_CLASSES, + normalize: bool = True, + figsize: Tuple[int, int] = (12, 10), + cmap: str = 'Blues', + save_path: Optional[Path] = None, + title: str = "Confusion Matrix" +) -> plt.Figure: + """ + Plot confusion matrix as a heatmap. + + Args: + y_true: True labels + y_pred: Predicted labels + class_names: List of class names + normalize: Whether to normalize + figsize: Figure size + cmap: Colormap + save_path: Optional path to save the figure + title: Plot title + + Returns: + Matplotlib figure + """ + cm = compute_confusion_matrix(y_true, y_pred, normalize=normalize) + + fig, ax = plt.subplots(figsize=figsize) + + sns.heatmap( + cm, annot=True, fmt='.2f' if normalize else 'd', + cmap=cmap, ax=ax, + xticklabels=class_names, + yticklabels=class_names, + square=True, + cbar_kws={'shrink': 0.8} + ) + + ax.set_xlabel('Predicted Label', fontsize=12) + ax.set_ylabel('True Label', fontsize=12) + ax.set_title(title, fontsize=14) + + plt.tight_layout() + + if save_path: + plt.savefig(save_path, dpi=150, bbox_inches='tight') + print(f"Confusion matrix saved to: {save_path}") + + return fig + + +def plot_training_history( + history: Dict, + metrics: List[str] = ['accuracy', 'loss'], + figsize: Tuple[int, int] = (14, 5), + save_path: Optional[Path] = None +) -> plt.Figure: + """ + Plot training history curves. + + Args: + history: Training history dictionary + metrics: Metrics to plot + figsize: Figure size + save_path: Optional path to save the figure + + Returns: + Matplotlib figure + """ + num_metrics = len(metrics) + fig, axes = plt.subplots(1, num_metrics, figsize=figsize) + + if num_metrics == 1: + axes = [axes] + + for ax, metric in zip(axes, metrics): + if metric in history: + epochs = range(1, len(history[metric]) + 1) + ax.plot(epochs, history[metric], 'b-', label=f'Training {metric.capitalize()}') + + val_metric = f'val_{metric}' + if val_metric in history: + ax.plot(epochs, history[val_metric], 'r-', label=f'Validation {metric.capitalize()}') + + ax.set_xlabel('Epoch') + ax.set_ylabel(metric.capitalize()) + ax.set_title(f'{metric.capitalize()} over Epochs') + ax.legend() + ax.grid(True, alpha=0.3) + + plt.tight_layout() + + if save_path: + plt.savefig(save_path, dpi=150, bbox_inches='tight') + print(f"Training history plot saved to: {save_path}") + + return fig + + +def plot_per_class_metrics( + results: Dict, + figsize: Tuple[int, int] = (14, 6), + save_path: Optional[Path] = None +) -> plt.Figure: + """ + Plot per-class precision, recall, and F1 scores. + + Args: + results: Evaluation results dictionary + figsize: Figure size + save_path: Optional path to save + + Returns: + Matplotlib figure + """ + per_class = results['per_class'] + classes = list(per_class.keys()) + + precision = [per_class[c]['precision'] for c in classes] + recall = [per_class[c]['recall'] for c in classes] + f1 = [per_class[c]['f1_score'] for c in classes] + + x = np.arange(len(classes)) + width = 0.25 + + fig, ax = plt.subplots(figsize=figsize) + + bars1 = ax.bar(x - width, precision, width, label='Precision', color='#3498db') + bars2 = ax.bar(x, recall, width, label='Recall', color='#2ecc71') + bars3 = ax.bar(x + width, f1, width, label='F1-Score', color='#e74c3c') + + ax.set_xlabel('Emotion Class') + ax.set_ylabel('Score') + ax.set_title('Per-Class Performance Metrics') + ax.set_xticks(x) + ax.set_xticklabels(classes, rotation=45, ha='right') + ax.legend() + ax.set_ylim(0, 1.0) + ax.grid(True, alpha=0.3, axis='y') + + # Add value labels + for bars in [bars1, bars2, bars3]: + for bar in bars: + height = bar.get_height() + ax.annotate(f'{height:.2f}', + xy=(bar.get_x() + bar.get_width() / 2, height), + xytext=(0, 3), + textcoords="offset points", + ha='center', va='bottom', fontsize=8) + + plt.tight_layout() + + if save_path: + plt.savefig(save_path, dpi=150, bbox_inches='tight') + print(f"Per-class metrics plot saved to: {save_path}") + + return fig + + +def compute_roc_curves( + y_true: np.ndarray, + y_proba: np.ndarray, + class_names: List[str] = EMOTION_CLASSES +) -> Dict: + """ + Compute ROC curves for each class. + + Args: + y_true: True labels (one-hot encoded) + y_proba: Prediction probabilities + class_names: List of class names + + Returns: + Dictionary with ROC curve data + """ + # Convert to one-hot if needed + if len(y_true.shape) == 1: + y_true_onehot = np.zeros((len(y_true), len(class_names))) + y_true_onehot[np.arange(len(y_true)), y_true] = 1 + y_true = y_true_onehot + + roc_data = {} + for i, class_name in enumerate(class_names): + fpr, tpr, thresholds = roc_curve(y_true[:, i], y_proba[:, i]) + roc_auc = auc(fpr, tpr) + + roc_data[class_name] = { + 'fpr': fpr.tolist(), + 'tpr': tpr.tolist(), + 'auc': float(roc_auc) + } + + return roc_data + + +def plot_roc_curves( + roc_data: Dict, + figsize: Tuple[int, int] = (10, 8), + save_path: Optional[Path] = None +) -> plt.Figure: + """ + Plot ROC curves for all classes. + + Args: + roc_data: ROC curve data from compute_roc_curves + figsize: Figure size + save_path: Optional save path + + Returns: + Matplotlib figure + """ + fig, ax = plt.subplots(figsize=figsize) + + colors = plt.cm.Set2(np.linspace(0, 1, len(roc_data))) + + for (class_name, data), color in zip(roc_data.items(), colors): + ax.plot( + data['fpr'], data['tpr'], + color=color, lw=2, + label=f"{class_name} (AUC = {data['auc']:.2f})" + ) + + ax.plot([0, 1], [0, 1], 'k--', lw=2, label='Random') + ax.set_xlim([0.0, 1.0]) + ax.set_ylim([0.0, 1.05]) + ax.set_xlabel('False Positive Rate') + ax.set_ylabel('True Positive Rate') + ax.set_title('ROC Curves by Emotion Class') + ax.legend(loc='lower right') + ax.grid(True, alpha=0.3) + + plt.tight_layout() + + if save_path: + plt.savefig(save_path, dpi=150, bbox_inches='tight') + print(f"ROC curves saved to: {save_path}") + + return fig + + +def compare_models( + model_results: Dict[str, Dict], + save_path: Optional[Path] = None +) -> plt.Figure: + """ + Compare multiple models. + + Args: + model_results: Dictionary of model_name -> evaluation results + save_path: Optional save path + + Returns: + Matplotlib figure + """ + models = list(model_results.keys()) + metrics = ['accuracy', 'macro_precision', 'macro_recall', 'macro_f1'] + + fig, ax = plt.subplots(figsize=(12, 6)) + + x = np.arange(len(models)) + width = 0.2 + + for i, metric in enumerate(metrics): + values = [model_results[m].get(metric, 0) for m in models] + offset = (i - len(metrics)/2 + 0.5) * width + bars = ax.bar(x + offset, values, width, label=metric.replace('_', ' ').title()) + + ax.set_xlabel('Model') + ax.set_ylabel('Score') + ax.set_title('Model Comparison') + ax.set_xticks(x) + ax.set_xticklabels(models) + ax.legend() + ax.set_ylim(0, 1.0) + ax.grid(True, alpha=0.3, axis='y') + + plt.tight_layout() + + if save_path: + plt.savefig(save_path, dpi=150, bbox_inches='tight') + print(f"Model comparison saved to: {save_path}") + + return fig + + +if __name__ == "__main__": + # Example usage + print("Evaluation module loaded successfully.") + print(f"Emotion classes: {EMOTION_CLASSES}") diff --git a/src/training/trainer.py b/src/training/trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..c90a581f60bd5081a2356d54c879fce0981c7ec1 --- /dev/null +++ b/src/training/trainer.py @@ -0,0 +1,422 @@ +""" +Training pipeline for emotion recognition models. +""" +import os +import json +from pathlib import Path +from datetime import datetime +from typing import Dict, Optional, Tuple, Callable + +import numpy as np +import tensorflow as tf +from tensorflow.keras.models import Model +from tensorflow.keras.optimizers import Adam +from tensorflow.keras.callbacks import ( + EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, + TensorBoard, Callback +) + +import sys +sys.path.append(str(Path(__file__).parent.parent.parent)) +from src.config import ( + EPOCHS, LEARNING_RATE, LEARNING_RATE_FINE_TUNE, + EARLY_STOPPING_PATIENCE, REDUCE_LR_PATIENCE, REDUCE_LR_FACTOR, + MODELS_DIR, CUSTOM_CNN_PATH, MOBILENET_PATH, VGG_PATH +) + + +class TrainingProgressCallback(Callback): + """Custom callback to track and display training progress.""" + + def __init__(self, total_epochs: int): + super().__init__() + self.total_epochs = total_epochs + + def on_epoch_end(self, epoch, logs=None): + logs = logs or {} + print(f"\nEpoch {epoch + 1}/{self.total_epochs}") + print(f" Loss: {logs.get('loss', 0):.4f} - Accuracy: {logs.get('accuracy', 0):.4f}") + print(f" Val Loss: {logs.get('val_loss', 0):.4f} - Val Accuracy: {logs.get('val_accuracy', 0):.4f}") + + +class EmotionModelTrainer: + """ + Trainer class for emotion recognition models. + """ + + def __init__( + self, + model: Model, + model_name: str = "model", + save_path: Optional[Path] = None, + logs_dir: Optional[Path] = None + ): + """ + Initialize the trainer. + + Args: + model: Keras model to train + model_name: Name for the model (used for saving) + save_path: Path to save the trained model + logs_dir: Directory for TensorBoard logs + """ + self.model = model + self.model_name = model_name + self.save_path = save_path or MODELS_DIR / f"{model_name}.h5" + self.logs_dir = logs_dir or MODELS_DIR / "logs" / model_name + + self.history = None + self.training_metadata = {} + + # Create directories + Path(self.save_path).parent.mkdir(parents=True, exist_ok=True) + Path(self.logs_dir).mkdir(parents=True, exist_ok=True) + + def compile( + self, + learning_rate: float = LEARNING_RATE, + optimizer: Optional[tf.keras.optimizers.Optimizer] = None, + loss: str = 'categorical_crossentropy', + metrics: list = ['accuracy'] + ) -> None: + """ + Compile the model. + + Args: + learning_rate: Learning rate for optimizer + optimizer: Custom optimizer (uses Adam if None) + loss: Loss function + metrics: Metrics to track + """ + if optimizer is None: + optimizer = Adam(learning_rate=learning_rate) + + self.model.compile( + optimizer=optimizer, + loss=loss, + metrics=metrics + ) + + self.training_metadata['learning_rate'] = learning_rate + self.training_metadata['loss_function'] = loss + self.training_metadata['metrics'] = metrics + + def get_callbacks( + self, + use_early_stopping: bool = True, + use_reduce_lr: bool = True, + use_tensorboard: bool = True, + use_checkpoint: bool = True, + custom_callbacks: Optional[list] = None + ) -> list: + """ + Get training callbacks. + + Args: + use_early_stopping: Whether to use early stopping + use_reduce_lr: Whether to reduce LR on plateau + use_tensorboard: Whether to log to TensorBoard + use_checkpoint: Whether to save best model + custom_callbacks: Additional custom callbacks + + Returns: + List of callbacks + """ + callbacks = [] + + if use_early_stopping: + callbacks.append(EarlyStopping( + monitor='val_loss', + patience=EARLY_STOPPING_PATIENCE, + restore_best_weights=True, + verbose=1 + )) + + if use_reduce_lr: + callbacks.append(ReduceLROnPlateau( + monitor='val_loss', + factor=REDUCE_LR_FACTOR, + patience=REDUCE_LR_PATIENCE, + min_lr=1e-7, + verbose=1 + )) + + if use_tensorboard: + callbacks.append(TensorBoard( + log_dir=str(self.logs_dir), + histogram_freq=1, + write_graph=True + )) + + if use_checkpoint: + callbacks.append(ModelCheckpoint( + filepath=str(self.save_path), + monitor='val_accuracy', + save_best_only=True, + mode='max', + verbose=1 + )) + + if custom_callbacks: + callbacks.extend(custom_callbacks) + + return callbacks + + def train( + self, + train_generator, + val_generator, + epochs: int = EPOCHS, + class_weights: Optional[Dict] = None, + callbacks: Optional[list] = None, + verbose: int = 1 + ) -> Dict: + """ + Train the model. + + Args: + train_generator: Training data generator + val_generator: Validation data generator + epochs: Number of epochs + class_weights: Optional class weights for imbalanced data + callbacks: Optional custom callbacks (uses defaults if None) + verbose: Verbosity mode + + Returns: + Training history dictionary + """ + if callbacks is None: + callbacks = self.get_callbacks() + + # Add progress callback + callbacks.append(TrainingProgressCallback(epochs)) + + # Record training start + start_time = datetime.now() + self.training_metadata['training_started'] = start_time.isoformat() + self.training_metadata['epochs_requested'] = epochs + + print(f"\n{'='*60}") + print(f"Training {self.model_name}") + print(f"{'='*60}") + print(f"Epochs: {epochs}") + print(f"Training samples: {train_generator.samples}") + print(f"Validation samples: {val_generator.samples}") + print(f"{'='*60}\n") + + # Train + self.history = self.model.fit( + train_generator, + epochs=epochs, + validation_data=val_generator, + class_weight=class_weights, + callbacks=callbacks, + verbose=verbose + ) + + # Record training end + end_time = datetime.now() + duration = (end_time - start_time).total_seconds() + + self.training_metadata['training_ended'] = end_time.isoformat() + self.training_metadata['training_duration_seconds'] = duration + self.training_metadata['epochs_completed'] = len(self.history.history['loss']) + self.training_metadata['final_accuracy'] = float(self.history.history['accuracy'][-1]) + self.training_metadata['final_val_accuracy'] = float(self.history.history['val_accuracy'][-1]) + self.training_metadata['best_val_accuracy'] = float(max(self.history.history['val_accuracy'])) + + print(f"\n{'='*60}") + print(f"Training Complete!") + print(f"Duration: {duration/60:.2f} minutes") + print(f"Best Validation Accuracy: {self.training_metadata['best_val_accuracy']:.4f}") + print(f"{'='*60}\n") + + return self.history.history + + def fine_tune( + self, + train_generator, + val_generator, + epochs: int = 20, + learning_rate: float = LEARNING_RATE_FINE_TUNE, + unfreeze_layers: int = 30 + ) -> Dict: + """ + Fine-tune a transfer learning model. + + Args: + train_generator: Training data generator + val_generator: Validation data generator + epochs: Number of fine-tuning epochs + learning_rate: Learning rate for fine-tuning + unfreeze_layers: Number of layers to unfreeze + + Returns: + Fine-tuning history + """ + # Unfreeze top layers + for layer in self.model.layers[-unfreeze_layers:]: + layer.trainable = True + + # Recompile with lower learning rate + self.compile(learning_rate=learning_rate) + + print(f"\nFine-tuning with learning rate: {learning_rate}") + print(f"Unfrozen {unfreeze_layers} top layers") + + # Continue training + return self.train(train_generator, val_generator, epochs=epochs) + + def save_training_history(self) -> None: + """Save training history and metadata to files.""" + if self.history is None: + print("No training history to save.") + return + + # Save history as JSON + history_path = self.save_path.with_suffix('.history.json') + with open(history_path, 'w') as f: + json.dump(self.history.history, f, indent=2) + + # Save metadata + metadata_path = self.save_path.with_suffix('.meta.json') + with open(metadata_path, 'w') as f: + json.dump(self.training_metadata, f, indent=2) + + print(f"Training history saved to: {history_path}") + print(f"Training metadata saved to: {metadata_path}") + + def get_training_summary(self) -> Dict: + """ + Get a summary of the training results. + + Returns: + Dictionary with training summary + """ + if self.history is None: + return {"status": "Not trained"} + + return { + "model_name": self.model_name, + "epochs_completed": len(self.history.history['loss']), + "final_accuracy": self.history.history['accuracy'][-1], + "final_val_accuracy": self.history.history['val_accuracy'][-1], + "best_val_accuracy": max(self.history.history['val_accuracy']), + "final_loss": self.history.history['loss'][-1], + "final_val_loss": self.history.history['val_loss'][-1], + "training_duration": self.training_metadata.get('training_duration_seconds', 0) + } + + +def train_custom_cnn( + train_generator, + val_generator, + epochs: int = EPOCHS, + class_weights: Optional[Dict] = None +) -> Tuple[Model, Dict]: + """ + Train the custom CNN model. + + Args: + train_generator: Training data generator + val_generator: Validation data generator + epochs: Number of epochs + class_weights: Optional class weights + + Returns: + Tuple of (trained model, training history) + """ + from src.models.custom_cnn import build_custom_cnn + + model = build_custom_cnn() + trainer = EmotionModelTrainer(model, "custom_cnn", CUSTOM_CNN_PATH) + trainer.compile() + history = trainer.train(train_generator, val_generator, epochs, class_weights) + trainer.save_training_history() + + return model, history + + +def train_mobilenet( + train_generator, + val_generator, + epochs: int = EPOCHS, + fine_tune_epochs: int = 20, + class_weights: Optional[Dict] = None +) -> Tuple[Model, Dict]: + """ + Train the MobileNetV2 model with fine-tuning. + + Args: + train_generator: Training data generator (RGB, 96x96) + val_generator: Validation data generator + epochs: Initial training epochs + fine_tune_epochs: Fine-tuning epochs + class_weights: Optional class weights + + Returns: + Tuple of (trained model, training history) + """ + from src.models.mobilenet_model import build_mobilenet_model + + model = build_mobilenet_model() + trainer = EmotionModelTrainer(model, "mobilenet_v2", MOBILENET_PATH) + + # Initial training with frozen base + trainer.compile() + history = trainer.train(train_generator, val_generator, epochs, class_weights) + + # Fine-tuning + if fine_tune_epochs > 0: + fine_tune_history = trainer.fine_tune( + train_generator, val_generator, fine_tune_epochs + ) + # Merge histories + for key in history: + history[key].extend(fine_tune_history[key]) + + trainer.save_training_history() + + return model, history + + +def train_vgg( + train_generator, + val_generator, + epochs: int = EPOCHS, + fine_tune_epochs: int = 15, + class_weights: Optional[Dict] = None +) -> Tuple[Model, Dict]: + """ + Train the VGG-19 model with fine-tuning. + + Args: + train_generator: Training data generator (RGB, 96x96) + val_generator: Validation data generator + epochs: Initial training epochs + fine_tune_epochs: Fine-tuning epochs + class_weights: Optional class weights + + Returns: + Tuple of (trained model, training history) + """ + from src.models.vgg_model import build_vgg_model + + model = build_vgg_model() + trainer = EmotionModelTrainer(model, "vgg19", VGG_PATH) + + # Initial training with frozen base + trainer.compile() + history = trainer.train(train_generator, val_generator, epochs, class_weights) + + # Fine-tuning + if fine_tune_epochs > 0: + fine_tune_history = trainer.fine_tune( + train_generator, val_generator, fine_tune_epochs + ) + for key in history: + history[key].extend(fine_tune_history[key]) + + trainer.save_training_history() + + return model, history