sseo325's picture
Update app.py
bc8f518 verified
import os
import time
import cv2
import numpy as np
import tensorflow as tf
import gradio as gr
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from fpdf import FPDF
from PIL import Image
# ===============================
# 1. Load Model
# ===============================
MODEL_PATH = "fer_surprise_softmax.h5"
model = tf.keras.models.load_model(MODEL_PATH, compile=False)
IMG_SIZE = (96, 96)
CLASS_NAMES = ["angry", "disgust", "fear", "happy", "sad", "surprise", "neutral"]
SURPRISE_IDX = CLASS_NAMES.index("surprise")
# ===============================
# 2. Face Detector
# ===============================
face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
)
# ===============================
# 3. State Storage
# ===============================
events = []
surprise_history = []
start_time = None
MIN_EVENT_GAP = 1.0
# Session stats
frames_with_face = 0
max_p_surprise = 0.0
# ===============================
# 4. Utility: Time Formatting
# ===============================
def format_time(seconds: float) -> str:
minutes = int(seconds // 60)
sec = int(seconds % 60)
return f"{minutes:02d}:{sec:02d}"
# ===============================
# 5. Real-time Frame Processing
# ===============================
def detect_surprise(frame, threshold):
global events, start_time, surprise_history
global frames_with_face, max_p_surprise
if frame is None:
stats_text = (
"### Session Stats\n"
"- Session duration: 00:00\n"
f"- Current threshold: {threshold:.2f}\n"
"- Frames with face detected: 0\n"
"- Surprise events detected: 0\n"
"- Max P(surprise): 0.00\n"
)
return None, {"Error": 1.0}, None, stats_text
if start_time is None:
start_time = time.time()
surprise_history = []
events = []
frames_with_face = 0
max_p_surprise = 0.0
current_time = time.time() - start_time
frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.1, 4)
# 변경된 기본 라벨: 얼굴 미검출 시 조명/각도 안내
label = "NO FACE - Try brighter lighting or adjust angle"
color = (0, 255, 255)
probs_dict = {}
if len(faces) > 0:
frames_with_face += 1
x, y, w, h = sorted(faces, key=lambda r: r[2] * r[3], reverse=True)[0]
roi = frame_bgr[y:y+h, x:x+w]
rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
resized = cv2.resize(rgb, IMG_SIZE)
inp = resized.astype("float32") / 255.0
inp = np.expand_dims(inp, axis=0)
probs = model.predict(inp, verbose=0)[0]
p_surprise = float(probs[SURPRISE_IDX])
if p_surprise > max_p_surprise:
max_p_surprise = p_surprise
probs_dict = {
cls: float(p) for cls, p in zip(CLASS_NAMES, probs)
}
surprise_history.append({
"time": current_time,
"score": p_surprise,
})
# -------- Top3 detection logic --------
if p_surprise >= threshold:
if len(events) == 0:
events.append({
"time": current_time,
"score": p_surprise,
"frame": frame.copy()
})
else:
dt = current_time - events[-1]["time"]
if dt > MIN_EVENT_GAP:
events.append({
"time": current_time,
"score": p_surprise,
"frame": frame.copy()
})
else:
if p_surprise > events[-1]["score"]:
events[-1]["time"] = current_time
events[-1]["score"] = p_surprise
events[-1]["frame"] = frame.copy()
label = f"😲 SURPRISE (p={p_surprise:.2f})"
color = (0, 255, 0)
else:
label = f"🙂 Not Surprise (p={p_surprise:.2f})"
color = (0, 0, 255)
# Draw bounding box
cv2.rectangle(frame_bgr, (x, y), (x + w, y + h), color, 3)
# -------- Label 위치: 왼쪽 아래 + 큰 글씨 --------
h_img, w_img = frame_bgr.shape[:2]
cv2.putText(
frame_bgr,
label,
(10, h_img - 10),
cv2.FONT_HERSHEY_SIMPLEX,
1.6,
color,
3
)
out_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
# Per-frame bar chart
fig = go.Figure()
if len(probs_dict) > 0:
fig.add_trace(go.Bar(
x=list(probs_dict.keys()),
y=list(probs_dict.values()),
marker_color="lightskyblue"
))
fig.update_layout(
title="Emotion Probability Distribution",
yaxis=dict(range=[0, 1])
)
session_duration_str = format_time(current_time)
stats_text = (
"### Session Stats\n"
f"- Session duration: {session_duration_str}\n"
f"- Current threshold: {threshold:.2f}\n"
f"- Frames with face detected: {frames_with_face}\n"
f"- Surprise events detected: {len(events)}\n"
f"- Max P(surprise): {max_p_surprise:.2f}\n"
)
return out_rgb, probs_dict, fig, stats_text
# ===============================
# 6. PDF Generation
# ===============================
def create_pdf(summary_text, top_images, timeline_fig):
os.makedirs("reports", exist_ok=True)
timestamp = int(time.time())
pdf_path = os.path.join("reports", f"surprise_report_{timestamp}.pdf")
timeline_path = os.path.join("reports", f"timeline_{timestamp}.png")
timeline_fig.savefig(timeline_path, bbox_inches="tight")
img_paths = []
for i, img in enumerate(top_images):
if img is None:
img_paths.append(None)
continue
img_pil = Image.fromarray(img)
img_path = os.path.join("reports", f"top{i+1}_{timestamp}.png")
img_pil.save(img_path)
img_paths.append(img_path)
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", "B", 16)
pdf.cell(0, 10, "Real-Time Surprise Detector Report", ln=1)
pdf.set_font("Arial", "", 11)
pdf.multi_cell(0, 6, summary_text)
pdf.ln(4)
pdf.set_font("Arial", "B", 12)
pdf.cell(0, 8, "Surprise Probability Timeline", ln=1)
pdf.image(timeline_path, w=170)
pdf.ln(4)
pdf.set_font("Arial", "B", 12)
pdf.cell(0, 8, "Top Surprise Frames", ln=1)
pdf.set_font("Arial", "", 11)
for i, path in enumerate(img_paths):
if path is not None:
pdf.cell(0, 6, f"Top {i+1}", ln=1)
pdf.image(path, w=80)
pdf.ln(2)
pdf.output(pdf_path)
return pdf_path
# ===============================
# 7. Summarize Results
# ===============================
def summarize_results():
global events, start_time, surprise_history
global frames_with_face, max_p_surprise
if len(surprise_history) == 0:
return "No data recorded.", None, None, None, None, None
times = [h["time"] for h in surprise_history]
scores = [h["score"] for h in surprise_history]
fig, ax = plt.subplots()
ax.plot(times, scores, marker="o", linewidth=1)
ax.set_title("Surprise Probability Timeline")
ax.set_xlabel("Time (s)")
ax.set_ylabel("P(surprise)")
ax.set_ylim(0, 1)
ax.grid(True)
top_images = [None, None, None]
if len(events) == 0:
summary_text = (
"No surprise events detected above the current threshold.\n\n"
"The timeline shows overall surprise probability over time."
)
img1 = img2 = img3 = None
else:
top3 = sorted(events, key=lambda x: x["score"], reverse=True)[:3]
captions = []
images = []
top_times = []
top_scores = []
for i, e in enumerate(top3):
formatted_time = format_time(e["time"])
score = e["score"]
captions.append(f"#{i+1} Time = {formatted_time} Score = {score:.2f}")
images.append(e["frame"])
top_times.append(e["time"])
top_scores.append(score)
summary_text = "Top 3 surprise moments:\n" + "\n".join(captions)
markers = ["*", "^", "s"]
colors = ["red", "darkorange", "gold"]
for i, (t, s) in enumerate(zip(top_times, top_scores)):
ax.scatter(t, s, color=colors[i], marker=markers[i], s=80, zorder=5)
for i in range(3):
if i < len(images):
top_images[i] = images[i]
img1, img2, img3 = top_images
pdf_path = create_pdf(summary_text, top_images, fig)
events = []
start_time = None
surprise_history = []
frames_with_face = 0
max_p_surprise = 0.0
return summary_text, img1, img2, img3, fig, pdf_path
# ===============================
# 8. UI
# ===============================
try:
custom_theme = gr.themes.Soft(primary_hue="indigo", neutral_hue="slate")
except:
custom_theme = "soft"
demo = gr.Blocks(theme=custom_theme)
with demo:
gr.Markdown(
"""
# 🎭 Real-Time Surprise Detector
### A real-time facial reaction analysis system
##### Detects surprise reactions using facial emotion recognition and summarizes top 3 peak surprise moments.
**How to use:**
1. Enable your webcam by clicking the feed area.
2. Watch your chosen video while keeping your face visible.
3. If many frames show **"NO FACE"**, try brighter lighting or adjust your face angle.
4. Click **“Show Top 3 Surprise Moments”** after stopping the stream.
5. Download the generated PDF if needed.
---
"""
)
with gr.Row():
with gr.Column(scale=2):
webcam = gr.Image(
sources=["webcam"],
type="numpy",
label="Webcam Feed"
)
output_img = gr.Image(label="Detection Result")
with gr.Column(scale=1):
threshold = gr.Slider(
minimum=0.0, maximum=1.0, value=0.1,
step=0.01, label="Surprise Threshold"
)
gr.Markdown(
"""
### What is the Surprise Threshold?
- Lower threshold → detects smaller reactions
- Higher threshold → detects only strong surprise
- **Default = 0.1**
👉 Try making a surprised face to adjust sensitivity.
"""
)
output_label = gr.Label(label="Softmax Probabilities")
plot = gr.Plot(label="Emotion Probability (per frame)")
stats_md = gr.Markdown("### Session Stats\nWaiting for stream...")
webcam.stream(
fn=detect_surprise,
inputs=[webcam, threshold],
outputs=[output_img, output_label, plot, stats_md],
stream_every=0.1
)
gr.Markdown("---")
gr.Markdown("## 🔍 Summary & Report")
summarize_button = gr.Button("🎯 Show Top 3 Surprise Moments")
summary_text = gr.Textbox(
label="Top 3 Summary",
lines=6,
max_lines=10
)
with gr.Row():
img1 = gr.Image(label="Top 1")
img2 = gr.Image(label="Top 2")
img3 = gr.Image(label="Top 3")
timeline_plot = gr.Plot(label="Surprise Timeline")
pdf_file = gr.File(label="Download PDF Report")
summarize_button.click(
fn=summarize_results,
inputs=[],
outputs=[summary_text, img1, img2, img3, timeline_plot, pdf_file]
)
if __name__ == "__main__":
demo.launch()