Spaces:

sseo325
/

Real-Time_Surprise_Detector

Sleeping

App Files Files Community

Real-Time_Surprise_Detector / app.py

sseo325

Update app.py

bc8f518 verified 5 months ago

raw

history blame contribute delete

11.8 kB

	import os
	import time
	import cv2
	import numpy as np
	import tensorflow as tf
	import gradio as gr
	import plotly.graph_objects as go
	import matplotlib.pyplot as plt
	from fpdf import FPDF
	from PIL import Image

	# ===============================
	# 1. Load Model
	# ===============================
	MODEL_PATH = "fer_surprise_softmax.h5"
	model = tf.keras.models.load_model(MODEL_PATH, compile=False)

	IMG_SIZE = (96, 96)
	CLASS_NAMES = ["angry", "disgust", "fear", "happy", "sad", "surprise", "neutral"]
	SURPRISE_IDX = CLASS_NAMES.index("surprise")

	# ===============================
	# 2. Face Detector
	# ===============================
	face_cascade = cv2.CascadeClassifier(
	cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
	)

	# ===============================
	# 3. State Storage
	# ===============================
	events = []
	surprise_history = []
	start_time = None
	MIN_EVENT_GAP = 1.0

	# Session stats
	frames_with_face = 0
	max_p_surprise = 0.0

	# ===============================
	# 4. Utility: Time Formatting
	# ===============================
	def format_time(seconds: float) -> str:
	minutes = int(seconds // 60)
	sec = int(seconds % 60)
	return f"{minutes:02d}:{sec:02d}"


	# ===============================
	# 5. Real-time Frame Processing
	# ===============================
	def detect_surprise(frame, threshold):

	global events, start_time, surprise_history
	global frames_with_face, max_p_surprise

	if frame is None:
	stats_text = (
	"### Session Stats\n"
	"- Session duration: 00:00\n"
	f"- Current threshold: {threshold:.2f}\n"
	"- Frames with face detected: 0\n"
	"- Surprise events detected: 0\n"
	"- Max P(surprise): 0.00\n"
	)
	return None, {"Error": 1.0}, None, stats_text

	if start_time is None:
	start_time = time.time()
	surprise_history = []
	events = []
	frames_with_face = 0
	max_p_surprise = 0.0

	current_time = time.time() - start_time

	frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
	gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)

	faces = face_cascade.detectMultiScale(gray, 1.1, 4)

	# 변경된 기본 라벨: 얼굴 미검출 시 조명/각도 안내
	label = "NO FACE - Try brighter lighting or adjust angle"
	color = (0, 255, 255)
	probs_dict = {}

	if len(faces) > 0:
	frames_with_face += 1
	x, y, w, h = sorted(faces, key=lambda r: r[2] * r[3], reverse=True)[0]
	roi = frame_bgr[y:y+h, x:x+w]

	rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
	resized = cv2.resize(rgb, IMG_SIZE)
	inp = resized.astype("float32") / 255.0
	inp = np.expand_dims(inp, axis=0)

	probs = model.predict(inp, verbose=0)[0]
	p_surprise = float(probs[SURPRISE_IDX])

	if p_surprise > max_p_surprise:
	max_p_surprise = p_surprise

	probs_dict = {
	cls: float(p) for cls, p in zip(CLASS_NAMES, probs)
	}

	surprise_history.append({
	"time": current_time,
	"score": p_surprise,
	})

	# -------- Top3 detection logic --------
	if p_surprise >= threshold:
	if len(events) == 0:
	events.append({
	"time": current_time,
	"score": p_surprise,
	"frame": frame.copy()
	})
	else:
	dt = current_time - events[-1]["time"]
	if dt > MIN_EVENT_GAP:
	events.append({
	"time": current_time,
	"score": p_surprise,
	"frame": frame.copy()
	})
	else:
	if p_surprise > events[-1]["score"]:
	events[-1]["time"] = current_time
	events[-1]["score"] = p_surprise
	events[-1]["frame"] = frame.copy()

	label = f"😲 SURPRISE (p={p_surprise:.2f})"
	color = (0, 255, 0)

	else:
	label = f"🙂 Not Surprise (p={p_surprise:.2f})"
	color = (0, 0, 255)

	# Draw bounding box
	cv2.rectangle(frame_bgr, (x, y), (x + w, y + h), color, 3)

	# -------- Label 위치: 왼쪽 아래 + 큰 글씨 --------
	h_img, w_img = frame_bgr.shape[:2]
	cv2.putText(
	frame_bgr,
	label,
	(10, h_img - 10),
	cv2.FONT_HERSHEY_SIMPLEX,
	1.6,
	color,
	3
	)

	out_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)

	# Per-frame bar chart
	fig = go.Figure()
	if len(probs_dict) > 0:
	fig.add_trace(go.Bar(
	x=list(probs_dict.keys()),
	y=list(probs_dict.values()),
	marker_color="lightskyblue"
	))
	fig.update_layout(
	title="Emotion Probability Distribution",
	yaxis=dict(range=[0, 1])
	)

	session_duration_str = format_time(current_time)
	stats_text = (
	"### Session Stats\n"
	f"- Session duration: {session_duration_str}\n"
	f"- Current threshold: {threshold:.2f}\n"
	f"- Frames with face detected: {frames_with_face}\n"
	f"- Surprise events detected: {len(events)}\n"
	f"- Max P(surprise): {max_p_surprise:.2f}\n"
	)

	return out_rgb, probs_dict, fig, stats_text


	# ===============================
	# 6. PDF Generation
	# ===============================
	def create_pdf(summary_text, top_images, timeline_fig):
	os.makedirs("reports", exist_ok=True)
	timestamp = int(time.time())
	pdf_path = os.path.join("reports", f"surprise_report_{timestamp}.pdf")

	timeline_path = os.path.join("reports", f"timeline_{timestamp}.png")
	timeline_fig.savefig(timeline_path, bbox_inches="tight")

	img_paths = []
	for i, img in enumerate(top_images):
	if img is None:
	img_paths.append(None)
	continue
	img_pil = Image.fromarray(img)
	img_path = os.path.join("reports", f"top{i+1}_{timestamp}.png")
	img_pil.save(img_path)
	img_paths.append(img_path)

	pdf = FPDF()
	pdf.add_page()

	pdf.set_font("Arial", "B", 16)
	pdf.cell(0, 10, "Real-Time Surprise Detector Report", ln=1)

	pdf.set_font("Arial", "", 11)
	pdf.multi_cell(0, 6, summary_text)
	pdf.ln(4)

	pdf.set_font("Arial", "B", 12)
	pdf.cell(0, 8, "Surprise Probability Timeline", ln=1)
	pdf.image(timeline_path, w=170)
	pdf.ln(4)

	pdf.set_font("Arial", "B", 12)
	pdf.cell(0, 8, "Top Surprise Frames", ln=1)
	pdf.set_font("Arial", "", 11)

	for i, path in enumerate(img_paths):
	if path is not None:
	pdf.cell(0, 6, f"Top {i+1}", ln=1)
	pdf.image(path, w=80)
	pdf.ln(2)

	pdf.output(pdf_path)
	return pdf_path


	# ===============================
	# 7. Summarize Results
	# ===============================
	def summarize_results():

	global events, start_time, surprise_history
	global frames_with_face, max_p_surprise

	if len(surprise_history) == 0:
	return "No data recorded.", None, None, None, None, None

	times = [h["time"] for h in surprise_history]
	scores = [h["score"] for h in surprise_history]

	fig, ax = plt.subplots()
	ax.plot(times, scores, marker="o", linewidth=1)
	ax.set_title("Surprise Probability Timeline")
	ax.set_xlabel("Time (s)")
	ax.set_ylabel("P(surprise)")
	ax.set_ylim(0, 1)
	ax.grid(True)

	top_images = [None, None, None]
	if len(events) == 0:
	summary_text = (
	"No surprise events detected above the current threshold.\n\n"
	"The timeline shows overall surprise probability over time."
	)
	img1 = img2 = img3 = None

	else:
	top3 = sorted(events, key=lambda x: x["score"], reverse=True)[:3]

	captions = []
	images = []
	top_times = []
	top_scores = []

	for i, e in enumerate(top3):
	formatted_time = format_time(e["time"])
	score = e["score"]
	captions.append(f"#{i+1} Time = {formatted_time} Score = {score:.2f}")
	images.append(e["frame"])
	top_times.append(e["time"])
	top_scores.append(score)

	summary_text = "Top 3 surprise moments:\n" + "\n".join(captions)

	markers = ["*", "^", "s"]
	colors = ["red", "darkorange", "gold"]

	for i, (t, s) in enumerate(zip(top_times, top_scores)):
	ax.scatter(t, s, color=colors[i], marker=markers[i], s=80, zorder=5)

	for i in range(3):
	if i < len(images):
	top_images[i] = images[i]

	img1, img2, img3 = top_images

	pdf_path = create_pdf(summary_text, top_images, fig)

	events = []
	start_time = None
	surprise_history = []
	frames_with_face = 0
	max_p_surprise = 0.0

	return summary_text, img1, img2, img3, fig, pdf_path


	# ===============================
	# 8. UI
	# ===============================
	try:
	custom_theme = gr.themes.Soft(primary_hue="indigo", neutral_hue="slate")
	except:
	custom_theme = "soft"

	demo = gr.Blocks(theme=custom_theme)

	with demo:

	gr.Markdown(
	"""
	# 🎭 Real-Time Surprise Detector
	### A real-time facial reaction analysis system
	##### Detects surprise reactions using facial emotion recognition and summarizes top 3 peak surprise moments.

	How to use:
	1. Enable your webcam by clicking the feed area.
	2. Watch your chosen video while keeping your face visible.
	3. If many frames show "NO FACE", try brighter lighting or adjust your face angle.
	4. Click “Show Top 3 Surprise Moments” after stopping the stream.
	5. Download the generated PDF if needed.
	---
	"""
	)

	with gr.Row():
	with gr.Column(scale=2):

	webcam = gr.Image(
	sources=["webcam"],
	type="numpy",
	label="Webcam Feed"
	)
	output_img = gr.Image(label="Detection Result")

	with gr.Column(scale=1):
	threshold = gr.Slider(
	minimum=0.0, maximum=1.0, value=0.1,
	step=0.01, label="Surprise Threshold"
	)

	gr.Markdown(
	"""
	### What is the Surprise Threshold?

	- Lower threshold → detects smaller reactions
	- Higher threshold → detects only strong surprise
	- Default = 0.1

	👉 Try making a surprised face to adjust sensitivity.
	"""
	)

	output_label = gr.Label(label="Softmax Probabilities")
	plot = gr.Plot(label="Emotion Probability (per frame)")
	stats_md = gr.Markdown("### Session Stats\nWaiting for stream...")

	webcam.stream(
	fn=detect_surprise,
	inputs=[webcam, threshold],
	outputs=[output_img, output_label, plot, stats_md],
	stream_every=0.1
	)

	gr.Markdown("---")
	gr.Markdown("## 🔍 Summary & Report")

	summarize_button = gr.Button("🎯 Show Top 3 Surprise Moments")

	summary_text = gr.Textbox(
	label="Top 3 Summary",
	lines=6,
	max_lines=10
	)

	with gr.Row():
	img1 = gr.Image(label="Top 1")
	img2 = gr.Image(label="Top 2")
	img3 = gr.Image(label="Top 3")

	timeline_plot = gr.Plot(label="Surprise Timeline")
	pdf_file = gr.File(label="Download PDF Report")

	summarize_button.click(
	fn=summarize_results,
	inputs=[],
	outputs=[summary_text, img1, img2, img3, timeline_plot, pdf_file]
	)

	if __name__ == "__main__":
	demo.launch()