bimo177x commited on
Commit
77cd16c
·
verified ·
1 Parent(s): 57bf6bc

Upload web_app.py

Browse files
Files changed (1) hide show
  1. src/web_app.py +176 -0
src/web_app.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import streamlit as st
3
+ import cv2
4
+ import numpy as np
5
+ import tensorflow as tf
6
+ import mediapipe as mp
7
+ import tempfile
8
+ import os
9
+ import time
10
+ from tensorflow.keras.models import load_model # type: ignore
11
+ from gtts import gTTS
12
+ from playsound import playsound
13
+
14
+ # ==============================
15
+ # KONFIGURASI DASAR
16
+ # ==============================
17
+ BASE_DIR = os.getcwd()
18
+ MODEL_PATH = os.path.join(BASE_DIR, "models", "sign_model.h5")
19
+ LABEL_CLASSES_PATH = os.path.join(BASE_DIR, "models", "label_classes.npy")
20
+ TEMP_AUDIO_FILE = os.path.join(tempfile.gettempdir(), "temp_prediction.mp3")
21
+
22
+ # ==============================
23
+ # LOAD MODEL
24
+ # ==============================
25
+ @st.cache_resource
26
+ def load_all_models():
27
+ model = load_model(MODEL_PATH)
28
+ mobilenet_model = tf.keras.applications.MobileNetV2(
29
+ input_shape=(224, 224, 3),
30
+ include_top=False,
31
+ weights='imagenet',
32
+ pooling='avg'
33
+ )
34
+ actions = np.load(LABEL_CLASSES_PATH)
35
+ return model, mobilenet_model, actions
36
+
37
+ model, mobilenet_model, actions = load_all_models()
38
+
39
+ # ==============================
40
+ # MEDIAPIPE SETUP
41
+ # ==============================
42
+ mp_holistic = mp.solutions.holistic
43
+ mp_drawing = mp.solutions.drawing_utils
44
+ holistic = mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)
45
+
46
+ # ==============================
47
+ # FUNGSI PENDUKUNG
48
+ # ==============================
49
+ def mediapipe_detection(image, model):
50
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
51
+ image.flags.writeable = False
52
+ results = model.process(image)
53
+ image.flags.writeable = True
54
+ return cv2.cvtColor(image, cv2.COLOR_RGB2BGR), results
55
+
56
+ def draw_styled_landmarks(image, results):
57
+ if results.pose_landmarks:
58
+ mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
59
+ if results.left_hand_landmarks:
60
+ mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
61
+ if results.right_hand_landmarks:
62
+ mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
63
+
64
+ def extract_landmarks(results):
65
+ pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
66
+ lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
67
+ rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
68
+ return np.concatenate([pose, lh, rh])
69
+
70
+ def get_bbox(results, shape):
71
+ xs, ys = [], []
72
+ for lm_set in [results.pose_landmarks, results.left_hand_landmarks, results.right_hand_landmarks]:
73
+ if lm_set:
74
+ for lm in lm_set.landmark:
75
+ xs.append(int(lm.x * shape[1]))
76
+ ys.append(int(lm.y * shape[0]))
77
+ if not xs or not ys:
78
+ return None
79
+ return max(0, min(xs)), max(0, min(ys)), min(shape[1], max(xs)), min(shape[0], max(ys))
80
+
81
+ def create_canvas_crop(img, bbox):
82
+ CANVAS_SIZE = 600
83
+ if bbox is None:
84
+ return np.ones((CANVAS_SIZE, CANVAS_SIZE, 3), dtype=np.uint8) * 255
85
+ x1, y1, x2, y2 = bbox
86
+ roi = img[y1:y2, x1:x2]
87
+ if roi.size == 0:
88
+ return np.ones((CANVAS_SIZE, CANVAS_SIZE, 3), dtype=np.uint8) * 255
89
+ h, w = roi.shape[:2]
90
+ scale = min((CANVAS_SIZE*0.9)/w, (CANVAS_SIZE*0.9)/h)
91
+ new_w, new_h = int(w*scale), int(h*scale)
92
+ resized = cv2.resize(roi, (new_w, new_h))
93
+ canvas = np.ones((CANVAS_SIZE, CANVAS_SIZE, 3), dtype=np.uint8) * 255
94
+ x_offset, y_offset = (CANVAS_SIZE-new_w)//2, (CANVAS_SIZE-new_h)//2
95
+ canvas[y_offset:y_offset+new_h, x_offset:x_offset+new_w] = resized
96
+ return canvas
97
+
98
+ def text_to_speech(text, lang ='id'):
99
+ mp3_fp = io.BytesIO()
100
+ tts = gTTS(text=text, lang=lang)
101
+ tts.write_to_fp(mp3_fp)
102
+ mp3_fp.seek(0)
103
+ return mp3_fp.read()
104
+ # ==============================
105
+ # STREAMLIT UI
106
+ # ==============================
107
+ st.title("🤟 Real-time Sign Language Translator")
108
+ st.markdown("Aplikasi ini menerjemahkan bahasa isyarat ke teks dan suara secara real-time menggunakan kamera.")
109
+
110
+ col1, col2 = st.columns(2)
111
+ with col1:
112
+ start_button = st.button("Mulai Deteksi")
113
+ with col2:
114
+ stop_button = st.button("Hentikan")
115
+
116
+ FRAME_WINDOW = st.image([])
117
+ sentence_placeholder = st.empty()
118
+
119
+ # ==============================
120
+ # LOOP STREAMING
121
+ # ==============================
122
+ sequence = []
123
+ sentence = []
124
+ threshold = 0.9
125
+ last_prediction_time = 0
126
+ COOLDOWN = 2
127
+
128
+ if start_button:
129
+ cap = cv2.VideoCapture(0)
130
+ st.info("Kamera aktif. Tekan 'Hentikan' untuk berhenti.")
131
+ while cap.isOpened() and not stop_button:
132
+ ret, frame = cap.read()
133
+ if not ret:
134
+ st.warning("Tidak dapat membaca frame dari kamera.")
135
+ break
136
+
137
+ image, results = mediapipe_detection(frame, holistic)
138
+ draw_styled_landmarks(image, results)
139
+
140
+ keypoints = extract_landmarks(results)
141
+ if np.any(keypoints != 0):
142
+ bbox = get_bbox(results, frame.shape)
143
+ canvas_crop = create_canvas_crop(frame, bbox)
144
+
145
+ img_rgb = cv2.cvtColor(canvas_crop, cv2.COLOR_BGR2RGB)
146
+ resized_img = cv2.resize(img_rgb, (224, 224))
147
+ preprocessed_img = tf.keras.applications.mobilenet_v2.preprocess_input(resized_img)
148
+ mobilenet_features = mobilenet_model.predict(np.expand_dims(preprocessed_img, axis=0), verbose=0).flatten()
149
+
150
+ fused_features = np.concatenate([mobilenet_features, keypoints])
151
+ sequence.append(fused_features)
152
+ sequence = sequence[-30:]
153
+
154
+ if len(sequence) == 30:
155
+ current_time = time.time()
156
+ if current_time - last_prediction_time > COOLDOWN:
157
+ res = model.predict(np.expand_dims(sequence, axis=0), verbose=0)[0]
158
+ if res[np.argmax(res)] > threshold:
159
+ predicted_label = actions[np.argmax(res)]
160
+ if len(sentence) == 0 or predicted_label != sentence[-1]:
161
+ sentence.append(predicted_label)
162
+ last_prediction_time = current_time
163
+ try:
164
+ text_to_speech(predicted_label)
165
+ except Exception as e:
166
+ st.warning(f"Voice output error: {e}")
167
+
168
+ if len(sentence) > 5:
169
+ sentence = sentence[-5:]
170
+
171
+ # Tampilkan hasil dan frame
172
+ FRAME_WINDOW.image(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
173
+ sentence_placeholder.markdown(f"### 🗣️ Prediksi: {' '.join(sentence)}")
174
+
175
+ cap.release()
176
+ st.success("Deteksi dihentikan.")