omm7 commited on
Commit
ab5dbbd
·
verified ·
1 Parent(s): 5428408

Upload app/app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app/app.py +19 -30
app/app.py CHANGED
@@ -3,6 +3,7 @@ from pathlib import Path
3
  import subprocess
4
  import tempfile
5
  import imageio
 
6
  import streamlit as st
7
  import tensorflow as tf
8
  from modelutil import load_model
@@ -19,22 +20,17 @@ st.set_page_config(
19
  st.markdown("""
20
  <style>
21
  @import url('https://fonts.googleapis.com/css2?family=Syne:wght@400;700;800&family=Space+Mono&display=swap');
22
-
23
  html, body, [class*="css"] {
24
  font-family: 'Syne', sans-serif;
25
  background-color: #07070f;
26
  color: #e2e2f0;
27
  }
28
  .stApp { background-color: #07070f; }
29
-
30
- /* Sidebar */
31
  [data-testid="stSidebar"] {
32
  background-color: #0f0f1c !important;
33
  border-right: 1px solid #1e1e32;
34
  }
35
  [data-testid="stSidebar"] * { color: #9ca3af !important; }
36
-
37
- /* Headers */
38
  h1 {
39
  font-weight: 800 !important;
40
  background: linear-gradient(135deg, #f0f0ff, #c084fc, #818cf8);
@@ -43,8 +39,6 @@ h1 {
43
  letter-spacing: -0.03em;
44
  }
45
  h2, h3 { color: #c084fc !important; font-weight: 700 !important; }
46
-
47
- /* Info / success boxes */
48
  .stAlert { border-radius: 10px !important; }
49
  [data-testid="stInfo"] {
50
  background: #0f0f1c !important;
@@ -60,8 +54,6 @@ h2, h3 { color: #c084fc !important; font-weight: 700 !important; }
60
  font-family: 'Space Mono', monospace;
61
  font-size: 1.1rem;
62
  }
63
-
64
- /* Code / preformatted */
65
  code, pre {
66
  font-family: 'Space Mono', monospace !important;
67
  background: #0a0a16 !important;
@@ -69,11 +61,7 @@ code, pre {
69
  border-radius: 8px !important;
70
  font-size: 0.8rem !important;
71
  }
72
-
73
- /* Selectbox */
74
  [data-testid="stSelectbox"] label { color: #6b7280 !important; font-size: 0.8rem; letter-spacing: 0.1em; text-transform: uppercase; }
75
-
76
- /* Divider */
77
  hr { border-color: #1a1a2e !important; }
78
  </style>
79
  """, unsafe_allow_html=True)
@@ -130,7 +118,6 @@ if not options:
130
 
131
  selected_video = st.selectbox("**Choose a video**", options)
132
  file_path = DATA_DIR / selected_video
133
-
134
  st.divider()
135
 
136
  # ── Load model (cached) ───────────────────────────────────────────────────────
@@ -140,10 +127,21 @@ def get_model():
140
 
141
  model = get_model()
142
 
 
 
 
 
 
 
 
 
 
 
 
143
  # ── Two-column layout ─────────────────────────────────────────────────────────
144
  col1, col2 = st.columns(2, gap="large")
145
 
146
- # ── Column 1: Video preview ───────────────────────────────────────────────────
147
  with col1:
148
  st.markdown("### 📹 Original Video")
149
  st.info("Video converted to mp4 for browser playback")
@@ -165,13 +163,15 @@ with col1:
165
  if output_path and output_path.exists():
166
  output_path.unlink()
167
 
 
 
 
 
 
168
  # ── Column 2: Model inference ─────────────────────────────────────────────────
169
  with col2:
170
  st.markdown("### 🧠 Model Inference")
171
 
172
- # Load frames + alignment
173
- video_tensor, annotations = load_data(tf.convert_to_tensor(str(file_path)))
174
-
175
  # ── Mouth crop GIF ────────────────────────────────────────────────────────
176
  st.info("Mouth crop - what the model actually sees (grayscale · normalized)")
177
  gif_path = None
@@ -193,15 +193,6 @@ with col2:
193
 
194
  st.divider()
195
 
196
- # ── Ground truth ──────────────────────────────────────────────────────────
197
- st.info("Ground truth label (from `.align` file)")
198
- ground_truth = tf.strings.reduce_join(
199
- num_to_char(annotations)
200
- ).numpy().decode('utf-8')
201
- st.code(ground_truth, language=None)
202
-
203
- st.divider()
204
-
205
  # ── Raw tokens ───────────────────────────────────���────────────────────────
206
  st.info("Raw CTC token indices from model output")
207
  yhat = model.predict(tf.expand_dims(video_tensor, axis=0), verbose=0)
@@ -214,14 +205,12 @@ with col2:
214
  prediction = tf.strings.reduce_join(
215
  num_to_char(decoded[0])
216
  ).numpy().decode('utf-8').strip()
217
-
218
  st.success(f"**Prediction:** {prediction}")
219
 
220
  # ── Confidence ────────────────────────────────────────────────────────────
221
- import numpy as np
222
  confidence = float(np.mean(np.max(yhat[0], axis=-1)) * 100)
223
  st.markdown(
224
  f"<p style='font-family:Space Mono,monospace;font-size:0.78rem;color:#4b5563;'>"
225
  f"AVG CONFIDENCE · <span style='color:#34d399'>{confidence:.1f}%</span></p>",
226
  unsafe_allow_html=True,
227
- )
 
3
  import subprocess
4
  import tempfile
5
  import imageio
6
+ import numpy as np
7
  import streamlit as st
8
  import tensorflow as tf
9
  from modelutil import load_model
 
20
  st.markdown("""
21
  <style>
22
  @import url('https://fonts.googleapis.com/css2?family=Syne:wght@400;700;800&family=Space+Mono&display=swap');
 
23
  html, body, [class*="css"] {
24
  font-family: 'Syne', sans-serif;
25
  background-color: #07070f;
26
  color: #e2e2f0;
27
  }
28
  .stApp { background-color: #07070f; }
 
 
29
  [data-testid="stSidebar"] {
30
  background-color: #0f0f1c !important;
31
  border-right: 1px solid #1e1e32;
32
  }
33
  [data-testid="stSidebar"] * { color: #9ca3af !important; }
 
 
34
  h1 {
35
  font-weight: 800 !important;
36
  background: linear-gradient(135deg, #f0f0ff, #c084fc, #818cf8);
 
39
  letter-spacing: -0.03em;
40
  }
41
  h2, h3 { color: #c084fc !important; font-weight: 700 !important; }
 
 
42
  .stAlert { border-radius: 10px !important; }
43
  [data-testid="stInfo"] {
44
  background: #0f0f1c !important;
 
54
  font-family: 'Space Mono', monospace;
55
  font-size: 1.1rem;
56
  }
 
 
57
  code, pre {
58
  font-family: 'Space Mono', monospace !important;
59
  background: #0a0a16 !important;
 
61
  border-radius: 8px !important;
62
  font-size: 0.8rem !important;
63
  }
 
 
64
  [data-testid="stSelectbox"] label { color: #6b7280 !important; font-size: 0.8rem; letter-spacing: 0.1em; text-transform: uppercase; }
 
 
65
  hr { border-color: #1a1a2e !important; }
66
  </style>
67
  """, unsafe_allow_html=True)
 
118
 
119
  selected_video = st.selectbox("**Choose a video**", options)
120
  file_path = DATA_DIR / selected_video
 
121
  st.divider()
122
 
123
  # ── Load model (cached) ───────────────────────────────────────────────────────
 
127
 
128
  model = get_model()
129
 
130
+ # ── Load frames + alignment (cached per video) ────────────────────────────────
131
+ @st.cache_data(show_spinner="Processing video...")
132
+ def get_video_data(path: str):
133
+ video_tensor, annotations = load_data(tf.convert_to_tensor(path))
134
+ ground_truth = tf.strings.reduce_join(
135
+ num_to_char(annotations)
136
+ ).numpy().decode('utf-8')
137
+ return video_tensor, annotations, ground_truth
138
+
139
+ video_tensor, annotations, ground_truth = get_video_data(str(file_path))
140
+
141
  # ── Two-column layout ─────────────────────────────────────────────────────────
142
  col1, col2 = st.columns(2, gap="large")
143
 
144
+ # ── Column 1: Video preview + Ground truth ────────────────────────────────────
145
  with col1:
146
  st.markdown("### 📹 Original Video")
147
  st.info("Video converted to mp4 for browser playback")
 
163
  if output_path and output_path.exists():
164
  output_path.unlink()
165
 
166
+ # ── Ground truth (moved here) ─────────────────────────────────────────────
167
+ st.divider()
168
+ st.info("Ground truth label (from `.align` file)")
169
+ st.code(ground_truth, language=None)
170
+
171
  # ── Column 2: Model inference ─────────────────────────────────────────────────
172
  with col2:
173
  st.markdown("### 🧠 Model Inference")
174
 
 
 
 
175
  # ── Mouth crop GIF ────────────────────────────────────────────────────────
176
  st.info("Mouth crop - what the model actually sees (grayscale · normalized)")
177
  gif_path = None
 
193
 
194
  st.divider()
195
 
 
 
 
 
 
 
 
 
 
196
  # ── Raw tokens ───────────────────────────────────���────────────────────────
197
  st.info("Raw CTC token indices from model output")
198
  yhat = model.predict(tf.expand_dims(video_tensor, axis=0), verbose=0)
 
205
  prediction = tf.strings.reduce_join(
206
  num_to_char(decoded[0])
207
  ).numpy().decode('utf-8').strip()
 
208
  st.success(f"**Prediction:** {prediction}")
209
 
210
  # ── Confidence ────────────────────────────────────────────────────────────
 
211
  confidence = float(np.mean(np.max(yhat[0], axis=-1)) * 100)
212
  st.markdown(
213
  f"<p style='font-family:Space Mono,monospace;font-size:0.78rem;color:#4b5563;'>"
214
  f"AVG CONFIDENCE · <span style='color:#34d399'>{confidence:.1f}%</span></p>",
215
  unsafe_allow_html=True,
216
+ )