Daksh0505 commited on
Commit
1bb02a6
·
verified ·
1 Parent(s): bc44d3c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -2
app.py CHANGED
@@ -221,7 +221,7 @@ if st.session_state.translation:
221
  # ------------------------------------------------
222
  # Learning Header
223
  # ------------------------------------------------
224
- st.subheader("Leaning how it works")
225
 
226
  # ------------------------------------------------
227
  # Self Attention Section
@@ -312,7 +312,70 @@ with st.expander("🔹 Fixed-Length vs Variable-Length Tasks"):
312
  - Example: Machine translation, summarization, speech recognition.
313
  - Seq2Seq models are designed to handle this flexibility.
314
  """)
315
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
  # ------------------------------------------------
317
  # Show model architecture
318
  # ------------------------------------------------
 
221
  # ------------------------------------------------
222
  # Learning Header
223
  # ------------------------------------------------
224
+ st.subheader(" Understanding the Model")
225
 
226
  # ------------------------------------------------
227
  # Self Attention Section
 
312
  - Example: Machine translation, summarization, speech recognition.
313
  - Seq2Seq models are designed to handle this flexibility.
314
  """)
315
+
316
+ # ------------------------------------------------
317
+ # Mathematics Expanders (Advanced / Optional)
318
+ # ------------------------------------------------
319
+ st.subheader("🧮 Mathematics Behind the Model")
320
+
321
+ with st.expander("🔹 Self-Attention Equations", expanded=False):
322
+ st.markdown(r"""
323
+ The attention function is computed as:
324
+
325
+ \[
326
+ \text{Attention}(Q,K,V) = \text{softmax}\left(\frac{Q K^T}{\sqrt{d_k}}\right) V
327
+ \]
328
+
329
+ Where:
330
+ - \(Q\) = Query matrix
331
+ - \(K\) = Key matrix
332
+ - \(V\) = Value matrix
333
+ - \(d_k\) = Dimension of key vectors
334
+
335
+ This allows the model to compute a weighted sum of values based on relevance.
336
+ """)
337
+
338
+ with st.expander("🔹 Multi-Head Attention Equations", expanded=False):
339
+ st.markdown(r"""
340
+ Multi-Head Attention combines multiple self-attention heads:
341
+
342
+ \[
343
+ \text{MultiHead}(Q,K,V) = \text{Concat}(\text{head}_1, ..., \text{head}_h) W^O
344
+ \]
345
+
346
+ Each head:
347
+ \[
348
+ \text{head}_i = \text{Attention}(Q W_i^Q, K W_i^K, V W_i^V)
349
+ \]
350
+
351
+ Where \(W_i^Q, W_i^K, W_i^V, W^O\) are learnable projection matrices.
352
+ """)
353
+
354
+ with st.expander("🔹 Cross-Attention / Encoder-Decoder Attention", expanded=False):
355
+ st.markdown(r"""
356
+ Cross-Attention computes attention using decoder queries and encoder outputs:
357
+
358
+ \[
359
+ \text{Context}_t = \text{Attention}(Q_t, K_{enc}, V_{enc})
360
+ \]
361
+
362
+ - \(Q_t\) = decoder hidden state at timestep \(t\)
363
+ - \(K_{enc}, V_{enc}\) = encoder outputs
364
+ """)
365
+
366
+ with st.expander("🔹 Seq2Seq Decoder Step", expanded=False):
367
+ st.markdown(r"""
368
+ At each decoder timestep:
369
+
370
+ \[
371
+ s_t, c_t = \text{LSTM}(y_{t-1}, s_{t-1}, c_{t-1})
372
+ \]
373
+
374
+ \[
375
+ \text{Output}_t = \text{Dense}(\text{Concat}(s_t, \text{Context}_t))
376
+ \]
377
+ """)
378
+
379
  # ------------------------------------------------
380
  # Show model architecture
381
  # ------------------------------------------------