Spaces:
Running
Running
Commit ·
8c8be69
1
Parent(s): b97efb5
feat: completed all pedagogical diagrams for CME 295 including Data Quality and Parallelism
Browse files
CME295-Transformers/index.html
CHANGED
|
@@ -962,15 +962,72 @@
|
|
| 962 |
</div>
|
| 963 |
<div class="list-item">
|
| 964 |
<div class="list-num">02</div>
|
| 965 |
-
<div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 966 |
</div>
|
| 967 |
<div class="list-item">
|
| 968 |
<div class="list-num">03</div>
|
| 969 |
-
<div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 970 |
</div>
|
| 971 |
<div class="list-item">
|
| 972 |
<div class="list-num">04</div>
|
| 973 |
-
<div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 974 |
</div>
|
| 975 |
`
|
| 976 |
},
|
|
|
|
| 962 |
</div>
|
| 963 |
<div class="list-item">
|
| 964 |
<div class="list-num">02</div>
|
| 965 |
+
<div>
|
| 966 |
+
<strong>Data Mixing & Quality:</strong> The "garbage in, garbage out" rule.
|
| 967 |
+
|
| 968 |
+
<div class="visual-demo" style="margin-top: 15px; background: rgba(0,0,0,0.2); padding: 15px; border-radius: 8px; border: 1px solid var(--green);">
|
| 969 |
+
<h4 style="color: var(--green); margin-bottom: 10px; font-size: 0.85em;">Data Hygiene Pipeline</h4>
|
| 970 |
+
<svg viewBox="0 0 400 120" style="width: 100%; height: auto;">
|
| 971 |
+
<text x="20" y="20" fill="var(--text-dim)" font-size="10">Raw Web (Petabytes)</text>
|
| 972 |
+
<rect x="20" y="30" width="60" height="40" fill="var(--surface)" stroke="var(--text-dim)" />
|
| 973 |
+
<path d="M 80 50 L 120 50" stroke="var(--text-dim)" marker-end="url(#arrow-dim)" />
|
| 974 |
+
|
| 975 |
+
<rect x="120" y="30" width="100" height="40" rx="4" fill="var(--surface)" stroke="var(--cyan)" />
|
| 976 |
+
<text x="125" y="55" fill="var(--cyan)" font-size="10">Dedupe (MinHash)</text>
|
| 977 |
+
<path d="M 220 50 L 260 50" stroke="var(--text-dim)" marker-end="url(#arrow-dim)" />
|
| 978 |
+
|
| 979 |
+
<rect x="260" y="30" width="100" height="40" rx="4" fill="rgba(46, 204, 113, 0.2)" stroke="var(--green)" />
|
| 980 |
+
<text x="268" y="55" fill="var(--text)" font-size="10">Quality Classifier</text>
|
| 981 |
+
<path d="M 360 50 L 380 50" stroke="var(--green)" marker-end="url(#arrow-green)" />
|
| 982 |
+
</svg>
|
| 983 |
+
<p style="font-size: 0.75em; color: var(--text-dim);">Filtering removes 90% of raw data to ensure only high-quality information reaches the model.</p>
|
| 984 |
+
</div>
|
| 985 |
+
</div>
|
| 986 |
</div>
|
| 987 |
<div class="list-item">
|
| 988 |
<div class="list-num">03</div>
|
| 989 |
+
<div>
|
| 990 |
+
<strong>Learning Rate Schedules:</strong> Maintaining training stability.
|
| 991 |
+
|
| 992 |
+
<div class="visual-demo" style="margin-top: 15px; background: rgba(0,0,0,0.2); padding: 15px; border-radius: 8px; border: 1px solid var(--orange);">
|
| 993 |
+
<h4 style="color: var(--orange); margin-bottom: 10px; font-size: 0.85em;">Cosine Decay with Warmup</h4>
|
| 994 |
+
<svg viewBox="0 0 400 120" style="width: 100%; height: auto;">
|
| 995 |
+
<line x1="40" y1="100" x2="360" y2="100" stroke="var(--text-dim)" />
|
| 996 |
+
<line x1="40" y1="20" x2="40" y2="100" stroke="var(--text-dim)" />
|
| 997 |
+
<path d="M 40 100 L 80 30 Q 200 30 360 100" fill="none" stroke="var(--orange)" stroke-width="2" />
|
| 998 |
+
<text x="45" y="45" fill="var(--orange)" font-size="8">Warmup</text>
|
| 999 |
+
<text x="180" y="45" fill="var(--orange)" font-size="8">Cosine Decay</text>
|
| 1000 |
+
<text x="300" y="115" fill="var(--text-dim)" font-size="9">Training Steps</text>
|
| 1001 |
+
</svg>
|
| 1002 |
+
</div>
|
| 1003 |
+
</div>
|
| 1004 |
</div>
|
| 1005 |
<div class="list-item">
|
| 1006 |
<div class="list-num">04</div>
|
| 1007 |
+
<div>
|
| 1008 |
+
<strong>3D Parallelism:</strong> Splitting the model across huge H100 clusters.
|
| 1009 |
+
|
| 1010 |
+
<div class="visual-demo" style="margin-top: 15px; background: rgba(0,0,0,0.2); padding: 15px; border-radius: 8px; border: 1px solid var(--cyan);">
|
| 1011 |
+
<h4 style="color: var(--cyan); margin-bottom: 10px; font-size: 0.85em;">Tensor vs. Pipeline vs. Data Parallelism</h4>
|
| 1012 |
+
<svg viewBox="0 0 400 120" style="width: 100%; height: auto;">
|
| 1013 |
+
<!-- Data Parallelism -->
|
| 1014 |
+
<rect x="20" y="20" width="30" height="30" fill="var(--cyan)" opacity="0.3" stroke="var(--cyan)" />
|
| 1015 |
+
<rect x="20" y="60" width="30" height="30" fill="var(--cyan)" opacity="0.3" stroke="var(--cyan)" />
|
| 1016 |
+
<text x="20" y="105" fill="var(--cyan)" font-size="8">Data Par (Batch)</text>
|
| 1017 |
+
|
| 1018 |
+
<!-- Pipeline Parallelism -->
|
| 1019 |
+
<rect x="150" y="40" width="30" height="30" fill="var(--orange)" opacity="0.4" />
|
| 1020 |
+
<path d="M 180 55 L 210 55" stroke="var(--orange)" marker-end="url(#arrow-orange)" />
|
| 1021 |
+
<rect x="210" y="40" width="30" height="30" fill="var(--orange)" opacity="0.6" />
|
| 1022 |
+
<text x="160" y="105" fill="var(--orange)" font-size="8">Pipeline Par (Layers)</text>
|
| 1023 |
+
|
| 1024 |
+
<!-- Tensor Parallelism -->
|
| 1025 |
+
<rect x="300" y="40" width="60" height="15" fill="rgba(163, 113, 247, 0.5)" stroke="var(--cyan)" />
|
| 1026 |
+
<rect x="300" y="60" width="60" height="15" fill="rgba(163, 113, 247, 0.5)" stroke="var(--cyan)" />
|
| 1027 |
+
<text x="300" y="105" fill="var(--cyan)" font-size="8">Tensor Par (Weights)</text>
|
| 1028 |
+
</svg>
|
| 1029 |
+
</div>
|
| 1030 |
+
</div>
|
| 1031 |
</div>
|
| 1032 |
`
|
| 1033 |
},
|