raj5517 commited on
Commit
124970a
Β·
verified Β·
1 Parent(s): 12c01e9

Upload modeling_multimodal.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. modeling_multimodal.py +119 -0
modeling_multimodal.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Multi-Modal Representation Learning Framework
3
+ Wraps the custom PyTorch architecture in a HuggingFace-compatible class.
4
+ """
5
+
6
+ import torch
7
+ import torch.nn as nn
8
+ import torch.nn.functional as F
9
+ from huggingface_hub import PyTorchModelHubMixin
10
+
11
+
12
+ # ── Encoder ────────────────────────────────────────────────────
13
+ class TabularEncoder(nn.Module):
14
+ def __init__(self, input_dim, embedding_dim=64, hidden_dim=128):
15
+ super().__init__()
16
+ self.encoder = nn.Sequential(
17
+ nn.Linear(input_dim, hidden_dim),
18
+ nn.BatchNorm1d(hidden_dim),
19
+ nn.ReLU(),
20
+ nn.Dropout(0.2),
21
+ nn.Linear(hidden_dim, hidden_dim // 2),
22
+ nn.ReLU(),
23
+ nn.Linear(hidden_dim // 2, embedding_dim),
24
+ nn.LayerNorm(embedding_dim)
25
+ )
26
+ def forward(self, x):
27
+ return self.encoder(x)
28
+
29
+ class AcademicEncoder(TabularEncoder):
30
+ def __init__(self, embedding_dim=64):
31
+ super().__init__(input_dim=5, embedding_dim=embedding_dim)
32
+
33
+ class BehavioralEncoder(TabularEncoder):
34
+ def __init__(self, embedding_dim=64):
35
+ super().__init__(input_dim=5, embedding_dim=embedding_dim)
36
+
37
+ class ActivityEncoder(TabularEncoder):
38
+ def __init__(self, embedding_dim=64):
39
+ super().__init__(input_dim=5, embedding_dim=embedding_dim)
40
+
41
+
42
+ # ── Fusion ─────────────────────────────────────────────────────
43
+ class CrossModalAttentionFusion(nn.Module):
44
+ def __init__(self, embedding_dim=64, num_modalities=3, unified_dim=128):
45
+ super().__init__()
46
+ self.num_modalities = num_modalities
47
+ self.attention_heads = nn.ModuleList([
48
+ nn.Linear(embedding_dim * num_modalities, 1)
49
+ for _ in range(num_modalities)
50
+ ])
51
+ self.projection = nn.Sequential(
52
+ nn.Linear(embedding_dim * num_modalities, unified_dim),
53
+ nn.ReLU(),
54
+ nn.LayerNorm(unified_dim)
55
+ )
56
+
57
+ def forward(self, embeddings):
58
+ concat = torch.cat(embeddings, dim=-1)
59
+ scores = torch.stack([
60
+ head(concat) for head in self.attention_heads
61
+ ], dim=1).squeeze(-1)
62
+ attn_weights = F.softmax(scores, dim=-1)
63
+ unified = self.projection(concat)
64
+ return unified, attn_weights
65
+
66
+
67
+ # ── Full Model (HuggingFace compatible) ───────────────────────
68
+ class MultiModalFramework(nn.Module, PyTorchModelHubMixin):
69
+ """
70
+ Multi-Modal Representation Learning Framework.
71
+
72
+ Fuses heterogeneous tabular signals into unified embeddings
73
+ using cross-modal attention and SimCLR contrastive training.
74
+
75
+ Inputs:
76
+ academic (B, 5): gpa, attendance_pct, assignment_completion,
77
+ exam_avg, late_submissions
78
+ behavioral (B, 5): library_visits, session_duration,
79
+ peer_interaction, forum_posts, login_variance
80
+ activity (B, 5): steps_per_day, sleep_hours, active_minutes,
81
+ sedentary_hours, resting_hr
82
+
83
+ Outputs:
84
+ unified (B, 128): fused embedding vector
85
+ attn_weights (B, 3): modality attention [academic, behavioral, activity]
86
+ """
87
+
88
+ def __init__(self, embedding_dim=64, unified_dim=128):
89
+ super().__init__()
90
+ self.embedding_dim = embedding_dim
91
+ self.unified_dim = unified_dim
92
+
93
+ self.encoders = nn.ModuleDict({
94
+ 'academic': AcademicEncoder(embedding_dim),
95
+ 'behavioral': BehavioralEncoder(embedding_dim),
96
+ 'activity': ActivityEncoder(embedding_dim),
97
+ })
98
+ self.fusion = CrossModalAttentionFusion(
99
+ embedding_dim=embedding_dim,
100
+ num_modalities=3,
101
+ unified_dim=unified_dim
102
+ )
103
+
104
+ def forward(self, academic, behavioral, activity):
105
+ emb_a = self.encoders['academic'](academic)
106
+ emb_b = self.encoders['behavioral'](behavioral)
107
+ emb_c = self.encoders['activity'](activity)
108
+ unified, attn = self.fusion([emb_a, emb_b, emb_c])
109
+ return unified, attn
110
+
111
+ def encode(self, academic, behavioral, activity):
112
+ """Returns only the unified embedding. Use this for downstream tasks."""
113
+ unified, _ = self.forward(academic, behavioral, activity)
114
+ return unified
115
+
116
+ def get_attention(self, academic, behavioral, activity):
117
+ """Returns only attention weights. Use this for explainability."""
118
+ _, attn = self.forward(academic, behavioral, activity)
119
+ return attn