Upload folder using huggingface_hub

Files changed (4) hide show

README.md CHANGED Viewed

@@ -9,7 +9,7 @@ datasets:
 metrics:
 - bleu
 model-index:
-- name: Yujivus/PRISM-Protomolecule
   results:
   - task:
       type: translation

 metrics:
 - bleu
 model-index:
+- name: Yujivus/PRISM-Molecule
   results:
   - task:
       type: translation

__pycache__/modeling_prism_gated.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/modeling_prism_gated.cpython-312.pyc and b/__pycache__/modeling_prism_gated.cpython-312.pyc differ

config.json CHANGED Viewed

@@ -8,5 +8,5 @@
     "num_encoder_layers": 6,
     "num_refining_layers": 0,
     "num_decoder_layers": 6,
-    "architecture": "PRISM_Protomolecule"
 }

     "num_encoder_layers": 6,
     "num_refining_layers": 0,
     "num_decoder_layers": 6,
+    "architecture": "PRISM_Molecule"
 }

modeling_prism_gated.py CHANGED Viewed

@@ -81,7 +81,7 @@ class PRISMLayer(nn.Module):
         self.filter_len = max_len
         # 1. THE GATE (Data Dependency)
-        self.gate_proj = nn.Linear(d_model * 2, d_model * 2)
         # 2. THE FILTER (Global Pattern)
         self.global_filter = nn.Parameter(torch.randn(d_model, max_len, dtype=torch.cfloat) * 0.02)
@@ -89,7 +89,7 @@ class PRISMLayer(nn.Module):
         # 3. INPUT MIXING
         self.mix_real = nn.Linear(d_model, d_model)
         self.mix_imag = nn.Linear(d_model, d_model)
         # 4. OUTPUT PROJECTION
         self.out_real = nn.Linear(d_model, d_model)
         self.out_imag = nn.Linear(d_model, d_model)
@@ -116,7 +116,7 @@ class PRISMLayer(nn.Module):
         x_cat = torch.cat([x_norm.real, x_norm.imag], dim=-1)
         gates = torch.sigmoid(self.gate_proj(x_cat))
         gate_r, gate_i = gates.chunk(2, dim=-1)
         # B. FILTER
         B, L, D = x_norm.shape
         x_freq = torch.fft.fft(x_norm, n=self.filter_len, dim=1)
@@ -235,7 +235,7 @@ class PRISMHybrid_RoPE(nn.Module):
             output = self.decoder(tgt_emb, context=refined_memory, mask=decoder_mask, context_mask=context_mask)
         return self.final_linear(output)
     # ... (generate function remains the same) ...
     @torch.no_grad()
     def generate(self, src, max_length, num_beams=5):

         self.filter_len = max_len
         # 1. THE GATE (Data Dependency)
+        self.gate_proj = nn.Linear(d_model * 2, d_model * 2)
         # 2. THE FILTER (Global Pattern)
         self.global_filter = nn.Parameter(torch.randn(d_model, max_len, dtype=torch.cfloat) * 0.02)
         # 3. INPUT MIXING
         self.mix_real = nn.Linear(d_model, d_model)
         self.mix_imag = nn.Linear(d_model, d_model)
         # 4. OUTPUT PROJECTION
         self.out_real = nn.Linear(d_model, d_model)
         self.out_imag = nn.Linear(d_model, d_model)
         x_cat = torch.cat([x_norm.real, x_norm.imag], dim=-1)
         gates = torch.sigmoid(self.gate_proj(x_cat))
         gate_r, gate_i = gates.chunk(2, dim=-1)
         # B. FILTER
         B, L, D = x_norm.shape
         x_freq = torch.fft.fft(x_norm, n=self.filter_len, dim=1)
             output = self.decoder(tgt_emb, context=refined_memory, mask=decoder_mask, context_mask=context_mask)
         return self.final_linear(output)
     # ... (generate function remains the same) ...
     @torch.no_grad()
     def generate(self, src, max_length, num_beams=5):