Spaces:

accelera-ai
/

open_mp_generator

Sleeping

App Files Files Community

mohamedahraf273 commited on 24 days ago

Commit

35b065e

1 Parent(s): e8aab00

update notebook

Browse files

Files changed (1) hide show

generator.ipynb +48 -170

generator.ipynb CHANGED Viewed

@@ -645,201 +645,79 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
-   "id": "a49bb85f",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n",
-      "IMPORTANT: If you haven't re-run the TRAINING loop (Cell 9)\n",
-      "after applying the Transpose fix, the results below will likely\n",
-      "be poor/incomplete because the model hasn't updated its weights\n",
-      "correctly yet.\n",
-      "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n",
-      "\n",
-      "Running generation tests on validation set (True Greedy Decoding):\n",
-      "\n",
-      "Example 0:\n",
-      "Input:      [CLS:parallel_for] for (i = 0; i < 16; ++i)\n",
       "  ;\n",
       "\n",
-      "Target:     omp target parallel for simd simdlen(4 4)\n",
-      "Prediction: omp parallel for shared(,k,,,,,,,,,,,,,pr) shared(L,,,,,,,,,,,,,,,,\n",
-      "------------------------------------------------------------\n",
-      "Example 10:\n",
-      "Input:      [CLS:reduction] for (i = 1; i < (500 - 1); i++)\n",
-      "{\n",
-      "  iIndex = i * dim2;\n",
-      "  jIndex = 0;\n",
-      "  for (j = 1; j < (500 - 1); j++)\n",
-      "  {\n",
-      "    jIndex += 500;\n",
-      "    for (k = 1; k < (500 - 1); k++)\n",
-      "    {\n",
-      "      index = (iIndex + jIndex) + k;\n",
-      "      compute_it = old[index] * need;\n",
-      "      aggregate += compute_it / gimmie;\n",
-      "      accumulator = 0;\n",
-      "      long subsum1 = 0;\n",
-      "      long subsum2 = 0;\n",
-      "      long subsum3 = 0;\n",
-      "      for (z = 0; z < 27; z += 3)\n",
-      "      {\n",
-      "        subsum1 += old[index + arr[z]];\n",
-      "        subsum2 += old[index + arr[z + 1]];\n",
-      "        subsum3 += old[index + arr[z + 2]];\n",
-      "      }\n",
-      "\n",
-      "      accumulator += (subsum1 + subsum2) + subsum3;\n",
-      "      long value = accumulator / 27;\n",
-      "      int par = value / 100;\n",
-      "      a0 += ((unsigned) par) >> 31;\n",
-      "      a0 += !(par ^ 0);\n",
-      "      a1 += !(par ^ 1);\n",
-      "      a2 += !(par ^ 2);\n",
-      "      a3 += !(par ^ 3);\n",
-      "      a4 += !(par ^ 4);\n",
-      "      a5 += !(par ^ 5);\n",
-      "      a6 += !(par ^ 6);\n",
-      "      a7 += !(par ^ 7);\n",
-      "      a8 += !(par ^ 8);\n",
-      "      int64_t tmp = ((int64_t) par) - 9;\n",
-      "      a9 += (tmp >> 63) + 1;\n",
-      "      new[index] = value;\n",
-      "    }\n",
-      "\n",
-      "  }\n",
-      "\n",
-      "}\n",
-      "\n",
-      "Target:     omp parallel for private(j, k, z, accumulator, jIndex, index, iIndex, compute_it) reduction(+: aggregate, a0,a1,a2,a3,a4,a5,a6,a7,a8,a9)\n",
-      "Prediction: omp parallel for reduction(+:data,,,,,,,,,,,,,,\n",
-      "------------------------------------------------------------\n",
-      "Example 20:\n",
-      "Input:      [CLS:parallel_for] for (i = 0; i < 16; ++i)\n",
-      "  ;\n",
-      "\n",
-      "Target:     omp parallel for simd firstprivate(, )\n",
-      "Prediction: omp parallel for shared(,k,,,,,,,,,,,,,pr) shared(L,,,,,,,,,,,,,,,,\n",
-      "------------------------------------------------------------\n",
-      "Example 30:\n",
-      "Input:      [CLS:parallel_for] for (i = 0; i < n; i++)\n",
-      "{\n",
-      "  x[i] = 1.0;\n",
-      "  y[i] = 2.0;\n",
-      "}\n",
-      "\n",
-      "Target:     omp parallel for private(i)\n",
-      "Prediction: omp parallel for shared(gen,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n",
-      "------------------------------------------------------------\n"
      ]
     }
    ],
    "source": [
     "model.eval()\n",
     "\n",
-    "def generate_sentence(model, input_text, tokenizer, max_len=150, device='cuda'):\n",
-    "    \"\"\"\n",
-    "    Greedy decoding function that generates tokens until <EOS> or max_len.\n",
-    "    This mimics the model's forward pass but allows dynamic length generation.\n",
-    "    \"\"\"\n",
     "    model.eval()\n",
-    "    \n",
-    "    # Tokenize input\n",
-    "    input_ids = tokenizer.encode(input_text, max_length=500, add_special_tokens=True)\n",
-    "    src_tensor = torch.LongTensor(input_ids).unsqueeze(0).to(device) # [1, src_len]\n",
-    "    src_len = torch.LongTensor([len(input_ids)]).to(device)          # [1]\n",
-    "    \n",
     "    with torch.no_grad():\n",
-    "        # Encode\n",
-    "        encoder_outputs, hidden, cell = model.encoder(src_tensor, src_len)\n",
-    "        \n",
-    "        # Create mask (same logic as in Generator.forward)\n",
-    "        max_src_len = encoder_outputs.shape[1]\n",
-    "        mask = torch.arange(max_src_len, device=device).unsqueeze(0) < src_len.unsqueeze(1)\n",
-    "        mask = mask.float()\n",
-    "        \n",
-    "        # Project hidden/cell states from Encoder to Decoder size\n",
-    "        # Reshape to [num_layers, 2, batch, hidden] to combine bidirectional states\n",
     "        hidden = hidden.view(model.encoder.num_layers, 2, 1, model.encoder.hidden_size)\n",
     "        hidden = torch.cat((hidden[:, 0], hidden[:, 1]), dim=2)\n",
     "        hidden = model.hidden_projection(hidden)\n",
-    "        \n",
     "        cell = cell.view(model.encoder.num_layers, 2, 1, model.encoder.hidden_size)\n",
     "        cell = torch.cat((cell[:, 0], cell[:, 1]), dim=2)\n",
     "        cell = model.cell_projection(cell)\n",
-    "        \n",
-    "        # Start with <SOS>\n",
-    "        trg_indexes = [tokenizer.char2idx['<SOS>']]\n",
-    "        \n",
-    "        for i in range(max_len):\n",
-    "            trg_tensor = torch.LongTensor([trg_indexes[-1]]).to(device) # [1]\n",
-    "            \n",
-    "            output, hidden, cell, _ = model.decoder(\n",
-    "                trg_tensor, hidden, cell, encoder_outputs, mask\n",
-    "            )\n",
-    "            \n",
-    "            # Greedy prediction: take token with highest probability\n",
-    "            pred_token = output.argmax(1).item()\n",
-    "            trg_indexes.append(pred_token)\n",
-    "            \n",
-    "            if pred_token == tokenizer.char2idx['<EOS>']:\n",
-    "                break\n",
-    "                \n",
-    "    # Decode integers back to string\n",
-    "    return tokenizer.decode(trg_indexes)\n",
-    "\n",
-    "# ---------------------------------------------------------\n",
-    "print(\"!\"*60)\n",
-    "print(\"IMPORTANT: If you haven't re-run the TRAINING loop (Cell 9)\")\n",
-    "print(\"after applying the Transpose fix, the results below will likely\")\n",
-    "print(\"be poor/incomplete because the model hasn't updated its weights\")\n",
-    "print(\"correctly yet.\")\n",
-    "print(\"!\"*60 + \"\\n\")\n",
     "\n",
-    "print(\"Running generation tests on validation set (True Greedy Decoding):\\n\")\n",
-    "test_indices = [0, 10, 20, 30]\n",
-    "# Ensure indices are within bounds\n",
-    "test_indices = [i for i in test_indices if i < len(val_inputs)]\n",
-    "\n",
-    "for i in test_indices:\n",
-    "    input_text = val_inputs[i]\n",
-    "    target_text = val_outputs[i]\n",
-    "    \n",
-    "    prediction = generate_sentence(model, input_text, tokenizer, device=device)\n",
-    "    \n",
-    "    print(f\"Example {i}:\")\n",
-    "    print(f\"Input:      {input_text}\")\n",
-    "    print(f\"Target:     {target_text}\")\n",
-    "    print(f\"Prediction: {prediction}\")\n",
-    "    print(\"-\" * 60)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "85bd9571",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# ---------------------------------------------------------\n",
-    "# RUN THIS CELL ONLY IF YOU WANT TO RESET TRAINING\n",
-    "# This initializes the model weights from scratch. \n",
-    "# Run this, and then run the TRAINING LOOP (Cell 9) again.\n",
-    "# ---------------------------------------------------------\n",
-    "\n",
-    "print(\"↺ RESETTING MODEL & OPTIMIZER...\")\n",
-    "model = Generator(encoder, decoder, device).to(device)\n",
-    "model.apply(model._init_weights)\n",
     "\n",
-    "optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
-    "training_history = {'train_loss': [], 'valid_loss': []}\n",
-    "best_valid_loss = float('inf')\n",
     "\n",
-    "print(\"✓ Model reset. Now scroll up and run the TRAINING LOOP again.\")"
    ]
   }
  ],

   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "id": "6d9a8e25",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Loaded checkpoint from best_model.pth (epoch 14)\n",
+      "Sample input (truncated): [CLS:parallel_for] for (i = 0; i < 16; ++i)\n",
       "  ;\n",
       "\n",
+      "Reference pragma: omp target parallel for simd simdlen(4 4)\n",
+      "Model prediction: omp parallel for simd lastprivate(\n"
      ]
     }
    ],
    "source": [
+    "import os\n",
+    "\n",
+    "checkpoint_path = \"best_model.pth\"\n",
+    "if not os.path.exists(checkpoint_path):\n",
+    "    raise FileNotFoundError(\"Run training first so 'best_model.pth' exists.\")\n",
+    "\n",
+    "checkpoint = torch.load(checkpoint_path, map_location=device)\n",
+    "model.load_state_dict(checkpoint['model_state_dict'])\n",
     "model.eval()\n",
+    "print(f\"Loaded checkpoint from {checkpoint_path} (epoch {checkpoint.get('epoch', '?')})\")\n",
     "\n",
+    "SOS_IDX = tokenizer.char2idx['<SOS>']\n",
+    "EOS_IDX = tokenizer.char2idx['<EOS>']\n",
+    "\n",
+    "def greedy_generate(code_snippet: str, cls: str = \"parallel\", max_len: int = 80) -> str:\n",
+    "    \"\"\"Greedy decode a pragma for a single code snippet.\"\"\"\n",
     "    model.eval()\n",
+    "    text = code_snippet if code_snippet.startswith(\"[CLS:\") else f\"[CLS:{cls}] {code_snippet}\"\n",
+    "    input_ids = tokenizer.encode(text, max_length=500, add_special_tokens=True)\n",
+    "    input_len = next((i for i, tok in enumerate(input_ids) if tok == PAD_IDX), len(input_ids))\n",
+    "    input_tensor = torch.tensor([input_ids], device=device)\n",
+    "    input_len_tensor = torch.tensor([input_len], device=device)\n",
+    "\n",
     "    with torch.no_grad():\n",
+    "        enc_outs, hidden, cell = model.encoder(input_tensor, input_len_tensor)\n",
+    "        mask = (torch.arange(enc_outs.size(1), device=device).unsqueeze(0) < input_len_tensor.unsqueeze(1)).float()\n",
+    "\n",
     "        hidden = hidden.view(model.encoder.num_layers, 2, 1, model.encoder.hidden_size)\n",
     "        hidden = torch.cat((hidden[:, 0], hidden[:, 1]), dim=2)\n",
     "        hidden = model.hidden_projection(hidden)\n",
+    "\n",
     "        cell = cell.view(model.encoder.num_layers, 2, 1, model.encoder.hidden_size)\n",
     "        cell = torch.cat((cell[:, 0], cell[:, 1]), dim=2)\n",
     "        cell = model.cell_projection(cell)\n",
     "\n",
+    "        input_token = torch.tensor([SOS_IDX], device=device)\n",
+    "        generated = []\n",
+    "        for _ in range(max_len):\n",
+    "            output, hidden, cell, _ = model.decoder(input_token, hidden, cell, enc_outs, mask)\n",
+    "            top1 = output.argmax(1)\n",
+    "            token_id = top1.item()\n",
+    "            if token_id == EOS_IDX:\n",
+    "                break\n",
+    "            generated.append(token_id)\n",
+    "            input_token = top1\n",
     "\n",
+    "    return tokenizer.decode(generated)\n",
     "\n",
+    "# Quick sanity check on a validation example\n",
+    "sample_input = val_inputs[0]\n",
+    "reference = val_outputs[0]\n",
+    "prediction = greedy_generate(sample_input)\n",
+    "print(\"Sample input (truncated):\", sample_input[:140] + \"...\" if len(sample_input) > 140 else sample_input)\n",
+    "print(\"Reference pragma:\", reference)\n",
+    "print(\"Model prediction:\", prediction)\n"
    ]
   }
  ],