Reynier commited on
Commit
3cf1666
·
verified ·
1 Parent(s): 04bdac2

Add Labin_wl.ipynb

Browse files
Files changed (1) hide show
  1. notebooks/Labin_wl.ipynb +1076 -0
notebooks/Labin_wl.ipynb ADDED
@@ -0,0 +1,1076 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "T4"
8
+ },
9
+ "kernelspec": {
10
+ "name": "python3",
11
+ "display_name": "Python 3"
12
+ },
13
+ "language_info": {
14
+ "name": "python"
15
+ },
16
+ "accelerator": "GPU"
17
+ },
18
+ "cells": [
19
+ {
20
+ "cell_type": "code",
21
+ "source": [
22
+ "from google.colab import drive # Importing the library to mount Google Drive\n",
23
+ "drive.mount('/content/drive') # Mounting Google Drive in Colab environment"
24
+ ],
25
+ "metadata": {
26
+ "colab": {
27
+ "base_uri": "https://localhost:8080/"
28
+ },
29
+ "id": "71FJxLKc1343",
30
+ "outputId": "656465ff-fbd4-42d4-ebfe-bba89e051db8"
31
+ },
32
+ "execution_count": null,
33
+ "outputs": [
34
+ {
35
+ "output_type": "stream",
36
+ "name": "stdout",
37
+ "text": [
38
+ "Mounted at /content/drive\n"
39
+ ]
40
+ }
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "source": [
46
+ "%%capture\n",
47
+ "!pip install keras_self_attention"
48
+ ],
49
+ "metadata": {
50
+ "id": "p6_ioHiTyN37"
51
+ },
52
+ "execution_count": null,
53
+ "outputs": []
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": null,
58
+ "metadata": {
59
+ "colab": {
60
+ "base_uri": "https://localhost:8080/"
61
+ },
62
+ "id": "YXAm488r1DJw",
63
+ "outputId": "14641690-2b73-48ac-90b3-3417d02571f3"
64
+ },
65
+ "outputs": [
66
+ {
67
+ "output_type": "stream",
68
+ "name": "stdout",
69
+ "text": [
70
+ " domain family label\n",
71
+ "0 nailconsiderable.ru suppobox dga\n",
72
+ "1 stilldelight.net suppobox dga\n",
73
+ "2 kimberleekatheryn.net suppobox dga\n",
74
+ "3 soilbeen.net suppobox dga\n",
75
+ "4 visitform.net suppobox dga\n",
76
+ "... ... ... ...\n",
77
+ "159995 dhuhaa.com legit notdga\n",
78
+ "159996 sdmetalcrew.org legit notdga\n",
79
+ "159997 melbcampcontuligol.ga legit notdga\n",
80
+ "159998 pl-enthusiast.net legit notdga\n",
81
+ "159999 rd-forum.ru legit notdga\n",
82
+ "\n",
83
+ "[160000 rows x 3 columns]\n"
84
+ ]
85
+ }
86
+ ],
87
+ "source": [
88
+ "import pandas as pd\n",
89
+ "\n",
90
+ "# File paths\n",
91
+ "train_df_file = \"/content/drive/My Drive/MOE_DGA/train_wl.csv\"\n",
92
+ "\n",
93
+ "train_df = pd.read_csv(train_df_file)\n",
94
+ "\n",
95
+ "#train_df = train_df.rename(columns={\"label\": \"Label\"})\n",
96
+ "\n",
97
+ "\n",
98
+ "print(train_df)"
99
+ ]
100
+ },
101
+ {
102
+ "cell_type": "code",
103
+ "source": [
104
+ "import datetime\n",
105
+ "import numpy as np\n",
106
+ "import pandas as pd\n",
107
+ "\n",
108
+ "from keras.callbacks import ModelCheckpoint, History\n",
109
+ "from keras.models import Sequential\n",
110
+ "from keras.layers import Bidirectional, LSTM, Dense, Dropout, Embedding\n",
111
+ "from keras_self_attention import SeqSelfAttention, SeqWeightedAttention\n",
112
+ "\n",
113
+ "## Charset and encoding/decoding functions\n",
114
+ "def encode(domain):\n",
115
+ " # Convertir a minúsculas y filtrar caracteres no válidos\n",
116
+ " domain = domain.lower()\n",
117
+ " encoded = []\n",
118
+ " for d in domain:\n",
119
+ " if d in stoi:\n",
120
+ " encoded.append(stoi[d])\n",
121
+ " else:\n",
122
+ " # Si el carácter no está en el charset, usar '*' como carácter desconocido\n",
123
+ " encoded.append(stoi['*'])\n",
124
+ " return encoded\n",
125
+ "\n",
126
+ "def pad(l, amount=0, where='right', value=0):\n",
127
+ " llen = len(l)\n",
128
+ " if where == 'left':\n",
129
+ " padded = [value]*(amount - llen) + l[:amount]\n",
130
+ " if where == 'right':\n",
131
+ " padded = l[:amount] + [value]*(amount - llen)\n",
132
+ " return padded\n",
133
+ "\n",
134
+ "# Charset expandido: incluye números, letras minúsculas, y caracteres comunes en dominios\n",
135
+ "charset = ['*'] + [chr(x) for x in range(0x30, 0x30+10)] + [chr(x) for x in range(0x61, 0x61+26)] + ['-', '_' ,'.']\n",
136
+ "stoi = {k:charset.index(k) for k in charset}\n",
137
+ "itos = {charset.index(k):k for k in charset}\n",
138
+ "\n",
139
+ "print(f\"Charset disponible: {''.join(charset)}\")\n",
140
+ "print(f\"Tamaño del vocabulario: {len(charset)}\")\n",
141
+ "\n",
142
+ "## Main parameters of the model\n",
143
+ "vocab_size = len(charset)\n",
144
+ "batch_size = 64\n",
145
+ "max_len = 64 # Maximum length for the domain names\n",
146
+ "embd_size = 128\n",
147
+ "lstm_size = 128\n",
148
+ "dense_size = 64\n",
149
+ "dropout = 0.5\n",
150
+ "\n",
151
+ "## Data preparation function\n",
152
+ "def prepare_data(train_df):\n",
153
+ " \"\"\"\n",
154
+ " Prepara los datos del dataframe para el entrenamiento\n",
155
+ " train_df debe tener columnas 'domain' y 'label' (con valores 'dga' y 'notdga')\n",
156
+ " \"\"\"\n",
157
+ " # Crear etiquetas binarias (1 para dga, 0 para notdga)\n",
158
+ " df = train_df.copy()\n",
159
+ " df['y'] = (df.label == 'dga').astype(int)\n",
160
+ "\n",
161
+ " # Codificar dominios\n",
162
+ " df['encoded'] = df.domain.apply(encode)\n",
163
+ " df['padded'] = df.encoded.apply(lambda x: pad(x, max_len, 'left'))\n",
164
+ "\n",
165
+ " # Convertir a arrays numpy\n",
166
+ " X = np.array(list(df.padded.values))\n",
167
+ " y = df['y'].values\n",
168
+ "\n",
169
+ " return X, y\n",
170
+ "\n",
171
+ "## Callbacks para guardar el modelo y su historial de entrenamiento\n",
172
+ "def build_callbacks(save_path, monitor):\n",
173
+ " checkpoint = ModelCheckpoint(filepath=save_path, monitor=monitor, verbose=1, save_best_only=True)\n",
174
+ " history = History()\n",
175
+ " callbacks = [checkpoint, history]\n",
176
+ " return callbacks\n",
177
+ "\n",
178
+ "# Crear callbacks\n",
179
+ "timestamp = str(datetime.datetime.now()).split(\".\")[0].replace(\" \", \"_\")\n",
180
+ "labin_callbacks = build_callbacks(f'LABin_best_model_{timestamp}.keras', 'val_loss')\n",
181
+ "\n",
182
+ "## LABin model definition - Binary classifier\n",
183
+ "LABin = Sequential()\n",
184
+ "LABin.add(Embedding(input_dim=vocab_size, output_dim=embd_size, input_length=max_len))\n",
185
+ "LABin.add(Bidirectional(LSTM(lstm_size, return_sequences=True), name=\"bilstm1\"))\n",
186
+ "LABin.add(SeqSelfAttention(name=\"seqselfatt\"))\n",
187
+ "LABin.add(Dropout(rate=dropout, name=\"drop1\"))\n",
188
+ "LABin.add(Bidirectional(LSTM(lstm_size, return_sequences=True), name=\"bilstm2\"))\n",
189
+ "LABin.add(SeqWeightedAttention(name=\"seqweigatt\"))\n",
190
+ "LABin.add(Dropout(rate=dropout, name=\"drop2\"))\n",
191
+ "LABin.add(Dense(dense_size, activation='relu', name=\"linear\"))\n",
192
+ "LABin.add(Dropout(rate=dropout, name=\"drop3\"))\n",
193
+ "LABin.add(Dense(1, activation='sigmoid', name=\"sigmoid\"))\n",
194
+ "LABin.compile(optimizer=\"adam\", loss=\"binary_crossentropy\", metrics=['accuracy'])\n",
195
+ "\n",
196
+ "# Mostrar resumen del modelo\n",
197
+ "LABin.summary()\n",
198
+ "\n",
199
+ "## Función de entrenamiento\n",
200
+ "def train_labin(train_df, epochs=50, validation_split=0.2):\n",
201
+ " \"\"\"\n",
202
+ " Entrena el modelo LABin con el dataframe proporcionado\n",
203
+ " \"\"\"\n",
204
+ " print(\"Preparando datos...\")\n",
205
+ " X, y = prepare_data(train_df)\n",
206
+ "\n",
207
+ " print(f\"Datos preparados: {X.shape[0]} muestras\")\n",
208
+ " print(f\"Distribución de clases: DGA={np.sum(y)}, NotDGA={len(y)-np.sum(y)}\")\n",
209
+ "\n",
210
+ " print(\"Iniciando entrenamiento...\")\n",
211
+ " history = LABin.fit(\n",
212
+ " X, y,\n",
213
+ " batch_size=batch_size,\n",
214
+ " epochs=epochs,\n",
215
+ " callbacks=labin_callbacks,\n",
216
+ " validation_split=validation_split,\n",
217
+ " verbose=1\n",
218
+ " )\n",
219
+ "\n",
220
+ " return history\n",
221
+ "\n",
222
+ "# Ejemplo de uso:\n",
223
+ "# Asumiendo que tienes tu dataframe 'train_df' con columnas 'domain' y 'label'\n",
224
+ "# history = train_labin(train_df, epochs=50)\n",
225
+ "\n",
226
+ "## Función para visualizar resultados (opcional)\n",
227
+ "def plot_training_history(history):\n",
228
+ " import matplotlib.pyplot as plt\n",
229
+ "\n",
230
+ " plt.figure(figsize=(12, 4))\n",
231
+ "\n",
232
+ " plt.subplot(1, 2, 1)\n",
233
+ " plt.plot(history.history['accuracy'], label='Training Accuracy')\n",
234
+ " plt.plot(history.history['val_accuracy'], label='Validation Accuracy')\n",
235
+ " plt.title('LABin Accuracy')\n",
236
+ " plt.xlabel('Epoch')\n",
237
+ " plt.ylabel('Accuracy')\n",
238
+ " plt.legend()\n",
239
+ "\n",
240
+ " plt.subplot(1, 2, 2)\n",
241
+ " plt.plot(history.history['loss'], label='Training Loss')\n",
242
+ " plt.plot(history.history['val_loss'], label='Validation Loss')\n",
243
+ " plt.title('LABin Loss')\n",
244
+ " plt.xlabel('Epoch')\n",
245
+ " plt.ylabel('Loss')\n",
246
+ " plt.legend()\n",
247
+ "\n",
248
+ " plt.tight_layout()\n",
249
+ " plt.savefig(f'LABin_training_history_{timestamp}.png')\n",
250
+ " plt.show()\n",
251
+ "\n",
252
+ "# Para usar después del entrenamiento:\n",
253
+ "# plot_training_history(history)"
254
+ ],
255
+ "metadata": {
256
+ "colab": {
257
+ "base_uri": "https://localhost:8080/",
258
+ "height": 554
259
+ },
260
+ "id": "kEk4Sbxf1_8n",
261
+ "outputId": "25aa17af-d000-4228-e371-ce4528daecaf"
262
+ },
263
+ "execution_count": null,
264
+ "outputs": [
265
+ {
266
+ "output_type": "stream",
267
+ "name": "stdout",
268
+ "text": [
269
+ "Charset disponible: *0123456789abcdefghijklmnopqrstuvwxyz-_.\n",
270
+ "Tamaño del vocabulario: 40\n"
271
+ ]
272
+ },
273
+ {
274
+ "output_type": "stream",
275
+ "name": "stderr",
276
+ "text": [
277
+ "/usr/local/lib/python3.11/dist-packages/keras/src/layers/core/embedding.py:90: UserWarning: Argument `input_length` is deprecated. Just remove it.\n",
278
+ " warnings.warn(\n"
279
+ ]
280
+ },
281
+ {
282
+ "output_type": "display_data",
283
+ "data": {
284
+ "text/plain": [
285
+ "\u001b[1mModel: \"sequential_1\"\u001b[0m\n"
286
+ ],
287
+ "text/html": [
288
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"sequential_1\"</span>\n",
289
+ "</pre>\n"
290
+ ]
291
+ },
292
+ "metadata": {}
293
+ },
294
+ {
295
+ "output_type": "display_data",
296
+ "data": {
297
+ "text/plain": [
298
+ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
299
+ "┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
300
+ "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
301
+ "│ embedding_1 (\u001b[38;5;33mEmbedding\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n",
302
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
303
+ "│ bilstm1 (\u001b[38;5;33mBidirectional\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n",
304
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
305
+ "│ seqselfatt (\u001b[38;5;33mSeqSelfAttention\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n",
306
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
307
+ "│ drop1 (\u001b[38;5;33mDropout\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m │\n",
308
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
309
+ "│ bilstm2 (\u001b[38;5;33mBidirectional\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n",
310
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
311
+ "│ seqweigatt │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n",
312
+ "│ (\u001b[38;5;33mSeqWeightedAttention\u001b[0m) │ │ │\n",
313
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
314
+ "│ drop2 (\u001b[38;5;33mDropout\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m │\n",
315
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
316
+ "│ linear (\u001b[38;5;33mDense\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n",
317
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
318
+ "│ drop3 (\u001b[38;5;33mDropout\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m │\n",
319
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
320
+ "│ sigmoid (\u001b[38;5;33mDense\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n",
321
+ "└─────────────────────────────────┴────────────────────────┴───────────────┘\n"
322
+ ],
323
+ "text/html": [
324
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
325
+ "┃<span style=\"font-weight: bold\"> Layer (type) </span>┃<span style=\"font-weight: bold\"> Output Shape </span>┃<span style=\"font-weight: bold\"> Param # </span>┃\n",
326
+ "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
327
+ "│ embedding_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>) │ ? │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (unbuilt) │\n",
328
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
329
+ "│ bilstm1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Bidirectional</span>) │ ? │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (unbuilt) │\n",
330
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
331
+ "│ seqselfatt (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">SeqSelfAttention</span>) │ ? │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (unbuilt) │\n",
332
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
333
+ "│ drop1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dropout</span>) │ ? │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n",
334
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
335
+ "│ bilstm2 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Bidirectional</span>) │ ? │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (unbuilt) │\n",
336
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
337
+ "│ seqweigatt │ ? │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (unbuilt) │\n",
338
+ "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">SeqWeightedAttention</span>) │ │ │\n",
339
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
340
+ "│ drop2 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dropout</span>) │ ? │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n",
341
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
342
+ "│ linear (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ ? │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (unbuilt) │\n",
343
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
344
+ "│ drop3 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dropout</span>) │ ? │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n",
345
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
346
+ "│ sigmoid (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ ? │ <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (unbuilt) │\n",
347
+ "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
348
+ "</pre>\n"
349
+ ]
350
+ },
351
+ "metadata": {}
352
+ },
353
+ {
354
+ "output_type": "display_data",
355
+ "data": {
356
+ "text/plain": [
357
+ "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
358
+ ],
359
+ "text/html": [
360
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
361
+ "</pre>\n"
362
+ ]
363
+ },
364
+ "metadata": {}
365
+ },
366
+ {
367
+ "output_type": "display_data",
368
+ "data": {
369
+ "text/plain": [
370
+ "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
371
+ ],
372
+ "text/html": [
373
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
374
+ "</pre>\n"
375
+ ]
376
+ },
377
+ "metadata": {}
378
+ },
379
+ {
380
+ "output_type": "display_data",
381
+ "data": {
382
+ "text/plain": [
383
+ "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
384
+ ],
385
+ "text/html": [
386
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
387
+ "</pre>\n"
388
+ ]
389
+ },
390
+ "metadata": {}
391
+ }
392
+ ]
393
+ },
394
+ {
395
+ "cell_type": "code",
396
+ "source": [
397
+ "# Ejemplo de uso:\n",
398
+ "# Asumiendo que tienes tu dataframe 'train_df' con columnas 'domain' y 'label'\n",
399
+ "history = train_labin(train_df, epochs=50)\n"
400
+ ],
401
+ "metadata": {
402
+ "colab": {
403
+ "base_uri": "https://localhost:8080/"
404
+ },
405
+ "id": "7jpJtyL9x_Va",
406
+ "outputId": "96cf6c7b-e4b2-4939-dddb-acd83977b6fd"
407
+ },
408
+ "execution_count": null,
409
+ "outputs": [
410
+ {
411
+ "output_type": "stream",
412
+ "name": "stdout",
413
+ "text": [
414
+ "Preparando datos...\n",
415
+ "Datos preparados: 160000 muestras\n",
416
+ "Distribución de clases: DGA=80000, NotDGA=80000\n",
417
+ "Iniciando entrenamiento...\n",
418
+ "Epoch 1/50\n",
419
+ "\u001b[1m1998/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.7264 - loss: 0.5377\n",
420
+ "Epoch 1: val_loss improved from inf to 0.59806, saving model to LABin_best_model_2025-05-30_15:26:47.keras\n",
421
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m60s\u001b[0m 25ms/step - accuracy: 0.7264 - loss: 0.5376 - val_accuracy: 0.7864 - val_loss: 0.5981\n",
422
+ "Epoch 2/50\n",
423
+ "\u001b[1m1999/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.7948 - loss: 0.4331\n",
424
+ "Epoch 2: val_loss improved from 0.59806 to 0.53677, saving model to LABin_best_model_2025-05-30_15:26:47.keras\n",
425
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m49s\u001b[0m 24ms/step - accuracy: 0.7948 - loss: 0.4331 - val_accuracy: 0.7870 - val_loss: 0.5368\n",
426
+ "Epoch 3/50\n",
427
+ "\u001b[1m1998/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.8145 - loss: 0.4002\n",
428
+ "Epoch 3: val_loss did not improve from 0.53677\n",
429
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 25ms/step - accuracy: 0.8145 - loss: 0.4002 - val_accuracy: 0.7098 - val_loss: 0.6577\n",
430
+ "Epoch 4/50\n",
431
+ "\u001b[1m1999/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.8285 - loss: 0.3788\n",
432
+ "Epoch 4: val_loss did not improve from 0.53677\n",
433
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m83s\u001b[0m 25ms/step - accuracy: 0.8285 - loss: 0.3788 - val_accuracy: 0.7905 - val_loss: 0.5763\n",
434
+ "Epoch 5/50\n",
435
+ "\u001b[1m1999/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.8370 - loss: 0.3608\n",
436
+ "Epoch 5: val_loss improved from 0.53677 to 0.47977, saving model to LABin_best_model_2025-05-30_15:26:47.keras\n",
437
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m49s\u001b[0m 25ms/step - accuracy: 0.8370 - loss: 0.3608 - val_accuracy: 0.8449 - val_loss: 0.4798\n",
438
+ "Epoch 6/50\n",
439
+ "\u001b[1m1999/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.8460 - loss: 0.3459\n",
440
+ "Epoch 6: val_loss did not improve from 0.47977\n",
441
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m50s\u001b[0m 25ms/step - accuracy: 0.8460 - loss: 0.3459 - val_accuracy: 0.8472 - val_loss: 0.5106\n",
442
+ "Epoch 7/50\n",
443
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.8547 - loss: 0.3301\n",
444
+ "Epoch 7: val_loss improved from 0.47977 to 0.45215, saving model to LABin_best_model_2025-05-30_15:26:47.keras\n",
445
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 25ms/step - accuracy: 0.8547 - loss: 0.3301 - val_accuracy: 0.8692 - val_loss: 0.4522\n",
446
+ "Epoch 8/50\n",
447
+ "\u001b[1m1999/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.8657 - loss: 0.3114\n",
448
+ "Epoch 8: val_loss improved from 0.45215 to 0.40643, saving model to LABin_best_model_2025-05-30_15:26:47.keras\n",
449
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m49s\u001b[0m 24ms/step - accuracy: 0.8657 - loss: 0.3114 - val_accuracy: 0.8890 - val_loss: 0.4064\n",
450
+ "Epoch 9/50\n",
451
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.8737 - loss: 0.2941\n",
452
+ "Epoch 9: val_loss did not improve from 0.40643\n",
453
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m49s\u001b[0m 25ms/step - accuracy: 0.8737 - loss: 0.2941 - val_accuracy: 0.8893 - val_loss: 0.4190\n",
454
+ "Epoch 10/50\n",
455
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━��━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.8819 - loss: 0.2749\n",
456
+ "Epoch 10: val_loss improved from 0.40643 to 0.39025, saving model to LABin_best_model_2025-05-30_15:26:47.keras\n",
457
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m50s\u001b[0m 25ms/step - accuracy: 0.8819 - loss: 0.2749 - val_accuracy: 0.8953 - val_loss: 0.3903\n",
458
+ "Epoch 11/50\n",
459
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.8927 - loss: 0.2557\n",
460
+ "Epoch 11: val_loss did not improve from 0.39025\n",
461
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 25ms/step - accuracy: 0.8926 - loss: 0.2557 - val_accuracy: 0.8741 - val_loss: 0.4587\n",
462
+ "Epoch 12/50\n",
463
+ "\u001b[1m1999/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.8987 - loss: 0.2432\n",
464
+ "Epoch 12: val_loss did not improve from 0.39025\n",
465
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m80s\u001b[0m 24ms/step - accuracy: 0.8987 - loss: 0.2432 - val_accuracy: 0.8831 - val_loss: 0.4264\n",
466
+ "Epoch 13/50\n",
467
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.9071 - loss: 0.2258\n",
468
+ "Epoch 13: val_loss did not improve from 0.39025\n",
469
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 24ms/step - accuracy: 0.9071 - loss: 0.2258 - val_accuracy: 0.8494 - val_loss: 0.6522\n",
470
+ "Epoch 14/50\n",
471
+ "\u001b[1m1999/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.9128 - loss: 0.2150\n",
472
+ "Epoch 14: val_loss did not improve from 0.39025\n",
473
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 24ms/step - accuracy: 0.9128 - loss: 0.2150 - val_accuracy: 0.8538 - val_loss: 0.5587\n",
474
+ "Epoch 15/50\n",
475
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.9164 - loss: 0.2053\n",
476
+ "Epoch 15: val_loss did not improve from 0.39025\n",
477
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m83s\u001b[0m 25ms/step - accuracy: 0.9164 - loss: 0.2053 - val_accuracy: 0.8673 - val_loss: 0.5919\n",
478
+ "Epoch 16/50\n",
479
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.9216 - loss: 0.1902\n",
480
+ "Epoch 16: val_loss did not improve from 0.39025\n",
481
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m81s\u001b[0m 24ms/step - accuracy: 0.9216 - loss: 0.1902 - val_accuracy: 0.8490 - val_loss: 0.6060\n",
482
+ "Epoch 17/50\n",
483
+ "\u001b[1m1998/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.9284 - loss: 0.1769\n",
484
+ "Epoch 17: val_loss did not improve from 0.39025\n",
485
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 24ms/step - accuracy: 0.9284 - loss: 0.1769 - val_accuracy: 0.8280 - val_loss: 0.7875\n",
486
+ "Epoch 18/50\n",
487
+ "\u001b[1m1999/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.9321 - loss: 0.1672\n",
488
+ "Epoch 18: val_loss did not improve from 0.39025\n",
489
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 24ms/step - accuracy: 0.9321 - loss: 0.1672 - val_accuracy: 0.8326 - val_loss: 0.8456\n",
490
+ "Epoch 19/50\n",
491
+ "\u001b[1m1999/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 24ms/step - accuracy: 0.9351 - loss: 0.1574\n",
492
+ "Epoch 19: val_loss did not improve from 0.39025\n",
493
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m88s\u001b[0m 27ms/step - accuracy: 0.9351 - loss: 0.1574 - val_accuracy: 0.7985 - val_loss: 0.8885\n",
494
+ "Epoch 20/50\n",
495
+ "\u001b[1m1998/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 25ms/step - accuracy: 0.9393 - loss: 0.1488\n",
496
+ "Epoch 20: val_loss did not improve from 0.39025\n",
497
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m54s\u001b[0m 27ms/step - accuracy: 0.9393 - loss: 0.1488 - val_accuracy: 0.8516 - val_loss: 0.6725\n",
498
+ "Epoch 21/50\n",
499
+ "\u001b[1m1999/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.9417 - loss: 0.1415\n",
500
+ "Epoch 21: val_loss did not improve from 0.39025\n",
501
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m78s\u001b[0m 25ms/step - accuracy: 0.9417 - loss: 0.1415 - val_accuracy: 0.8191 - val_loss: 0.8281\n",
502
+ "Epoch 22/50\n",
503
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.9450 - loss: 0.1340\n",
504
+ "Epoch 22: val_loss did not improve from 0.39025\n",
505
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m49s\u001b[0m 25ms/step - accuracy: 0.9450 - loss: 0.1340 - val_accuracy: 0.8337 - val_loss: 0.7363\n",
506
+ "Epoch 23/50\n",
507
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.9476 - loss: 0.1274\n",
508
+ "Epoch 23: val_loss did not improve from 0.39025\n",
509
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m81s\u001b[0m 24ms/step - accuracy: 0.9476 - loss: 0.1274 - val_accuracy: 0.7985 - val_loss: 1.1026\n",
510
+ "Epoch 24/50\n",
511
+ "\u001b[1m1998/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.9506 - loss: 0.1206\n",
512
+ "Epoch 24: val_loss did not improve from 0.39025\n",
513
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m83s\u001b[0m 25ms/step - accuracy: 0.9506 - loss: 0.1206 - val_accuracy: 0.8056 - val_loss: 0.8602\n",
514
+ "Epoch 25/50\n",
515
+ "\u001b[1m1999/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 24ms/step - accuracy: 0.9530 - loss: 0.1130\n",
516
+ "Epoch 25: val_loss did not improve from 0.39025\n",
517
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m85s\u001b[0m 26ms/step - accuracy: 0.9530 - loss: 0.1130 - val_accuracy: 0.8264 - val_loss: 0.8605\n",
518
+ "Epoch 26/50\n",
519
+ "\u001b[1m1998/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 24ms/step - accuracy: 0.9531 - loss: 0.1139\n",
520
+ "Epoch 26: val_loss did not improve from 0.39025\n",
521
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 26ms/step - accuracy: 0.9531 - loss: 0.1139 - val_accuracy: 0.8080 - val_loss: 0.8498\n",
522
+ "Epoch 27/50\n",
523
+ "\u001b[1m1998/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 24ms/step - accuracy: 0.9574 - loss: 0.1024\n",
524
+ "Epoch 27: val_loss did not improve from 0.39025\n",
525
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 26ms/step - accuracy: 0.9574 - loss: 0.1024 - val_accuracy: 0.8100 - val_loss: 0.8863\n",
526
+ "Epoch 28/50\n",
527
+ "\u001b[1m1999/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 24ms/step - accuracy: 0.9588 - loss: 0.0972\n",
528
+ "Epoch 28: val_loss did not improve from 0.39025\n",
529
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m53s\u001b[0m 27ms/step - accuracy: 0.9588 - loss: 0.0972 - val_accuracy: 0.7785 - val_loss: 0.9076\n",
530
+ "Epoch 29/50\n",
531
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 24ms/step - accuracy: 0.9622 - loss: 0.0938\n",
532
+ "Epoch 29: val_loss did not improve from 0.39025\n",
533
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 27ms/step - accuracy: 0.9622 - loss: 0.0938 - val_accuracy: 0.7402 - val_loss: 1.1227\n",
534
+ "Epoch 30/50\n",
535
+ "\u001b[1m1998/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 24ms/step - accuracy: 0.9634 - loss: 0.0901\n",
536
+ "Epoch 30: val_loss did not improve from 0.39025\n",
537
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m81s\u001b[0m 26ms/step - accuracy: 0.9634 - loss: 0.0901 - val_accuracy: 0.7887 - val_loss: 0.9364\n",
538
+ "Epoch 31/50\n",
539
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 24ms/step - accuracy: 0.9662 - loss: 0.0838\n",
540
+ "Epoch 31: val_loss did not improve from 0.39025\n",
541
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m53s\u001b[0m 27ms/step - accuracy: 0.9662 - loss: 0.0838 - val_accuracy: 0.7540 - val_loss: 1.1325\n",
542
+ "Epoch 32/50\n",
543
+ "\u001b[1m1999/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 24ms/step - accuracy: 0.9675 - loss: 0.0821\n",
544
+ "Epoch 32: val_loss did not improve from 0.39025\n",
545
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m81s\u001b[0m 26ms/step - accuracy: 0.9675 - loss: 0.0821 - val_accuracy: 0.7806 - val_loss: 0.8558\n",
546
+ "Epoch 33/50\n",
547
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.9690 - loss: 0.0801\n",
548
+ "Epoch 33: val_loss did not improve from 0.39025\n",
549
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m81s\u001b[0m 26ms/step - accuracy: 0.9690 - loss: 0.0801 - val_accuracy: 0.7749 - val_loss: 0.9175\n",
550
+ "Epoch 34/50\n",
551
+ "\u001b[1m1998/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.9720 - loss: 0.0710\n",
552
+ "Epoch 34: val_loss did not improve from 0.39025\n",
553
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m50s\u001b[0m 25ms/step - accuracy: 0.9720 - loss: 0.0710 - val_accuracy: 0.7702 - val_loss: 0.9847\n",
554
+ "Epoch 35/50\n",
555
+ "\u001b[1m1999/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.9722 - loss: 0.0718\n",
556
+ "Epoch 35: val_loss did not improve from 0.39025\n",
557
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 25ms/step - accuracy: 0.9722 - loss: 0.0718 - val_accuracy: 0.7346 - val_loss: 1.0676\n",
558
+ "Epoch 36/50\n",
559
+ "\u001b[1m1999/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.9726 - loss: 0.0715\n",
560
+ "Epoch 36: val_loss did not improve from 0.39025\n",
561
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m83s\u001b[0m 25ms/step - accuracy: 0.9725 - loss: 0.0716 - val_accuracy: 0.7491 - val_loss: 0.9360\n",
562
+ "Epoch 37/50\n",
563
+ "\u001b[1m1999/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.9767 - loss: 0.0600\n",
564
+ "Epoch 37: val_loss did not improve from 0.39025\n",
565
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m50s\u001b[0m 25ms/step - accuracy: 0.9767 - loss: 0.0600 - val_accuracy: 0.7389 - val_loss: 1.1465\n",
566
+ "Epoch 38/50\n",
567
+ "\u001b[1m1998/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.9782 - loss: 0.0596\n",
568
+ "Epoch 38: val_loss did not improve from 0.39025\n",
569
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m49s\u001b[0m 25ms/step - accuracy: 0.9782 - loss: 0.0596 - val_accuracy: 0.7706 - val_loss: 0.9810\n",
570
+ "Epoch 39/50\n",
571
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.9781 - loss: 0.0583\n",
572
+ "Epoch 39: val_loss did not improve from 0.39025\n",
573
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 25ms/step - accuracy: 0.9781 - loss: 0.0584 - val_accuracy: 0.7287 - val_loss: 1.0654\n",
574
+ "Epoch 40/50\n",
575
+ "\u001b[1m1999/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.9787 - loss: 0.0576\n",
576
+ "Epoch 40: val_loss did not improve from 0.39025\n",
577
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 25ms/step - accuracy: 0.9787 - loss: 0.0576 - val_accuracy: 0.7186 - val_loss: 1.1190\n",
578
+ "Epoch 41/50\n",
579
+ "\u001b[1m1999/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.9792 - loss: 0.0576\n",
580
+ "Epoch 41: val_loss did not improve from 0.39025\n",
581
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 25ms/step - accuracy: 0.9791 - loss: 0.0576 - val_accuracy: 0.7548 - val_loss: 1.0554\n",
582
+ "Epoch 42/50\n",
583
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.9804 - loss: 0.0537\n",
584
+ "Epoch 42: val_loss did not improve from 0.39025\n",
585
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m83s\u001b[0m 25ms/step - accuracy: 0.9804 - loss: 0.0537 - val_accuracy: 0.7732 - val_loss: 0.9237\n",
586
+ "Epoch 43/50\n",
587
+ "\u001b[1m1998/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.9801 - loss: 0.0550\n",
588
+ "Epoch 43: val_loss did not improve from 0.39025\n",
589
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m81s\u001b[0m 25ms/step - accuracy: 0.9801 - loss: 0.0550 - val_accuracy: 0.7821 - val_loss: 0.9783\n",
590
+ "Epoch 44/50\n",
591
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.9813 - loss: 0.0522\n",
592
+ "Epoch 44: val_loss did not improve from 0.39025\n",
593
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 25ms/step - accuracy: 0.9813 - loss: 0.0522 - val_accuracy: 0.7368 - val_loss: 1.3491\n",
594
+ "Epoch 45/50\n",
595
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.9827 - loss: 0.0495\n",
596
+ "Epoch 45: val_loss did not improve from 0.39025\n",
597
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m81s\u001b[0m 25ms/step - accuracy: 0.9827 - loss: 0.0495 - val_accuracy: 0.7447 - val_loss: 1.2059\n",
598
+ "Epoch 46/50\n",
599
+ "\u001b[1m1998/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.9818 - loss: 0.0506\n",
600
+ "Epoch 46: val_loss did not improve from 0.39025\n",
601
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m83s\u001b[0m 26ms/step - accuracy: 0.9818 - loss: 0.0506 - val_accuracy: 0.7544 - val_loss: 1.1131\n",
602
+ "Epoch 47/50\n",
603
+ "\u001b[1m1998/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.9820 - loss: 0.0505\n",
604
+ "Epoch 47: val_loss did not improve from 0.39025\n",
605
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m81s\u001b[0m 25ms/step - accuracy: 0.9820 - loss: 0.0505 - val_accuracy: 0.7353 - val_loss: 1.0976\n",
606
+ "Epoch 48/50\n",
607
+ "\u001b[1m1998/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.9843 - loss: 0.0461\n",
608
+ "Epoch 48: val_loss did not improve from 0.39025\n",
609
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 25ms/step - accuracy: 0.9843 - loss: 0.0461 - val_accuracy: 0.7104 - val_loss: 1.2959\n",
610
+ "Epoch 49/50\n",
611
+ "\u001b[1m1998/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m━\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.9829 - loss: 0.0493\n",
612
+ "Epoch 49: val_loss did not improve from 0.39025\n",
613
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m50s\u001b[0m 25ms/step - accuracy: 0.9829 - loss: 0.0493 - val_accuracy: 0.7662 - val_loss: 0.8865\n",
614
+ "Epoch 50/50\n",
615
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.9842 - loss: 0.0441\n",
616
+ "Epoch 50: val_loss did not improve from 0.39025\n",
617
+ "\u001b[1m2000/2000\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m83s\u001b[0m 26ms/step - accuracy: 0.9842 - loss: 0.0441 - val_accuracy: 0.7740 - val_loss: 1.0445\n"
618
+ ]
619
+ }
620
+ ]
621
+ },
622
+ {
623
+ "cell_type": "code",
624
+ "source": [],
625
+ "metadata": {
626
+ "id": "-flqDZcRx_Yj"
627
+ },
628
+ "execution_count": null,
629
+ "outputs": []
630
+ },
631
+ {
632
+ "cell_type": "code",
633
+ "source": [
634
+ "## FUNCIONES PARA CARGAR EL MODELO Y HACER PREDICCIONES\n",
635
+ "\n",
636
+ "def load_trained_model(model_path):\n",
637
+ " \"\"\"\n",
638
+ " Carga el modelo entrenado desde un archivo\n",
639
+ " \"\"\"\n",
640
+ " from keras.models import load_model\n",
641
+ " from keras_self_attention import SeqSelfAttention, SeqWeightedAttention\n",
642
+ "\n",
643
+ " # Cargar el modelo con las capas personalizadas\n",
644
+ " custom_objects = {\n",
645
+ " 'SeqSelfAttention': SeqSelfAttention,\n",
646
+ " 'SeqWeightedAttention': SeqWeightedAttention\n",
647
+ " }\n",
648
+ "\n",
649
+ " model = load_model(model_path, custom_objects=custom_objects)\n",
650
+ " print(f\"Modelo cargado desde: {model_path}\")\n",
651
+ " return model\n",
652
+ "\n",
653
+ "def predict_single_domain(model, domain):\n",
654
+ " \"\"\"\n",
655
+ " Predice si un dominio individual es DGA o no\n",
656
+ " \"\"\"\n",
657
+ " # Preparar el dominio\n",
658
+ " encoded = encode(domain)\n",
659
+ " padded = pad(encoded, max_len, 'left')\n",
660
+ " X = np.array([padded]) # Agregar dimensión batch\n",
661
+ "\n",
662
+ " # Hacer predicción\n",
663
+ " prediction = model.predict(X, verbose=0)[0][0]\n",
664
+ "\n",
665
+ " # Interpretar resultado\n",
666
+ " is_dga = prediction > 0.5\n",
667
+ " confidence = prediction if is_dga else (1 - prediction)\n",
668
+ "\n",
669
+ " result = {\n",
670
+ " 'domain': domain,\n",
671
+ " 'prediction': 'DGA' if is_dga else 'LEGIT',\n",
672
+ " 'confidence': confidence,\n",
673
+ " 'raw_score': prediction\n",
674
+ " }\n",
675
+ "\n",
676
+ " return result\n",
677
+ "\n",
678
+ "def predict_domains_batch(model, domains_list):\n",
679
+ " \"\"\"\n",
680
+ " Predice múltiples dominios a la vez\n",
681
+ " \"\"\"\n",
682
+ " results = []\n",
683
+ "\n",
684
+ " # Preparar todos los dominios\n",
685
+ " encoded_domains = [pad(encode(domain), max_len, 'left') for domain in domains_list]\n",
686
+ " X = np.array(encoded_domains)\n",
687
+ "\n",
688
+ " # Hacer predicciones en lote\n",
689
+ " predictions = model.predict(X, verbose=0)\n",
690
+ "\n",
691
+ " # Procesar resultados\n",
692
+ " for i, domain in enumerate(domains_list):\n",
693
+ " pred_score = predictions[i][0]\n",
694
+ " is_dga = pred_score > 0.5\n",
695
+ " confidence = pred_score if is_dga else (1 - pred_score)\n",
696
+ "\n",
697
+ " result = {\n",
698
+ " 'domain': domain,\n",
699
+ " 'prediction': 'DGA' if is_dga else 'LEGIT',\n",
700
+ " 'confidence': confidence,\n",
701
+ " 'raw_score': pred_score\n",
702
+ " }\n",
703
+ " results.append(result)\n",
704
+ "\n",
705
+ " return results\n",
706
+ "\n",
707
+ "def evaluate_model_on_test(model, test_df):\n",
708
+ " \"\"\"\n",
709
+ " Evalúa el modelo en un conjunto de test\n",
710
+ " test_df debe tener columnas 'domain' y 'label'\n",
711
+ " \"\"\"\n",
712
+ " print(\"Evaluando modelo en datos de test...\")\n",
713
+ "\n",
714
+ " # Preparar datos de test\n",
715
+ " X_test, y_test = prepare_data(test_df)\n",
716
+ "\n",
717
+ " # Hacer predicciones\n",
718
+ " predictions = model.predict(X_test, verbose=0)\n",
719
+ " y_pred = (predictions > 0.5).astype(int).flatten()\n",
720
+ "\n",
721
+ " # Calcular métricas\n",
722
+ " from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix\n",
723
+ "\n",
724
+ " accuracy = accuracy_score(y_test, y_pred)\n",
725
+ " precision = precision_score(y_test, y_pred)\n",
726
+ " recall = recall_score(y_test, y_pred)\n",
727
+ " f1 = f1_score(y_test, y_pred)\n",
728
+ " cm = confusion_matrix(y_test, y_pred)\n",
729
+ "\n",
730
+ " print(f\"Accuracy: {accuracy:.4f}\")\n",
731
+ " print(f\"Precision: {precision:.4f}\")\n",
732
+ " print(f\"Recall: {recall:.4f}\")\n",
733
+ " print(f\"F1-Score: {f1:.4f}\")\n",
734
+ " print(f\"Confusion Matrix:\\n{cm}\")\n",
735
+ "\n",
736
+ " return {\n",
737
+ " 'accuracy': accuracy,\n",
738
+ " 'precision': precision,\n",
739
+ " 'recall': recall,\n",
740
+ " 'f1': f1,\n",
741
+ " 'confusion_matrix': cm\n",
742
+ " }\n",
743
+ "\n",
744
+ "## EJEMPLOS DE USO:\n",
745
+ "\n",
746
+ "\"\"\"\n",
747
+ "# 1. ENTRENAR EL MODELO\n",
748
+ "history = train_labin(train_df, epochs=50)\n",
749
+ "\n",
750
+ "# 2. CARGAR UN MODELO YA ENTRENADO\n",
751
+ "# Cambia 'ruta_del_modelo.keras' por la ruta real donde guardaste tu modelo\n",
752
+ "loaded_model = load_trained_model('LABin_best_model_2025-05-30_15:22:09.keras')\n",
753
+ "\n",
754
+ "# 3. PROBAR UN DOMINIO INDIVIDUAL\n",
755
+ "result = predict_single_domain(loaded_model, 'google.com')\n",
756
+ "print(f\"Dominio: {result['domain']}\")\n",
757
+ "print(f\"Predicción: {result['prediction']}\")\n",
758
+ "print(f\"Confianza: {result['confidence']:.4f}\")\n",
759
+ "\n",
760
+ "# 4. PROBAR MÚLTIPLES DOMINIOS\n",
761
+ "test_domains = [\n",
762
+ " 'google.com',\n",
763
+ " 'facebook.com',\n",
764
+ " 'xkjhsdkjfhlksdjf.com',\n",
765
+ " 'qwerty123456.net',\n",
766
+ " 'amazon.com'\n",
767
+ "]\n",
768
+ "\n",
769
+ "results = predict_domains_batch(loaded_model, test_domains)\n",
770
+ "for result in results:\n",
771
+ " print(f\"{result['domain']:<30} -> {result['prediction']:<5} (confianza: {result['confidence']:.4f})\")\n",
772
+ "\n",
773
+ "# 5. EVALUAR EN CONJUNTO DE TEST (si tienes un test_df)\n",
774
+ "# metrics = evaluate_model_on_test(loaded_model, test_df)\n",
775
+ "\"\"\""
776
+ ],
777
+ "metadata": {
778
+ "colab": {
779
+ "base_uri": "https://localhost:8080/",
780
+ "height": 243
781
+ },
782
+ "id": "18quATrOx_bi",
783
+ "outputId": "bb272cd6-0d89-4de1-e131-e67b4cc69ce3"
784
+ },
785
+ "execution_count": null,
786
+ "outputs": [
787
+ {
788
+ "output_type": "execute_result",
789
+ "data": {
790
+ "text/plain": [
791
+ "'\\n# 1. ENTRENAR EL MODELO\\nhistory = train_labin(train_df, epochs=50)\\n\\n# 2. CARGAR UN MODELO YA ENTRENADO\\n# Cambia \\'ruta_del_modelo.keras\\' por la ruta real donde guardaste tu modelo\\nloaded_model = load_trained_model(\\'LABin_best_model_2025-05-30_15:22:09.keras\\')\\n\\n# 3. PROBAR UN DOMINIO INDIVIDUAL\\nresult = predict_single_domain(loaded_model, \\'google.com\\')\\nprint(f\"Dominio: {result[\\'domain\\']}\")\\nprint(f\"Predicción: {result[\\'prediction\\']}\")\\nprint(f\"Confianza: {result[\\'confidence\\']:.4f}\")\\n\\n# 4. PROBAR MÚLTIPLES DOMINIOS\\ntest_domains = [\\n \\'google.com\\',\\n \\'facebook.com\\', \\n \\'xkjhsdkjfhlksdjf.com\\',\\n \\'qwerty123456.net\\',\\n \\'amazon.com\\'\\n]\\n\\nresults = predict_domains_batch(loaded_model, test_domains)\\nfor result in results:\\n print(f\"{result[\\'domain\\']:<30} -> {result[\\'prediction\\']:<5} (confianza: {result[\\'confidence\\']:.4f})\")\\n\\n# 5. EVALUAR EN CONJUNTO DE TEST (si tienes un test_df)\\n# metrics = evaluate_model_on_test(loaded_model, test_df)\\n'"
792
+ ],
793
+ "application/vnd.google.colaboratory.intrinsic+json": {
794
+ "type": "string"
795
+ }
796
+ },
797
+ "metadata": {},
798
+ "execution_count": 11
799
+ }
800
+ ]
801
+ },
802
+ {
803
+ "cell_type": "code",
804
+ "source": [
805
+ "# 2. CARGAR UN MODELO YA ENTRENADO\n",
806
+ "# Cambia 'ruta_del_modelo.keras' por la ruta real donde guardaste tu modelo\n",
807
+ "loaded_model = load_trained_model('/content/LABin_best_model_2025-05-30_15:26:47.keras')\n",
808
+ "\n",
809
+ "# 3. PROBAR UN DOMINIO INDIVIDUAL\n",
810
+ "result = predict_single_domain(loaded_model, 'sadfdfdsfasds.com')\n",
811
+ "print(f\"Dominio: {result['domain']}\")\n",
812
+ "print(f\"Predicción: {result['prediction']}\")\n",
813
+ "print(f\"Confianza: {result['confidence']:.4f}\")"
814
+ ],
815
+ "metadata": {
816
+ "colab": {
817
+ "base_uri": "https://localhost:8080/"
818
+ },
819
+ "id": "NpXsx39qx_ef",
820
+ "outputId": "fcecb761-1bbe-498b-ddd5-e67ef3bb6e2a"
821
+ },
822
+ "execution_count": 17,
823
+ "outputs": [
824
+ {
825
+ "output_type": "stream",
826
+ "name": "stdout",
827
+ "text": [
828
+ "Modelo cargado desde: /content/LABin_best_model_2025-05-30_15:26:47.keras\n",
829
+ "Dominio: sadfdfdsfasds.com\n",
830
+ "Predicción: DGA\n",
831
+ "Confianza: 0.9111\n"
832
+ ]
833
+ }
834
+ ]
835
+ },
836
+ {
837
+ "cell_type": "code",
838
+ "source": [
839
+ "import requests\n",
840
+ "import pandas as pd\n",
841
+ "import numpy as np\n",
842
+ "import time\n",
843
+ "from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score\n",
844
+ "import sys\n",
845
+ "from sklearn.metrics import classification_report, confusion_matrix\n",
846
+ "import seaborn as sns\n",
847
+ "import matplotlib.pyplot as plt\n",
848
+ "from google.colab import drive\n",
849
+ "import re\n",
850
+ "\n",
851
+ "families = [\n",
852
+ " 'matsnu.gz',\n",
853
+ " 'suppobox.gz',\n",
854
+ " 'charbot.gz',\n",
855
+ " 'gozi.gz',\n",
856
+ " 'manuelita.gz',\n",
857
+ " 'rovnix.gz',\n",
858
+ " 'deception.gz',\n",
859
+ " 'nymaim.gz'\n",
860
+ "]\n",
861
+ "\n",
862
+ "runs = 30\n",
863
+ "\n",
864
+ "for family in families:\n",
865
+ " print(family)\n",
866
+ " dga = pd.read_csv(f'/content/drive/My Drive/Familias_Test/{family}', chunksize=50)\n",
867
+ " legit = pd.read_csv('/content/drive/My Drive/Familias_Test/legit.gz', chunksize=50)\n",
868
+ " dfs = []\n",
869
+ " for run in range(runs):\n",
870
+ " print(f'{run:2}/{runs}', end='\\r')\n",
871
+ " dfw = pd.concat([dga.get_chunk(), legit.get_chunk()])\n",
872
+ " pred = []\n",
873
+ " prob = []\n",
874
+ " query_time = []\n",
875
+ " results = []\n",
876
+ "\n",
877
+ " for domain_to_check in dfw.domain.values:\n",
878
+ " st = time.time()\n",
879
+ "\n",
880
+ " result = predict_single_domain(loaded_model, domain_to_check)\n",
881
+ " if result['prediction'] == \"DGA\":\n",
882
+ " label_value = 1\n",
883
+ " else:\n",
884
+ " label_value = 0\n",
885
+ "\n",
886
+ " pred.append(label_value)\n",
887
+ " query_time.append(time.time() - st)\n",
888
+ "\n",
889
+ " dfw['pred'] = pred\n",
890
+ " # dfw['prob'] = prob # Si tienes probabilidades, descomenta esta línea\n",
891
+ " dfw['query_time'] = query_time\n",
892
+ " dfw.to_csv(f'/content/drive/My Drive/results/results_Labin_{family}_{run}.csv.gz', index=False)\n"
893
+ ],
894
+ "metadata": {
895
+ "id": "Gg50xzhLIx85",
896
+ "colab": {
897
+ "base_uri": "https://localhost:8080/"
898
+ },
899
+ "outputId": "39467198-b736-4d6a-a303-3bc09077d35e"
900
+ },
901
+ "execution_count": 18,
902
+ "outputs": [
903
+ {
904
+ "output_type": "stream",
905
+ "name": "stdout",
906
+ "text": [
907
+ "matsnu.gz\n",
908
+ "suppobox.gz\n",
909
+ "charbot.gz\n",
910
+ "gozi.gz\n",
911
+ "manuelita.gz\n",
912
+ "rovnix.gz\n",
913
+ "deception.gz\n",
914
+ "nymaim.gz\n"
915
+ ]
916
+ }
917
+ ]
918
+ },
919
+ {
920
+ "cell_type": "code",
921
+ "source": [
922
+ "import requests\n",
923
+ "import pandas as pd\n",
924
+ "import numpy as np\n",
925
+ "import time\n",
926
+ "from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score\n",
927
+ "import sys\n",
928
+ "from sklearn.metrics import classification_report, confusion_matrix\n",
929
+ "import seaborn as sns\n",
930
+ "import matplotlib.pyplot as plt\n",
931
+ "from google.colab import drive\n",
932
+ "import re\n",
933
+ "\n",
934
+ "families = ['bigviktor.gz',\n",
935
+ " 'pizd.gz',\n",
936
+ " 'ngioweb.gz'\n",
937
+ "\n",
938
+ " ]\n",
939
+ "\n",
940
+ "runs = 30\n",
941
+ "\n",
942
+ "for family in families:\n",
943
+ " print(family)\n",
944
+ " dga = pd.read_csv(f'/content/drive/My Drive/New_Families/{family}', chunksize=50)\n",
945
+ " legit = pd.read_csv('/content/drive/My Drive/Familias_Test/legit.gz', chunksize=50)\n",
946
+ " dfs = []\n",
947
+ "\n",
948
+ " # Saltar los primeros 30 chunks de legit\n",
949
+ " for _ in range(30):\n",
950
+ " legit.get_chunk()\n",
951
+ "\n",
952
+ "\n",
953
+ "\n",
954
+ " for run in range(runs):\n",
955
+ " print(f'{run:2}/{runs}', end='\\r')\n",
956
+ " dfw = pd.concat([dga.get_chunk(), legit.get_chunk()])\n",
957
+ " pred = []\n",
958
+ " prob = []\n",
959
+ " query_time = []\n",
960
+ " results = []\n",
961
+ "\n",
962
+ " for domain_to_check in dfw.domain.values:\n",
963
+ " st = time.time()\n",
964
+ " result = predict_single_domain(loaded_model, domain_to_check)\n",
965
+ " if result['prediction'] == \"DGA\":\n",
966
+ " label_value = 1\n",
967
+ " else:\n",
968
+ " label_value = 0\n",
969
+ "\n",
970
+ " pred.append(label_value)\n",
971
+ " query_time.append(time.time() - st)\n",
972
+ "\n",
973
+ " dfw['pred'] = pred\n",
974
+ " # dfw['prob'] = prob # Si tienes probabilidades, descomenta esta línea\n",
975
+ " dfw['query_time'] = query_time\n",
976
+ " dfw.to_csv(f'/content/drive/My Drive/results/results_Labin_{family}_{run}.csv.gz', index=False)\n"
977
+ ],
978
+ "metadata": {
979
+ "colab": {
980
+ "base_uri": "https://localhost:8080/"
981
+ },
982
+ "id": "Q1g1s0WHKi5X",
983
+ "outputId": "c2d6a2fe-728c-42cc-b5ee-925c1242e7c1"
984
+ },
985
+ "execution_count": 19,
986
+ "outputs": [
987
+ {
988
+ "output_type": "stream",
989
+ "name": "stdout",
990
+ "text": [
991
+ "bigviktor.gz\n",
992
+ "pizd.gz\n",
993
+ "ngioweb.gz\n"
994
+ ]
995
+ }
996
+ ]
997
+ },
998
+ {
999
+ "cell_type": "code",
1000
+ "source": [
1001
+ "#\"\"\"\n",
1002
+ "families = [\n",
1003
+ " 'matsnu.gz',\n",
1004
+ " 'suppobox.gz',\n",
1005
+ " 'charbot.gz',\n",
1006
+ " 'gozi.gz',\n",
1007
+ " 'manuelita.gz',\n",
1008
+ " 'rovnix.gz',\n",
1009
+ " 'deception.gz',\n",
1010
+ " 'nymaim.gz',\n",
1011
+ " 'bigviktor.gz',\n",
1012
+ " 'pizd.gz',\n",
1013
+ " 'ngioweb.gz'\n",
1014
+ "]\n",
1015
+ "#\"\"\"\n",
1016
+ "def fpr_tpr(y, ypred):\n",
1017
+ " tn, fp, fn, tp = confusion_matrix(y, ypred).ravel()\n",
1018
+ " fpr = fp / (fp + tn) # False Positive Rate\n",
1019
+ " tpr = tp / (tp + fn) # True Positive Rate (Recall)\n",
1020
+ " return fpr, tpr\n",
1021
+ "\n",
1022
+ "for family in families:\n",
1023
+ " acc = []\n",
1024
+ " pre = []\n",
1025
+ " rec = []\n",
1026
+ " f1 = []\n",
1027
+ " fpr = []\n",
1028
+ " tpr = []\n",
1029
+ " qt = []\n",
1030
+ " qts = []\n",
1031
+ " for run in range(runs):\n",
1032
+ " df = pd.read_csv(f'/content/drive/My Drive/results/results_Labin_{family}_{run}.csv.gz')\n",
1033
+ " y = (df.label == 'dga').astype(int)\n",
1034
+ " ypred = df.pred\n",
1035
+ " acc.append(accuracy_score(y, ypred))\n",
1036
+ " pre.append(precision_score(y, ypred))\n",
1037
+ " rec.append(recall_score(y, ypred))\n",
1038
+ " f1.append(f1_score(y, ypred))\n",
1039
+ " fpr_value, tpr_value = fpr_tpr(y, ypred)\n",
1040
+ " fpr.append(fpr_value)\n",
1041
+ " tpr.append(tpr_value)\n",
1042
+ " qt.append(df.query_time.mean())\n",
1043
+ " qts.append(df.query_time.std())\n",
1044
+ "# print(f'Query time: {np.mean(qt):0.5f}+/-{np.mean(qts)}:0.5f')\n",
1045
+ " print(f'{family.split(\".\")[0]:15}: acc:{np.mean(acc):0.2f}±{np.std(acc):.3f} f1:{np.mean(f1):0.2f}±{np.std(f1):.3f} pre:{np.mean(pre):0.2f}±{np.std(pre):.3f} rec:{np.mean(rec):0.2f}±{np.std(rec):.3f} FPR:{np.mean(fpr):0.2f}±{np.std(fpr):.3f} TPR:{np.mean(tpr):0.2f}±{np.std(tpr):.3f} Query time: {np.mean(qt):0.5f}±{np.mean(qts):0.5f}')\n"
1046
+ ],
1047
+ "metadata": {
1048
+ "colab": {
1049
+ "base_uri": "https://localhost:8080/"
1050
+ },
1051
+ "id": "aaj2PD9NLLAn",
1052
+ "outputId": "6aca9e04-06c0-4a50-cd65-7a902b463ad7"
1053
+ },
1054
+ "execution_count": 20,
1055
+ "outputs": [
1056
+ {
1057
+ "output_type": "stream",
1058
+ "name": "stdout",
1059
+ "text": [
1060
+ "matsnu : acc:0.93±0.032 f1:0.93±0.028 pre:0.89±0.046 rec:0.97±0.018 FPR:0.12±0.059 TPR:0.97±0.018 Query time: 0.08699±0.03077\n",
1061
+ "suppobox : acc:0.94±0.031 f1:0.94±0.027 pre:0.89±0.045 rec:1.00±0.012 FPR:0.12±0.059 TPR:1.00±0.012 Query time: 0.07804±0.02411\n",
1062
+ "charbot : acc:0.84±0.044 f1:0.83±0.046 pre:0.87±0.055 rec:0.79±0.051 FPR:0.12±0.059 TPR:0.79±0.051 Query time: 0.07832±0.02187\n",
1063
+ "gozi : acc:0.85±0.054 f1:0.84±0.056 pre:0.87±0.054 rec:0.81±0.080 FPR:0.12±0.059 TPR:0.81±0.080 Query time: 0.07945±0.02210\n",
1064
+ "manuelita : acc:0.52±0.036 f1:0.24±0.064 pre:0.57±0.131 rec:0.15±0.047 FPR:0.12±0.059 TPR:0.15±0.047 Query time: 0.07936±0.02168\n",
1065
+ "rovnix : acc:0.93±0.029 f1:0.94±0.025 pre:0.89±0.045 rec:0.98±0.017 FPR:0.12±0.059 TPR:0.98±0.017 Query time: 0.07933±0.02181\n",
1066
+ "deception : acc:0.94±0.030 f1:0.94±0.026 pre:0.90±0.045 rec:1.00±0.000 FPR:0.12±0.059 TPR:1.00±0.000 Query time: 0.08005±0.02218\n",
1067
+ "nymaim : acc:0.88±0.036 f1:0.88±0.034 pre:0.88±0.049 rec:0.87±0.040 FPR:0.12±0.059 TPR:0.87±0.040 Query time: 0.08008±0.02256\n",
1068
+ "bigviktor : acc:0.55±0.031 f1:0.36±0.048 pre:0.65±0.101 rec:0.25±0.042 FPR:0.14±0.056 TPR:0.25±0.042 Query time: 0.08002±0.02226\n",
1069
+ "pizd : acc:0.84±0.031 f1:0.83±0.030 pre:0.86±0.051 rec:0.82±0.038 FPR:0.14±0.056 TPR:0.82±0.038 Query time: 0.08062±0.02229\n",
1070
+ "ngioweb : acc:0.58±0.055 f1:0.42±0.094 pre:0.68±0.120 rec:0.31±0.078 FPR:0.14±0.056 TPR:0.31±0.078 Query time: 0.08096±0.02214\n"
1071
+ ]
1072
+ }
1073
+ ]
1074
+ }
1075
+ ]
1076
+ }