Rajan commited on
Commit
c03de54
·
verified ·
1 Parent(s): 2f34c40

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +1412 -0
app.py ADDED
@@ -0,0 +1,1412 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import json
4
+ import re
5
+ import random
6
+ from typing import List, Dict, Tuple, Optional
7
+ import os
8
+ import time
9
+ import matplotlib.pyplot as plt
10
+ from io import BytesIO
11
+ import base64
12
+ from datetime import datetime
13
+
14
+ # Assuming all the classes (ActivationFunctions, LossFunctions, Layer, DenseLayer,
15
+ # DropoutLayer, NeuralNetwork, TextProcessor, Chatbot) are defined as in your uploaded code
16
+ # I'm not repeating them here for brevity
17
+
18
+
19
+ class ActivationFunctions:
20
+ """Class containing various activation functions and their derivatives."""
21
+
22
+ @staticmethod
23
+ def sigmoid(z: np.ndarray) -> np.ndarray:
24
+ """Sigmoid activation function."""
25
+ z = np.clip(z, -500, 500)
26
+ return 1 / (1 + np.exp(-z))
27
+
28
+ @staticmethod
29
+ def sigmoid_derivative(z: np.ndarray) -> np.ndarray:
30
+ """Derivative of the sigmoid function."""
31
+ s = ActivationFunctions.sigmoid(z)
32
+ return s * (1 - s)
33
+
34
+ @staticmethod
35
+ def relu(z: np.ndarray) -> np.ndarray:
36
+ """ReLU activation function."""
37
+ return np.maximum(0, z)
38
+
39
+ @staticmethod
40
+ def relu_derivative(z: np.ndarray) -> np.ndarray:
41
+ """Derivative of the ReLU function."""
42
+ return np.where(z > 0, 1, 0)
43
+
44
+ @staticmethod
45
+ def softmax(z: np.ndarray) -> np.ndarray:
46
+ """Softmax activation function."""
47
+ exp_z = np.exp(z - np.max(z))
48
+ return exp_z / exp_z.sum(axis=0, keepdims=True)
49
+
50
+
51
+ class LossFunctions:
52
+ """Class containing various loss functions and their derivatives."""
53
+
54
+ @staticmethod
55
+ def mse(output: np.ndarray, target: np.ndarray) -> float:
56
+ """Mean Squared Error loss."""
57
+ return np.mean((output - target) ** 2)
58
+
59
+ @staticmethod
60
+ def mse_derivative(output: np.ndarray, target: np.ndarray) -> np.ndarray:
61
+ """Derivative of MSE loss."""
62
+ return 2 * (output - target) / output.size
63
+
64
+ @staticmethod
65
+ def cross_entropy(output: np.ndarray, target: np.ndarray) -> float:
66
+ """Cross Entropy loss for multi-class classification."""
67
+ epsilon = 1e-15
68
+ output = np.clip(output, epsilon, 1 - epsilon)
69
+ return -np.sum(target * np.log(output)) / output.shape[1]
70
+
71
+ @staticmethod
72
+ def cross_entropy_derivative(output: np.ndarray, target: np.ndarray) -> np.ndarray:
73
+ """Derivative of Cross Entropy loss."""
74
+ epsilon = 1e-15
75
+ output = np.clip(output, epsilon, 1 - epsilon)
76
+ return -target / output / output.shape[1]
77
+
78
+
79
+ class Layer:
80
+ """Base class for neural network layers."""
81
+
82
+ def forward(self, inputs: np.ndarray) -> np.ndarray:
83
+ """Forward pass through the layer."""
84
+ raise NotImplementedError
85
+
86
+ def backward(self, grad: np.ndarray) -> np.ndarray:
87
+ """Backward pass through the layer."""
88
+ raise NotImplementedError
89
+
90
+ def update(self, learning_rate: float) -> None:
91
+ """Update layer parameters."""
92
+ pass
93
+
94
+ def get_parameters(self) -> List:
95
+ """Get layer parameters."""
96
+ return []
97
+
98
+
99
+ class DenseLayer(Layer):
100
+ """Fully connected layer with improved numerical stability."""
101
+
102
+ def __init__(self, input_size: int, output_size: int, activation: str = "sigmoid"):
103
+ """Initialize the dense layer with more stable parameters."""
104
+ self.input_size = input_size
105
+ self.output_size = output_size
106
+
107
+ # Use smaller initialization to prevent exploding gradients
108
+ # Xavier/Glorot initialization with smaller scale factor
109
+ self.weights = np.random.randn(output_size, input_size) * np.sqrt(
110
+ 1 / (input_size + output_size)
111
+ )
112
+ self.biases = np.zeros((output_size, 1))
113
+
114
+ # Set activation function
115
+ if activation == "sigmoid":
116
+ self.activation_fn = ActivationFunctions.sigmoid
117
+ self.activation_derivative = ActivationFunctions.sigmoid_derivative
118
+ elif activation == "relu":
119
+ self.activation_fn = ActivationFunctions.relu
120
+ self.activation_derivative = ActivationFunctions.relu_derivative
121
+ elif activation == "softmax":
122
+ self.activation_fn = ActivationFunctions.softmax
123
+ self.activation_derivative = None
124
+ else:
125
+ raise ValueError(f"Unsupported activation function: {activation}")
126
+
127
+ self.activation_name = activation
128
+
129
+ # Cache for backward pass
130
+ self.inputs = None
131
+ self.z = None
132
+ self.output = None
133
+
134
+ # Gradients
135
+ self.dW = None
136
+ self.db = None
137
+
138
+ def forward(self, inputs: np.ndarray) -> np.ndarray:
139
+ """Forward pass through the layer with improved numerical stability."""
140
+ self.inputs = inputs
141
+
142
+ # Use dot product with better numerical stability
143
+ self.z = np.dot(self.weights, inputs) + self.biases
144
+
145
+ # Clip values to prevent overflow in activations
146
+ if self.activation_name == "sigmoid":
147
+ self.z = np.clip(self.z, -15, 15) # Prevent overflow in sigmoid
148
+
149
+ self.output = self.activation_fn(self.z)
150
+
151
+ # Add small epsilon to prevent exact zeros or ones
152
+ if self.activation_name == "softmax":
153
+ epsilon = 1e-10
154
+ self.output = np.clip(self.output, epsilon, 1.0 - epsilon)
155
+
156
+ return self.output
157
+
158
+ def backward(self, grad: np.ndarray) -> np.ndarray:
159
+ """Backward pass through the layer with gradient clipping."""
160
+ if self.activation_name == "softmax":
161
+ # Special case for softmax + cross-entropy
162
+ delta = grad
163
+ else:
164
+ delta = grad * self.activation_derivative(self.z)
165
+
166
+ # Compute gradients
167
+ self.dW = np.dot(delta, self.inputs.T)
168
+ self.db = np.sum(delta, axis=1, keepdims=True)
169
+
170
+ # Clip gradients to prevent exploding gradients
171
+ max_grad_norm = 5.0
172
+ self.dW = np.clip(self.dW, -max_grad_norm, max_grad_norm)
173
+ self.db = np.clip(self.db, -max_grad_norm, max_grad_norm)
174
+
175
+ # Gradient to pass to the previous layer
176
+ return np.dot(self.weights.T, delta)
177
+
178
+ def update(self, learning_rate: float) -> None:
179
+ """Update layer parameters using gradient descent with weight decay."""
180
+ # Add small weight decay to prevent overfitting
181
+ weight_decay = 1e-4
182
+ weight_decay_term = weight_decay * self.weights
183
+
184
+ self.weights -= learning_rate * (self.dW + weight_decay_term)
185
+ self.biases -= learning_rate * self.db
186
+
187
+
188
+ class DropoutLayer(Layer):
189
+ """Dropout layer for regularization."""
190
+
191
+ def __init__(self, dropout_rate: float = 0.5):
192
+ """Initialize the dropout layer."""
193
+ self.dropout_rate = dropout_rate
194
+ self.mask = None
195
+
196
+ def forward(self, inputs: np.ndarray, training: bool = True) -> np.ndarray:
197
+ """Forward pass through the layer."""
198
+ if not training:
199
+ return inputs
200
+
201
+ # Create dropout mask
202
+ self.mask = np.random.binomial(1, 1 - self.dropout_rate, size=inputs.shape) / (
203
+ 1 - self.dropout_rate
204
+ )
205
+ return inputs * self.mask
206
+
207
+ def backward(self, grad: np.ndarray) -> np.ndarray:
208
+ """Backward pass through the layer."""
209
+ return grad * self.mask
210
+
211
+
212
+ class NeuralNetwork:
213
+ """Neural network with multiple layers."""
214
+
215
+ def __init__(self):
216
+ """Initialize the neural network."""
217
+ self.layers = []
218
+ self.loss_fn = None
219
+ self.loss_derivative = None
220
+
221
+ def add(self, layer: Layer) -> None:
222
+ """Add a layer to the network."""
223
+ self.layers.append(layer)
224
+
225
+ def set_loss(self, loss_type: str) -> None:
226
+ """Set the loss function."""
227
+ if loss_type == "mse":
228
+ self.loss_fn = LossFunctions.mse
229
+ self.loss_derivative = LossFunctions.mse_derivative
230
+ elif loss_type == "cross_entropy":
231
+ self.loss_fn = LossFunctions.cross_entropy
232
+ self.loss_derivative = LossFunctions.cross_entropy_derivative
233
+ else:
234
+ raise ValueError(f"Unsupported loss function: {loss_type}")
235
+
236
+ def forward(self, x: np.ndarray, training: bool = True) -> np.ndarray:
237
+ """Forward pass through the network."""
238
+ output = x
239
+ for layer in self.layers:
240
+ if isinstance(layer, DropoutLayer):
241
+ output = layer.forward(output, training)
242
+ else:
243
+ output = layer.forward(output)
244
+ return output
245
+
246
+ def compute_loss(self, y_pred: np.ndarray, y_true: np.ndarray) -> float:
247
+ """Compute the loss."""
248
+ return self.loss_fn(y_pred, y_true)
249
+
250
+ def backward(self, y_pred: np.ndarray, y_true: np.ndarray) -> None:
251
+ """Backward pass through the network."""
252
+ # Initial gradient from the loss function
253
+ grad = self.loss_derivative(y_pred, y_true)
254
+
255
+ # Propagate gradient through layers in reverse order
256
+ for layer in reversed(self.layers):
257
+ grad = layer.backward(grad)
258
+
259
+ def update(self, learning_rate: float) -> None:
260
+ """Update network parameters."""
261
+ for layer in self.layers:
262
+ layer.update(learning_rate)
263
+
264
+ def predict(self, x: np.ndarray) -> np.ndarray:
265
+ """Make predictions."""
266
+ return self.forward(x, training=False)
267
+
268
+ @classmethod
269
+ def load(cls, filename: str) -> "NeuralNetwork":
270
+ """Load a model from a file."""
271
+ with open(filename, "r") as f:
272
+ model_data = json.load(f)
273
+
274
+ network = cls()
275
+ network.set_loss(model_data.get("loss_type", "cross_entropy"))
276
+
277
+ for layer_data in model_data["layers"]:
278
+ if layer_data["type"] == "dense":
279
+ layer = DenseLayer(
280
+ layer_data["input_size"],
281
+ layer_data["output_size"],
282
+ layer_data["activation"],
283
+ )
284
+ layer.weights = np.array(layer_data["weights"])
285
+ layer.biases = np.array(layer_data["biases"])
286
+ network.add(layer)
287
+ elif layer_data["type"] == "dropout":
288
+ layer = DropoutLayer(layer_data["dropout_rate"])
289
+ network.add(layer)
290
+
291
+ return network
292
+
293
+ def save(self, filename: str) -> None:
294
+ """Save the model to a file."""
295
+ model_data = {"layers": []}
296
+
297
+ for layer in self.layers:
298
+ if isinstance(layer, DenseLayer):
299
+ layer_data = {
300
+ "type": "dense",
301
+ "input_size": layer.input_size,
302
+ "output_size": layer.output_size,
303
+ "activation": layer.activation_name,
304
+ "weights": layer.weights.tolist(),
305
+ "biases": layer.biases.tolist(),
306
+ }
307
+ model_data["layers"].append(layer_data)
308
+ elif isinstance(layer, DropoutLayer):
309
+ layer_data = {"type": "dropout", "dropout_rate": layer.dropout_rate}
310
+ model_data["layers"].append(layer_data)
311
+
312
+ with open(filename, "w") as f:
313
+ json.dump(model_data, f)
314
+
315
+
316
+ class TextProcessor:
317
+ """Class for processing text data."""
318
+
319
+ def __init__(self):
320
+ """Initialize the text processor."""
321
+ self.vocabulary = []
322
+ self.vocabulary_size = 0
323
+
324
+ def tokenize(self, sentence: str) -> List[str]:
325
+ """Tokenize a sentence."""
326
+ return re.findall(r"\w+", sentence.lower())
327
+
328
+ def build_vocabulary(self, sentences: List[str]) -> None:
329
+ """Build the vocabulary from a list of sentences."""
330
+ vocabulary = set()
331
+ for sentence in sentences:
332
+ tokens = self.tokenize(sentence)
333
+ vocabulary.update(tokens)
334
+
335
+ self.vocabulary = sorted(list(vocabulary))
336
+ self.vocabulary_size = len(self.vocabulary)
337
+
338
+ def sentence_to_bow(self, sentence: str) -> np.ndarray:
339
+ """Convert a sentence to a bag-of-words vector."""
340
+ tokens = self.tokenize(sentence)
341
+ vector = np.zeros((self.vocabulary_size, 1))
342
+
343
+ for token in tokens:
344
+ if token in self.vocabulary:
345
+ idx = self.vocabulary.index(token)
346
+ vector[idx, 0] = 1
347
+
348
+ return vector
349
+
350
+ def save(self, filename: str) -> None:
351
+ """Save the text processor to a file."""
352
+ processor_data = {
353
+ "vocabulary": self.vocabulary,
354
+ "vocabulary_size": self.vocabulary_size,
355
+ }
356
+
357
+ with open(filename, "w") as f:
358
+ json.dump(processor_data, f)
359
+
360
+ @classmethod
361
+ def load(cls, filename: str) -> "TextProcessor":
362
+ """Load a text processor from a file."""
363
+ with open(filename, "r") as f:
364
+ processor_data = json.load(f)
365
+
366
+ processor = cls()
367
+ processor.vocabulary = processor_data["vocabulary"]
368
+ processor.vocabulary_size = processor_data["vocabulary_size"]
369
+
370
+ return processor
371
+
372
+
373
+ class Chatbot:
374
+ """Neural network based chatbot."""
375
+
376
+ def __init__(self):
377
+ """Initialize the chatbot."""
378
+ self.intents = {}
379
+ self.text_processor = TextProcessor()
380
+ self.model = NeuralNetwork()
381
+ self.intent_names = []
382
+ self.confidence_threshold = 0.5
383
+ self.default_response = "I'm not sure I understand. Could you rephrase that?"
384
+ self.training_history = None
385
+
386
+ def load_intents(self, intents_data: Dict) -> None:
387
+ """Load intents data."""
388
+ self.intents = intents_data
389
+ self.intent_names = list(self.intents.keys())
390
+
391
+ # Extract all patterns for building vocabulary
392
+ all_patterns = []
393
+ for intent in self.intents.values():
394
+ all_patterns.extend(intent["patterns"])
395
+
396
+ # Build vocabulary from patterns
397
+ self.text_processor.build_vocabulary(all_patterns)
398
+
399
+ def load_intents_from_file(self, filename: str) -> None:
400
+ """Load intents from a JSON file."""
401
+ with open(filename, "r") as f:
402
+ intents_data = json.load(f)
403
+
404
+ self.load_intents(intents_data)
405
+
406
+ def save_intents(self, filename: str) -> None:
407
+ """Save intents to a JSON file."""
408
+ with open(filename, "w") as f:
409
+ json.dump(self.intents, f, indent=4)
410
+
411
+ def load_model(self, filename: str) -> None:
412
+ """Load a model from a file."""
413
+ self.model = NeuralNetwork.load(filename)
414
+
415
+ def save_model(self, filename: str) -> None:
416
+ """Save the model to a file."""
417
+ self.model.save(filename)
418
+ # Also save the text processor and intent names
419
+ self.text_processor.save(filename.replace(".json", "_processor.json"))
420
+
421
+ # Save intent names
422
+ with open(filename.replace(".json", "_intents.json"), "w") as f:
423
+ json.dump(
424
+ {
425
+ "intent_names": self.intent_names,
426
+ "confidence_threshold": self.confidence_threshold,
427
+ "default_response": self.default_response,
428
+ },
429
+ f,
430
+ )
431
+
432
+ def build_model(
433
+ self, hidden_layers: List[int] = [8], dropout_rate: float = 0.0
434
+ ) -> None:
435
+ """Build the neural network model."""
436
+ # Input layer size is the vocabulary size
437
+ input_size = self.text_processor.vocabulary_size
438
+
439
+ # Output layer size is the number of intents
440
+ output_size = len(self.intent_names)
441
+
442
+ if output_size == 0:
443
+ raise ValueError("No intents loaded. Please load intents first.")
444
+
445
+ # Create the model
446
+ self.model = NeuralNetwork()
447
+
448
+ # Add first hidden layer
449
+ self.model.add(DenseLayer(input_size, hidden_layers[0], "relu"))
450
+
451
+ # Add dropout if needed
452
+ if dropout_rate > 0:
453
+ self.model.add(DropoutLayer(dropout_rate))
454
+
455
+ # Add additional hidden layers
456
+ for i in range(1, len(hidden_layers)):
457
+ self.model.add(DenseLayer(hidden_layers[i - 1], hidden_layers[i], "relu"))
458
+
459
+ # Add dropout if needed
460
+ if dropout_rate > 0:
461
+ self.model.add(DropoutLayer(dropout_rate))
462
+
463
+ # Add output layer with softmax activation for classification
464
+ self.model.add(DenseLayer(hidden_layers[-1], output_size, "softmax"))
465
+
466
+ # Set cross-entropy loss for classification
467
+ self.model.set_loss("cross_entropy")
468
+
469
+ def train(
470
+ self,
471
+ epochs: int = 1000,
472
+ learning_rate: float = 0.01,
473
+ batch_size: int = None,
474
+ verbose: bool = True,
475
+ ) -> Dict:
476
+ """Train the model with numerical stability fixes."""
477
+ # Prepare training data
478
+ X_train = []
479
+ y_train = []
480
+
481
+ for idx, intent in enumerate(self.intent_names):
482
+ for pattern in self.intents[intent]["patterns"]:
483
+ # Convert pattern to bag-of-words
484
+ X_train.append(self.text_processor.sentence_to_bow(pattern))
485
+
486
+ # Create one-hot encoded target
487
+ target = np.zeros((len(self.intent_names), 1))
488
+ target[idx, 0] = 1
489
+ y_train.append(target)
490
+
491
+ # Convert to numpy arrays
492
+ X_train = np.hstack(X_train)
493
+ y_train = np.hstack(y_train)
494
+
495
+ # Training history
496
+ history = {"loss": [], "accuracy": []}
497
+
498
+ # Apply gradient clipping to prevent exploding gradients
499
+ max_grad_norm = 1.0
500
+
501
+ # Training loop
502
+ for epoch in range(epochs):
503
+ # Forward pass
504
+ outputs = self.model.forward(X_train)
505
+
506
+ # Add small epsilon to prevent log(0)
507
+ epsilon = 1e-10
508
+ outputs = np.clip(outputs, epsilon, 1.0 - epsilon)
509
+
510
+ # Compute loss
511
+ loss = self.model.compute_loss(outputs, y_train)
512
+
513
+ # Check for NaN and if found, break training
514
+ if np.isnan(loss):
515
+ if verbose:
516
+ print(f"NaN loss detected at epoch {epoch+1}. Stopping training.")
517
+
518
+ # If we have previous good values, use those
519
+ if epoch > 0:
520
+ break
521
+ else:
522
+ # Otherwise, return with error
523
+ return {"loss": [0], "accuracy": [0]}
524
+
525
+ # Backward pass
526
+ self.model.backward(outputs, y_train)
527
+
528
+ # Apply gradient clipping to each layer
529
+ for layer in self.model.layers:
530
+ if hasattr(layer, "dW") and layer.dW is not None:
531
+ # Clip gradients
532
+ layer.dW = np.clip(layer.dW, -max_grad_norm, max_grad_norm)
533
+ if hasattr(layer, "db") and layer.db is not None:
534
+ layer.db = np.clip(layer.db, -max_grad_norm, max_grad_norm)
535
+
536
+ # Update parameters
537
+ self.model.update(learning_rate)
538
+
539
+ # Compute accuracy
540
+ predictions = np.argmax(outputs, axis=0)
541
+ targets = np.argmax(y_train, axis=0)
542
+ accuracy = np.mean(predictions == targets)
543
+
544
+ # Save history
545
+ history["loss"].append(
546
+ float(loss)
547
+ ) # Convert to Python float to ensure it's serializable
548
+ history["accuracy"].append(float(accuracy))
549
+
550
+ # Print progress
551
+ if verbose and (epoch + 1) % 100 == 0:
552
+ print(
553
+ f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}, Accuracy: {accuracy:.4f}"
554
+ )
555
+
556
+ self.training_history = history
557
+ return history
558
+
559
+ def predict(self, sentence: str) -> Tuple[str, float]:
560
+ """Predict the intent of a sentence."""
561
+ # Convert to bag-of-words
562
+ bow = self.text_processor.sentence_to_bow(sentence)
563
+
564
+ # Get prediction
565
+ prediction = self.model.predict(bow)
566
+
567
+ # Get predicted intent and confidence
568
+ intent_idx = np.argmax(prediction)
569
+ confidence = prediction[intent_idx, 0]
570
+
571
+ return self.intent_names[intent_idx], confidence
572
+
573
+ def get_response(self, sentence: str) -> Tuple[str, str, float]:
574
+ """Get a response for a user input."""
575
+ intent, confidence = self.predict(sentence)
576
+
577
+ # Use default response if confidence is below threshold
578
+ if confidence < self.confidence_threshold:
579
+ return "unknown", self.default_response, confidence
580
+
581
+ # Get a random response for the predicted intent
582
+ responses = self.intents[intent]["responses"]
583
+ response = random.choice(responses)
584
+
585
+ return intent, response, confidence
586
+
587
+ def plot_training_history(self, history: Dict = None) -> None:
588
+ """Plot the training history."""
589
+ if history is None:
590
+ history = self.training_history
591
+
592
+ if history is None:
593
+ print("No training history available.")
594
+ return
595
+
596
+ plt.figure(figsize=(12, 5))
597
+
598
+ plt.subplot(1, 2, 1)
599
+ plt.plot(history["loss"])
600
+ plt.title("Model Loss")
601
+ plt.xlabel("Epoch")
602
+ plt.ylabel("Loss")
603
+
604
+ plt.subplot(1, 2, 2)
605
+ plt.plot(history["accuracy"])
606
+ plt.title("Model Accuracy")
607
+ plt.xlabel("Epoch")
608
+ plt.ylabel("Accuracy")
609
+
610
+ plt.tight_layout()
611
+ plt.show()
612
+
613
+ def get_training_plot_as_base64(self, history: Dict = None) -> str:
614
+ """Generate a base64 encoded image of the training history plot with improved error handling."""
615
+ if history is None:
616
+ history = self.training_history
617
+
618
+ if history is None or "loss" not in history or len(history["loss"]) == 0:
619
+ return None
620
+
621
+ try:
622
+ plt.figure(figsize=(12, 5))
623
+
624
+ # Check for NaN values and filter them out
625
+ loss_values = [x for x in history["loss"] if not np.isnan(x)]
626
+ acc_values = [x for x in history["accuracy"] if not np.isnan(x)]
627
+
628
+ if len(loss_values) == 0 or len(acc_values) == 0:
629
+ return None
630
+
631
+ # Plot loss (with error handling)
632
+ plt.subplot(1, 2, 1)
633
+ plt.plot(loss_values)
634
+ plt.title("Model Loss")
635
+ plt.xlabel("Epoch")
636
+ plt.ylabel("Loss")
637
+
638
+ # Plot accuracy (with error handling)
639
+ plt.subplot(1, 2, 2)
640
+ plt.plot(acc_values)
641
+ plt.title("Model Accuracy")
642
+ plt.xlabel("Epoch")
643
+ plt.ylabel("Accuracy")
644
+
645
+ plt.tight_layout()
646
+
647
+ # Save plot to a BytesIO object
648
+ buf = BytesIO()
649
+ plt.savefig(buf, format="png")
650
+ buf.seek(0)
651
+
652
+ # Encode to base64
653
+ img_str = base64.b64encode(buf.read()).decode("utf-8")
654
+
655
+ plt.close()
656
+
657
+ # Save the image to a file instead of returning the base64 string directly
658
+ # This avoids the file name too long error
659
+ img_path = "training_plot.png"
660
+ with open(img_path, "wb") as f:
661
+ f.write(base64.b64decode(img_str))
662
+
663
+ return img_path
664
+ except Exception as e:
665
+ print(f"Error generating training plot: {str(e)}")
666
+ return None
667
+
668
+ def chat(self):
669
+ """Start a chat session in the console."""
670
+ print("Chatbot: Hello! Type 'quit' to exit.")
671
+
672
+ while True:
673
+ user_input = input("You: ")
674
+
675
+ if user_input.lower() in ["quit", "exit", "bye"]:
676
+ print("Chatbot: Goodbye!")
677
+ break
678
+
679
+ intent, response, confidence = self.get_response(user_input)
680
+ print(f"Chatbot ({intent}, {confidence:.2f}): {response}")
681
+
682
+
683
+ # Initialize the chatbot
684
+ chatbot = Chatbot()
685
+
686
+ # Default intents
687
+ default_intents = {
688
+ "greeting": {
689
+ "patterns": ["Hi", "Hello", "Hey", "Good morning", "What's up"],
690
+ "responses": ["Hello!", "Hi there!", "Greetings!", "Hey! How can I help you?"],
691
+ },
692
+ "farewell": {
693
+ "patterns": ["Bye", "See you", "Goodbye", "Later", "I'm leaving"],
694
+ "responses": ["Goodbye!", "See you later!", "Farewell!", "Take care!"],
695
+ },
696
+ "thanks": {
697
+ "patterns": ["Thanks", "Thank you", "Much appreciated", "Appreciate it"],
698
+ "responses": ["You're welcome!", "No problem!", "Anytime!", "Glad to help!"],
699
+ },
700
+ "help": {
701
+ "patterns": ["Help", "I need help", "Can you help me", "Support"],
702
+ "responses": [
703
+ "How can I help you?",
704
+ "I'm here to assist you.",
705
+ "What do you need help with?",
706
+ ],
707
+ },
708
+ }
709
+
710
+
711
+ # Function to initialize the chatbot
712
+ def initialize_chatbot():
713
+ global chatbot
714
+
715
+ # Check if model exists
716
+ model_path = "chatbot_model.json"
717
+ processor_path = "chatbot_model_processor.json"
718
+ intents_names_path = "chatbot_model_intents.json"
719
+ intents_path = "intents.json"
720
+
721
+ # Check if intents file exists
722
+ if os.path.exists(intents_path):
723
+ try:
724
+ chatbot.load_intents_from_file(intents_path)
725
+ print(f"Loaded intents from {intents_path}")
726
+ except Exception as e:
727
+ print(f"Error loading intents: {e}")
728
+ print("Loading default intents")
729
+ chatbot.load_intents(default_intents)
730
+ else:
731
+ print("No intents file found. Loading default intents")
732
+ chatbot.load_intents(default_intents)
733
+ # Save default intents
734
+ chatbot.save_intents(intents_path)
735
+
736
+ # Check if all model files exist
737
+ if (
738
+ os.path.exists(model_path)
739
+ and os.path.exists(processor_path)
740
+ and os.path.exists(intents_names_path)
741
+ ):
742
+ try:
743
+ # Load the model
744
+ chatbot.load_model(model_path)
745
+
746
+ # Load the text processor
747
+ chatbot.text_processor = TextProcessor.load(processor_path)
748
+
749
+ # Load intent names and settings
750
+ with open(intents_names_path, "r") as f:
751
+ intents_data = json.load(f)
752
+ chatbot.intent_names = intents_data["intent_names"]
753
+ chatbot.confidence_threshold = intents_data.get(
754
+ "confidence_threshold", 0.5
755
+ )
756
+ chatbot.default_response = intents_data.get(
757
+ "default_response",
758
+ "I'm not sure I understand. Could you rephrase that?",
759
+ )
760
+
761
+ print(f"Loaded existing model from {model_path}")
762
+ except Exception as e:
763
+ print(f"Error loading model: {e}")
764
+ print("A new model will be built and trained")
765
+ chatbot.build_model(hidden_layers=[32, 16])
766
+ else:
767
+ print(
768
+ "No model found or incomplete model files. A new model will be built and trained"
769
+ )
770
+ chatbot.build_model(hidden_layers=[32, 16])
771
+
772
+
773
+ # Call initialize
774
+ initialize_chatbot()
775
+
776
+ # Chat history for the interface
777
+ chat_history = []
778
+
779
+
780
+ # Function to respond to user messages
781
+ def respond(message, history):
782
+ if not message:
783
+ return "Please type a message."
784
+
785
+ # Get response from chatbot
786
+ intent, response, confidence = chatbot.get_response(message)
787
+
788
+ # Add thinking animation (simulate processing)
789
+ time.sleep(0.5)
790
+
791
+ # Return the response
792
+ return response
793
+
794
+
795
+ # Function to get intent and confidence
796
+ def get_intent_info(message):
797
+ if not message:
798
+ return "N/A", 0.0
799
+
800
+ # Get intent and confidence
801
+ intent, confidence = chatbot.predict(message)
802
+ return intent, float(confidence)
803
+
804
+
805
+ # Function to add a new intent
806
+ def add_intent(intent_name, patterns, responses):
807
+ if not intent_name or not patterns or not responses:
808
+ return "Please fill all fields"
809
+
810
+ # Split patterns and responses
811
+ pattern_list = [p.strip() for p in patterns.split("\n") if p.strip()]
812
+ response_list = [r.strip() for r in responses.split("\n") if r.strip()]
813
+
814
+ if not pattern_list or not response_list:
815
+ return "Please provide at least one pattern and one response"
816
+
817
+ # Check if intent already exists
818
+ if intent_name in chatbot.intents:
819
+ # Update existing intent
820
+ chatbot.intents[intent_name]["patterns"].extend(pattern_list)
821
+ chatbot.intents[intent_name]["responses"].extend(response_list)
822
+ else:
823
+ # Add new intent
824
+ chatbot.intents[intent_name] = {
825
+ "patterns": pattern_list,
826
+ "responses": response_list,
827
+ }
828
+ chatbot.intent_names.append(intent_name)
829
+
830
+ # Save intents
831
+ chatbot.save_intents("intents.json")
832
+
833
+ return f"Intent '{intent_name}' added/updated successfully"
834
+
835
+
836
+ # Fixed train_model function with corrected format string
837
+ def train_model(epochs, learning_rate, hidden_layers_str, dropout_rate):
838
+ try:
839
+ # Parse hidden layers
840
+ hidden_layers = [
841
+ int(x.strip()) for x in hidden_layers_str.split(",") if x.strip()
842
+ ]
843
+
844
+ if not hidden_layers:
845
+ return (
846
+ "Error: Invalid hidden layer format. Use comma-separated numbers, e.g. '32,16'",
847
+ None,
848
+ )
849
+
850
+ # Convert to float/int and use lower learning rate for stability
851
+ epochs = int(epochs)
852
+ learning_rate = min(
853
+ float(learning_rate), 0.005
854
+ ) # Cap learning rate for stability
855
+ dropout_rate = float(dropout_rate)
856
+
857
+ # Validate intents and vocabulary
858
+ if len(chatbot.intent_names) < 2:
859
+ return (
860
+ "Error: Need at least 2 intents for training. Please add more intents.",
861
+ None,
862
+ )
863
+
864
+ if chatbot.text_processor.vocabulary_size == 0:
865
+ return (
866
+ "Error: No vocabulary built. Please add more patterns to your intents.",
867
+ None,
868
+ )
869
+
870
+ # Rebuild model with new architecture
871
+ chatbot.build_model(hidden_layers=hidden_layers, dropout_rate=dropout_rate)
872
+
873
+ # Train the model
874
+ history = chatbot.train(
875
+ epochs=epochs, learning_rate=learning_rate, verbose=True
876
+ )
877
+
878
+ # Check if training was successful
879
+ if not history or "loss" not in history or not history["loss"]:
880
+ return "Training failed - no history data returned", None
881
+
882
+ # Format final loss and accuracy safely
883
+ final_loss = history["loss"][-1] if history["loss"] else 0
884
+ final_accuracy = history["accuracy"][-1] if history["accuracy"] else 0
885
+
886
+ if np.isnan(final_loss):
887
+ final_loss_str = "NaN"
888
+ else:
889
+ final_loss_str = f"{final_loss:.4f}"
890
+
891
+ if np.isnan(final_accuracy):
892
+ final_accuracy_str = "NaN"
893
+ else:
894
+ final_accuracy_str = f"{final_accuracy:.4f}"
895
+
896
+ # Save the model
897
+ chatbot.save_model("chatbot_model.json")
898
+
899
+ # Generate plot image
900
+ img_str = chatbot.get_training_plot_as_base64(history)
901
+
902
+ return (
903
+ f"Model trained successfully with:\n"
904
+ f"- Epochs: {epochs}\n"
905
+ f"- Learning Rate: {learning_rate}\n"
906
+ f"- Hidden Layers: {hidden_layers}\n"
907
+ f"- Dropout Rate: {dropout_rate}\n"
908
+ f"- Final Loss: {final_loss_str}\n"
909
+ f"- Final Accuracy: {final_accuracy_str}"
910
+ ), img_str
911
+ except Exception as e:
912
+ import traceback
913
+
914
+ error_details = traceback.format_exc()
915
+ return f"Error training model: {str(e)}\n\nDetails:\n{error_details}", None
916
+
917
+
918
+ # Function to load an existing model
919
+ def load_model_from_file(file_obj):
920
+ if not file_obj:
921
+ return "No file uploaded"
922
+
923
+ try:
924
+ file_path = file_obj.name
925
+
926
+ # Check file extension
927
+ if not file_path.endswith(".json"):
928
+ return "Please upload a JSON model file"
929
+
930
+ # Load the model
931
+ chatbot.load_model(file_path)
932
+
933
+ # Get the base name without extension for related files
934
+ base_name = os.path.splitext(file_path)[0]
935
+ processor_path = f"{base_name}_processor.json"
936
+ intents_names_path = f"{base_name}_intents.json"
937
+
938
+ # Check for related files
939
+ if os.path.exists(processor_path):
940
+ chatbot.text_processor = TextProcessor.load(processor_path)
941
+
942
+ if os.path.exists(intents_names_path):
943
+ with open(intents_names_path, "r") as f:
944
+ intents_data = json.load(f)
945
+ chatbot.intent_names = intents_data["intent_names"]
946
+ chatbot.confidence_threshold = intents_data.get(
947
+ "confidence_threshold", 0.5
948
+ )
949
+ chatbot.default_response = intents_data.get(
950
+ "default_response",
951
+ "I'm not sure I understand. Could you rephrase that?",
952
+ )
953
+
954
+ return f"Model loaded successfully from {file_path}"
955
+ except Exception as e:
956
+ return f"Error loading model: {str(e)}"
957
+
958
+
959
+ # Function to save the current model
960
+ def save_model():
961
+ try:
962
+ # Get timestamp for filename
963
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
964
+ filename = f"chatbot_model_{timestamp}.json"
965
+
966
+ # Save the model
967
+ chatbot.save_model(filename)
968
+
969
+ return f"Model saved as {filename}"
970
+ except Exception as e:
971
+ return f"Error saving model: {str(e)}"
972
+
973
+
974
+ # Function to update settings
975
+ def update_settings(threshold, default_response):
976
+ try:
977
+ # Update settings
978
+ chatbot.confidence_threshold = float(threshold)
979
+ chatbot.default_response = default_response
980
+
981
+ # Save settings to the model intents file
982
+ with open("chatbot_model_intents.json", "w") as f:
983
+ json.dump(
984
+ {
985
+ "intent_names": chatbot.intent_names,
986
+ "confidence_threshold": chatbot.confidence_threshold,
987
+ "default_response": chatbot.default_response,
988
+ },
989
+ f,
990
+ )
991
+
992
+ return "Settings updated successfully"
993
+ except Exception as e:
994
+ return f"Error updating settings: {str(e)}"
995
+
996
+
997
+ # Function to list intents
998
+ def list_intents():
999
+ if not chatbot.intents:
1000
+ return "No intents available"
1001
+
1002
+ intents_info = ""
1003
+ for intent_name, intent_data in chatbot.intents.items():
1004
+ patterns = ", ".join(intent_data["patterns"][:3])
1005
+ if len(intent_data["patterns"]) > 3:
1006
+ patterns += "..."
1007
+
1008
+ responses = ", ".join(intent_data["responses"][:3])
1009
+ if len(intent_data["responses"]) > 3:
1010
+ responses += "..."
1011
+
1012
+ intents_info += f"**Intent**: {intent_name}\n"
1013
+ intents_info += f"**Patterns**: {patterns}\n"
1014
+ intents_info += f"**Responses**: {responses}\n\n"
1015
+
1016
+ return intents_info
1017
+
1018
+
1019
+ # Function to edit an intent
1020
+ def edit_intent(intent_name, new_patterns, new_responses):
1021
+ if not intent_name or intent_name not in chatbot.intents:
1022
+ return f"Intent '{intent_name}' not found"
1023
+
1024
+ # Split patterns and responses
1025
+ if new_patterns:
1026
+ pattern_list = [p.strip() for p in new_patterns.split("\n") if p.strip()]
1027
+ if pattern_list:
1028
+ chatbot.intents[intent_name]["patterns"] = pattern_list
1029
+
1030
+ if new_responses:
1031
+ response_list = [r.strip() for r in new_responses.split("\n") if r.strip()]
1032
+ if response_list:
1033
+ chatbot.intents[intent_name]["responses"] = response_list
1034
+
1035
+ # Save intents
1036
+ chatbot.save_intents("intents.json")
1037
+
1038
+ return f"Intent '{intent_name}' updated successfully"
1039
+
1040
+
1041
+ # Function to delete an intent
1042
+ def delete_intent(intent_name):
1043
+ if not intent_name or intent_name not in chatbot.intents:
1044
+ return f"Intent '{intent_name}' not found"
1045
+
1046
+ # Delete intent
1047
+ del chatbot.intents[intent_name]
1048
+ chatbot.intent_names.remove(intent_name)
1049
+
1050
+ # Save intents
1051
+ chatbot.save_intents("intents.json")
1052
+
1053
+ return f"Intent '{intent_name}' deleted successfully"
1054
+
1055
+
1056
+ # Get the list of intents for dropdown
1057
+ def get_intent_list():
1058
+ return chatbot.intent_names
1059
+
1060
+
1061
+ # Function to export intents
1062
+ def export_intents():
1063
+ try:
1064
+ # Get timestamp for filename
1065
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1066
+ filename = f"intents_{timestamp}.json"
1067
+
1068
+ # Save intents
1069
+ with open(filename, "w") as f:
1070
+ json.dump(chatbot.intents, f, indent=4)
1071
+
1072
+ return f"Intents exported as {filename}"
1073
+ except Exception as e:
1074
+ return f"Error exporting intents: {str(e)}"
1075
+
1076
+
1077
+ # Function to import intents
1078
+ def import_intents_from_file(file_obj):
1079
+ if not file_obj:
1080
+ return "No file uploaded"
1081
+
1082
+ try:
1083
+ file_path = file_obj.name
1084
+
1085
+ # Check file extension
1086
+ if not file_path.endswith(".json"):
1087
+ return "Please upload a JSON intents file"
1088
+
1089
+ # Load intents
1090
+ with open(file_path, "r") as f:
1091
+ intents_data = json.load(f)
1092
+
1093
+ # Validate intents format
1094
+ for intent_name, intent_data in intents_data.items():
1095
+ if (
1096
+ not isinstance(intent_data, dict)
1097
+ or "patterns" not in intent_data
1098
+ or "responses" not in intent_data
1099
+ ):
1100
+ return f"Invalid intent format for '{intent_name}'"
1101
+
1102
+ # Update chatbot intents
1103
+ chatbot.load_intents(intents_data)
1104
+
1105
+ # Save intents
1106
+ chatbot.save_intents("intents.json")
1107
+
1108
+ return f"Imported {len(intents_data)} intents from {file_path}"
1109
+ except Exception as e:
1110
+ return f"Error importing intents: {str(e)}"
1111
+
1112
+
1113
+ # Function to get intent details
1114
+ def get_intent_details(intent_name):
1115
+ if not intent_name or intent_name not in chatbot.intents:
1116
+ return "", ""
1117
+
1118
+ patterns = "\n".join(chatbot.intents[intent_name]["patterns"])
1119
+ responses = "\n".join(chatbot.intents[intent_name]["responses"])
1120
+
1121
+ return patterns, responses
1122
+
1123
+
1124
+ # Create the Gradio interface with multiple tabs
1125
+ with gr.Blocks(title="Neural Network Chatbot", theme=gr.themes.Soft()) as demo:
1126
+ gr.Markdown("# 🤖 Neural Network Chatbot")
1127
+ gr.Markdown(
1128
+ """ This chatbot uses a neural network to understand and respond to your messages.
1129
+
1130
+ This chatbot application was developed by:
1131
+
1132
+ | **Name** | **Student ID** | **Email** |
1133
+ |----------|----------------|-----------|
1134
+ | AARJEYAN SHRESTHA | C0927422 | C0927422@mylambton.ca |
1135
+ | PRAJWAL LUITEL | C0927658 | C0927658@mylambton.ca |
1136
+ | RAJAN GHIMIRE | C0924991 | C0924991@mylambton.ca |
1137
+ | RISHABH JHA | C0923563 | C0923563@mylambton.ca |
1138
+ | SUDIP CHAUDHARY | C0922310 | C0922310@mylambton.ca |
1139
+
1140
+
1141
+ - **Course**: Software Tools and Emerging Technologies for AI and ML
1142
+ - **Term**: 3rd
1143
+ - **Instructor**: [Peter Sigurdson](https://www.linkedin.com/in/petersigurdson/)
1144
+
1145
+ """
1146
+ )
1147
+
1148
+ with gr.Tabs():
1149
+ # Chat tab
1150
+ with gr.Tab("Chat"):
1151
+ with gr.Row():
1152
+ with gr.Column(scale=3):
1153
+ chatbot_interface = gr.Chatbot(label="Conversation", height=400)
1154
+
1155
+ with gr.Row():
1156
+ msg = gr.Textbox(
1157
+ placeholder="Type your message here...",
1158
+ label="Your message",
1159
+ lines=2,
1160
+ show_label=False,
1161
+ )
1162
+ send_btn = gr.Button("Send", variant="primary")
1163
+
1164
+ with gr.Accordion("Examples", open=False):
1165
+ gr.Examples(
1166
+ examples=[
1167
+ "Hello!",
1168
+ "How are you?",
1169
+ "What can you help me with?",
1170
+ "Thank you",
1171
+ "Goodbye",
1172
+ ],
1173
+ inputs=msg,
1174
+ )
1175
+
1176
+ with gr.Column(scale=1):
1177
+ gr.Markdown("### Analysis")
1178
+ intent_label = gr.Label(label="Predicted Intent")
1179
+ confidence_score = gr.Number(label="Confidence Score")
1180
+
1181
+ gr.Markdown("### Settings")
1182
+ confidence_slider = gr.Slider(
1183
+ label="Confidence Threshold",
1184
+ minimum=0.0,
1185
+ maximum=1.0,
1186
+ step=0.05,
1187
+ value=chatbot.confidence_threshold,
1188
+ )
1189
+ default_resp = gr.Textbox(
1190
+ label="Default Response",
1191
+ value=chatbot.default_response,
1192
+ lines=2,
1193
+ )
1194
+ update_settings_btn = gr.Button("Update Settings")
1195
+
1196
+ # Event handlers for chat
1197
+ def user_message(user_message, history):
1198
+ return "", history + [[user_message, None]]
1199
+
1200
+ def bot_message(history):
1201
+ if history:
1202
+ user_message = history[-1][0]
1203
+ intent, response, confidence = chatbot.get_response(user_message)
1204
+ history[-1][1] = response
1205
+ return history, intent, confidence
1206
+ return history, "N/A", 0.0
1207
+
1208
+ msg.submit(
1209
+ user_message,
1210
+ [msg, chatbot_interface],
1211
+ [msg, chatbot_interface],
1212
+ queue=False,
1213
+ ).then(
1214
+ bot_message,
1215
+ chatbot_interface,
1216
+ [chatbot_interface, intent_label, confidence_score],
1217
+ )
1218
+
1219
+ send_btn.click(
1220
+ user_message,
1221
+ [msg, chatbot_interface],
1222
+ [msg, chatbot_interface],
1223
+ queue=False,
1224
+ ).then(
1225
+ bot_message,
1226
+ chatbot_interface,
1227
+ [chatbot_interface, intent_label, confidence_score],
1228
+ )
1229
+
1230
+ update_settings_btn.click(
1231
+ update_settings,
1232
+ [confidence_slider, default_resp],
1233
+ gr.Textbox(label="Status"),
1234
+ )
1235
+
1236
+ # Intents Management tab
1237
+ with gr.Tab("Intents Management"):
1238
+ with gr.Row():
1239
+ with gr.Column():
1240
+ gr.Markdown("### Add New Intent")
1241
+ new_intent_name = gr.Textbox(label="Intent Name")
1242
+ new_patterns = gr.Textbox(label="Patterns (one per line)", lines=5)
1243
+ new_responses = gr.Textbox(
1244
+ label="Responses (one per line)", lines=5
1245
+ )
1246
+ add_intent_btn = gr.Button("Add Intent", variant="primary")
1247
+ add_intent_status = gr.Textbox(label="Status")
1248
+
1249
+ with gr.Column():
1250
+ gr.Markdown("### Edit Intent")
1251
+ edit_intent_dropdown = gr.Dropdown(
1252
+ label="Select Intent to Edit",
1253
+ choices=get_intent_list(),
1254
+ interactive=True,
1255
+ )
1256
+ edit_patterns = gr.Textbox(label="Patterns (one per line)", lines=5)
1257
+ edit_responses = gr.Textbox(
1258
+ label="Responses (one per line)", lines=5
1259
+ )
1260
+
1261
+ with gr.Row():
1262
+ update_intent_btn = gr.Button("Update Intent")
1263
+ delete_intent_btn = gr.Button("Delete Intent", variant="stop")
1264
+
1265
+ edit_intent_status = gr.Textbox(label="Status")
1266
+
1267
+ with gr.Row():
1268
+ with gr.Column():
1269
+ gr.Markdown("### Import/Export Intents")
1270
+ with gr.Row():
1271
+ export_intents_btn = gr.Button("Export Intents")
1272
+ import_intents_file = gr.File(
1273
+ label="Import Intents (JSON file)"
1274
+ )
1275
+ import_export_status = gr.Textbox(label="Status")
1276
+
1277
+ with gr.Column():
1278
+ gr.Markdown("### Current Intents")
1279
+ refresh_intents_btn = gr.Button("Refresh Intents List")
1280
+ intents_list = gr.Markdown()
1281
+
1282
+ # Event handlers for intents management
1283
+ add_intent_btn.click(
1284
+ add_intent,
1285
+ [new_intent_name, new_patterns, new_responses],
1286
+ add_intent_status,
1287
+ )
1288
+
1289
+ # Update dropdown when adding/deleting intents
1290
+ add_intent_btn.click(get_intent_list, [], edit_intent_dropdown)
1291
+
1292
+ edit_intent_dropdown.change(
1293
+ get_intent_details,
1294
+ edit_intent_dropdown,
1295
+ [edit_patterns, edit_responses],
1296
+ )
1297
+
1298
+ update_intent_btn.click(
1299
+ edit_intent,
1300
+ [edit_intent_dropdown, edit_patterns, edit_responses],
1301
+ edit_intent_status,
1302
+ )
1303
+
1304
+ delete_intent_btn.click(
1305
+ delete_intent, edit_intent_dropdown, edit_intent_status
1306
+ ).then(get_intent_list, [], edit_intent_dropdown)
1307
+
1308
+ export_intents_btn.click(export_intents, [], import_export_status)
1309
+
1310
+ import_intents_file.change(
1311
+ import_intents_from_file, import_intents_file, import_export_status
1312
+ ).then(get_intent_list, [], edit_intent_dropdown)
1313
+
1314
+ refresh_intents_btn.click(list_intents, [], intents_list)
1315
+
1316
+ # Training tab
1317
+ with gr.Tab("Training"):
1318
+ with gr.Row():
1319
+ with gr.Column():
1320
+ gr.Markdown("### Train Model")
1321
+ epochs_input = gr.Number(
1322
+ label="Epochs", value=500, minimum=100, maximum=5000, step=100
1323
+ )
1324
+ learning_rate_input = gr.Number(
1325
+ label="Learning Rate",
1326
+ value=0.01,
1327
+ minimum=0.0001,
1328
+ maximum=0.1,
1329
+ step=0.001,
1330
+ )
1331
+ hidden_layers_input = gr.Textbox(
1332
+ label="Hidden Layers (comma-separated)", value="32, 16"
1333
+ )
1334
+ dropout_rate_input = gr.Number(
1335
+ label="Dropout Rate",
1336
+ value=0.2,
1337
+ minimum=0.0,
1338
+ maximum=0.5,
1339
+ step=0.05,
1340
+ )
1341
+ train_btn = gr.Button("Train Model", variant="primary")
1342
+
1343
+ with gr.Column():
1344
+ training_status = gr.Textbox(label="Training Status", lines=6)
1345
+ training_plot = gr.Image(label="Training History")
1346
+
1347
+ with gr.Row():
1348
+ with gr.Column():
1349
+ gr.Markdown("### Model Management")
1350
+ save_model_btn = gr.Button("Save Current Model")
1351
+ load_model_file = gr.File(label="Load Model (JSON file)")
1352
+ model_status = gr.Textbox(label="Status")
1353
+
1354
+ # Event handlers for training
1355
+ train_btn.click(
1356
+ train_model,
1357
+ [
1358
+ epochs_input,
1359
+ learning_rate_input,
1360
+ hidden_layers_input,
1361
+ dropout_rate_input,
1362
+ ],
1363
+ [training_status, training_plot],
1364
+ )
1365
+
1366
+ save_model_btn.click(save_model, [], model_status)
1367
+
1368
+ load_model_file.change(load_model_from_file, load_model_file, model_status)
1369
+
1370
+ # About tab
1371
+ with gr.Tab("About"):
1372
+ gr.Markdown(
1373
+ """
1374
+ ## Neural Network Chatbot
1375
+
1376
+ This chatbot uses a neural network to understand and respond to user messages.
1377
+ The model is trained on a set of intents, each with patterns and responses.
1378
+
1379
+ ### Features:
1380
+
1381
+ - **Neural Network Backend**: The chatbot uses a fully-connected neural network with configurable layers.
1382
+ - **Intent Recognition**: Recognizes user intents based on trained patterns.
1383
+ - **Customizable Responses**: Each intent has multiple possible responses for variety.
1384
+ - **Training Interface**: Train the model directly from the web interface.
1385
+ - **Intent Management**: Add, edit, delete, import, and export intents.
1386
+ - **Model Management**: Save and load models for future use.
1387
+
1388
+ ### How to Use:
1389
+
1390
+ 1. **Chat Tab**: Interact with the chatbot.
1391
+ 2. **Intents Management Tab**: Manage the chatbot's knowledge.
1392
+ 3. **Training Tab**: Train the neural network model.
1393
+ 4. **About Tab**: Learn about the chatbot and its features.
1394
+
1395
+ ### Technical Details:
1396
+
1397
+ - Built with Python, NumPy, and Gradio.
1398
+ - Uses a bag-of-words approach for text representation.
1399
+ - Neural network with configurable hidden layers and activation functions.
1400
+ - Cross-entropy loss for multi-class classification.
1401
+
1402
+ Created for deployment on Hugging Face Spaces.
1403
+ """
1404
+ )
1405
+
1406
+ # Call initialize again after defining the UI
1407
+ # to make sure dropdown is populated
1408
+ chat_intents = get_intent_list()
1409
+
1410
+ # Launch the app
1411
+ if __name__ == "__main__":
1412
+ demo.launch()