Spaces:
Build error
Build error
Preechanon Chatthai commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -28,17 +28,14 @@ class TimestepDropout(Dropout):
|
|
| 28 |
noise_shape = (input_shape[0], input_shape[1], 1)
|
| 29 |
return noise_shape
|
| 30 |
|
| 31 |
-
|
| 32 |
def model_(n_gram = 21):
|
| 33 |
|
| 34 |
input1 = Input(shape=(21,),dtype='float32',name = 'char_input')
|
| 35 |
input2 = Input(shape=(21,),dtype='float32',name = 'type_input')
|
| 36 |
-
|
| 37 |
a = Embedding(178, 32,input_length=21)(input1)
|
| 38 |
a = SpatialDropout1D(0.15)(a)
|
| 39 |
#a = TimestepDropout(0.05)(a)
|
| 40 |
char_input = BatchNormalization()(a)
|
| 41 |
-
|
| 42 |
a_concat = []
|
| 43 |
filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[8,200],[11,150],[12,100]]
|
| 44 |
#filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[7,200],[8,200],[9,150],[10,150],[11,150],[12,100]]
|
|
@@ -58,18 +55,13 @@ def model_(n_gram = 21):
|
|
| 58 |
b = Embedding(12, 12, input_length=21)(input2)
|
| 59 |
type_inputs = SpatialDropout1D(0.15)(b)
|
| 60 |
#type_inputs = TimestepDropout(0.05)(b)
|
| 61 |
-
|
| 62 |
x = Concatenate()([type_inputs, char_input, lstm_char, token_max])
|
| 63 |
x = BatchNormalization()(x)
|
| 64 |
-
|
| 65 |
x = Flatten()(x)
|
| 66 |
x = Dense(100, activation='elu')(x)
|
| 67 |
x = Dropout(0.2)(x)
|
| 68 |
out = Dense(1, activation='sigmoid',dtype = 'float32',kernel_regularizer=regularizers.L2(0.01),bias_regularizer=regularizers.L2(0.01))(x)
|
| 69 |
-
|
| 70 |
-
|
| 71 |
model = Model(inputs=[input1, input2], outputs=out)
|
| 72 |
-
|
| 73 |
return model
|
| 74 |
|
| 75 |
|
|
@@ -91,23 +83,18 @@ def create_feature_array(text, n_pad=21):
|
|
| 91 |
x_char = np.array(x_char).astype(float)
|
| 92 |
x_type = np.array(x_type).astype(float)
|
| 93 |
return x_char, x_type
|
| 94 |
-
|
| 95 |
def tokenize(text):
|
|
|
|
| 96 |
n_pad = 21
|
| 97 |
-
|
| 98 |
if not text:
|
| 99 |
return ['']
|
| 100 |
-
|
| 101 |
if isinstance(text, str) and sys.version_info.major == 2:
|
| 102 |
text = text.decode('utf-8')
|
| 103 |
-
|
| 104 |
x_char, x_type = create_feature_array(text, n_pad=n_pad)
|
| 105 |
word_end = []
|
| 106 |
-
|
| 107 |
y_predict = model.predict([x_char, x_type], batch_size = 512)
|
| 108 |
y_predict = (y_predict.ravel() > 0.46542968749999997).astype(int)
|
| 109 |
word_end = y_predict[1:].tolist() + [1]
|
| 110 |
-
|
| 111 |
tokens = []
|
| 112 |
word = ''
|
| 113 |
for char, w_e in zip(text, word_end):
|
|
@@ -117,7 +104,6 @@ def tokenize(text):
|
|
| 117 |
word = ''
|
| 118 |
return tokens
|
| 119 |
|
| 120 |
-
|
| 121 |
model = model_()
|
| 122 |
model.load_weights("cutto_tf2.h5")
|
| 123 |
|
|
|
|
| 28 |
noise_shape = (input_shape[0], input_shape[1], 1)
|
| 29 |
return noise_shape
|
| 30 |
|
|
|
|
| 31 |
def model_(n_gram = 21):
|
| 32 |
|
| 33 |
input1 = Input(shape=(21,),dtype='float32',name = 'char_input')
|
| 34 |
input2 = Input(shape=(21,),dtype='float32',name = 'type_input')
|
|
|
|
| 35 |
a = Embedding(178, 32,input_length=21)(input1)
|
| 36 |
a = SpatialDropout1D(0.15)(a)
|
| 37 |
#a = TimestepDropout(0.05)(a)
|
| 38 |
char_input = BatchNormalization()(a)
|
|
|
|
| 39 |
a_concat = []
|
| 40 |
filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[8,200],[11,150],[12,100]]
|
| 41 |
#filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[7,200],[8,200],[9,150],[10,150],[11,150],[12,100]]
|
|
|
|
| 55 |
b = Embedding(12, 12, input_length=21)(input2)
|
| 56 |
type_inputs = SpatialDropout1D(0.15)(b)
|
| 57 |
#type_inputs = TimestepDropout(0.05)(b)
|
|
|
|
| 58 |
x = Concatenate()([type_inputs, char_input, lstm_char, token_max])
|
| 59 |
x = BatchNormalization()(x)
|
|
|
|
| 60 |
x = Flatten()(x)
|
| 61 |
x = Dense(100, activation='elu')(x)
|
| 62 |
x = Dropout(0.2)(x)
|
| 63 |
out = Dense(1, activation='sigmoid',dtype = 'float32',kernel_regularizer=regularizers.L2(0.01),bias_regularizer=regularizers.L2(0.01))(x)
|
|
|
|
|
|
|
| 64 |
model = Model(inputs=[input1, input2], outputs=out)
|
|
|
|
| 65 |
return model
|
| 66 |
|
| 67 |
|
|
|
|
| 83 |
x_char = np.array(x_char).astype(float)
|
| 84 |
x_type = np.array(x_type).astype(float)
|
| 85 |
return x_char, x_type
|
|
|
|
| 86 |
def tokenize(text):
|
| 87 |
+
|
| 88 |
n_pad = 21
|
|
|
|
| 89 |
if not text:
|
| 90 |
return ['']
|
|
|
|
| 91 |
if isinstance(text, str) and sys.version_info.major == 2:
|
| 92 |
text = text.decode('utf-8')
|
|
|
|
| 93 |
x_char, x_type = create_feature_array(text, n_pad=n_pad)
|
| 94 |
word_end = []
|
|
|
|
| 95 |
y_predict = model.predict([x_char, x_type], batch_size = 512)
|
| 96 |
y_predict = (y_predict.ravel() > 0.46542968749999997).astype(int)
|
| 97 |
word_end = y_predict[1:].tolist() + [1]
|
|
|
|
| 98 |
tokens = []
|
| 99 |
word = ''
|
| 100 |
for char, w_e in zip(text, word_end):
|
|
|
|
| 104 |
word = ''
|
| 105 |
return tokens
|
| 106 |
|
|
|
|
| 107 |
model = model_()
|
| 108 |
model.load_weights("cutto_tf2.h5")
|
| 109 |
|