Spaces:

StelleX
/

Cutto

Build error

App Files Files Community

Preechanon Chatthai commited on Mar 30, 2024

Commit

78a3063

verified ·

1 Parent(s): 8af9c20

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -15

app.py CHANGED Viewed

@@ -28,17 +28,14 @@ class TimestepDropout(Dropout):
         noise_shape = (input_shape[0], input_shape[1], 1)
         return noise_shape
 def model_(n_gram = 21):
     input1 = Input(shape=(21,),dtype='float32',name = 'char_input')
     input2 = Input(shape=(21,),dtype='float32',name = 'type_input')
     a = Embedding(178, 32,input_length=21)(input1)
     a = SpatialDropout1D(0.15)(a)
     #a = TimestepDropout(0.05)(a)
     char_input = BatchNormalization()(a)
     a_concat = []
     filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[8,200],[11,150],[12,100]]
     #filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[7,200],[8,200],[9,150],[10,150],[11,150],[12,100]]
@@ -58,18 +55,13 @@ def model_(n_gram = 21):
     b = Embedding(12, 12, input_length=21)(input2)
     type_inputs = SpatialDropout1D(0.15)(b)
     #type_inputs = TimestepDropout(0.05)(b)
     x = Concatenate()([type_inputs, char_input, lstm_char, token_max])
     x = BatchNormalization()(x)
     x = Flatten()(x)
     x = Dense(100, activation='elu')(x)
     x = Dropout(0.2)(x)
     out = Dense(1, activation='sigmoid',dtype = 'float32',kernel_regularizer=regularizers.L2(0.01),bias_regularizer=regularizers.L2(0.01))(x)
     model = Model(inputs=[input1, input2], outputs=out)
     return model
@@ -91,23 +83,18 @@ def create_feature_array(text, n_pad=21):
     x_char = np.array(x_char).astype(float)
     x_type = np.array(x_type).astype(float)
     return x_char, x_type
 def tokenize(text):
         n_pad = 21
         if not text:
             return ['']
         if isinstance(text, str) and sys.version_info.major == 2:
             text = text.decode('utf-8')
         x_char, x_type = create_feature_array(text, n_pad=n_pad)
         word_end = []
         y_predict = model.predict([x_char, x_type], batch_size = 512)
         y_predict = (y_predict.ravel() > 0.46542968749999997).astype(int)
         word_end = y_predict[1:].tolist() + [1]
         tokens = []
         word = ''
         for char, w_e in zip(text, word_end):
@@ -117,7 +104,6 @@ def tokenize(text):
                 word = ''
         return tokens
 model = model_()
 model.load_weights("cutto_tf2.h5")

         noise_shape = (input_shape[0], input_shape[1], 1)
         return noise_shape
 def model_(n_gram = 21):
     input1 = Input(shape=(21,),dtype='float32',name = 'char_input')
     input2 = Input(shape=(21,),dtype='float32',name = 'type_input')
     a = Embedding(178, 32,input_length=21)(input1)
     a = SpatialDropout1D(0.15)(a)
     #a = TimestepDropout(0.05)(a)
     char_input = BatchNormalization()(a)
     a_concat = []
     filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[8,200],[11,150],[12,100]]
     #filters = [[1,200],[2,200],[3,200],[4,200],[5,200],[6,200],[7,200],[8,200],[9,150],[10,150],[11,150],[12,100]]
     b = Embedding(12, 12, input_length=21)(input2)
     type_inputs = SpatialDropout1D(0.15)(b)
     #type_inputs = TimestepDropout(0.05)(b)
     x = Concatenate()([type_inputs, char_input, lstm_char, token_max])
     x = BatchNormalization()(x)
     x = Flatten()(x)
     x = Dense(100, activation='elu')(x)
     x = Dropout(0.2)(x)
     out = Dense(1, activation='sigmoid',dtype = 'float32',kernel_regularizer=regularizers.L2(0.01),bias_regularizer=regularizers.L2(0.01))(x)
     model = Model(inputs=[input1, input2], outputs=out)
     return model
     x_char = np.array(x_char).astype(float)
     x_type = np.array(x_type).astype(float)
     return x_char, x_type
 def tokenize(text):
         n_pad = 21
         if not text:
             return ['']
         if isinstance(text, str) and sys.version_info.major == 2:
             text = text.decode('utf-8')
         x_char, x_type = create_feature_array(text, n_pad=n_pad)
         word_end = []
         y_predict = model.predict([x_char, x_type], batch_size = 512)
         y_predict = (y_predict.ravel() > 0.46542968749999997).astype(int)
         word_end = y_predict[1:].tolist() + [1]
         tokens = []
         word = ''
         for char, w_e in zip(text, word_end):
                 word = ''
         return tokens
 model = model_()
 model.load_weights("cutto_tf2.h5")