Spaces:
Sleeping
Sleeping
Gaetano Parente commited on
Commit ·
b1db827
1
Parent(s): 03f9175
fix multiclassification
Browse files
data/tokenizer/multi-classification-tokenizer.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4289059
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9cd74a01da6f2a4bd88fa485e6830c9ce113f23e6abf7263aec333f958dea02c
|
| 3 |
size 4289059
|
modules/multilabel_classification.py
CHANGED
|
@@ -7,18 +7,19 @@ BASE_PATH = './data/'
|
|
| 7 |
MODEL = BASE_PATH + 'model/'
|
| 8 |
TOKEN = BASE_PATH + 'tokenizer/'
|
| 9 |
|
| 10 |
-
class_names = np.array(['
|
| 11 |
-
'comp.sys.ibm.pc.hardware', 'comp.sys.mac.hardware', 'comp.windows.x',
|
| 12 |
-
'misc.forsale', 'rec.autos', 'rec.motorcycles', 'rec.sport.baseball',
|
| 13 |
-
'rec.sport.hockey', 'sci.crypt', 'sci.electronics', 'sci.med', 'sci.space',
|
| 14 |
-
'soc.religion.christian', 'talk.politics.guns', 'talk.politics.mideast',
|
| 15 |
-
'talk.politics.misc', 'talk.religion.misc'])
|
| 16 |
|
| 17 |
def predict(model_path, tokenizer_path, sentence):
|
| 18 |
tokenizer = utils.load_tokenizer(tokenizer_path)
|
| 19 |
x_data = []
|
| 20 |
x_data.append(sentence)
|
| 21 |
x_tokenized = tokenizer.texts_to_sequences(x_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
x_pad = pad_sequences(x_tokenized, maxlen=200)
|
| 23 |
x_t = x_pad[0]
|
| 24 |
model = models.load_model(model_path, compile=False)
|
|
@@ -28,7 +29,7 @@ def predict(model_path, tokenizer_path, sentence):
|
|
| 28 |
|
| 29 |
def multi_classification(text):
|
| 30 |
model = MODEL + 'multi-classification.h5'
|
| 31 |
-
tokenizer = TOKEN + '
|
| 32 |
labels = predict(model, tokenizer, text)
|
| 33 |
response = {}
|
| 34 |
for i, label in enumerate(labels[0]):
|
|
|
|
| 7 |
MODEL = BASE_PATH + 'model/'
|
| 8 |
TOKEN = BASE_PATH + 'tokenizer/'
|
| 9 |
|
| 10 |
+
class_names = np.array(['Economia', 'Politica', 'Scienza_e_tecnica', 'Sport', 'Storia'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
def predict(model_path, tokenizer_path, sentence):
|
| 13 |
tokenizer = utils.load_tokenizer(tokenizer_path)
|
| 14 |
x_data = []
|
| 15 |
x_data.append(sentence)
|
| 16 |
x_tokenized = tokenizer.texts_to_sequences(x_data)
|
| 17 |
+
new_x_tokenized = []
|
| 18 |
+
for x_token in x_tokenized[0]:
|
| 19 |
+
if(x_token is None):
|
| 20 |
+
x_token = 1
|
| 21 |
+
new_x_tokenized.append(x_token)
|
| 22 |
+
x_tokenized = [new_x_tokenized]
|
| 23 |
x_pad = pad_sequences(x_tokenized, maxlen=200)
|
| 24 |
x_t = x_pad[0]
|
| 25 |
model = models.load_model(model_path, compile=False)
|
|
|
|
| 29 |
|
| 30 |
def multi_classification(text):
|
| 31 |
model = MODEL + 'multi-classification.h5'
|
| 32 |
+
tokenizer = TOKEN + 'tokenizer.json'
|
| 33 |
labels = predict(model, tokenizer, text)
|
| 34 |
response = {}
|
| 35 |
for i, label in enumerate(labels[0]):
|