Spaces:

NextGenTech
/

ngt-ai-platform

Sleeping

Gaetano Parente commited on Apr 18, 2024

Commit

b1db827

1 Parent(s): 03f9175

fix multiclassification

Files changed (2) hide show

data/tokenizer/multi-classification-tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e8d7cbb6eeb9b529b0eb53e230d42356eae15cd3c07ee9f9f5c32352d59034e
 size 4289059

 version https://git-lfs.github.com/spec/v1
+oid sha256:9cd74a01da6f2a4bd88fa485e6830c9ce113f23e6abf7263aec333f958dea02c
 size 4289059

modules/multilabel_classification.py CHANGED Viewed

@@ -7,18 +7,19 @@ BASE_PATH = './data/'
 MODEL = BASE_PATH + 'model/'
 TOKEN = BASE_PATH + 'tokenizer/'
-class_names = np.array(['alt.atheism', 'comp.graphics', 'comp.os.ms-windows.misc',
- 'comp.sys.ibm.pc.hardware', 'comp.sys.mac.hardware', 'comp.windows.x',
- 'misc.forsale', 'rec.autos', 'rec.motorcycles', 'rec.sport.baseball',
- 'rec.sport.hockey', 'sci.crypt', 'sci.electronics', 'sci.med', 'sci.space',
- 'soc.religion.christian', 'talk.politics.guns', 'talk.politics.mideast',
- 'talk.politics.misc', 'talk.religion.misc'])
 def predict(model_path, tokenizer_path, sentence):
     tokenizer = utils.load_tokenizer(tokenizer_path)
     x_data = []
     x_data.append(sentence)
     x_tokenized = tokenizer.texts_to_sequences(x_data)
     x_pad = pad_sequences(x_tokenized, maxlen=200)
     x_t = x_pad[0]
     model = models.load_model(model_path, compile=False)
@@ -28,7 +29,7 @@ def predict(model_path, tokenizer_path, sentence):
 def multi_classification(text):
     model = MODEL + 'multi-classification.h5'
-    tokenizer = TOKEN + 'multi-classification-tokenizer.json'
     labels = predict(model, tokenizer, text)
     response = {}
     for i, label in enumerate(labels[0]):

 MODEL = BASE_PATH + 'model/'
 TOKEN = BASE_PATH + 'tokenizer/'
+class_names = np.array(['Economia', 'Politica', 'Scienza_e_tecnica', 'Sport', 'Storia'])
 def predict(model_path, tokenizer_path, sentence):
     tokenizer = utils.load_tokenizer(tokenizer_path)
     x_data = []
     x_data.append(sentence)
     x_tokenized = tokenizer.texts_to_sequences(x_data)
+    new_x_tokenized = []
+    for x_token in x_tokenized[0]:
+        if(x_token is None):
+            x_token = 1
+        new_x_tokenized.append(x_token)
+    x_tokenized = [new_x_tokenized]
     x_pad = pad_sequences(x_tokenized, maxlen=200)
     x_t = x_pad[0]
     model = models.load_model(model_path, compile=False)
 def multi_classification(text):
     model = MODEL + 'multi-classification.h5'
+    tokenizer = TOKEN + 'tokenizer.json'
     labels = predict(model, tokenizer, text)
     response = {}
     for i, label in enumerate(labels[0]):