Gaetano Parente commited on
Commit
b1db827
·
1 Parent(s): 03f9175

fix multiclassification

Browse files
data/tokenizer/multi-classification-tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e8d7cbb6eeb9b529b0eb53e230d42356eae15cd3c07ee9f9f5c32352d59034e
3
  size 4289059
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cd74a01da6f2a4bd88fa485e6830c9ce113f23e6abf7263aec333f958dea02c
3
  size 4289059
modules/multilabel_classification.py CHANGED
@@ -7,18 +7,19 @@ BASE_PATH = './data/'
7
  MODEL = BASE_PATH + 'model/'
8
  TOKEN = BASE_PATH + 'tokenizer/'
9
 
10
- class_names = np.array(['alt.atheism', 'comp.graphics', 'comp.os.ms-windows.misc',
11
- 'comp.sys.ibm.pc.hardware', 'comp.sys.mac.hardware', 'comp.windows.x',
12
- 'misc.forsale', 'rec.autos', 'rec.motorcycles', 'rec.sport.baseball',
13
- 'rec.sport.hockey', 'sci.crypt', 'sci.electronics', 'sci.med', 'sci.space',
14
- 'soc.religion.christian', 'talk.politics.guns', 'talk.politics.mideast',
15
- 'talk.politics.misc', 'talk.religion.misc'])
16
 
17
  def predict(model_path, tokenizer_path, sentence):
18
  tokenizer = utils.load_tokenizer(tokenizer_path)
19
  x_data = []
20
  x_data.append(sentence)
21
  x_tokenized = tokenizer.texts_to_sequences(x_data)
 
 
 
 
 
 
22
  x_pad = pad_sequences(x_tokenized, maxlen=200)
23
  x_t = x_pad[0]
24
  model = models.load_model(model_path, compile=False)
@@ -28,7 +29,7 @@ def predict(model_path, tokenizer_path, sentence):
28
 
29
  def multi_classification(text):
30
  model = MODEL + 'multi-classification.h5'
31
- tokenizer = TOKEN + 'multi-classification-tokenizer.json'
32
  labels = predict(model, tokenizer, text)
33
  response = {}
34
  for i, label in enumerate(labels[0]):
 
7
  MODEL = BASE_PATH + 'model/'
8
  TOKEN = BASE_PATH + 'tokenizer/'
9
 
10
+ class_names = np.array(['Economia', 'Politica', 'Scienza_e_tecnica', 'Sport', 'Storia'])
 
 
 
 
 
11
 
12
  def predict(model_path, tokenizer_path, sentence):
13
  tokenizer = utils.load_tokenizer(tokenizer_path)
14
  x_data = []
15
  x_data.append(sentence)
16
  x_tokenized = tokenizer.texts_to_sequences(x_data)
17
+ new_x_tokenized = []
18
+ for x_token in x_tokenized[0]:
19
+ if(x_token is None):
20
+ x_token = 1
21
+ new_x_tokenized.append(x_token)
22
+ x_tokenized = [new_x_tokenized]
23
  x_pad = pad_sequences(x_tokenized, maxlen=200)
24
  x_t = x_pad[0]
25
  model = models.load_model(model_path, compile=False)
 
29
 
30
  def multi_classification(text):
31
  model = MODEL + 'multi-classification.h5'
32
+ tokenizer = TOKEN + 'tokenizer.json'
33
  labels = predict(model, tokenizer, text)
34
  response = {}
35
  for i, label in enumerate(labels[0]):