gonzalocordova commited on
Commit
2a72c85
·
1 Parent(s): 405ea99

fix: topic ids bug

Browse files
Files changed (2) hide show
  1. NLPutils.py +3 -0
  2. app.py +5 -4
NLPutils.py CHANGED
@@ -15,7 +15,10 @@ def load_model():
15
  token = "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"
16
  REPO_ID = "gonzalocordova/word2vec_fastText"
17
  FILENAME = "wiki.en.vec"
 
18
  model_pth = hf_hub_download(repo_id=REPO_ID, filename=FILENAME, repo_type="model", use_auth_token=token)
 
 
19
  return gensim.models.KeyedVectors.load_word2vec_format(model_pth, binary=False)
20
 
21
  # Keywords for each defined topic
 
15
  token = "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"
16
  REPO_ID = "gonzalocordova/word2vec_fastText"
17
  FILENAME = "wiki.en.vec"
18
+ print("DOWNLOADING MODEL...")
19
  model_pth = hf_hub_download(repo_id=REPO_ID, filename=FILENAME, repo_type="model", use_auth_token=token)
20
+ print("MODEL DOWNLOADED.")
21
+ print("LOADING MODEL...")
22
  return gensim.models.KeyedVectors.load_word2vec_format(model_pth, binary=False)
23
 
24
  # Keywords for each defined topic
app.py CHANGED
@@ -2,9 +2,8 @@ import gradio as gr
2
  import NLPutils as nlp
3
  import numpy as np
4
 
5
- print("Loading model...")
6
  model = nlp.load_model()
7
- print("Model loaded.")
8
  topic_keywords_embed = nlp.get_topic_keywords_embed(model)
9
  topic_centroids = nlp.get_topic_centroids(topic_keywords_embed)
10
  topic_to_id = {'ComputerScience': 0,
@@ -55,8 +54,10 @@ def predict_fn(text):
55
  # apply softmax to top 3 topics
56
  probs = softmax(probs)
57
  # update top 3 topics dict with softmax probabilities
 
 
58
  top3 = dict(zip(ids, probs))
59
- # return dictionary whose keys are labels and values are confidences
60
- return {id_to_topic[id] : round(top3[id], 2) for id in top3}
61
 
62
  gr.Interface(predict_fn, gr.inputs.Textbox(label="Input Text"), outputs="label").launch()
 
2
  import NLPutils as nlp
3
  import numpy as np
4
 
 
5
  model = nlp.load_model()
6
+ print("MODEL LOADED")
7
  topic_keywords_embed = nlp.get_topic_keywords_embed(model)
8
  topic_centroids = nlp.get_topic_centroids(topic_keywords_embed)
9
  topic_to_id = {'ComputerScience': 0,
 
54
  # apply softmax to top 3 topics
55
  probs = softmax(probs)
56
  # update top 3 topics dict with softmax probabilities
57
+ print(probs)
58
+ print(ids)
59
  top3 = dict(zip(ids, probs))
60
+ print(top3)
61
+ return {id : round(top3[id], 2) for id in top3}
62
 
63
  gr.Interface(predict_fn, gr.inputs.Textbox(label="Input Text"), outputs="label").launch()