Spaces:

mladhd
/

TopicDeTextor

Runtime error

gonzalocordova commited on May 12, 2023

Commit

2a72c85

1 Parent(s): 405ea99

fix: topic ids bug

Files changed (2) hide show

NLPutils.py CHANGED Viewed

@@ -15,7 +15,10 @@ def load_model():
     token = "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"
     REPO_ID = "gonzalocordova/word2vec_fastText"
     FILENAME = "wiki.en.vec"
     model_pth = hf_hub_download(repo_id=REPO_ID, filename=FILENAME, repo_type="model", use_auth_token=token)
     return gensim.models.KeyedVectors.load_word2vec_format(model_pth, binary=False)
 # Keywords for each defined topic

     token = "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"
     REPO_ID = "gonzalocordova/word2vec_fastText"
     FILENAME = "wiki.en.vec"
+    print("DOWNLOADING MODEL...")
     model_pth = hf_hub_download(repo_id=REPO_ID, filename=FILENAME, repo_type="model", use_auth_token=token)
+    print("MODEL DOWNLOADED.")
+    print("LOADING MODEL...")
     return gensim.models.KeyedVectors.load_word2vec_format(model_pth, binary=False)
 # Keywords for each defined topic

app.py CHANGED Viewed

@@ -2,9 +2,8 @@ import gradio as gr
 import NLPutils as nlp
 import numpy as np
-print("Loading model...")
 model = nlp.load_model()
-print("Model loaded.")
 topic_keywords_embed = nlp.get_topic_keywords_embed(model)
 topic_centroids = nlp.get_topic_centroids(topic_keywords_embed)
 topic_to_id = {'ComputerScience': 0,
@@ -55,8 +54,10 @@ def predict_fn(text):
         # apply softmax to top 3 topics
         probs = softmax(probs)
         # update top 3 topics dict with softmax probabilities
         top3 = dict(zip(ids, probs))
-        # return dictionary whose keys are labels and values are confidences
-        return {id_to_topic[id] : round(top3[id], 2) for id in top3}
 gr.Interface(predict_fn, gr.inputs.Textbox(label="Input Text"), outputs="label").launch()

 import NLPutils as nlp
 import numpy as np
 model = nlp.load_model()
+print("MODEL LOADED")
 topic_keywords_embed = nlp.get_topic_keywords_embed(model)
 topic_centroids = nlp.get_topic_centroids(topic_keywords_embed)
 topic_to_id = {'ComputerScience': 0,
         # apply softmax to top 3 topics
         probs = softmax(probs)
         # update top 3 topics dict with softmax probabilities
+        print(probs)
+        print(ids)
         top3 = dict(zip(ids, probs))
+        print(top3)
+        return {id : round(top3[id], 2) for id in top3}
 gr.Interface(predict_fn, gr.inputs.Textbox(label="Input Text"), outputs="label").launch()