Merge branch 'main' of https://huggingface.co/spaces/gabrielanicole/literacy
Browse files- app.py +29 -21
- pre-requirements.txt +1 -0
- requirements.txt +4 -4
app.py
CHANGED
|
@@ -71,9 +71,11 @@ dict_tokenizer_tr = {
|
|
| 71 |
'en-sw': tokenizer_sw,
|
| 72 |
}
|
| 73 |
|
| 74 |
-
dict_reference_faiss = {
|
| 75 |
-
|
| 76 |
-
}
|
|
|
|
|
|
|
| 77 |
|
| 78 |
saliency_examples = [
|
| 79 |
"Peace of Mind: Protection for consumers.",
|
|
@@ -753,7 +755,7 @@ def first_function(w1, model):
|
|
| 753 |
# 'texts' : dict_tokenizer_tr[model].decode(params[1].sequences.tolist())
|
| 754 |
})
|
| 755 |
|
| 756 |
-
## load_reference;
|
| 757 |
## Build FAISS index
|
| 758 |
# ---> preload faiss using the respective model with a initial dataset.
|
| 759 |
## dict_reference_faiss[model] = metadata_all [per language]
|
|
@@ -763,22 +765,25 @@ def first_function(w1, model):
|
|
| 763 |
|
| 764 |
## Build FAISS index
|
| 765 |
# ---> preload faiss using the respective model with a initial dataset.
|
| 766 |
-
result_search = {}
|
| 767 |
-
result_search['input'] = build_search(input_embeddings, model, type='input')
|
| 768 |
-
result_search['output'] = build_search(output_embeddings, model, type='output')
|
| 769 |
-
|
| 770 |
-
json_out = {'input': {'tokens': {}, 'words': {}}, 'output': {'tokens': {}, 'words': {}}}
|
| 771 |
-
dict_projected = {}
|
| 772 |
-
for type in ['input', 'output']:
|
| 773 |
-
dict_projected[type] = {}
|
| 774 |
-
for key in ['tokens', 'words']:
|
| 775 |
-
similar_key = result_search[type][key]['similar']
|
| 776 |
-
vocab = result_search[type][key]['vocab_queries']
|
| 777 |
-
dict_projected[type][key] = filtered_projection(similar_key, vocab, model, type=type, key=key)
|
| 778 |
-
json_out[type][key]['similar_queries'] = similar_key
|
| 779 |
-
json_out[type][key]['tnse'] = dict_projected[type][key]
|
| 780 |
-
json_out[type][key]['key_text_list'] = result_search[type][key]['sentence_key_list']
|
| 781 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 782 |
## bertviz
|
| 783 |
# paramsbv, tgtbv = get_bertvis_data(w1, model)
|
| 784 |
|
|
@@ -787,8 +792,11 @@ def first_function(w1, model):
|
|
| 787 |
html_att_dec = params[3][1]
|
| 788 |
html_att_cross = params[4][1]
|
| 789 |
|
| 790 |
-
|
| 791 |
-
params = [params[0], params[1], json_out, params[2][0], params[3][0], params[4][0]]
|
|
|
|
|
|
|
|
|
|
| 792 |
# params.append([tgt, params['params'], params['html2'].data]
|
| 793 |
|
| 794 |
return [translated_text, params, html_att_enc, html_att_dec, html_att_cross]
|
|
|
|
| 71 |
'en-sw': tokenizer_sw,
|
| 72 |
}
|
| 73 |
|
| 74 |
+
# dict_reference_faiss = {
|
| 75 |
+
# 'en-es': load_index('en-es'),
|
| 76 |
+
# }
|
| 77 |
+
|
| 78 |
+
# print("dict", dict_reference_faiss['en-es']['input']['tokens'][1])
|
| 79 |
|
| 80 |
saliency_examples = [
|
| 81 |
"Peace of Mind: Protection for consumers.",
|
|
|
|
| 755 |
# 'texts' : dict_tokenizer_tr[model].decode(params[1].sequences.tolist())
|
| 756 |
})
|
| 757 |
|
| 758 |
+
## load_reference; ERROR
|
| 759 |
## Build FAISS index
|
| 760 |
# ---> preload faiss using the respective model with a initial dataset.
|
| 761 |
## dict_reference_faiss[model] = metadata_all [per language]
|
|
|
|
| 765 |
|
| 766 |
## Build FAISS index
|
| 767 |
# ---> preload faiss using the respective model with a initial dataset.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 768 |
|
| 769 |
+
### to uncomment gg1 ###
|
| 770 |
+
# result_search = {}
|
| 771 |
+
# result_search['input'] = build_search(input_embeddings, model, type='input')
|
| 772 |
+
# result_search['output'] = build_search(output_embeddings, model, type='output')
|
| 773 |
+
|
| 774 |
+
# json_out = {'input': {'tokens': {}, 'words': {}}, 'output': {'tokens': {}, 'words': {}}}
|
| 775 |
+
# dict_projected = {}
|
| 776 |
+
# for type in ['input', 'output']:
|
| 777 |
+
# dict_projected[type] = {}
|
| 778 |
+
# for key in ['tokens', 'words']:
|
| 779 |
+
# similar_key = result_search[type][key]['similar']
|
| 780 |
+
# vocab = result_search[type][key]['vocab_queries']
|
| 781 |
+
# dict_projected[type][key] = filtered_projection(similar_key, vocab, model, type=type, key=key)
|
| 782 |
+
# json_out[type][key]['similar_queries'] = similar_key
|
| 783 |
+
# json_out[type][key]['tnse'] = dict_projected[type][key]
|
| 784 |
+
# json_out[type][key]['key_text_list'] = result_search[type][key]['sentence_key_list']
|
| 785 |
+
### to uncomment gg1 ###
|
| 786 |
+
|
| 787 |
## bertviz
|
| 788 |
# paramsbv, tgtbv = get_bertvis_data(w1, model)
|
| 789 |
|
|
|
|
| 792 |
html_att_dec = params[3][1]
|
| 793 |
html_att_cross = params[4][1]
|
| 794 |
|
| 795 |
+
### to uncomment gg1 ###
|
| 796 |
+
# params = [params[0], params[1], json_out, params[2][0], params[3][0], params[4][0]]
|
| 797 |
+
### to uncomment gg1 ###
|
| 798 |
+
|
| 799 |
+
params = [params[0], params[1], [], params[2][0], params[3][0], params[4][0]]
|
| 800 |
# params.append([tgt, params['params'], params['html2'].data]
|
| 801 |
|
| 802 |
return [translated_text, params, html_att_enc, html_att_dec, html_att_cross]
|
pre-requirements.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
faiss-cpu==1.8.0
|
requirements.txt
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
bertviz
|
| 2 |
-
jupyter
|
| 3 |
-
scikit-learn
|
| 4 |
-
faiss-cpu
|
|
|
|
| 1 |
+
bertviz
|
| 2 |
+
jupyter
|
| 3 |
+
scikit-learn
|
| 4 |
+
faiss-cpu==1.8.0.post1
|