fixed typo / added English bert comparison / changed description
Browse files
app.py
CHANGED
|
@@ -22,28 +22,45 @@ txt="a polynomial [MASK] from 3-SAT." #reduction
|
|
| 22 |
#print(res["sequence"])
|
| 23 |
#print(res["score"])
|
| 24 |
|
| 25 |
-
#
|
|
|
|
|
|
|
|
|
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
def unmask_words(txt_with_mask,k_suggestions=5):
|
| 28 |
-
|
|
|
|
| 29 |
labels={}
|
| 30 |
-
for res in
|
| 31 |
labels["".join(res["token_str"].split(" "))]=res["score"]
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
#trying our function
|
| 35 |
#val=unmask_words(txt)
|
| 36 |
|
| 37 |
import gradio as gr
|
| 38 |
-
description="""CC bert is a MLM model pretrained on data collected from ~200k papers
|
| 39 |
-
|
| 40 |
or contact [mishra@di.ens.fr](mishra@di.ens.fr).
|
| 41 |
-
|
| 42 |
"""
|
|
|
|
| 43 |
examples=[["as pspace is [MASK] under complement."],
|
| 44 |
["n!-(n-1)[MASK]"],
|
| 45 |
["[MASK] these two classes is a major problem."],
|
| 46 |
-
["This would show that the polynomial
|
| 47 |
["""we consider two ways of measuring complexity, data complexity, which is with respect to the size of the data,
|
| 48 |
and their combined [MASK]"""]
|
| 49 |
]
|
|
@@ -53,7 +70,7 @@ examples=[["as pspace is [MASK] under complement."],
|
|
| 53 |
input_box=gr.inputs.Textbox(lines=20,placeholder="Unifying computational entropies via Kullback–Leibler [MASK]",label="Enter the masked text:")
|
| 54 |
interface=gr.Interface(fn=unmask_words,inputs=[input_box,
|
| 55 |
gr.inputs.Slider(1,10,1,5,label="No of Suggestions:")],
|
| 56 |
-
outputs=gr.outputs.Label(label="top words:"),
|
| 57 |
examples=examples,
|
| 58 |
theme="darkhuggingface",
|
| 59 |
title="CC-Bert MLM",description=description,allow_flagging=True)
|
|
|
|
| 22 |
#print(res["sequence"])
|
| 23 |
#print(res["score"])
|
| 24 |
|
| 25 |
+
#now for BERT on English
|
| 26 |
+
default_name="bert-base-uncased"
|
| 27 |
+
|
| 28 |
+
tokenizer = AutoTokenizer.from_pretrained(default_name)
|
| 29 |
|
| 30 |
+
model = TFAutoModelForMaskedLM.from_pretrained(default_name)
|
| 31 |
+
unmasker_bert = FillMaskPipeline(model=model,tokenizer=tokenizer)
|
| 32 |
+
|
| 33 |
+
#make a function out of the unmasker
|
| 34 |
def unmask_words(txt_with_mask,k_suggestions=5):
|
| 35 |
+
results_cc=unmasker(txt_with_mask,top_k=k_suggestions)
|
| 36 |
+
|
| 37 |
labels={}
|
| 38 |
+
for res in results_cc:
|
| 39 |
labels["".join(res["token_str"].split(" "))]=res["score"]
|
| 40 |
+
|
| 41 |
+
results_bert=unmasker_bert(txt_with_mask,top_k=k_suggestions)
|
| 42 |
+
|
| 43 |
+
labels_bert={}
|
| 44 |
+
for res in results_bert:
|
| 45 |
+
labels_bert["".join(res["token_str"].split(" "))]=res["score"]
|
| 46 |
+
|
| 47 |
+
return labels,labels_bert
|
| 48 |
+
|
| 49 |
+
|
| 50 |
|
| 51 |
#trying our function
|
| 52 |
#val=unmask_words(txt)
|
| 53 |
|
| 54 |
import gradio as gr
|
| 55 |
+
description="""CC bert is a MLM model pretrained on data collected from ~200k papers on arXiv comprising of mathematical proofs and theorems. The aim of this interface is to show the difference between english and scientific english pretraining.
|
| 56 |
+
For more information visit [Theoremkb Project](https://github.com/PierreSenellart/theoremkb)
|
| 57 |
or contact [mishra@di.ens.fr](mishra@di.ens.fr).
|
|
|
|
| 58 |
"""
|
| 59 |
+
|
| 60 |
examples=[["as pspace is [MASK] under complement."],
|
| 61 |
["n!-(n-1)[MASK]"],
|
| 62 |
["[MASK] these two classes is a major problem."],
|
| 63 |
+
["This would show that the polynomial hierarchy at the second [MASK], which is considered only"],
|
| 64 |
["""we consider two ways of measuring complexity, data complexity, which is with respect to the size of the data,
|
| 65 |
and their combined [MASK]"""]
|
| 66 |
]
|
|
|
|
| 70 |
input_box=gr.inputs.Textbox(lines=20,placeholder="Unifying computational entropies via Kullback–Leibler [MASK]",label="Enter the masked text:")
|
| 71 |
interface=gr.Interface(fn=unmask_words,inputs=[input_box,
|
| 72 |
gr.inputs.Slider(1,10,1,5,label="No of Suggestions:")],
|
| 73 |
+
outputs=[gr.outputs.Label(label="top words:"),gr.outputs.Label(label="top words eng-bert:")],
|
| 74 |
examples=examples,
|
| 75 |
theme="darkhuggingface",
|
| 76 |
title="CC-Bert MLM",description=description,allow_flagging=True)
|