Spaces:
Runtime error
Runtime error
hm-auch
commited on
Commit
·
b642a67
1
Parent(s):
86814fc
update classifier and demonstrator-code
Browse files- README.md +1 -1
- app.py +44 -31
- hscommon.py +0 -13
- {save/modelV1 → result/model}/keras_metadata.pb +2 -2
- {save/modelV1 → result/model}/saved_model.pb +2 -2
- {save/modelV1 → result/model}/variables/variables.data-00000-of-00001 +2 -2
- result/model/variables/variables.index +0 -0
- save/modelV1/variables/variables.index +0 -0
README.md
CHANGED
|
@@ -10,4 +10,4 @@ pinned: false
|
|
| 10 |
license: afl-3.0
|
| 11 |
---
|
| 12 |
|
| 13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
|
|
|
|
| 10 |
license: afl-3.0
|
| 11 |
---
|
| 12 |
|
| 13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
|
app.py
CHANGED
|
@@ -1,44 +1,57 @@
|
|
| 1 |
import transformers
|
| 2 |
-
import hscommon
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
import tensorflow as tf
|
| 6 |
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
MODEL_DIRECTORY = 'save/modelV1'
|
| 10 |
PRETRAINED_MODEL_NAME = 'dbmdz/bert-base-german-cased'
|
| 11 |
TOKENIZER = transformers.BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)
|
| 12 |
-
MAX_SEQUENCE_LENGTH =
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
optimizer = optimization.create_optimizer(
|
| 25 |
-
init_lr=INIT_LR,
|
| 26 |
-
num_train_steps=steps_per_epoch,
|
| 27 |
-
num_warmup_steps=num_warmup_steps,
|
| 28 |
-
optimizer_type=OPTIMIZER
|
| 29 |
)
|
| 30 |
|
| 31 |
-
|
| 32 |
-
return model
|
| 33 |
-
|
| 34 |
-
hs_detection_model = tf.keras.models.load_model(MODEL_DIRECTORY, compile=False)
|
| 35 |
-
compile_model(hs_detection_model)
|
| 36 |
|
| 37 |
def inference(sentence):
|
| 38 |
-
encoded_sentence =
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
input_sentence_text = gr.inputs.Textbox(placeholder="Hier den Satz eingeben, der Hassrede enthalten kann.")
|
| 43 |
-
|
| 44 |
-
|
|
|
|
| 1 |
import transformers
|
|
|
|
| 2 |
|
| 3 |
import gradio as gr
|
| 4 |
import tensorflow as tf
|
| 5 |
|
| 6 |
+
MODEL_DIRECTORY = './result/model'
|
|
|
|
|
|
|
| 7 |
PRETRAINED_MODEL_NAME = 'dbmdz/bert-base-german-cased'
|
| 8 |
TOKENIZER = transformers.BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)
|
| 9 |
+
MAX_SEQUENCE_LENGTH = 300
|
| 10 |
+
|
| 11 |
+
def encode(sentences, tokenizer, sequence_length):
|
| 12 |
+
return tokenizer.batch_encode_plus(
|
| 13 |
+
sentences,
|
| 14 |
+
max_length=sequence_length, # set the length of the sequences
|
| 15 |
+
add_special_tokens=True, # add [CLS] and [SEP] tokens
|
| 16 |
+
return_attention_mask=True,
|
| 17 |
+
return_token_type_ids=False, # not needed for this type of ML task
|
| 18 |
+
pad_to_max_length=True, # add 0 pad tokens to the sequences less than max_length
|
| 19 |
+
return_tensors='tf'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
)
|
| 21 |
|
| 22 |
+
hs_detection_model = tf.keras.models.load_model(MODEL_DIRECTORY, compile=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
def inference(sentence):
|
| 25 |
+
encoded_sentence = encode([sentence], TOKENIZER, MAX_SEQUENCE_LENGTH)
|
| 26 |
+
return hs_detection_model.predict(encoded_sentence.values())
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
title = "HS-Detector Demonstrator"
|
| 30 |
+
description = """
|
| 31 |
+
<center>
|
| 32 |
+
<p>Dataset: germeval18_hasoc19_rp21_combi_dataset (17,7% HS)</p>
|
| 33 |
+
<p>Das bisher beste Modell basierend auf Bert nach 2 Epochen und max. 300 Token pro Eintrag fine-tuning mit folgenden Evaluationsergebnissen:</p>
|
| 34 |
+
|
| 35 |
+
Accuracy: 0.8794712286158631<br/>
|
| 36 |
+
Balanced Accuracy: 0.7561891312100413<br/>
|
| 37 |
+
Binary F1-Score: 0.6249999999999999<br/>
|
| 38 |
+
Binary Precision: 0.6994584837545126<br/>
|
| 39 |
+
Binary Recall: 0.564868804664723<br/>
|
| 40 |
+
Weighted F1-Score: 0.8742843536656945<br/>
|
| 41 |
+
Weighted Precision: 0.8722794361456155<br/>
|
| 42 |
+
Weighted Recall: 0.8794712286158631<br/>
|
| 43 |
+
Macro F1-Score: 0.7765982087708463<br/>
|
| 44 |
+
Macro Precision: 0.80455672371745<br/>
|
| 45 |
+
Macro Recall: 0.7561891312100413<br/>
|
| 46 |
+
MCC score: 0.558655967312084<br/>
|
| 47 |
+
AUROC score: 0.7561891312100413<br/>
|
| 48 |
+
|
| 49 |
+
<img src="https://huggingface.co/spaces/course-demos/Rick_and_Morty_QA/resolve/main/rick.png" width=200px>
|
| 50 |
+
</center>
|
| 51 |
+
"""
|
| 52 |
+
|
| 53 |
+
article = "Die Eingaben werden nicht geloggt. Klassifikator einfach ausprobieren."
|
| 54 |
|
| 55 |
input_sentence_text = gr.inputs.Textbox(placeholder="Hier den Satz eingeben, der Hassrede enthalten kann.")
|
| 56 |
+
ui = gr.Interface(fn=inference, inputs=input_sentence_text, outputs="text", title = title, description = description, article = article)
|
| 57 |
+
ui.launch()
|
hscommon.py
DELETED
|
@@ -1,13 +0,0 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
def encode(sentences, tokenizer, sequence_length):
|
| 5 |
-
return tokenizer.batch_encode_plus(
|
| 6 |
-
sentences,
|
| 7 |
-
max_length=sequence_length, # set the length of the sequences
|
| 8 |
-
add_special_tokens=True, # add [CLS] and [SEP] tokens
|
| 9 |
-
return_attention_mask=True,
|
| 10 |
-
return_token_type_ids=False, # not needed for this type of ML task
|
| 11 |
-
pad_to_max_length=True, # add 0 pad tokens to the sequences less than max_length
|
| 12 |
-
return_tensors='tf'
|
| 13 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
{save/modelV1 → result/model}/keras_metadata.pb
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:805488d800d068dbc81f561789b35c1fe524012434890af914e94166ac17497d
|
| 3 |
+
size 154871
|
{save/modelV1 → result/model}/saved_model.pb
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01b6489c30792f60f29d622bd7b0e3985fa863165907c8fded7fb2b1029cc421
|
| 3 |
+
size 6564579
|
{save/modelV1 → result/model}/variables/variables.data-00000-of-00001
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:879c45b816164b7f5919fd5c42ec35d34f1eb266ee3b28a7c210bcb23f5a6d86
|
| 3 |
+
size 1319386304
|
result/model/variables/variables.index
ADDED
|
Binary file (40.7 kB). View file
|
|
|
save/modelV1/variables/variables.index
DELETED
|
Binary file (40.6 kB)
|
|
|