| import os |
| os.environ["CUDA_VISIBLE_DEVICES"] = "-1" |
|
|
| import gradio as gr |
| import tensorflow as tf |
| from huggingface_hub import hf_hub_download |
| from transformers import AutoTokenizer, TFAutoModel |
|
|
|
|
| @tf.keras.utils.register_keras_serializable() |
| class DistilBertLayer(tf.keras.layers.Layer): |
| def __init__(self, model_name="vinai/bertweet-base", **kwargs): |
| super().__init__(**kwargs) |
| self.model_name = model_name |
| self.bert = TFAutoModel.from_pretrained(model_name, from_pt=True) |
|
|
| def call(self, inputs): |
| input_ids, attention_mask = inputs |
| outputs = self.bert( |
| input_ids=input_ids, |
| attention_mask=attention_mask, |
| training=False |
| ) |
| return outputs.last_hidden_state |
|
|
| def get_config(self): |
| config = super().get_config() |
| config.update({"model_name": self.model_name}) |
| return config |
|
|
|
|
| |
| MODEL_REPO = "tomy07417/disaster-tweets-bertweet-gru" |
| MODEL_FILE = "bertweet_gru_model.keras" |
|
|
| |
| model_path = hf_hub_download( |
| repo_id=MODEL_REPO, |
| filename=MODEL_FILE, |
| repo_type="model" |
| ) |
|
|
| |
| model = tf.keras.models.load_model( |
| model_path, |
| custom_objects={"DistilBertLayer": DistilBertLayer}, |
| compile=False |
| ) |
|
|
| tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base") |
|
|
|
|
| def predict(text): |
| inputs = tokenizer( |
| [text], |
| max_length=50, |
| truncation=True, |
| padding="max_length", |
| return_tensors="tf" |
| ) |
|
|
| input_ids = inputs["input_ids"] |
| attention_mask = inputs["attention_mask"] |
|
|
| |
| prob = model.predict([input_ids, attention_mask])[0][0] |
| pred = bool(prob > 0.5) |
|
|
| return {"prob": float(prob), "pred": pred} |
|
|
|
|
| demo = gr.Interface( |
| fn=predict, |
| inputs=gr.Textbox(lines=3, label="Tweet"), |
| outputs=gr.JSON(label="Result"), |
| title="Tweet classifier", |
| description="Paste a tweet in English" |
| ) |
|
|
| if __name__ == "__main__": |
| |
| demo.launch(ssr_mode=False) |
|
|
|
|