Spaces:

sunwaee
/

Perceiver-Multiclass-Emotion-Classification

Running

App Files Files Community

sunwaee commited on Dec 18, 2021

Commit

b438028

1 Parent(s): 3b45242

added scripts

Browse files

Files changed (2) hide show

app.py +120 -0
source/pipeline.py +138 -0

app.py ADDED Viewed

	@@ -0,0 +1,120 @@

+# coding:utf-8
+"""
+Filename: app.py
+Author: @DvdNss
+Created on 12/18/2021
+"""
+import os
+import gdown as gdown
+import nltk
+import streamlit as st
+from nltk.tokenize import sent_tokenize
+from source.pipeline import MultiLabelPipeline, inputs_to_dataset
+def download_models(ids):
+    """
+    Download all models.
+    :param ids: name and links of models
+    :return:
+    """
+    # Download sentence tokenizer
+    nltk.download('punkt')
+    # Download model from drive if not stored locally
+    for key in ids:
+        if not os.path.isfile(f"model/{key}.pt"):
+            url = f"https://drive.google.com/uc?id={ids[key]}"
+            gdown.download(url=url, output=f"model/{key}.pt")
+@st.cache
+def load_labels():
+    """
+    Load model labels.
+    :return:
+    """
+    return [
+        "admiration",
+        "amusement",
+        "anger",
+        "annoyance",
+        "approval",
+        "caring",
+        "confusion",
+        "curiosity",
+        "desire",
+        "disappointment",
+        "disapproval",
+        "disgust",
+        "embarrassment",
+        "excitement",
+        "fear",
+        "gratitude",
+        "grief",
+        "joy",
+        "love",
+        "nervousness",
+        "optimism",
+        "pride",
+        "realization",
+        "relief",
+        "remorse",
+        "sadness",
+        "surprise",
+        "neutral"
+    ]
+@st.cache(allow_output_mutation=True)
+def load_model(model_path):
+    """
+    Load model and cache it.
+    :param model_path: path to model
+    :return:
+    """
+    model = MultiLabelPipeline(model_path=model_path)
+    return model
+# Page config
+st.set_page_config(layout="centered")
+st.title("Multiclass Emotion Classification")
+st.write("DeepMind Language Perceiver for Multiclass Emotion Classification (Eng). ")
+# Variables
+ids = {'perceiver-go-emotions': '15m-p0Pwwnh3STi7zXHkKr9HFxliGJikU'}
+labels = load_labels()
+# Download all models from drive
+download_models(ids)
+# Display labels
+st.markdown(f"__Labels:__ {', '.join(labels)}")
+# Model selection
+left, right = st.columns([4, 2])
+inputs = left.text_area('', max_chars=2048, placeholder='Write something here to see what happens! ')
+model_path = right.selectbox('', options=[k for k in ids], index=0, help='Model to use. ')
+split = right.checkbox('Split into sentences')
+model = load_model(model_path=f"model/{model_path}.pt")
+right.write(model.device)
+if split:
+    if not inputs.isspace() and inputs != "":
+        with st.spinner('Processing text... This may take a while.'):
+            left.write(model(inputs_to_dataset(sent_tokenize(inputs)), batch_size=1))
+else:
+    if not inputs.isspace() and inputs != "":
+        with st.spinner('Processing text... This may take a while.'):
+            left.write(model(inputs_to_dataset([inputs]), batch_size=1))

source/pipeline.py ADDED Viewed

	@@ -0,0 +1,138 @@

+# coding:utf-8
+"""
+Filename: inference.py
+Author: @DvdNss
+Created on 12/17/2021
+"""
+from typing import List
+import torch
+from datasets import Dataset
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+from transformers import PerceiverTokenizer
+def _map_outputs(predictions):
+    """
+    Map model outputs to classes.
+    :param predictions: model ouptut batch
+    :return:
+    """
+    labels = [
+        "admiration",
+        "amusement",
+        "anger",
+        "annoyance",
+        "approval",
+        "caring",
+        "confusion",
+        "curiosity",
+        "desire",
+        "disappointment",
+        "disapproval",
+        "disgust",
+        "embarrassment",
+        "excitement",
+        "fear",
+        "gratitude",
+        "grief",
+        "joy",
+        "love",
+        "nervousness",
+        "optimism",
+        "pride",
+        "realization",
+        "relief",
+        "remorse",
+        "sadness",
+        "surprise",
+        "neutral"
+    ]
+    classes = []
+    for i, example in enumerate(predictions):
+        out_batch = []
+        for j, category in enumerate(example):
+            out_batch.append(labels[j]) if category > 0.5 else None
+        classes.append(out_batch)
+    return classes
+class MultiLabelPipeline:
+    """
+    Multi label classification pipeline.
+    """
+    def __init__(self, model_path):
+        """
+        Init MLC pipeline.
+        :param model_path: model to use
+        """
+        # Init attributes
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        if self.device == 'cuda':
+            self.model = torch.load(model_path).eval().to(self.device)
+        else:
+            self.model = torch.load(model_path, map_location=torch.device('cpu')).eval().to(self.device)
+        self.tokenizer = PerceiverTokenizer.from_pretrained('deepmind/language-perceiver')
+    def __call__(self, dataset, batch_size: int = 4):
+        """
+        Processing pipeline.
+        :param dataset: dataset
+        :return:
+        """
+        # Tokenize inputs
+        dataset = dataset.map(lambda row: self.tokenizer(row['text'], padding="max_length", truncation=True),
+                              batched=True, remove_columns=['text'], desc='Tokenizing')
+        dataset.set_format('torch', columns=['input_ids', 'attention_mask'])
+        dataloader = DataLoader(dataset, batch_size=batch_size)
+        # Define output classes
+        classes = []
+        mem_logs = []
+        with tqdm(dataloader, unit='batches') as progression:
+            for batch in progression:
+                progression.set_description('Inference')
+                # Forward
+                outputs = self.model(inputs=batch['input_ids'].to(self.device),
+                                     attention_mask=batch['attention_mask'].to(self.device), )
+                # Outputs
+                predictions = outputs.logits.cpu().detach().numpy()
+                # Map predictions to classes
+                batch_classes = _map_outputs(predictions)
+                for row in batch_classes:
+                    classes.append(row)
+                # Retrieve memory usage
+                memory = round(torch.cuda.memory_reserved(self.device) / 1e9, 2)
+                mem_logs.append(memory)
+                # Update pbar
+                progression.set_postfix(memory=f"{round(sum(mem_logs) / len(mem_logs), 2)}Go")
+        return classes
+def inputs_to_dataset(inputs: List[str]):
+    """
+    Convert a list of strings to a dataset object.
+    :param inputs: list of strings
+    :return:
+    """
+    inputs = {'text': [input for input in inputs]}
+    return Dataset.from_dict(inputs)