software-maintenance-classification

Runtime error

File size: 3,798 Bytes

5ca6171

try:
    import torch
    import pandas as pd
    import streamlit as st
    import re
    from transformers import AutoTokenizer, AutoModelForSequenceClassification
    from stqdm import stqdm
    from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig
except Exception as e:
    print(e)
    
# Config
MODELS_PATH = "kadabengaran/distilbert-base-uncased-lora-text-classification"

id2label= {0: 'Other', 1: 'Problem Discovery', 2: 'Information Seeking', 3: 'Feature Request'}
label2id= {'Other': 0, 'Problem Discovery': 1, 'Information Seeking': 2, 'Feature Request': 3}
numLabels= 4

def get_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

USE_CUDA = False
device = get_device()
if device.type == 'cuda':
    USE_CUDA = True

# Get the Keys
def get_key(val, my_dict):
    for key, value in my_dict.items():
        if val == value:
            return key

def load_tokenizer(model_path):
    # create tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_path, add_prefix_space=True)
    return tokenizer

def remove_special_characters(text):
    # case folding
    text = text.lower()

    # menghapus karakter khusus
    text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text)
    text = re.sub(r'[0-9]', ' ', text)

    # replace multiple whitespace characters with a single space
    text = re.sub(r"\s+", " ", text)
    
    return text
    
def load_model():
    config = PeftConfig.from_pretrained(MODELS_PATH)
    inference_model = AutoModelForSequenceClassification.from_pretrained(
        config.base_model_name_or_path, num_labels=numLabels, id2label=id2label, label2id=label2id
    )
    tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
    model = PeftModel.from_pretrained(inference_model, MODELS_PATH)
    return model, tokenizer

def classify_single(text, model, tokenizer, device):
    
    if device.type == 'cuda':
        model.cuda()
        
    # tokenize text
    inputs = tokenizer.encode(text, return_tensors="pt").to(device)

    # compute logits
    logits = model(inputs).logits
    # convert logits to label
    predictions = torch.argmax(logits)
    return id2label[predictions.tolist()]


tab_labels = ["Single Input", "Multiple Input"]
class App:
    def __init__(self):
        self.fileTypes = ["csv"]
        self.default_tab_selected = tab_labels[0]
        self.input_text = None
        self.csv_input = None
        self.csv_process = None
        
    def run(self):
        model, tokenizer = load_model()
        html_temp = """
        <div style="padding:10px">
        <h1 style="color:white;text-align:center;">User Question Classification</h1>
        </div>
        """
        st.markdown(html_temp, unsafe_allow_html=True)
        st.markdown("")
        if USE_CUDA:
            st.sidebar.markdown(footer,unsafe_allow_html=True)
        self.render_single_input()
        st.divider()
        self.render_process_button(model, tokenizer, device)


    def render_single_input(self):
        self.input_text = st.text_area("Enter Text Here", placeholder="Type Here")

    
    def render_process_button(self, model, tokenizer, device):
        if st.button("Process"):
            input_text = self.input_text
            if input_text:
                classification_result = classify_single(input_text, model, tokenizer, device)
                st.write("Classification result:", classification_result)
            else:
                st.warning('Please enter text to process', icon="⚠️")
            
    
footer="""<style>
.footer {
position: fixed;
left: 10;
bottom: 0;
width: 100%;
color: #ffa9365e;
}
</style>
<div class="footer">
<p>CUDA enabled</p>
</div>
"""

if __name__ == "__main__":
    app = App()
    app.run()