File size: 3,798 Bytes
5ca6171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
try:
    import torch
    import pandas as pd
    import streamlit as st
    import re
    from transformers import AutoTokenizer, AutoModelForSequenceClassification
    from stqdm import stqdm
    from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig
except Exception as e:
    print(e)
    
# Config
MODELS_PATH = "kadabengaran/distilbert-base-uncased-lora-text-classification"

id2label= {0: 'Other', 1: 'Problem Discovery', 2: 'Information Seeking', 3: 'Feature Request'}
label2id= {'Other': 0, 'Problem Discovery': 1, 'Information Seeking': 2, 'Feature Request': 3}
numLabels= 4

def get_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

USE_CUDA = False
device = get_device()
if device.type == 'cuda':
    USE_CUDA = True

# Get the Keys
def get_key(val, my_dict):
    for key, value in my_dict.items():
        if val == value:
            return key

def load_tokenizer(model_path):
    # create tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_path, add_prefix_space=True)
    return tokenizer

def remove_special_characters(text):
    # case folding
    text = text.lower()

    # menghapus karakter khusus
    text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text)
    text = re.sub(r'[0-9]', ' ', text)

    # replace multiple whitespace characters with a single space
    text = re.sub(r"\s+", " ", text)
    
    return text
    
def load_model():
    config = PeftConfig.from_pretrained(MODELS_PATH)
    inference_model = AutoModelForSequenceClassification.from_pretrained(
        config.base_model_name_or_path, num_labels=numLabels, id2label=id2label, label2id=label2id
    )
    tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
    model = PeftModel.from_pretrained(inference_model, MODELS_PATH)
    return model, tokenizer

def classify_single(text, model, tokenizer, device):
    
    if device.type == 'cuda':
        model.cuda()
        
    # tokenize text
    inputs = tokenizer.encode(text, return_tensors="pt").to(device)

    # compute logits
    logits = model(inputs).logits
    # convert logits to label
    predictions = torch.argmax(logits)
    return id2label[predictions.tolist()]


tab_labels = ["Single Input", "Multiple Input"]
class App:
    def __init__(self):
        self.fileTypes = ["csv"]
        self.default_tab_selected = tab_labels[0]
        self.input_text = None
        self.csv_input = None
        self.csv_process = None
        
    def run(self):
        model, tokenizer = load_model()
        html_temp = """
        <div style="padding:10px">
        <h1 style="color:white;text-align:center;">User Question Classification</h1>
        </div>
        """
        st.markdown(html_temp, unsafe_allow_html=True)
        st.markdown("")
        if USE_CUDA:
            st.sidebar.markdown(footer,unsafe_allow_html=True)
        self.render_single_input()
        st.divider()
        self.render_process_button(model, tokenizer, device)


    def render_single_input(self):
        self.input_text = st.text_area("Enter Text Here", placeholder="Type Here")

    
    def render_process_button(self, model, tokenizer, device):
        if st.button("Process"):
            input_text = self.input_text
            if input_text:
                classification_result = classify_single(input_text, model, tokenizer, device)
                st.write("Classification result:", classification_result)
            else:
                st.warning('Please enter text to process', icon="⚠️")
            
    
footer="""<style>
.footer {
position: fixed;
left: 10;
bottom: 0;
width: 100%;
color: #ffa9365e;
}
</style>
<div class="footer">
<p>CUDA enabled</p>
</div>
"""

if __name__ == "__main__":
    app = App()
    app.run()