Spaces:
Runtime error
Runtime error
File size: 3,798 Bytes
5ca6171 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
try:
import torch
import pandas as pd
import streamlit as st
import re
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from stqdm import stqdm
from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig
except Exception as e:
print(e)
# Config
MODELS_PATH = "kadabengaran/distilbert-base-uncased-lora-text-classification"
id2label= {0: 'Other', 1: 'Problem Discovery', 2: 'Information Seeking', 3: 'Feature Request'}
label2id= {'Other': 0, 'Problem Discovery': 1, 'Information Seeking': 2, 'Feature Request': 3}
numLabels= 4
def get_device():
if torch.cuda.is_available():
return torch.device('cuda')
else:
return torch.device('cpu')
USE_CUDA = False
device = get_device()
if device.type == 'cuda':
USE_CUDA = True
# Get the Keys
def get_key(val, my_dict):
for key, value in my_dict.items():
if val == value:
return key
def load_tokenizer(model_path):
# create tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, add_prefix_space=True)
return tokenizer
def remove_special_characters(text):
# case folding
text = text.lower()
# menghapus karakter khusus
text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text)
text = re.sub(r'[0-9]', ' ', text)
# replace multiple whitespace characters with a single space
text = re.sub(r"\s+", " ", text)
return text
def load_model():
config = PeftConfig.from_pretrained(MODELS_PATH)
inference_model = AutoModelForSequenceClassification.from_pretrained(
config.base_model_name_or_path, num_labels=numLabels, id2label=id2label, label2id=label2id
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
model = PeftModel.from_pretrained(inference_model, MODELS_PATH)
return model, tokenizer
def classify_single(text, model, tokenizer, device):
if device.type == 'cuda':
model.cuda()
# tokenize text
inputs = tokenizer.encode(text, return_tensors="pt").to(device)
# compute logits
logits = model(inputs).logits
# convert logits to label
predictions = torch.argmax(logits)
return id2label[predictions.tolist()]
tab_labels = ["Single Input", "Multiple Input"]
class App:
def __init__(self):
self.fileTypes = ["csv"]
self.default_tab_selected = tab_labels[0]
self.input_text = None
self.csv_input = None
self.csv_process = None
def run(self):
model, tokenizer = load_model()
html_temp = """
<div style="padding:10px">
<h1 style="color:white;text-align:center;">User Question Classification</h1>
</div>
"""
st.markdown(html_temp, unsafe_allow_html=True)
st.markdown("")
if USE_CUDA:
st.sidebar.markdown(footer,unsafe_allow_html=True)
self.render_single_input()
st.divider()
self.render_process_button(model, tokenizer, device)
def render_single_input(self):
self.input_text = st.text_area("Enter Text Here", placeholder="Type Here")
def render_process_button(self, model, tokenizer, device):
if st.button("Process"):
input_text = self.input_text
if input_text:
classification_result = classify_single(input_text, model, tokenizer, device)
st.write("Classification result:", classification_result)
else:
st.warning('Please enter text to process', icon="⚠️")
footer="""<style>
.footer {
position: fixed;
left: 10;
bottom: 0;
width: 100%;
color: #ffa9365e;
}
</style>
<div class="footer">
<p>CUDA enabled</p>
</div>
"""
if __name__ == "__main__":
app = App()
app.run() |