software-maintenance-classification

Runtime error

App Files Files Community

software-maintenance-classification / main.py

kadabengaran

Create main.py

5ca6171 about 2 years ago

raw

history blame contribute delete

3.8 kB

	try:
	import torch
	import pandas as pd
	import streamlit as st
	import re
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	from stqdm import stqdm
	from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig
	except Exception as e:
	print(e)

	# Config
	MODELS_PATH = "kadabengaran/distilbert-base-uncased-lora-text-classification"

	id2label= {0: 'Other', 1: 'Problem Discovery', 2: 'Information Seeking', 3: 'Feature Request'}
	label2id= {'Other': 0, 'Problem Discovery': 1, 'Information Seeking': 2, 'Feature Request': 3}
	numLabels= 4

	def get_device():
	if torch.cuda.is_available():
	return torch.device('cuda')
	else:
	return torch.device('cpu')

	USE_CUDA = False
	device = get_device()
	if device.type == 'cuda':
	USE_CUDA = True

	# Get the Keys
	def get_key(val, my_dict):
	for key, value in my_dict.items():
	if val == value:
	return key

	def load_tokenizer(model_path):
	# create tokenizer
	tokenizer = AutoTokenizer.from_pretrained(model_path, add_prefix_space=True)
	return tokenizer

	def remove_special_characters(text):
	# case folding
	text = text.lower()

	# menghapus karakter khusus
	text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text)
	text = re.sub(r'[0-9]', ' ', text)

	# replace multiple whitespace characters with a single space
	text = re.sub(r"\s+", " ", text)

	return text

	def load_model():
	config = PeftConfig.from_pretrained(MODELS_PATH)
	inference_model = AutoModelForSequenceClassification.from_pretrained(
	config.base_model_name_or_path, num_labels=numLabels, id2label=id2label, label2id=label2id
	)
	tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
	model = PeftModel.from_pretrained(inference_model, MODELS_PATH)
	return model, tokenizer

	def classify_single(text, model, tokenizer, device):

	if device.type == 'cuda':
	model.cuda()

	# tokenize text
	inputs = tokenizer.encode(text, return_tensors="pt").to(device)

	# compute logits
	logits = model(inputs).logits
	# convert logits to label
	predictions = torch.argmax(logits)
	return id2label[predictions.tolist()]


	tab_labels = ["Single Input", "Multiple Input"]
	class App:
	def __init__(self):
	self.fileTypes = ["csv"]
	self.default_tab_selected = tab_labels[0]
	self.input_text = None
	self.csv_input = None
	self.csv_process = None

	def run(self):
	model, tokenizer = load_model()
	html_temp = """
	<div style="padding:10px">
	<h1 style="color:white;text-align:center;">User Question Classification</h1>
	</div>
	"""
	st.markdown(html_temp, unsafe_allow_html=True)
	st.markdown("")
	if USE_CUDA:
	st.sidebar.markdown(footer,unsafe_allow_html=True)
	self.render_single_input()
	st.divider()
	self.render_process_button(model, tokenizer, device)


	def render_single_input(self):
	self.input_text = st.text_area("Enter Text Here", placeholder="Type Here")


	def render_process_button(self, model, tokenizer, device):
	if st.button("Process"):
	input_text = self.input_text
	if input_text:
	classification_result = classify_single(input_text, model, tokenizer, device)
	st.write("Classification result:", classification_result)
	else:
	st.warning('Please enter text to process', icon="⚠️")


	footer="""<style>
	.footer {
	position: fixed;
	left: 10;
	bottom: 0;
	width: 100%;
	color: #ffa9365e;
	}
	</style>
	<div class="footer">
	<p>CUDA enabled</p>
	</div>
	"""

	if __name__ == "__main__":
	app = App()
	app.run()