Spaces:
Runtime error
Runtime error
| try: | |
| import torch | |
| import pandas as pd | |
| import streamlit as st | |
| import re | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| from stqdm import stqdm | |
| from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig | |
| except Exception as e: | |
| print(e) | |
| # Config | |
| MODELS_PATH = "kadabengaran/distilbert-base-uncased-lora-text-classification" | |
| id2label= {0: 'Other', 1: 'Problem Discovery', 2: 'Information Seeking', 3: 'Feature Request'} | |
| label2id= {'Other': 0, 'Problem Discovery': 1, 'Information Seeking': 2, 'Feature Request': 3} | |
| numLabels= 4 | |
| def get_device(): | |
| if torch.cuda.is_available(): | |
| return torch.device('cuda') | |
| else: | |
| return torch.device('cpu') | |
| USE_CUDA = False | |
| device = get_device() | |
| if device.type == 'cuda': | |
| USE_CUDA = True | |
| # Get the Keys | |
| def get_key(val, my_dict): | |
| for key, value in my_dict.items(): | |
| if val == value: | |
| return key | |
| def load_tokenizer(model_path): | |
| # create tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(model_path, add_prefix_space=True) | |
| return tokenizer | |
| def remove_special_characters(text): | |
| # case folding | |
| text = text.lower() | |
| # menghapus karakter khusus | |
| text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text) | |
| text = re.sub(r'[0-9]', ' ', text) | |
| # replace multiple whitespace characters with a single space | |
| text = re.sub(r"\s+", " ", text) | |
| return text | |
| def load_model(): | |
| config = PeftConfig.from_pretrained(MODELS_PATH) | |
| inference_model = AutoModelForSequenceClassification.from_pretrained( | |
| config.base_model_name_or_path, num_labels=numLabels, id2label=id2label, label2id=label2id | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) | |
| model = PeftModel.from_pretrained(inference_model, MODELS_PATH) | |
| return model, tokenizer | |
| def classify_single(text, model, tokenizer, device): | |
| if device.type == 'cuda': | |
| model.cuda() | |
| # tokenize text | |
| inputs = tokenizer.encode(text, return_tensors="pt").to(device) | |
| # compute logits | |
| logits = model(inputs).logits | |
| # convert logits to label | |
| predictions = torch.argmax(logits) | |
| return id2label[predictions.tolist()] | |
| tab_labels = ["Single Input", "Multiple Input"] | |
| class App: | |
| def __init__(self): | |
| self.fileTypes = ["csv"] | |
| self.default_tab_selected = tab_labels[0] | |
| self.input_text = None | |
| self.csv_input = None | |
| self.csv_process = None | |
| def run(self): | |
| model, tokenizer = load_model() | |
| html_temp = """ | |
| <div style="padding:10px"> | |
| <h1 style="color:white;text-align:center;">User Question Classification</h1> | |
| </div> | |
| """ | |
| st.markdown(html_temp, unsafe_allow_html=True) | |
| st.markdown("") | |
| if USE_CUDA: | |
| st.sidebar.markdown(footer,unsafe_allow_html=True) | |
| self.render_single_input() | |
| st.divider() | |
| self.render_process_button(model, tokenizer, device) | |
| def render_single_input(self): | |
| self.input_text = st.text_area("Enter Text Here", placeholder="Type Here") | |
| def render_process_button(self, model, tokenizer, device): | |
| if st.button("Process"): | |
| input_text = self.input_text | |
| if input_text: | |
| classification_result = classify_single(input_text, model, tokenizer, device) | |
| st.write("Classification result:", classification_result) | |
| else: | |
| st.warning('Please enter text to process', icon="⚠️") | |
| footer="""<style> | |
| .footer { | |
| position: fixed; | |
| left: 10; | |
| bottom: 0; | |
| width: 100%; | |
| color: #ffa9365e; | |
| } | |
| </style> | |
| <div class="footer"> | |
| <p>CUDA enabled</p> | |
| </div> | |
| """ | |
| if __name__ == "__main__": | |
| app = App() | |
| app.run() |