software-maintenance-classification

Runtime error

App Files Files Community

kadabengaran commited on Dec 3, 2023

Commit

5ca6171

1 Parent(s): 2d1aa85

Create main.py

Browse files

Files changed (1) hide show

main.py +133 -0

main.py ADDED Viewed

	@@ -0,0 +1,133 @@

+try:
+    import torch
+    import pandas as pd
+    import streamlit as st
+    import re
+    from transformers import AutoTokenizer, AutoModelForSequenceClassification
+    from stqdm import stqdm
+    from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig
+except Exception as e:
+    print(e)
+# Config
+MODELS_PATH = "kadabengaran/distilbert-base-uncased-lora-text-classification"
+id2label= {0: 'Other', 1: 'Problem Discovery', 2: 'Information Seeking', 3: 'Feature Request'}
+label2id= {'Other': 0, 'Problem Discovery': 1, 'Information Seeking': 2, 'Feature Request': 3}
+numLabels= 4
+def get_device():
+    if torch.cuda.is_available():
+        return torch.device('cuda')
+    else:
+        return torch.device('cpu')
+USE_CUDA = False
+device = get_device()
+if device.type == 'cuda':
+    USE_CUDA = True
+# Get the Keys
+def get_key(val, my_dict):
+    for key, value in my_dict.items():
+        if val == value:
+            return key
+def load_tokenizer(model_path):
+    # create tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(model_path, add_prefix_space=True)
+    return tokenizer
+def remove_special_characters(text):
+    # case folding
+    text = text.lower()
+    # menghapus karakter khusus
+    text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text)
+    text = re.sub(r'[0-9]', ' ', text)
+    # replace multiple whitespace characters with a single space
+    text = re.sub(r"\s+", " ", text)
+    return text
+def load_model():
+    config = PeftConfig.from_pretrained(MODELS_PATH)
+    inference_model = AutoModelForSequenceClassification.from_pretrained(
+        config.base_model_name_or_path, num_labels=numLabels, id2label=id2label, label2id=label2id
+    )
+    tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
+    model = PeftModel.from_pretrained(inference_model, MODELS_PATH)
+    return model, tokenizer
+def classify_single(text, model, tokenizer, device):
+    if device.type == 'cuda':
+        model.cuda()
+    # tokenize text
+    inputs = tokenizer.encode(text, return_tensors="pt").to(device)
+    # compute logits
+    logits = model(inputs).logits
+    # convert logits to label
+    predictions = torch.argmax(logits)
+    return id2label[predictions.tolist()]
+tab_labels = ["Single Input", "Multiple Input"]
+class App:
+    def __init__(self):
+        self.fileTypes = ["csv"]
+        self.default_tab_selected = tab_labels[0]
+        self.input_text = None
+        self.csv_input = None
+        self.csv_process = None
+    def run(self):
+        model, tokenizer = load_model()
+        html_temp = """
+        <div style="padding:10px">
+        <h1 style="color:white;text-align:center;">User Question Classification</h1>
+        </div>
+        """
+        st.markdown(html_temp, unsafe_allow_html=True)
+        st.markdown("")
+        if USE_CUDA:
+            st.sidebar.markdown(footer,unsafe_allow_html=True)
+        self.render_single_input()
+        st.divider()
+        self.render_process_button(model, tokenizer, device)
+    def render_single_input(self):
+        self.input_text = st.text_area("Enter Text Here", placeholder="Type Here")
+    def render_process_button(self, model, tokenizer, device):
+        if st.button("Process"):
+            input_text = self.input_text
+            if input_text:
+                classification_result = classify_single(input_text, model, tokenizer, device)
+                st.write("Classification result:", classification_result)
+            else:
+                st.warning('Please enter text to process', icon="⚠️")
+footer="""<style>
+.footer {
+position: fixed;
+left: 10;
+bottom: 0;
+width: 100%;
+color: #ffa9365e;
+}
+</style>
+<div class="footer">
+<p>CUDA enabled</p>
+</div>
+"""
+if __name__ == "__main__":
+    app = App()
+    app.run()