infinityy commited on
Commit
949f9a6
·
verified ·
1 Parent(s): 2efc965

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import joblib
3
+ import xgboost as xgb
4
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
5
+ import torch
6
+ import numpy as np
7
+
8
+ # Load BERT model & tokenizer
9
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")
10
+ model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-cased', num_labels=5,output_hidden_states=True,trust_remote_code=True)
11
+ from safetensors.torch import load_file
12
+ state_dict = load_file("model (4).safetensors")
13
+
14
+ # Load the state into the model
15
+ model.load_state_dict(state_dict,strict=False)
16
+ model.eval()
17
+
18
+ # Load PCA and Scaler
19
+ pca = joblib.load("pca.pkl")
20
+ scaler = joblib.load("scaler.pkl")
21
+ kmean=joblib.load("kmeans_model.pkl")
22
+
23
+ # Load XGBoost model
24
+ xgb_model = xgb.XGBClassifier()
25
+ xgb_model.load_model("xgb_model.json")
26
+
27
+ category_mappings = {
28
+ "kmeans_labels": pd.Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='int32')
29
+ }
30
+
31
+ def infer(component,title,description):
32
+ # BERT embedding
33
+ combined_text = f"{component} [SEP] {title} [SEP] {description}"
34
+ inputs = tokenizer(combined_text, return_tensors="pt", truncation=True,max_length=512)
35
+ with torch.no_grad():
36
+ outputs = model(**inputs)
37
+ cls_embedding = outputs.hidden_states[-1][:, 0, :].numpy()
38
+ test_df = pd.DataFrame(cls_embedding)
39
+
40
+ # Preprocessing
41
+ test_pca = scaler.transform(test_df)
42
+ test_pca = pca.transform(test_pca)
43
+ test_df = pd.DataFrame(test_pca, columns=[f"PCA{i+1}" for i in range(n)], index=test_df.index)
44
+ kmeans_labels_test = kmeans.predict(test_df)
45
+ test_df["kmeans_labels"]=kmeans_labels_test
46
+ test_df["kmeans_labels"] = pd.Categorical(test_df["kmeans_labels"], categories=category_mappings["kmeans_labels"])
47
+
48
+
49
+ # Predict
50
+ prediction = xgb_model.predict(test_df,iteration_range=(0, 130))
51
+ return f"Predicted Priority: {int(prediction[0])}"
52
+
53
+ # Gradio interface
54
+ iface = gr.Interface(
55
+ fn=infer,
56
+ inputs=[
57
+ gr.Textbox(label="Component"),
58
+ gr.Textbox(label="Title"),
59
+ gr.Textbox(label="Description")
60
+ ],
61
+ outputs="text"
62
+ )
63
+ iface.launch()