File size: 1,570 Bytes
661107f 6cc6801 681ecc1 d31dcb9 8e687b3 efda81b 681ecc1 efda81b 681ecc1 efda81b 661107f 681ecc1 661107f 681ecc1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import os
import numpy as np
import joblib
import scipy.sparse
from textblob import TextBlob
import nltk
# Download NLTK punkt tokenizer if not already present
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt')
from src.preprocessing import clean_text
from src.features import create_features
from src.model import load_model
from src.entity_extraction import extract_entities
# Define the path to the models directory
BASE_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "models"))
# Load models and vectorizer
model_issue = load_model(os.path.join(BASE_PATH, "model_issue_type.pkl"))
model_urgency = load_model(os.path.join(BASE_PATH, "model_urgency_level.pkl"))
tfidf = joblib.load(os.path.join(BASE_PATH, "tfidf.pkl"))
def predict_ticket(ticket_text):
# Preprocess the input ticket text
clean = clean_text(ticket_text)
# TF-IDF transformation
X_tfidf = tfidf.transform([clean])
# Additional features
ticket_length = len(clean.split())
sentiment = TextBlob(clean).sentiment.polarity
# Combine sparse TF-IDF with dense features
X_features = scipy.sparse.hstack([
X_tfidf,
np.array([[ticket_length]]),
np.array([[sentiment]])
])
# Make predictions
issue_pred = model_issue.predict(X_features)[0]
urgency_pred = model_urgency.predict(X_features)[0]
entities = extract_entities(ticket_text)
return {
"issue_type": issue_pred,
"urgency_level": urgency_pred,
"entities": entities
}
|