import os import numpy as np import joblib import scipy.sparse from textblob import TextBlob import nltk # Download NLTK punkt tokenizer if not already present try: nltk.data.find('tokenizers/punkt') except LookupError: nltk.download('punkt') from src.preprocessing import clean_text from src.features import create_features from src.model import load_model from src.entity_extraction import extract_entities # Define the path to the models directory BASE_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "models")) # Load models and vectorizer model_issue = load_model(os.path.join(BASE_PATH, "model_issue_type.pkl")) model_urgency = load_model(os.path.join(BASE_PATH, "model_urgency_level.pkl")) tfidf = joblib.load(os.path.join(BASE_PATH, "tfidf.pkl")) def predict_ticket(ticket_text): # Preprocess the input ticket text clean = clean_text(ticket_text) # TF-IDF transformation X_tfidf = tfidf.transform([clean]) # Additional features ticket_length = len(clean.split()) sentiment = TextBlob(clean).sentiment.polarity # Combine sparse TF-IDF with dense features X_features = scipy.sparse.hstack([ X_tfidf, np.array([[ticket_length]]), np.array([[sentiment]]) ]) # Make predictions issue_pred = model_issue.predict(X_features)[0] urgency_pred = model_urgency.predict(X_features)[0] entities = extract_entities(ticket_text) return { "issue_type": issue_pred, "urgency_level": urgency_pred, "entities": entities }