import numpy as np from sklearn.metrics.pairwise import cosine_distances from app.model_loader import embedding_model, core_samples, labels, eps from app.cluster_metadata import cluster_info def predict_cluster(log_text): emb = embedding_model.encode([log_text]) distances = cosine_distances(emb, core_samples) nearest = np.argmin(distances) similarity = 1 - distances[0][nearest] if distances[0][nearest] <= eps: cluster_id = int(labels[nearest]) info = cluster_info.get(cluster_id, {}) return { "cluster_id": cluster_id, "cluster_name": info.get("name","Unknown Cluster"), "subsystem": info.get("subsystem","unknown"), "description": info.get("description","No description"), "similarity_score": float(similarity), "anomaly": False } else: return { "cluster_id": -1, "cluster_name": "Unknown Bug Pattern", "subsystem": "unknown", "description": "Log does not match known clusters", "similarity_score": float(similarity), "anomaly": True }