| import numpy as np |
| from sklearn.metrics.pairwise import cosine_distances |
|
|
| from app.model_loader import embedding_model, core_samples, labels, eps |
| from app.cluster_metadata import cluster_info |
|
|
|
|
| def predict_cluster(log_text): |
|
|
| emb = embedding_model.encode([log_text]) |
|
|
| distances = cosine_distances(emb, core_samples) |
|
|
| nearest = np.argmin(distances) |
|
|
| similarity = 1 - distances[0][nearest] |
|
|
| if distances[0][nearest] <= eps: |
|
|
| cluster_id = int(labels[nearest]) |
|
|
| info = cluster_info.get(cluster_id, {}) |
|
|
| return { |
| "cluster_id": cluster_id, |
| "cluster_name": info.get("name","Unknown Cluster"), |
| "subsystem": info.get("subsystem","unknown"), |
| "description": info.get("description","No description"), |
| "similarity_score": float(similarity), |
| "anomaly": False |
| } |
|
|
| else: |
|
|
| return { |
| "cluster_id": -1, |
| "cluster_name": "Unknown Bug Pattern", |
| "subsystem": "unknown", |
| "description": "Log does not match known clusters", |
| "similarity_score": float(similarity), |
| "anomaly": True |
| } |