Spaces:
Sleeping
Sleeping
Metrics Fix and Embedding Enhancement for GFG_FINAL dataset - testing for GFG_FINAL_Trimmed_v2
Browse files- .env.example +0 -1
- app/routes/metrics_routes.py +72 -38
- app/routes/metrics_routes.py.bak +79 -0
- app/services/data_ingestion.py +3 -1
.env.example
CHANGED
|
@@ -18,7 +18,6 @@ HF_TOKEN=your_huggingface_token_here
|
|
| 18 |
EMBEDDING_MODEL=intfloat/e5-large-v2
|
| 19 |
|
| 20 |
|
| 21 |
-
|
| 22 |
# Server Configuration
|
| 23 |
HOST=0.0.0.0
|
| 24 |
PORT=7860
|
|
|
|
| 18 |
EMBEDDING_MODEL=intfloat/e5-large-v2
|
| 19 |
|
| 20 |
|
|
|
|
| 21 |
# Server Configuration
|
| 22 |
HOST=0.0.0.0
|
| 23 |
PORT=7860
|
app/routes/metrics_routes.py
CHANGED
|
@@ -12,63 +12,97 @@ router = APIRouter()
|
|
| 12 |
@router.get("/metrics", response_model=MetricsResponse)
|
| 13 |
async def get_metrics():
|
| 14 |
"""
|
| 15 |
-
|
| 16 |
-
|
|
|
|
| 17 |
- Average resolution time
|
| 18 |
-
-
|
| 19 |
-
-
|
| 20 |
"""
|
| 21 |
try:
|
| 22 |
logger.info("Calculating metrics...")
|
| 23 |
|
| 24 |
info = vector_store.get_collection_info()
|
| 25 |
-
total_tickets = info.get(
|
| 26 |
if total_tickets == 0:
|
| 27 |
raise HTTPException(status_code=404, detail="No data available. Please ingest data first.")
|
| 28 |
|
| 29 |
-
#
|
| 30 |
-
payloads = vector_store.
|
| 31 |
if not payloads:
|
| 32 |
-
raise HTTPException(status_code=404, detail="
|
| 33 |
|
| 34 |
-
#
|
| 35 |
-
|
| 36 |
-
|
|
|
|
| 37 |
|
| 38 |
-
|
| 39 |
-
closed_tickets = sum(1 for p in payloads if (p.get('status') or '') in closed_statuses)
|
| 40 |
|
| 41 |
-
#
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
avg_resolution = (sum(resolution_times) / len(resolution_times)) if resolution_times else 0.0
|
| 57 |
-
avg_resolution_str = f"{avg_resolution:.1f} days"
|
| 58 |
|
| 59 |
-
# SLA
|
| 60 |
sla_threshold = 5
|
| 61 |
sla_compliant = sum(1 for t in resolution_times if t <= sla_threshold)
|
| 62 |
sla_pct = (sla_compliant / len(resolution_times) * 100) if resolution_times else 0.0
|
| 63 |
-
sla_compliance_str = f"{sla_pct:.0f}%"
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
except HTTPException:
|
| 74 |
raise
|
|
|
|
| 12 |
@router.get("/metrics", response_model=MetricsResponse)
|
| 13 |
async def get_metrics():
|
| 14 |
"""
|
| 15 |
+
Compute key metrics from Jira data:
|
| 16 |
+
- Total tickets
|
| 17 |
+
- Open vs Closed
|
| 18 |
- Average resolution time
|
| 19 |
+
- SLA compliance
|
| 20 |
+
- Priority and Issue Type distribution
|
| 21 |
"""
|
| 22 |
try:
|
| 23 |
logger.info("Calculating metrics...")
|
| 24 |
|
| 25 |
info = vector_store.get_collection_info()
|
| 26 |
+
total_tickets = info.get("vectors_count", 0)
|
| 27 |
if total_tickets == 0:
|
| 28 |
raise HTTPException(status_code=404, detail="No data available. Please ingest data first.")
|
| 29 |
|
| 30 |
+
# ✅ Load all payloads instead of sample
|
| 31 |
+
payloads = vector_store.get_all_payloads()
|
| 32 |
if not payloads:
|
| 33 |
+
raise HTTPException(status_code=404, detail="No payloads found for metrics.")
|
| 34 |
|
| 35 |
+
# ✅ Normalize keys (lowercase)
|
| 36 |
+
normalized_payloads = []
|
| 37 |
+
for p in payloads:
|
| 38 |
+
normalized_payloads.append({k.lower(): v for k, v in p.items()})
|
| 39 |
|
| 40 |
+
df = pd.DataFrame(normalized_payloads)
|
|
|
|
| 41 |
|
| 42 |
+
# --- Handle Missing Core Fields Gracefully ---
|
| 43 |
+
def get_col(options):
|
| 44 |
+
"""Find the first available column among the options."""
|
| 45 |
+
for o in options:
|
| 46 |
+
if o in df.columns:
|
| 47 |
+
return o
|
| 48 |
+
return None
|
| 49 |
+
|
| 50 |
+
status_col = get_col(["status"])
|
| 51 |
+
created_col = get_col(["created", "created_date"])
|
| 52 |
+
resolved_col = get_col(["resolved", "resolved_date"])
|
| 53 |
+
priority_col = get_col(["priority"])
|
| 54 |
+
issue_type_col = get_col(["issue type", "issuetype"])
|
| 55 |
+
|
| 56 |
+
# --- Compute Open/Closed Ticket Counts ---
|
| 57 |
+
open_statuses = {'Needs Triage', 'In Progress', 'Short Term Backlog', 'Gathering Interest', 'Gathering Impact'}
|
| 58 |
+
closed_statuses = {"closed", "done", "resolved"}
|
| 59 |
|
| 60 |
+
if status_col:
|
| 61 |
+
df["status_norm"] = df[status_col].astype(str).str.strip().str.lower()
|
| 62 |
+
open_tickets = df["status_norm"].isin(open_statuses).sum()
|
| 63 |
+
closed_tickets = df["status_norm"].isin(closed_statuses).sum()
|
| 64 |
+
else:
|
| 65 |
+
open_tickets = closed_tickets = 0
|
| 66 |
+
|
| 67 |
+
# --- Average Resolution Time ---
|
| 68 |
+
resolution_times = []
|
| 69 |
+
if created_col and resolved_col:
|
| 70 |
+
for _, row in df.iterrows():
|
| 71 |
+
c = pd.to_datetime(row[created_col], errors="coerce")
|
| 72 |
+
r = pd.to_datetime(row[resolved_col], errors="coerce")
|
| 73 |
+
if pd.notnull(c) and pd.notnull(r) and r >= c:
|
| 74 |
+
resolution_times.append((r - c).days)
|
| 75 |
avg_resolution = (sum(resolution_times) / len(resolution_times)) if resolution_times else 0.0
|
| 76 |
+
avg_resolution_str = f"{avg_resolution:.1f} days" if avg_resolution else "N/A"
|
| 77 |
|
| 78 |
+
# --- SLA Compliance (Resolved ≤ 5 days) ---
|
| 79 |
sla_threshold = 5
|
| 80 |
sla_compliant = sum(1 for t in resolution_times if t <= sla_threshold)
|
| 81 |
sla_pct = (sla_compliant / len(resolution_times) * 100) if resolution_times else 0.0
|
| 82 |
+
sla_compliance_str = f"{sla_pct:.0f}%" if resolution_times else "N/A"
|
| 83 |
+
|
| 84 |
+
# --- Priority Distribution ---
|
| 85 |
+
if priority_col:
|
| 86 |
+
priority_counts = df[priority_col].value_counts().to_dict()
|
| 87 |
+
else:
|
| 88 |
+
priority_counts = {}
|
| 89 |
+
|
| 90 |
+
# --- Issue Type Distribution ---
|
| 91 |
+
if issue_type_col:
|
| 92 |
+
issue_type_counts = df[issue_type_col].value_counts().to_dict()
|
| 93 |
+
else:
|
| 94 |
+
issue_type_counts = {}
|
| 95 |
+
|
| 96 |
+
# --- Prepare Response ---
|
| 97 |
+
return {
|
| 98 |
+
"avg_resolution_time": avg_resolution_str,
|
| 99 |
+
"open_tickets": int(open_tickets),
|
| 100 |
+
"closed_tickets": int(closed_tickets),
|
| 101 |
+
"sla_compliance": sla_compliance_str,
|
| 102 |
+
"total_tickets": int(total_tickets),
|
| 103 |
+
"priority_distribution": priority_counts,
|
| 104 |
+
"issue_type_distribution": issue_type_counts,
|
| 105 |
+
}
|
| 106 |
|
| 107 |
except HTTPException:
|
| 108 |
raise
|
app/routes/metrics_routes.py.bak
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Routes for aggregate metrics"""
|
| 2 |
+
import spaces
|
| 3 |
+
from fastapi import APIRouter, HTTPException
|
| 4 |
+
from app.models.jira_schema import MetricsResponse
|
| 5 |
+
from app.services.vector_store import vector_store
|
| 6 |
+
from app.utils.logger import setup_logger
|
| 7 |
+
import pandas as pd
|
| 8 |
+
|
| 9 |
+
logger = setup_logger(__name__)
|
| 10 |
+
router = APIRouter()
|
| 11 |
+
|
| 12 |
+
@router.get("/metrics", response_model=MetricsResponse)
|
| 13 |
+
async def get_metrics():
|
| 14 |
+
"""
|
| 15 |
+
Get aggregate metrics from Jira data
|
| 16 |
+
|
| 17 |
+
- Average resolution time
|
| 18 |
+
- Open/closed ticket counts
|
| 19 |
+
- SLA compliance percentage
|
| 20 |
+
"""
|
| 21 |
+
try:
|
| 22 |
+
logger.info("Calculating metrics...")
|
| 23 |
+
|
| 24 |
+
info = vector_store.get_collection_info()
|
| 25 |
+
total_tickets = info.get('vectors_count', 0)
|
| 26 |
+
if total_tickets == 0:
|
| 27 |
+
raise HTTPException(status_code=404, detail="No data available. Please ingest data first.")
|
| 28 |
+
|
| 29 |
+
# Pull a sample or all payloads from the sidecar store
|
| 30 |
+
#payloads = vector_store.get_payloads_sample(limit=100)
|
| 31 |
+
payloads = vector_store.get_all_payloads()
|
| 32 |
+
if not payloads:
|
| 33 |
+
raise HTTPException(status_code=404, detail="Unable to retrieve metrics data")
|
| 34 |
+
|
| 35 |
+
# Calculate metrics
|
| 36 |
+
#open_statuses = {'Open', 'In Progress', 'To Do'}
|
| 37 |
+
open_statuses = {'Needs Triage', 'In Progress', 'Short Term Backlog', 'Gathering Interest', 'Gathering Impact'}
|
| 38 |
+
closed_statuses = {'Closed', 'Done', 'Resolved'}
|
| 39 |
+
|
| 40 |
+
open_tickets = sum(1 for p in payloads if (p.get('status') or '') in open_statuses)
|
| 41 |
+
closed_tickets = sum(1 for p in payloads if (p.get('status') or '') in closed_statuses)
|
| 42 |
+
|
| 43 |
+
# Average resolution time (days)
|
| 44 |
+
resolution_times = []
|
| 45 |
+
for p in payloads:
|
| 46 |
+
created = p.get('created_date')
|
| 47 |
+
resolved = p.get('resolved_date')
|
| 48 |
+
if created and resolved:
|
| 49 |
+
try:
|
| 50 |
+
c = pd.to_datetime(created)
|
| 51 |
+
r = pd.to_datetime(resolved)
|
| 52 |
+
delta = (r - c).days
|
| 53 |
+
if delta >= 0:
|
| 54 |
+
resolution_times.append(delta)
|
| 55 |
+
except Exception:
|
| 56 |
+
pass
|
| 57 |
+
|
| 58 |
+
avg_resolution = (sum(resolution_times) / len(resolution_times)) if resolution_times else 0.0
|
| 59 |
+
avg_resolution_str = f"{avg_resolution:.1f} days"
|
| 60 |
+
|
| 61 |
+
# SLA compliance: resolved within 5 days
|
| 62 |
+
sla_threshold = 5
|
| 63 |
+
sla_compliant = sum(1 for t in resolution_times if t <= sla_threshold)
|
| 64 |
+
sla_pct = (sla_compliant / len(resolution_times) * 100) if resolution_times else 0.0
|
| 65 |
+
sla_compliance_str = f"{sla_pct:.0f}%"
|
| 66 |
+
|
| 67 |
+
return MetricsResponse(
|
| 68 |
+
avg_resolution_time=avg_resolution_str,
|
| 69 |
+
open_tickets=open_tickets,
|
| 70 |
+
closed_tickets=closed_tickets,
|
| 71 |
+
sla_compliance=sla_compliance_str,
|
| 72 |
+
total_tickets=total_tickets
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
except HTTPException:
|
| 76 |
+
raise
|
| 77 |
+
except Exception as e:
|
| 78 |
+
logger.error(f"Metrics calculation failed: {str(e)}")
|
| 79 |
+
raise HTTPException(status_code=500, detail=str(e))
|
app/services/data_ingestion.py
CHANGED
|
@@ -65,7 +65,9 @@ class DataIngestionService:
|
|
| 65 |
record[key] = None
|
| 66 |
|
| 67 |
# Create searchable text representation
|
| 68 |
-
text_fields = ['summary', 'description', 'status', 'priority', 'project']
|
|
|
|
|
|
|
| 69 |
text_parts = []
|
| 70 |
|
| 71 |
for field in text_fields:
|
|
|
|
| 65 |
record[key] = None
|
| 66 |
|
| 67 |
# Create searchable text representation
|
| 68 |
+
#text_fields = ['summary', 'description', 'status', 'priority', 'project']
|
| 69 |
+
text_fields = ['summary', 'description', 'status', 'priority', 'project','issue_type', 'component', 'module', 'symptom_severity','assignee', 'reporter']
|
| 70 |
+
|
| 71 |
text_parts = []
|
| 72 |
|
| 73 |
for field in text_fields:
|