VcRlAgent commited on
Commit
d0f182c
·
1 Parent(s): a123e22

Metrics Fix and Embedding Enhancement for GFG_FINAL dataset - testing for GFG_FINAL_Trimmed_v2

Browse files
.env.example CHANGED
@@ -18,7 +18,6 @@ HF_TOKEN=your_huggingface_token_here
18
  EMBEDDING_MODEL=intfloat/e5-large-v2
19
 
20
 
21
-
22
  # Server Configuration
23
  HOST=0.0.0.0
24
  PORT=7860
 
18
  EMBEDDING_MODEL=intfloat/e5-large-v2
19
 
20
 
 
21
  # Server Configuration
22
  HOST=0.0.0.0
23
  PORT=7860
app/routes/metrics_routes.py CHANGED
@@ -12,63 +12,97 @@ router = APIRouter()
12
  @router.get("/metrics", response_model=MetricsResponse)
13
  async def get_metrics():
14
  """
15
- Get aggregate metrics from Jira data
16
-
 
17
  - Average resolution time
18
- - Open/closed ticket counts
19
- - SLA compliance percentage
20
  """
21
  try:
22
  logger.info("Calculating metrics...")
23
 
24
  info = vector_store.get_collection_info()
25
- total_tickets = info.get('vectors_count', 0)
26
  if total_tickets == 0:
27
  raise HTTPException(status_code=404, detail="No data available. Please ingest data first.")
28
 
29
- # Pull a sample or all payloads from the sidecar store
30
- payloads = vector_store.get_payloads_sample(limit=100)
31
  if not payloads:
32
- raise HTTPException(status_code=404, detail="Unable to retrieve metrics data")
33
 
34
- # Calculate metrics
35
- open_statuses = {'Open', 'In Progress', 'To Do'}
36
- closed_statuses = {'Closed', 'Done', 'Resolved'}
 
37
 
38
- open_tickets = sum(1 for p in payloads if (p.get('status') or '') in open_statuses)
39
- closed_tickets = sum(1 for p in payloads if (p.get('status') or '') in closed_statuses)
40
 
41
- # Average resolution time (days)
42
- resolution_times = []
43
- for p in payloads:
44
- created = p.get('created_date')
45
- resolved = p.get('resolved_date')
46
- if created and resolved:
47
- try:
48
- c = pd.to_datetime(created)
49
- r = pd.to_datetime(resolved)
50
- delta = (r - c).days
51
- if delta >= 0:
52
- resolution_times.append(delta)
53
- except Exception:
54
- pass
 
 
 
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  avg_resolution = (sum(resolution_times) / len(resolution_times)) if resolution_times else 0.0
57
- avg_resolution_str = f"{avg_resolution:.1f} days"
58
 
59
- # SLA compliance: resolved within 5 days
60
  sla_threshold = 5
61
  sla_compliant = sum(1 for t in resolution_times if t <= sla_threshold)
62
  sla_pct = (sla_compliant / len(resolution_times) * 100) if resolution_times else 0.0
63
- sla_compliance_str = f"{sla_pct:.0f}%"
64
-
65
- return MetricsResponse(
66
- avg_resolution_time=avg_resolution_str,
67
- open_tickets=open_tickets,
68
- closed_tickets=closed_tickets,
69
- sla_compliance=sla_compliance_str,
70
- total_tickets=total_tickets
71
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  except HTTPException:
74
  raise
 
12
  @router.get("/metrics", response_model=MetricsResponse)
13
  async def get_metrics():
14
  """
15
+ Compute key metrics from Jira data:
16
+ - Total tickets
17
+ - Open vs Closed
18
  - Average resolution time
19
+ - SLA compliance
20
+ - Priority and Issue Type distribution
21
  """
22
  try:
23
  logger.info("Calculating metrics...")
24
 
25
  info = vector_store.get_collection_info()
26
+ total_tickets = info.get("vectors_count", 0)
27
  if total_tickets == 0:
28
  raise HTTPException(status_code=404, detail="No data available. Please ingest data first.")
29
 
30
+ # Load all payloads instead of sample
31
+ payloads = vector_store.get_all_payloads()
32
  if not payloads:
33
+ raise HTTPException(status_code=404, detail="No payloads found for metrics.")
34
 
35
+ # Normalize keys (lowercase)
36
+ normalized_payloads = []
37
+ for p in payloads:
38
+ normalized_payloads.append({k.lower(): v for k, v in p.items()})
39
 
40
+ df = pd.DataFrame(normalized_payloads)
 
41
 
42
+ # --- Handle Missing Core Fields Gracefully ---
43
+ def get_col(options):
44
+ """Find the first available column among the options."""
45
+ for o in options:
46
+ if o in df.columns:
47
+ return o
48
+ return None
49
+
50
+ status_col = get_col(["status"])
51
+ created_col = get_col(["created", "created_date"])
52
+ resolved_col = get_col(["resolved", "resolved_date"])
53
+ priority_col = get_col(["priority"])
54
+ issue_type_col = get_col(["issue type", "issuetype"])
55
+
56
+ # --- Compute Open/Closed Ticket Counts ---
57
+ open_statuses = {'Needs Triage', 'In Progress', 'Short Term Backlog', 'Gathering Interest', 'Gathering Impact'}
58
+ closed_statuses = {"closed", "done", "resolved"}
59
 
60
+ if status_col:
61
+ df["status_norm"] = df[status_col].astype(str).str.strip().str.lower()
62
+ open_tickets = df["status_norm"].isin(open_statuses).sum()
63
+ closed_tickets = df["status_norm"].isin(closed_statuses).sum()
64
+ else:
65
+ open_tickets = closed_tickets = 0
66
+
67
+ # --- Average Resolution Time ---
68
+ resolution_times = []
69
+ if created_col and resolved_col:
70
+ for _, row in df.iterrows():
71
+ c = pd.to_datetime(row[created_col], errors="coerce")
72
+ r = pd.to_datetime(row[resolved_col], errors="coerce")
73
+ if pd.notnull(c) and pd.notnull(r) and r >= c:
74
+ resolution_times.append((r - c).days)
75
  avg_resolution = (sum(resolution_times) / len(resolution_times)) if resolution_times else 0.0
76
+ avg_resolution_str = f"{avg_resolution:.1f} days" if avg_resolution else "N/A"
77
 
78
+ # --- SLA Compliance (Resolved 5 days) ---
79
  sla_threshold = 5
80
  sla_compliant = sum(1 for t in resolution_times if t <= sla_threshold)
81
  sla_pct = (sla_compliant / len(resolution_times) * 100) if resolution_times else 0.0
82
+ sla_compliance_str = f"{sla_pct:.0f}%" if resolution_times else "N/A"
83
+
84
+ # --- Priority Distribution ---
85
+ if priority_col:
86
+ priority_counts = df[priority_col].value_counts().to_dict()
87
+ else:
88
+ priority_counts = {}
89
+
90
+ # --- Issue Type Distribution ---
91
+ if issue_type_col:
92
+ issue_type_counts = df[issue_type_col].value_counts().to_dict()
93
+ else:
94
+ issue_type_counts = {}
95
+
96
+ # --- Prepare Response ---
97
+ return {
98
+ "avg_resolution_time": avg_resolution_str,
99
+ "open_tickets": int(open_tickets),
100
+ "closed_tickets": int(closed_tickets),
101
+ "sla_compliance": sla_compliance_str,
102
+ "total_tickets": int(total_tickets),
103
+ "priority_distribution": priority_counts,
104
+ "issue_type_distribution": issue_type_counts,
105
+ }
106
 
107
  except HTTPException:
108
  raise
app/routes/metrics_routes.py.bak ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Routes for aggregate metrics"""
2
+ import spaces
3
+ from fastapi import APIRouter, HTTPException
4
+ from app.models.jira_schema import MetricsResponse
5
+ from app.services.vector_store import vector_store
6
+ from app.utils.logger import setup_logger
7
+ import pandas as pd
8
+
9
+ logger = setup_logger(__name__)
10
+ router = APIRouter()
11
+
12
+ @router.get("/metrics", response_model=MetricsResponse)
13
+ async def get_metrics():
14
+ """
15
+ Get aggregate metrics from Jira data
16
+
17
+ - Average resolution time
18
+ - Open/closed ticket counts
19
+ - SLA compliance percentage
20
+ """
21
+ try:
22
+ logger.info("Calculating metrics...")
23
+
24
+ info = vector_store.get_collection_info()
25
+ total_tickets = info.get('vectors_count', 0)
26
+ if total_tickets == 0:
27
+ raise HTTPException(status_code=404, detail="No data available. Please ingest data first.")
28
+
29
+ # Pull a sample or all payloads from the sidecar store
30
+ #payloads = vector_store.get_payloads_sample(limit=100)
31
+ payloads = vector_store.get_all_payloads()
32
+ if not payloads:
33
+ raise HTTPException(status_code=404, detail="Unable to retrieve metrics data")
34
+
35
+ # Calculate metrics
36
+ #open_statuses = {'Open', 'In Progress', 'To Do'}
37
+ open_statuses = {'Needs Triage', 'In Progress', 'Short Term Backlog', 'Gathering Interest', 'Gathering Impact'}
38
+ closed_statuses = {'Closed', 'Done', 'Resolved'}
39
+
40
+ open_tickets = sum(1 for p in payloads if (p.get('status') or '') in open_statuses)
41
+ closed_tickets = sum(1 for p in payloads if (p.get('status') or '') in closed_statuses)
42
+
43
+ # Average resolution time (days)
44
+ resolution_times = []
45
+ for p in payloads:
46
+ created = p.get('created_date')
47
+ resolved = p.get('resolved_date')
48
+ if created and resolved:
49
+ try:
50
+ c = pd.to_datetime(created)
51
+ r = pd.to_datetime(resolved)
52
+ delta = (r - c).days
53
+ if delta >= 0:
54
+ resolution_times.append(delta)
55
+ except Exception:
56
+ pass
57
+
58
+ avg_resolution = (sum(resolution_times) / len(resolution_times)) if resolution_times else 0.0
59
+ avg_resolution_str = f"{avg_resolution:.1f} days"
60
+
61
+ # SLA compliance: resolved within 5 days
62
+ sla_threshold = 5
63
+ sla_compliant = sum(1 for t in resolution_times if t <= sla_threshold)
64
+ sla_pct = (sla_compliant / len(resolution_times) * 100) if resolution_times else 0.0
65
+ sla_compliance_str = f"{sla_pct:.0f}%"
66
+
67
+ return MetricsResponse(
68
+ avg_resolution_time=avg_resolution_str,
69
+ open_tickets=open_tickets,
70
+ closed_tickets=closed_tickets,
71
+ sla_compliance=sla_compliance_str,
72
+ total_tickets=total_tickets
73
+ )
74
+
75
+ except HTTPException:
76
+ raise
77
+ except Exception as e:
78
+ logger.error(f"Metrics calculation failed: {str(e)}")
79
+ raise HTTPException(status_code=500, detail=str(e))
app/services/data_ingestion.py CHANGED
@@ -65,7 +65,9 @@ class DataIngestionService:
65
  record[key] = None
66
 
67
  # Create searchable text representation
68
- text_fields = ['summary', 'description', 'status', 'priority', 'project']
 
 
69
  text_parts = []
70
 
71
  for field in text_fields:
 
65
  record[key] = None
66
 
67
  # Create searchable text representation
68
+ #text_fields = ['summary', 'description', 'status', 'priority', 'project']
69
+ text_fields = ['summary', 'description', 'status', 'priority', 'project','issue_type', 'component', 'module', 'symptom_severity','assignee', 'reporter']
70
+
71
  text_parts = []
72
 
73
  for field in text_fields: