RishiXD commited on
Commit
b23ff00
·
verified ·
1 Parent(s): 493f2cf

Upload 67 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. backend/Dockerfile +28 -0
  2. backend/README.md +64 -0
  3. backend/backend_app/__init__.py +0 -0
  4. backend/backend_app/__pycache__/__init__.cpython-312.pyc +0 -0
  5. backend/backend_app/__pycache__/main.cpython-311.pyc +0 -0
  6. backend/backend_app/__pycache__/main.cpython-312.pyc +0 -0
  7. backend/backend_app/api/__pycache__/planning_routes.cpython-311.pyc +0 -0
  8. backend/backend_app/api/__pycache__/planning_routes.cpython-312.pyc +0 -0
  9. backend/backend_app/api/__pycache__/proxy_routes.cpython-312.pyc +0 -0
  10. backend/backend_app/api/__pycache__/risk_analysis.cpython-311.pyc +0 -0
  11. backend/backend_app/api/__pycache__/risk_analysis.cpython-312.pyc +0 -0
  12. backend/backend_app/api/__pycache__/routes.cpython-311.pyc +0 -0
  13. backend/backend_app/api/__pycache__/routes.cpython-312.pyc +0 -0
  14. backend/backend_app/api/__pycache__/strategic_routes.cpython-311.pyc +0 -0
  15. backend/backend_app/api/__pycache__/strategic_routes.cpython-312.pyc +0 -0
  16. backend/backend_app/api/planning_routes.py +48 -0
  17. backend/backend_app/api/risk_analysis.py +118 -0
  18. backend/backend_app/api/routes.py +147 -0
  19. backend/backend_app/api/strategic_routes.py +32 -0
  20. backend/backend_app/core/__pycache__/config.cpython-311.pyc +0 -0
  21. backend/backend_app/core/__pycache__/config.cpython-312.pyc +0 -0
  22. backend/backend_app/core/__pycache__/explain.cpython-311.pyc +0 -0
  23. backend/backend_app/core/__pycache__/explain.cpython-312.pyc +0 -0
  24. backend/backend_app/core/__pycache__/github_client.cpython-311.pyc +0 -0
  25. backend/backend_app/core/__pycache__/metrics.cpython-311.pyc +0 -0
  26. backend/backend_app/core/__pycache__/metrics.cpython-312.pyc +0 -0
  27. backend/backend_app/core/__pycache__/models.cpython-311.pyc +0 -0
  28. backend/backend_app/core/__pycache__/models.cpython-312.pyc +0 -0
  29. backend/backend_app/core/__pycache__/planning_engine.cpython-311.pyc +0 -0
  30. backend/backend_app/core/__pycache__/planning_engine.cpython-312.pyc +0 -0
  31. backend/backend_app/core/__pycache__/planning_loader.cpython-311.pyc +0 -0
  32. backend/backend_app/core/__pycache__/planning_loader.cpython-312.pyc +0 -0
  33. backend/backend_app/core/__pycache__/planning_models.cpython-311.pyc +0 -0
  34. backend/backend_app/core/__pycache__/planning_models.cpython-312.pyc +0 -0
  35. backend/backend_app/core/__pycache__/signals.cpython-311.pyc +0 -0
  36. backend/backend_app/core/__pycache__/signals.cpython-312.pyc +0 -0
  37. backend/backend_app/core/__pycache__/strategic_controller.cpython-311.pyc +0 -0
  38. backend/backend_app/core/__pycache__/strategic_controller.cpython-312.pyc +0 -0
  39. backend/backend_app/core/config.py +13 -0
  40. backend/backend_app/core/explain.py +40 -0
  41. backend/backend_app/core/github_client.py +148 -0
  42. backend/backend_app/core/metrics.py +140 -0
  43. backend/backend_app/core/models.py +67 -0
  44. backend/backend_app/core/planning_engine.py +312 -0
  45. backend/backend_app/core/planning_loader.py +19 -0
  46. backend/backend_app/core/planning_models.py +66 -0
  47. backend/backend_app/core/signals.py +146 -0
  48. backend/backend_app/core/strategic_controller.py +256 -0
  49. backend/backend_app/integrations/__pycache__/repo_api.cpython-311.pyc +0 -0
  50. backend/backend_app/integrations/__pycache__/repo_api.cpython-312.pyc +0 -0
backend/Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official lightweight Python image.
2
+ # https://hub.docker.com/_/python
3
+ FROM python:3.9-slim
4
+
5
+ # Set environment variables
6
+ ENV PYTHONDONTWRITEBYTECODE=1
7
+ ENV PYTHONUNBUFFERED=1
8
+
9
+ # Set work directory
10
+ WORKDIR /code
11
+
12
+ # Install dependencies
13
+ COPY requirements.txt /code/requirements.txt
14
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
15
+
16
+ # Copy project including `backend_app` directory
17
+ COPY . /code/
18
+
19
+ # Expose the API port (Hugging Face Spaces defaults to 7860)
20
+ EXPOSE 7860
21
+
22
+ # Add both /code and /code/backend to PYTHONPATH to ensure backend_app can be imported
23
+ # regardless of whether the build context was root or the backend folder.
24
+ ENV PYTHONPATH="/code:/code/backend:$PYTHONPATH"
25
+
26
+ # Command to run the application using uvicorn.
27
+ # We use 'sh -c' to inspect the directory structure before starting, which helps debug 'ModuleNotFoundError'.
28
+ CMD ["sh", "-c", "ls -R /code && uvicorn backend_app.main:app --host 0.0.0.0 --port 7860"]
backend/README.md ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Tribal Knowledge Risk Index & Auto-Correct Planning Engine
2
+
3
+ A FastAPI service to analyze knowledge concentration (bus factor) and auto-correct sprint plans based on reality gaps.
4
+
5
+ ## Setup
6
+
7
+ 1. **Install dependencies**:
8
+ ```bash
9
+ pip install -r requirements.txt
10
+ ```
11
+
12
+ 2. **Ensure Data is present**:
13
+ Place JSON files in `data/`.
14
+ - GitHub Dummy Data: `prs.json`, `reviews.json`, `commits.json`, `modules.json`
15
+ - Jira Dummy Data: `jira_sprints.json`, `jira_issues.json`, `jira_issue_events.json`
16
+
17
+ ## Running the Service
18
+
19
+ Start the server:
20
+ ```bash
21
+ python app/main.py
22
+ ```
23
+ Or:
24
+ ```bash
25
+ uvicorn app.main:app --reload
26
+ ```
27
+ API: `http://127.0.0.1:8000`
28
+
29
+ ## API Endpoints
30
+
31
+ ### 1. Source System Loading (Run First)
32
+ - `POST /load_data`: Load GitHub data.
33
+ - `POST /planning/load_jira_dummy`: Load Jira data.
34
+
35
+ ### 2. Computation
36
+ - `POST /compute`: Compute Tribal Knowledge Risks.
37
+ - `POST /planning/compute_autocorrect`: Compute Reality Gaps & Plan Corrections.
38
+
39
+ ### 3. Features
40
+
41
+ **Tribal Knowledge**:
42
+ - `GET /modules`: List modules by risk.
43
+ - `GET /modules/{id}`: Detailed knowledge metrics.
44
+
45
+ **Auto-Correct Planning**:
46
+ - `GET /planning/sprints`: List sprints with reality gaps and predictions.
47
+ - `GET /planning/sprints/{id}`: Detailed sprint metrics.
48
+ - `GET /planning/autocorrect/rules`: Learned historical correction rules.
49
+
50
+ ## Example Flow
51
+
52
+ ```bash
53
+ # 1. Load All Data
54
+ curl -X POST http://127.0.0.1:8000/load_data
55
+ curl -X POST http://127.0.0.1:8000/planning/load_jira_dummy
56
+
57
+ # 2. Compute Insights
58
+ curl -X POST http://127.0.0.1:8000/compute
59
+ curl -X POST http://127.0.0.1:8000/planning/compute_autocorrect
60
+
61
+ # 3. Check "Auto-Correct" Insights
62
+ # See the reality gap for the current sprint
63
+ curl http://127.0.0.1:8000/planning/sprints
64
+ ```
backend/backend_app/__init__.py ADDED
File without changes
backend/backend_app/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (152 Bytes). View file
 
backend/backend_app/__pycache__/main.cpython-311.pyc ADDED
Binary file (1.72 kB). View file
 
backend/backend_app/__pycache__/main.cpython-312.pyc ADDED
Binary file (1.78 kB). View file
 
backend/backend_app/api/__pycache__/planning_routes.cpython-311.pyc ADDED
Binary file (3.58 kB). View file
 
backend/backend_app/api/__pycache__/planning_routes.cpython-312.pyc ADDED
Binary file (3.15 kB). View file
 
backend/backend_app/api/__pycache__/proxy_routes.cpython-312.pyc ADDED
Binary file (2.36 kB). View file
 
backend/backend_app/api/__pycache__/risk_analysis.cpython-311.pyc ADDED
Binary file (5.07 kB). View file
 
backend/backend_app/api/__pycache__/risk_analysis.cpython-312.pyc ADDED
Binary file (4.84 kB). View file
 
backend/backend_app/api/__pycache__/routes.cpython-311.pyc ADDED
Binary file (8.52 kB). View file
 
backend/backend_app/api/__pycache__/routes.cpython-312.pyc ADDED
Binary file (7.67 kB). View file
 
backend/backend_app/api/__pycache__/strategic_routes.cpython-311.pyc ADDED
Binary file (1.41 kB). View file
 
backend/backend_app/api/__pycache__/strategic_routes.cpython-312.pyc ADDED
Binary file (1.72 kB). View file
 
backend/backend_app/api/planning_routes.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Path
2
+ from typing import List, Dict
3
+ from backend_app.state.store import store
4
+ from backend_app.core.planning_models import AutoCorrectHeadline, SprintMetrics, CorrectionRule
5
+
6
+ router = APIRouter(prefix="/planning", tags=["planning"])
7
+
8
+ @router.post("/load_jira_dummy")
9
+ def load_jira_dummy():
10
+ try:
11
+ counts = store.load_jira_data()
12
+ return {"status": "loaded", "counts": counts}
13
+ except FileNotFoundError as e:
14
+ raise HTTPException(status_code=404, detail=str(e))
15
+ except Exception as e:
16
+ raise HTTPException(status_code=500, detail=str(e))
17
+
18
+ @router.post("/compute_autocorrect", response_model=AutoCorrectHeadline)
19
+ def compute_autocorrect():
20
+ try:
21
+ store.compute_planning()
22
+ except ValueError as e:
23
+ raise HTTPException(status_code=400, detail=str(e))
24
+ except Exception as e:
25
+ raise HTTPException(status_code=500, detail=str(e))
26
+
27
+ return AutoCorrectHeadline(headline=store.planning_headline)
28
+
29
+ @router.get("/sprints", response_model=List[SprintMetrics])
30
+ def list_sprints():
31
+ if not store.jira_loaded:
32
+ raise HTTPException(status_code=400, detail="Jira data not loaded. Call /planning/load_jira_dummy first.")
33
+
34
+ return store.get_sprints()
35
+
36
+ @router.get("/sprints/{sprint_id}", response_model=SprintMetrics)
37
+ def get_sprint(sprint_id: str):
38
+ sprint = store.get_sprint(sprint_id)
39
+ if not sprint:
40
+ # Check if loaded but not computed?
41
+ if not store.jira_loaded:
42
+ raise HTTPException(status_code=400, detail="Jira data not loaded.")
43
+ raise HTTPException(status_code=404, detail="Sprint not found or metrics not computed.")
44
+ return sprint
45
+
46
+ @router.get("/autocorrect/rules", response_model=List[CorrectionRule])
47
+ def list_rules():
48
+ return store.get_corrections()
backend/backend_app/api/risk_analysis.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from typing import Dict, Any
3
+ from pydantic import BaseModel
4
+ from backend_app.state.store import store
5
+
6
+ router = APIRouter()
7
+
8
+ class RiskAnalysisRequest(BaseModel):
9
+ org: str
10
+ repo: str
11
+
12
+ @router.post("/analyze/risk", response_model=Dict[str, Any])
13
+ def analyze_risk(req: RiskAnalysisRequest):
14
+ """
15
+ One-shot API to:
16
+ 1. Load Live Data
17
+ 2. Compute Metrics
18
+ 3. Return 'Bus Factor' Risk Analysis (Feature #1)
19
+ PLUS Detailed raw stats: commits, PRs, merges per user.
20
+ """
21
+ try:
22
+ # 1. Load Data
23
+ print(f"Loading data for {req.org}/{req.repo}...")
24
+ store.load_live_data(req.org, req.repo)
25
+
26
+ # 2. Compute
27
+ print("Computing metrics...")
28
+ store.compute()
29
+
30
+ # 3. Collect Detailed Stats
31
+ # We want: commits, comments (reviews), PRs, Merge counts per user.
32
+
33
+ stats = {}
34
+ # Structure: { "user": { "commits": 0, "prs_opened": 0, "prs_merged": 0, "reviews": 0 } }
35
+
36
+ def get_stat(u):
37
+ if u not in stats: stats[u] = {"commits": 0, "prs_opened": 0, "prs_merged": 0, "reviews": 0}
38
+ return stats[u]
39
+
40
+ # Commits
41
+ for c in store.commits:
42
+ u = c.author or "unknown"
43
+ get_stat(u)["commits"] += 1
44
+
45
+ # PRs
46
+ for p in store.prs:
47
+ u = p.author or "unknown"
48
+ get_stat(u)["prs_opened"] += 1
49
+ if p.merged_at:
50
+ get_stat(u)["prs_merged"] += 1
51
+
52
+ # Reviews
53
+ for r in store.reviews:
54
+ u = r.reviewer or "unknown"
55
+ get_stat(u)["reviews"] += 1
56
+
57
+ # Format for response
58
+ detailed_stats = []
59
+ for user, data in stats.items():
60
+ detailed_stats.append({
61
+ "user": user,
62
+ **data
63
+ })
64
+
65
+ # Sort by commits desc
66
+ detailed_stats.sort(key=lambda x: x["commits"], reverse=True)
67
+
68
+ modules = store.get_modules()
69
+ if not modules:
70
+ return {
71
+ "headline": "No activity found.",
72
+ "overall_repo_risk": 0,
73
+ "user_stats": detailed_stats,
74
+ "modules_analysis": []
75
+ }
76
+
77
+ top_risk_module = modules[0]
78
+
79
+ results = []
80
+ for mod in modules:
81
+ if not mod.people: continue
82
+ top_person = mod.people[0]
83
+ share = top_person.share_pct * 100
84
+
85
+ # Bus Factor Check
86
+ bus_factor = mod.bus_factor
87
+ insight = f"Healthy distribution."
88
+ if bus_factor == 1:
89
+ insight = f"CRITICAL: {top_person.person_id} is a single point of failure (Bus Factor 1). If they leave, {share:.1f}% of module logic is orphaned."
90
+ elif share > 50:
91
+ insight = f"HIGH RISK: {top_person.person_id} dominates ({share:.1f}%)."
92
+
93
+ results.append({
94
+ "module": mod.module_id,
95
+ "risk_score": mod.risk_index,
96
+ "severity": mod.severity,
97
+ "bus_factor": bus_factor,
98
+ "key_person": top_person.person_id,
99
+ "knowledge_share_pct": round(share, 1),
100
+ "insight": insight,
101
+ "evidence": mod.evidence
102
+ })
103
+
104
+ headline = f"Repo Analysis: {top_risk_module.module_id} is at {top_risk_module.severity} risk."
105
+ if top_risk_module.bus_factor == 1:
106
+ headline += f" {top_risk_module.people[0].person_id} is a Single Point of Failure."
107
+
108
+ return {
109
+ "headline": headline,
110
+ "overall_repo_risk": top_risk_module.risk_index,
111
+ "user_stats": detailed_stats,
112
+ "modules_analysis": results
113
+ }
114
+
115
+ except Exception as e:
116
+ import traceback
117
+ traceback.print_exc()
118
+ raise HTTPException(status_code=500, detail=str(e))
backend/backend_app/api/routes.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Path, Body
2
+ from typing import List, Dict, Optional
3
+ from pydantic import BaseModel
4
+ from backend_app.state.store import store
5
+ from backend_app.core.models import LoadStatus, ComputeHeadline, ModuleMetric
6
+ from backend_app.integrations.supabase_client import supabase
7
+ import requests
8
+ import uuid
9
+
10
+ router = APIRouter()
11
+
12
+ class LiveDataRequest(BaseModel):
13
+ org: str
14
+ repo: str
15
+
16
+ @router.get("/health")
17
+ def health_check():
18
+ return {"status": "ok"}
19
+
20
+ @router.get("/test-supabase")
21
+ def test_supabase_connection():
22
+ if not supabase:
23
+ return {"status": "error", "message": "Supabase client is None (failed to init)"}
24
+
25
+ try:
26
+ # Try a lightweight query
27
+ print("Testing Supabase connection...")
28
+ response = supabase.table("pull_requests").select("count", count="exact").limit(1).execute()
29
+ print(f"Supabase Test Result: {response}")
30
+ return {
31
+ "status": "ok",
32
+ "data": response.data,
33
+ "message": "Connection successful"
34
+ }
35
+ except Exception as e:
36
+ print(f"Supabase Test Failed: {e}")
37
+ return {"status": "error", "message": str(e)}
38
+
39
+ @router.post("/load_data", response_model=Dict)
40
+ def load_data(req: Optional[LiveDataRequest] = None):
41
+ try:
42
+ if req and req.org and req.repo:
43
+ return store.load_live_data(req.org, req.repo)
44
+ else:
45
+ counts = store.load_data()
46
+ return {
47
+ "prs": counts.get("prs", 0),
48
+ "reviews": counts.get("reviews", 0),
49
+ "commits": counts.get("commits", 0),
50
+ "modules": counts.get("modules", 0),
51
+ "source": "Dummy Data"
52
+ }
53
+ except Exception as e:
54
+ # Check for specific integration failure message
55
+ msg = str(e)
56
+ if "Integration failed" in msg:
57
+ raise HTTPException(status_code=502, detail=msg)
58
+ if "missing" in msg.lower(): # File missing
59
+ raise HTTPException(status_code=404, detail=msg)
60
+
61
+ raise HTTPException(status_code=500, detail=f"Error loading data: {msg}")
62
+
63
+ @router.post("/compute", response_model=ComputeHeadline)
64
+ def compute():
65
+ try:
66
+ store.compute()
67
+ except ValueError as e:
68
+ raise HTTPException(status_code=400, detail=str(e))
69
+ except Exception as e:
70
+ raise HTTPException(status_code=500, detail=str(e))
71
+
72
+ # Generate headline from the highest risk module
73
+ modules = store.get_modules()
74
+ if not modules:
75
+ return ComputeHeadline(headline="No modules found or computed.")
76
+
77
+ # Pick top risk
78
+ top_mod = modules[0]
79
+ risk_level = top_mod.severity
80
+
81
+ # Extract top person
82
+ top_person_name = "No one"
83
+ if top_mod.people:
84
+ top_person_name = top_mod.people[0].person_id
85
+
86
+ headline = f"{top_mod.module_id} module is at {risk_level} risk because {top_person_name} owns most of the knowledge signals."
87
+
88
+ return ComputeHeadline(headline=headline)
89
+
90
+ @router.get("/modules", response_model=List[ModuleMetric], response_model_exclude={"people", "evidence", "plain_explanation"})
91
+ def list_modules():
92
+ """
93
+ List modules sorted by risk_index desc.
94
+ Excludes detailed fields for the list view to keep it lightweight if needed,
95
+ but the prompt asks for specific fields.
96
+ Prompt: "List modules... with fields: module_id, risk_index, severity, top1_share_pct, top2_share_pct, bus_factor, total_knowledge_weight, signals_count"
97
+ The response_model_exclude in FastAPI handles hiding fields.
98
+ """
99
+ if not store.loaded and not store.module_metrics:
100
+ # If not loaded/computed, return empty or error?
101
+ # Prompt doesn't specify. Implicitly empty list or 400.
102
+ # But if compute hasn't run, module_metrics is empty.
103
+ pass
104
+
105
+ return store.get_modules()
106
+
107
+ @router.get("/modules/{module_id}", response_model=ModuleMetric)
108
+ def get_module(module_id: str = Path(..., description="The ID of the module")):
109
+ metric = store.get_module(module_id)
110
+ if not metric:
111
+ raise HTTPException(status_code=404, detail=f"Module '{module_id}' not found. Ensure signals are computed.")
112
+ return metric
113
+
114
+ @router.get("/commits")
115
+ def get_commits_list():
116
+ """
117
+ Returns the list of loaded commits.
118
+ """
119
+ total_count = len(store.commits)
120
+ # Sort by timestamp desc
121
+ sorted_commits = sorted(store.commits, key=lambda c: c.timestamp, reverse=True)
122
+
123
+ return {
124
+ "count": total_count,
125
+ "commits": sorted_commits
126
+ }
127
+
128
+ @router.post("/run-workflow")
129
+ def run_workflow_endpoint(input_text: str = Body(default="hello world!", embed=True)):
130
+ api_key = 'sk-y2mGytaDwLg927nc2LqZDOs-Go1dWGzjvjlHUN7zXj8'
131
+ url = "http://localhost:7860/api/v1/run/7e37cb01-7c44-44df-be5e-9969091a5ffe"
132
+
133
+ payload = {
134
+ "output_type": "chat",
135
+ "input_type": "text",
136
+ "input_value": input_text
137
+ }
138
+ payload["session_id"] = str(uuid.uuid4())
139
+ headers = {"x-api-key": api_key}
140
+
141
+ try:
142
+ response = requests.post(url, json=payload, headers=headers)
143
+ response.raise_for_status()
144
+ return {"output": response.text}
145
+ except Exception as e:
146
+ raise HTTPException(status_code=500, detail=f"Workflow Error: {str(e)}")
147
+
backend/backend_app/api/strategic_routes.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from backend_app.state.store import store
3
+ from backend_app.core.strategic_controller import get_strategic_audit, analyze_jira_from_db
4
+ from pydantic import BaseModel
5
+
6
+ class StrategicAuditResponse(BaseModel):
7
+ briefing: str
8
+
9
+ router = APIRouter(prefix="/strategic", tags=["strategic"])
10
+
11
+ @router.post("/audit", response_model=StrategicAuditResponse)
12
+ def compute_strategic_audit():
13
+ # Make sure we have GitHub data loaded
14
+ if not store.loaded:
15
+ raise HTTPException(status_code=400, detail="GitHub data not loaded. Call /load_data first.")
16
+
17
+ # We can run without Jira loaded but it will default to empty plan.
18
+ # But best to encourage full load.
19
+
20
+ briefing_text = get_strategic_audit()
21
+ return StrategicAuditResponse(briefing=briefing_text)
22
+
23
+ @router.post("/jira-audit", response_model=StrategicAuditResponse)
24
+ def compute_jira_audit():
25
+ """
26
+ Compares the latest Jira data from DB with active GitHub data.
27
+ """
28
+ if not store.loaded:
29
+ raise HTTPException(status_code=400, detail="GitHub data not loaded. Call /load_data first.")
30
+
31
+ analysis = analyze_jira_from_db()
32
+ return StrategicAuditResponse(briefing=analysis)
backend/backend_app/core/__pycache__/config.cpython-311.pyc ADDED
Binary file (778 Bytes). View file
 
backend/backend_app/core/__pycache__/config.cpython-312.pyc ADDED
Binary file (773 Bytes). View file
 
backend/backend_app/core/__pycache__/explain.cpython-311.pyc ADDED
Binary file (1.79 kB). View file
 
backend/backend_app/core/__pycache__/explain.cpython-312.pyc ADDED
Binary file (1.84 kB). View file
 
backend/backend_app/core/__pycache__/github_client.cpython-311.pyc ADDED
Binary file (8.05 kB). View file
 
backend/backend_app/core/__pycache__/metrics.cpython-311.pyc ADDED
Binary file (5.29 kB). View file
 
backend/backend_app/core/__pycache__/metrics.cpython-312.pyc ADDED
Binary file (4.83 kB). View file
 
backend/backend_app/core/__pycache__/models.cpython-311.pyc ADDED
Binary file (3.84 kB). View file
 
backend/backend_app/core/__pycache__/models.cpython-312.pyc ADDED
Binary file (3.06 kB). View file
 
backend/backend_app/core/__pycache__/planning_engine.cpython-311.pyc ADDED
Binary file (11.3 kB). View file
 
backend/backend_app/core/__pycache__/planning_engine.cpython-312.pyc ADDED
Binary file (10.5 kB). View file
 
backend/backend_app/core/__pycache__/planning_loader.cpython-311.pyc ADDED
Binary file (2.6 kB). View file
 
backend/backend_app/core/__pycache__/planning_loader.cpython-312.pyc ADDED
Binary file (1.85 kB). View file
 
backend/backend_app/core/__pycache__/planning_models.cpython-311.pyc ADDED
Binary file (3.27 kB). View file
 
backend/backend_app/core/__pycache__/planning_models.cpython-312.pyc ADDED
Binary file (2.64 kB). View file
 
backend/backend_app/core/__pycache__/signals.cpython-311.pyc ADDED
Binary file (4.6 kB). View file
 
backend/backend_app/core/__pycache__/signals.cpython-312.pyc ADDED
Binary file (4.15 kB). View file
 
backend/backend_app/core/__pycache__/strategic_controller.cpython-311.pyc ADDED
Binary file (7.1 kB). View file
 
backend/backend_app/core/__pycache__/strategic_controller.cpython-312.pyc ADDED
Binary file (11.1 kB). View file
 
backend/backend_app/core/config.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ BASE_DIR = Path(__file__).resolve().parent.parent.parent
4
+ DATA_DIR = BASE_DIR / "data"
5
+
6
+ PRS_FILE = DATA_DIR / "prs.json"
7
+ REVIEWS_FILE = DATA_DIR / "reviews.json"
8
+ COMMITS_FILE = DATA_DIR / "commits.json"
9
+ MODULES_FILE = DATA_DIR / "modules.json"
10
+
11
+ JIRA_SPRINTS_FILE = DATA_DIR / "jira_sprints.json"
12
+ JIRA_ISSUES_FILE = DATA_DIR / "jira_issues.json"
13
+ JIRA_EVENTS_FILE = DATA_DIR / "jira_issue_events.json"
backend/backend_app/core/explain.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import TYPE_CHECKING
2
+
3
+ if TYPE_CHECKING:
4
+ from backend_app.core.models import ModuleMetric
5
+
6
+ def generate_explanation(metric: 'ModuleMetric') -> str:
7
+ """
8
+ A deterministic explanation that mentions:
9
+ - risk score
10
+ - top1 share %
11
+ - bus factor interpretation
12
+ - 1–2 evidence lines
13
+ """
14
+ # Headline
15
+ text = f"Risk Score: {metric.risk_index} ({metric.severity}). "
16
+
17
+ # Top Share
18
+ top_person = metric.people[0] if metric.people else None
19
+ if top_person:
20
+ text += f"Top contributor {top_person.person_id} holds {top_person.share_pct*100:.1f}% of the knowledge. "
21
+ else:
22
+ text += "No knowledge signals recorded. "
23
+ return text
24
+
25
+ # Bus Factor
26
+ if metric.bus_factor == 0:
27
+ text += "Bus factor is 0 (CRITICAL: No one has >10% share? Check data). "
28
+ elif metric.bus_factor == 1:
29
+ text += "Bus factor is 1 (Single point of failure). "
30
+ elif metric.bus_factor < 3:
31
+ text += f"Bus factor is {metric.bus_factor} (Low redundancy). "
32
+ else:
33
+ text += f"Bus factor is {metric.bus_factor} (Good redundancy). "
34
+
35
+ # Evidence (1-2 lines)
36
+ if metric.evidence:
37
+ text += "Key evidence: "
38
+ text += "; ".join(metric.evidence[:2]) + "."
39
+
40
+ return text
backend/backend_app/core/github_client.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timezone
2
+ from typing import List, Dict, Any, Optional
3
+ import requests
4
+ import json
5
+ import random # Fallback for story points
6
+ from backend_app.core.models import RawCommit, RawPR, RawReview
7
+ from backend_app.core.planning_models import RawIssue, RawIssueEvent, RawSprint
8
+
9
+ # Base URL for the custom GitHub App/API
10
+ BASE_URL = "https://samyak000-github-app.hf.space/insights"
11
+
12
+ class GitHubClient:
13
+ def __init__(self, org: str, repo: str):
14
+ self.org = org
15
+ self.repo = repo
16
+
17
+ def _parse_ts(self, ts_str: Optional[str]) -> datetime:
18
+ if not ts_str:
19
+ return datetime.now(timezone.utc)
20
+ try:
21
+ return datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
22
+ except:
23
+ return datetime.now(timezone.utc)
24
+
25
+ def fetch_commits(self) -> List[RawCommit]:
26
+ url = f"{BASE_URL}/commits"
27
+ payload = {"org": self.org, "repo": self.repo}
28
+ try:
29
+ resp = requests.post(url, json=payload, timeout=10)
30
+ if resp.status_code != 200: return []
31
+ data = resp.json()
32
+ commits = []
33
+ for item in data.get("commits", []):
34
+ try:
35
+ c = item.get("commit", {})
36
+ author_info = c.get("author", {})
37
+ ts = self._parse_ts(author_info.get("date"))
38
+ author_name = author_info.get("name", "Unknown")
39
+ if item.get("author") and "login" in item["author"]:
40
+ author_name = item["author"]["login"]
41
+
42
+ files = []
43
+ if "files" in item:
44
+ files = [f.get("filename") for f in item["files"] if "filename" in f]
45
+
46
+ commits.append(RawCommit(
47
+ commit_id=item.get("sha", ""),
48
+ author=author_name,
49
+ timestamp=ts,
50
+ files_changed=files
51
+ ))
52
+ except Exception: continue
53
+ return commits
54
+ except Exception: return []
55
+
56
+ def fetch_prs(self) -> List[RawPR]:
57
+ url = f"{BASE_URL}/pull-requests"
58
+ payload = {"org": self.org, "repo": self.repo}
59
+ try:
60
+ resp = requests.post(url, json=payload, timeout=15)
61
+ if resp.status_code != 200: return []
62
+ data = resp.json()
63
+
64
+ # Adjust based on actual key.
65
+ # If endpoint is /pull-requests, maybe key is "pull_requests" or "prs"?
66
+ # I'll check generic keys if specific fails
67
+ raw_list = data.get("pull_requests", data.get("prs", []))
68
+
69
+ prs = []
70
+ for item in raw_list:
71
+ try:
72
+ # Generic structure mapping
73
+ pid = str(item.get("number", item.get("id", "unknown")))
74
+ user = item.get("user", {})
75
+ author = user.get("login", "unknown")
76
+ created = self._parse_ts(item.get("created_at"))
77
+ merged = self._parse_ts(item.get("merged_at")) if item.get("merged_at") else None
78
+
79
+ # Files? Usually not in list view.
80
+ # If this API is "smart", maybe it includes them?
81
+ # If not, we assume empty or try "files" key
82
+ files = [] # item.get("files", []) if we're lucky
83
+
84
+ prs.append(RawPR(
85
+ pr_id=pid,
86
+ author=author,
87
+ created_at=created,
88
+ merged_at=merged,
89
+ files_changed=files
90
+ ))
91
+ except: continue
92
+ return prs
93
+ except Exception: return []
94
+
95
+ def fetch_issues(self) -> List[RawIssue]:
96
+ url = f"{BASE_URL}/pull-issues"
97
+ payload = {"org": self.org, "repo": self.repo}
98
+ try:
99
+ resp = requests.post(url, json=payload, timeout=15)
100
+ if resp.status_code != 200: return []
101
+ data = resp.json()
102
+ raw_list = data.get("issues", [])
103
+
104
+ issues = []
105
+ for item in raw_list:
106
+ try:
107
+ # Skip PRs if they come through this endpoint
108
+ if "pull_request" in item and item["pull_request"]:
109
+ continue
110
+
111
+ iid = f"GH-{item.get('number')}"
112
+ title = item.get("title", "")
113
+
114
+ # Map to Planning Model (Jira-style)
115
+ # We need to fabricate some data for the Planning Engine to work
116
+ assignees = item.get("assignees", [])
117
+ assignee = assignees[0].get("login") if assignees else "unassigned"
118
+
119
+ # Module? Try label
120
+ labels = [l.get("name") for l in item.get("labels", [])]
121
+ module_id = "general"
122
+ for l in labels:
123
+ if "module:" in l: # Convention?
124
+ module_id = l.replace("module:", "")
125
+ break
126
+
127
+ # Sprint? Milestone?
128
+ sprint_id = "SPR-LIVE" # Default bucket
129
+ if item.get("milestone"):
130
+ sprint_id = f"SPR-{item['milestone'].get('title')}"
131
+
132
+ issues.append(RawIssue(
133
+ issue_id=iid,
134
+ sprint_id=sprint_id,
135
+ title=title,
136
+ issue_type="Story", # Default
137
+ story_points=1, # Default
138
+ assignee=assignee,
139
+ module_id=module_id,
140
+ created_at=self._parse_ts(item.get("created_at"))
141
+ ))
142
+ except: continue
143
+ return issues
144
+ except Exception: return []
145
+
146
+ def fetch_activity(self) -> List[RawIssueEvent]:
147
+ # Maps activity timeline to issue events (transitions)
148
+ return [] # Placeholder, complex to map generic activity stream to "status changes" reliably without more info
backend/backend_app/core/metrics.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict
2
+ import math
3
+ from backend_app.core.models import Signal, ModuleMetric, PersonMetric
4
+ from backend_app.core.explain import generate_explanation
5
+
6
+ def compute_metrics(module_id: str, signals: List[Signal], max_total_score_global: float) -> ModuleMetric:
7
+ # 1. Aggregate scores per person
8
+ person_scores: Dict[str, float] = {}
9
+ person_signal_counts: Dict[str, Dict[str, int]] = {} # person -> type -> count
10
+
11
+ total_score = 0.0
12
+
13
+ for s in signals:
14
+ total_score += s.weight
15
+ person_scores[s.person_id] = person_scores.get(s.person_id, 0.0) + s.weight
16
+
17
+ if s.person_id not in person_signal_counts:
18
+ person_signal_counts[s.person_id] = {}
19
+ person_signal_counts[s.person_id][s.signal_type] = person_signal_counts[s.person_id].get(s.signal_type, 0) + 1
20
+
21
+ # 2. Calculate person metrics
22
+ people_metrics: List[PersonMetric] = []
23
+
24
+ # Sort people by score desc
25
+ sorted_people = sorted(person_scores.items(), key=lambda x: x[1], reverse=True)
26
+
27
+ for person_id, score in sorted_people:
28
+ share = score / total_score if total_score > 0 else 0.0
29
+ people_metrics.append(PersonMetric(
30
+ person_id=person_id,
31
+ knowledge_score=score,
32
+ share_pct=share, # Keep as 0-1 float for now, will format later or in model? Model says float.
33
+ type_counts=person_signal_counts.get(person_id, {})
34
+ ))
35
+
36
+ # 3. Module level metrics
37
+ top1_share = people_metrics[0].share_pct if len(people_metrics) > 0 else 0.0
38
+ top2_share = people_metrics[1].share_pct if len(people_metrics) > 1 else 0.0
39
+
40
+ bus_factor = sum(1 for p in people_metrics if p.share_pct >= 0.10)
41
+
42
+ # Risk Index Formula
43
+ # silo = clamp((top1_share - 0.4)/0.6, 0, 1)
44
+ # bus = clamp((2 - bus_factor)/2, 0, 1)
45
+ # criticality = clamp(total_score / max_total_score_across_modules, 0, 1)
46
+ # risk = 100 * (0.5*silo + 0.3*bus + 0.2*criticality)
47
+
48
+ def clamp(val, min_v, max_v):
49
+ return max(min_v, min(val, max_v))
50
+
51
+ if not signals:
52
+ # No signals = No Risk (or No Data)
53
+ return ModuleMetric(
54
+ module_id=module_id,
55
+ risk_index=0.0,
56
+ severity="HEALTHY",
57
+ top1_share_pct=0.0,
58
+ top2_share_pct=0.0,
59
+ bus_factor=0,
60
+ total_knowledge_weight=0.0,
61
+ signals_count=0,
62
+ people=[],
63
+ evidence=[],
64
+ plain_explanation="No activity detected."
65
+ )
66
+
67
+
68
+ silo_factor = (top1_share - 0.4) / 0.6
69
+ bus_risk_factor = (2 - bus_factor) / 2.0
70
+ criticality_factor = total_score / max_total_score_global if max_total_score_global > 0 else 0.0
71
+
72
+ # Ensure non-negative before clamping
73
+ silo_factor = max(silo_factor, 0.0)
74
+ bus_risk_factor = max(bus_risk_factor, 0.0)
75
+ criticality_factor = max(criticality_factor, 0.0)
76
+
77
+ # Calculate raw risk
78
+ risk_index_raw = 100.0 * (0.6 * silo_factor + 0.25 * bus_risk_factor + 0.15 * criticality_factor)
79
+
80
+ # Remove dampening logic entirely as it's suppressing real risk on small repos
81
+ # if len(signals) < 10:
82
+ # risk_index_raw = risk_index_raw * (len(signals) / 10.0)
83
+
84
+ risk_index = round(min(risk_index_raw, 100.0), 2)
85
+
86
+ # Severity - lowered thresholds slightly to show more "risk"
87
+ if risk_index >= 60:
88
+ severity = "SEVERE"
89
+ elif risk_index >= 30:
90
+ severity = "MODERATE"
91
+ else:
92
+ severity = "HEALTHY"
93
+
94
+ # 4. Evidence
95
+ # Generate 2-5 evidence strings for top contributors
96
+ evidence_lines = []
97
+ for p in people_metrics[:5]:
98
+ # “dev_a: share 84.0% | approvals=2, review_notes=0, commits=2”
99
+ # Map internal types to display names if needed, or just use raw keys
100
+ # The prompt examples: approvals, review_notes, commits
101
+ # My keys: review_approval, review_comment, review_changes_requested, commit
102
+
103
+ # Helper to get count
104
+ def gc(k): return p.type_counts.get(k, 0)
105
+
106
+ # Just dumping all counts for simplicity or Mapping to prettier names?
107
+ # Prompt: approvals=2, review_notes=0, commits=2
108
+ # I'll try to map to something readable.
109
+
110
+ parts = []
111
+ parts.append(f"commits={gc('commit')}")
112
+ approvals = gc('review_approval')
113
+ if approvals > 0: parts.append(f"approvals={approvals}")
114
+ comments = gc('review_comment')
115
+ if comments > 0: parts.append(f"comments={comments}")
116
+ changes = gc('review_changes_requested')
117
+ if changes > 0: parts.append(f"changes_requested={changes}")
118
+
119
+ counts_str = ", ".join(parts)
120
+ line = f"{p.person_id}: share {p.share_pct*100:.1f}% | {counts_str}"
121
+ evidence_lines.append(line)
122
+
123
+ mod_metric = ModuleMetric(
124
+ module_id=module_id,
125
+ risk_index=risk_index,
126
+ severity=severity,
127
+ top1_share_pct=top1_share,
128
+ top2_share_pct=top2_share,
129
+ bus_factor=bus_factor,
130
+ total_knowledge_weight=total_score,
131
+ signals_count=len(signals),
132
+ people=people_metrics,
133
+ evidence=evidence_lines,
134
+ plain_explanation=""
135
+ )
136
+
137
+ # Generate explanation
138
+ mod_metric.plain_explanation = generate_explanation(mod_metric)
139
+
140
+ return mod_metric
backend/backend_app/core/models.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import List, Optional, Dict
3
+ from datetime import datetime
4
+
5
+ # --- Input Models ---
6
+
7
+ class RawPR(BaseModel):
8
+ pr_id: str
9
+ author: str
10
+ created_at: datetime
11
+ merged_at: Optional[datetime] = None
12
+ files_changed: List[str]
13
+
14
+ class RawReview(BaseModel):
15
+ pr_id: str
16
+ reviewer: str
17
+ state: str # APPROVED, CHANGES_REQUESTED, COMMENTED
18
+ timestamp: datetime
19
+
20
+ class RawCommit(BaseModel):
21
+ commit_id: str
22
+ author: str
23
+ timestamp: datetime
24
+ message: Optional[str] = ""
25
+ files_changed: List[str]
26
+
27
+ # Dictionary mapping module_id -> list of path prefixes
28
+ ModulesConfig = Dict[str, List[str]]
29
+
30
+
31
+ # --- Output / Internal Models ---
32
+
33
+ class Signal(BaseModel):
34
+ person_id: str
35
+ module_id: str
36
+ signal_type: str
37
+ weight: float
38
+ timestamp: datetime
39
+ source_id: str # pr_id or commit_id
40
+
41
+ class PersonMetric(BaseModel):
42
+ person_id: str
43
+ knowledge_score: float
44
+ share_pct: float
45
+ type_counts: Dict[str, int] = Field(default_factory=dict)
46
+
47
+ class ModuleMetric(BaseModel):
48
+ module_id: str
49
+ risk_index: float
50
+ severity: str # SEVERE, MODERATE, HEALTHY
51
+ top1_share_pct: float
52
+ top2_share_pct: float
53
+ bus_factor: int
54
+ total_knowledge_weight: float
55
+ signals_count: int
56
+ people: List[PersonMetric]
57
+ evidence: List[str]
58
+ plain_explanation: str
59
+
60
+ class ComputeHeadline(BaseModel):
61
+ headline: str
62
+
63
+ class LoadStatus(BaseModel):
64
+ prs_count: int
65
+ reviews_count: int
66
+ commits_count: int
67
+ modules_count: int
backend/backend_app/core/planning_engine.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timezone, timedelta
2
+ from typing import List, Dict, Optional, Tuple
3
+ import math
4
+
5
+ from backend_app.core.planning_models import (
6
+ RawSprint, RawIssue, RawIssueEvent,
7
+ SprintMetrics, CorrectionRule, AutoCorrectHeadline
8
+ )
9
+ from backend_app.core.models import Signal, RawPR, RawReview
10
+ # We need access to GitHub data (processed signals or raw)
11
+
12
+ # Heuristic Constants
13
+ DEFAULT_POINTS_PER_DAY_DEV = 1.0 # Fallback
14
+ REALITY_GAP_WEIGHT_POINTS = 0.6
15
+ REALITY_GAP_WEIGHT_REVIEW = 0.4
16
+
17
+ def compute_autocorrect(
18
+ sprints: List[RawSprint],
19
+ issues: List[RawIssue],
20
+ events: List[RawIssueEvent],
21
+ github_prs: List[RawPR],
22
+ github_reviews: List[RawReview],
23
+ modules_config: Dict[str, List[str]]
24
+ ) -> Tuple[List[SprintMetrics], List[CorrectionRule], str]:
25
+
26
+ # 1. Organize Data
27
+ # Issues per sprint
28
+ issues_by_sprint = {s.sprint_id: [] for s in sprints}
29
+ for i in issues:
30
+ if i.sprint_id in issues_by_sprint:
31
+ issues_by_sprint[i.sprint_id].append(i)
32
+
33
+ # Events by issue
34
+ events_by_issue = {i.issue_id: [] for i in issues}
35
+ for e in events:
36
+ if e.issue_id in events_by_issue:
37
+ events_by_issue[e.issue_id].append(e)
38
+
39
+ # Sort events by time
40
+ for iid in events_by_issue:
41
+ events_by_issue[iid].sort(key=lambda x: x.timestamp)
42
+
43
+ # 2. Historical Analysis (Correction Rules)
44
+ # We look at COMPLETED sprints to learn multipliers.
45
+ # Current time is "now" (simulated). We can assume "now" is the end of the last sprint or mid-current.
46
+ # The prompt says "current local time is 2026-02-07".
47
+ # Sprint 1 (Jan 15-29) is done. Sprint 2 (Feb 1-14) is in progress.
48
+
49
+ correction_rules = _learn_correction_rules(sprints, issues, events_by_issue)
50
+
51
+ # 3. Compute Metrics for Sprints (focus on active/recent)
52
+ sprint_metrics_list = []
53
+
54
+ # We need to simulate "current status" relative to 2026-02-07 (NOW)
55
+ NOW = datetime(2026, 2, 7, 14, 0, 0, tzinfo=timezone.utc)
56
+
57
+ headline = "No active sprint analysis."
58
+
59
+ for sprint in sprints:
60
+ # Determine if sprint is past, current, or future
61
+ # Simple check
62
+ is_current = sprint.start_date <= NOW <= sprint.end_date
63
+ is_past = sprint.end_date < NOW
64
+
65
+ # Calculate Planned
66
+ total_points = sprint.planned_story_points
67
+ days_duration = (sprint.end_date - sprint.start_date).days + 1
68
+ points_per_day_planned = total_points / days_duration if days_duration > 0 else 0
69
+
70
+ # Calculate Actual / Projected
71
+ # Points completed within sprint window (for past) or up to NOW (for current)
72
+ completed_points = 0
73
+
74
+ sprint_issues = issues_by_sprint[sprint.sprint_id]
75
+
76
+ # Track module breakdown
77
+ # mod_id -> {planned: int, completed: int}
78
+ mod_stats = {}
79
+
80
+ for issue in sprint_issues:
81
+ mid = issue.module_id
82
+ if mid not in mod_stats: mod_stats[mid] = {"planned": 0, "completed": 0}
83
+ mod_stats[mid]["planned"] += issue.story_points
84
+
85
+ # Check if done
86
+ # Issue is done if it has a transition to DONE within the sprint window
87
+ # For current sprint, within start -> NOW
88
+ # For past, within start -> end
89
+
90
+ cutoff = NOW if is_current else sprint.end_date
91
+
92
+ done_time = None
93
+ evt_list = events_by_issue.get(issue.issue_id, [])
94
+ for evt in evt_list:
95
+ if evt.to_status == "DONE":
96
+ done_time = evt.timestamp
97
+ break # Assuming once done stays done for simplicity
98
+
99
+ if done_time and done_time <= cutoff and done_time >= sprint.start_date:
100
+ completed_points += issue.story_points
101
+ mod_stats[mid]["completed"] += issue.story_points
102
+
103
+ # --- Gap Analysis ---
104
+
105
+ # Expected completion based on linear burn
106
+ # For past sprints, expected at end is 100%.
107
+ # For current, expected is proportional to time passed.
108
+
109
+ if is_past:
110
+ time_progress_pct = 1.0
111
+ else:
112
+ days_passed = (NOW - sprint.start_date).days
113
+ if days_passed < 0: days_passed = 0
114
+ time_progress_pct = days_passed / days_duration
115
+
116
+ expected_points = total_points * time_progress_pct
117
+ points_gap = expected_points - completed_points
118
+
119
+ # Review Delay Signal from GitHub
120
+ # Get PRs created during this sprint
121
+ sprint_prs = []
122
+ # Naive PR filter by created_at in sprint window
123
+ # Note: timezone awareness might be tricky if mixed naive/aware.
124
+ # Assuming GitHub data is loaded as datetime (model).
125
+ for pr in github_prs:
126
+ # check overlap? created_at inside sprint
127
+ # Handle tz: ensure both are consistent.
128
+ # Our models define datetime, likely parsed as aware or naive.
129
+ # We'll assume both are UTC aware for this exercise.
130
+ if sprint.start_date <= pr.created_at <= sprint.end_date:
131
+ sprint_prs.append(pr)
132
+
133
+ # Calculate avg review time
134
+ # We need reviews for these PRs
135
+ # Map needed.
136
+ # This is expensive if unrelated, but dataset is small.
137
+ review_delays = []
138
+ for pr in sprint_prs:
139
+ # Find approval
140
+ approval_ts = None
141
+ for rev in github_reviews:
142
+ if rev.pr_id == pr.pr_id and rev.state == "APPROVED":
143
+ approval_ts = rev.timestamp
144
+ break
145
+
146
+ if approval_ts:
147
+ delay = (approval_ts - pr.created_at).total_seconds() / 86400.0 # days
148
+ review_delays.append(delay)
149
+ elif is_current:
150
+ # If not approved yet, delay is (NOW - created)
151
+ current_wait = (NOW - pr.created_at).total_seconds() / 86400.0
152
+ if current_wait > 1.0: # Only count if waiting > 1 day
153
+ review_delays.append(current_wait)
154
+
155
+ avg_review_delay = sum(review_delays)/len(review_delays) if review_delays else 0.5 # default 0.5d
156
+
157
+ # Baseline review delay? Say 0.6 is good.
158
+ review_gap = max(0, avg_review_delay - 0.6)
159
+
160
+ # Reality Gap Score (0-100)
161
+ # normalize points gap: if we are 30% behind, that's bad.
162
+ pct_behind = points_gap / total_points if total_points > 0 else 0
163
+ score_points = min(100, max(0, pct_behind * 100 * 2)) # Multiplier 2x: 50% behind = 100 risk
164
+
165
+ score_review = min(100, review_gap * 20) # 1 day late = 20 pts, 5 days = 100
166
+
167
+ reality_gap_score = int(score_points * 0.7 + score_review * 0.3)
168
+
169
+ # Prediction
170
+ # Simple velocity based on current completed vs time used
171
+ predicted_slip = 0
172
+ predicted_finish = sprint.end_date
173
+
174
+ if is_current and completed_points < total_points and time_progress_pct > 0.1:
175
+ # Pace: points per day actual
176
+ days_spent = (NOW - sprint.start_date).days
177
+ if days_spent < 1: days_spent = 1
178
+ avg_pace = completed_points / days_spent
179
+
180
+ remaining = total_points - completed_points
181
+ if avg_pace > 0:
182
+ days_needed = remaining / avg_pace
183
+ finish_date = NOW + timedelta(days=days_needed)
184
+ slip = (finish_date - sprint.end_date).days
185
+ if slip > 0:
186
+ predicted_slip = int(slip)
187
+ predicted_finish = finish_date
188
+ else:
189
+ # Stall
190
+ predicted_slip = 99
191
+ predicted_finish = NOW + timedelta(days=30)
192
+
193
+ # Explainability
194
+ top_drivers = []
195
+ # Who is missing points?
196
+ # Which modules?
197
+ bad_modules = []
198
+ for m, stats in mod_stats.items():
199
+ if stats["planned"] > 0:
200
+ p = stats["completed"] / stats["planned"]
201
+ # Adjust expectation: expected p should be time_progress_pct
202
+ if p < (time_progress_pct * 0.7): # 30% buffer
203
+ bad_modules.append(m)
204
+
205
+ if bad_modules:
206
+ top_drivers.append(f"Modules behind schedule: {', '.join(bad_modules)}")
207
+
208
+ if review_gap > 1.0:
209
+ top_drivers.append(f"High review delays (avg {avg_review_delay:.1f}d)")
210
+
211
+ if points_gap > 5:
212
+ top_drivers.append(f"Point completion gap: {points_gap} pts behind plan")
213
+
214
+ # Recommendations
215
+ actions = []
216
+ if is_current and "payments" in bad_modules and review_gap > 1.0:
217
+ actions.append("Payments module is bottlenecked by reviews. Assign 1 extra reviewer.")
218
+ if predicted_slip > 2:
219
+ actions.append(f"Predicted slip {predicted_slip} days. Reduce scope by {int(points_gap)} pts.")
220
+
221
+ metric = SprintMetrics(
222
+ sprint_id=sprint.sprint_id,
223
+ name=sprint.name,
224
+ start_date=sprint.start_date,
225
+ end_date=sprint.end_date,
226
+ planned_story_points=total_points,
227
+ completed_story_points=completed_points,
228
+ completion_pct=round(completed_points / total_points * 100, 1) if total_points else 0,
229
+ reality_gap_score=reality_gap_score,
230
+ points_completion_gap=round(points_gap, 1),
231
+ predicted_slip_days=predicted_slip,
232
+ predicted_finish_date=predicted_finish.strftime("%Y-%m-%d"),
233
+ module_breakdown=mod_stats,
234
+ top_drivers=top_drivers,
235
+ recommended_actions=actions
236
+ )
237
+ sprint_metrics_list.append(metric)
238
+
239
+ if is_current:
240
+ drivers_short = "; ".join(top_drivers[:1]) if top_drivers else "on track"
241
+ headline = f"{sprint.name} is trending {predicted_slip} days late: {drivers_short}."
242
+
243
+ return sprint_metrics_list, correction_rules, headline
244
+
245
+
246
+ def _learn_correction_rules(sprints: List[RawSprint], issues: List[RawIssue], events_by_issue: Dict[str, List[RawIssueEvent]]) -> List[CorrectionRule]:
247
+ """
248
+ Learn from past COMPLETED sprints.
249
+ Correction = actual_duration / planned_duration
250
+ Wait, issues don't have "planned duration", they have points.
251
+ We need:
252
+ planned_days = points / sprint_avg_velocity (points/day)
253
+ actual_days = DONE - IN_PROGRESS timestamp
254
+ """
255
+ rules = []
256
+
257
+ # Group by (team, module, type) -> list of ratios
258
+ history: Dict[Tuple[str, str, str], List[float]] = {}
259
+
260
+ # Pre-calc sprint velocities
261
+ sprint_velocities = {} # sprint_id -> points/day
262
+ for s in sprints:
263
+ duration = (s.end_date - s.start_date).days + 1
264
+ vel = s.planned_story_points / duration if duration > 0 else 1.0
265
+ sprint_velocities[s.sprint_id] = vel
266
+
267
+ for issue in issues:
268
+ # Only look at fully done issues
269
+ evts = events_by_issue.get(issue.issue_id, [])
270
+ start_ts = None
271
+ end_ts = None
272
+
273
+ for e in evts:
274
+ if e.to_status == "IN_PROGRESS": start_ts = e.timestamp
275
+ if e.to_status == "DONE": end_ts = e.timestamp
276
+
277
+ if start_ts and end_ts:
278
+ actual_days = (end_ts - start_ts).total_seconds() / 86400.0
279
+ if actual_days < 0.1: actual_days = 0.1 # min
280
+
281
+ # Planned days
282
+ vel = sprint_velocities.get(issue.sprint_id, 1.0)
283
+ planned_days = issue.story_points / vel
284
+
285
+ ratio = actual_days / planned_days
286
+
287
+ # Key
288
+ # We assume team_alpha for all as per dummy data
289
+ key = ("team_alpha", issue.module_id, issue.issue_type)
290
+ if key not in history: history[key] = []
291
+ history[key].append(ratio)
292
+
293
+ # Compile rules
294
+ for key, ratios in history.items():
295
+ team, mod, itype = key
296
+ avg_ratio = sum(ratios) / len(ratios)
297
+ # Clamp
298
+ multiplier = max(1.0, min(avg_ratio, 2.5))
299
+
300
+ # Build explanation
301
+ expl = f"Historically {mod}/{itype} tasks take {multiplier:.1f}x longer than planned."
302
+
303
+ rules.append(CorrectionRule(
304
+ team_id=team,
305
+ module_id=mod,
306
+ issue_type=itype,
307
+ multiplier=round(multiplier, 2),
308
+ samples_count=len(ratios),
309
+ explanation=expl
310
+ ))
311
+
312
+ return rules
backend/backend_app/core/planning_loader.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from typing import List, Dict, Tuple
3
+ from backend_app.core.config import JIRA_SPRINTS_FILE, JIRA_ISSUES_FILE, JIRA_EVENTS_FILE
4
+ from backend_app.core.planning_models import RawSprint, RawIssue, RawIssueEvent
5
+
6
+ def load_jira_files() -> Tuple[List[RawSprint], List[RawIssue], List[RawIssueEvent]]:
7
+ if not JIRA_SPRINTS_FILE.exists() or not JIRA_ISSUES_FILE.exists() or not JIRA_EVENTS_FILE.exists():
8
+ raise FileNotFoundError("One or more Jira data files are missing.")
9
+
10
+ with open(JIRA_SPRINTS_FILE, 'r') as f:
11
+ sprints = [RawSprint(**i) for i in json.load(f)]
12
+
13
+ with open(JIRA_ISSUES_FILE, 'r') as f:
14
+ issues = [RawIssue(**i) for i in json.load(f)]
15
+
16
+ with open(JIRA_EVENTS_FILE, 'r') as f:
17
+ events = [RawIssueEvent(**i) for i in json.load(f)]
18
+
19
+ return sprints, issues, events
backend/backend_app/core/planning_models.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import List, Optional, Dict
3
+ from datetime import datetime
4
+
5
+ # --- Jira Data Models ---
6
+
7
+ class RawSprint(BaseModel):
8
+ sprint_id: str
9
+ name: str
10
+ start_date: datetime
11
+ end_date: datetime
12
+ team_id: str
13
+ planned_story_points: int
14
+
15
+ class RawIssue(BaseModel):
16
+ issue_id: str
17
+ sprint_id: str
18
+ title: str
19
+ issue_type: str # Story|Bug|Task
20
+ story_points: int
21
+ assignee: str
22
+ module_id: str
23
+ created_at: datetime
24
+
25
+ class RawIssueEvent(BaseModel):
26
+ issue_id: str
27
+ timestamp: datetime
28
+ from_status: str
29
+ to_status: str
30
+
31
+ # --- Planning Output Models ---
32
+
33
+ class SprintMetrics(BaseModel):
34
+ sprint_id: str
35
+ name: str
36
+ start_date: datetime
37
+ end_date: datetime
38
+ planned_story_points: int
39
+ completed_story_points: int
40
+ completion_pct: float
41
+
42
+ # Gap Metrics
43
+ reality_gap_score: int # 0-100
44
+ points_completion_gap: float
45
+
46
+ # Prediction
47
+ predicted_slip_days: int
48
+ predicted_finish_date: str # Just string for simplicity in display
49
+
50
+ # Breakdown by module for detailed views
51
+ module_breakdown: Dict[str, Dict[str, float]] = Field(default_factory=dict) # mod -> {planned, actual}
52
+
53
+ # Evidence & Recs
54
+ top_drivers: List[str]
55
+ recommended_actions: List[str]
56
+
57
+ class CorrectionRule(BaseModel):
58
+ team_id: str
59
+ module_id: str
60
+ issue_type: str
61
+ multiplier: float
62
+ samples_count: int
63
+ explanation: str
64
+
65
+ class AutoCorrectHeadline(BaseModel):
66
+ headline: str
backend/backend_app/core/signals.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict, Set
2
+ from datetime import datetime
3
+ from backend_app.core.models import RawPR, RawReview, RawCommit, Signal
4
+
5
+ WEIGHT_COMMIT = 1.0
6
+ WEIGHT_REVIEW_APPROVED = 3.0
7
+ WEIGHT_REVIEW_COMMENTED = 2.0
8
+ WEIGHT_REVIEW_CHANGES_REQUESTED = 2.5
9
+
10
+ def get_modules_for_paths(paths: List[str], modules_config: Dict[str, List[str]]) -> Set[str]:
11
+ """
12
+ Given a list of file paths changed in a PR or Commit,
13
+ return the set of module_ids that apply.
14
+ A path belongs to a module if it starts with any of the module's prefixes.
15
+ """
16
+ matched_modules = set()
17
+ # Debug: if no paths, maybe root? No, we need changed paths.
18
+ if not paths:
19
+ # If no paths info (e.g. from API limitation), assume root if config has root
20
+ if "root" in modules_config:
21
+ return {"root"}
22
+ return set()
23
+
24
+ for path in paths:
25
+ path_str = str(path)
26
+ mapped = False
27
+ for mod_id, prefixes in modules_config.items():
28
+ for prefix in prefixes:
29
+ # Handle root special case: prefix "" matches everything
30
+ if prefix == "" or path_str.startswith(prefix):
31
+ matched_modules.add(mod_id)
32
+ mapped = True
33
+
34
+ # Fallback: if path didn't map to anything specific, map to 'root' if it exists
35
+ if not mapped and "root" in modules_config:
36
+ matched_modules.add("root")
37
+
38
+ return matched_modules
39
+
40
+ def process_signals(
41
+ prs: List[RawPR],
42
+ reviews: List[RawReview],
43
+ commits: List[RawCommit],
44
+ modules_config: Dict[str, List[str]]
45
+ ) -> Dict[str, List[Signal]]:
46
+ """
47
+ Convert raw events into signals grouped by module_id.
48
+ """
49
+ signals_by_module: Dict[str, List[Signal]] = {}
50
+
51
+ # Init empty list for all modules so we get a result even if 0 signals
52
+ for mod_id in modules_config:
53
+ signals_by_module[mod_id] = []
54
+
55
+ # Helper to append signal
56
+ def add_signal(mod_id: str, sig: Signal):
57
+ if mod_id not in signals_by_module:
58
+ signals_by_module[mod_id] = []
59
+ signals_by_module[mod_id].append(sig)
60
+
61
+ # 1. Process Commits
62
+
63
+ # 1. Process Commits
64
+ for commit in commits:
65
+ files = commit.files_changed
66
+ if not files:
67
+ # If files list is empty from Supabase, try to fallback to 'root'
68
+ files = []
69
+
70
+ affected_modules = get_modules_for_paths(files, modules_config)
71
+
72
+ for mod_id in affected_modules:
73
+ # Create signal from commit
74
+ sig = Signal(
75
+ person_id=commit.author,
76
+ module_id=mod_id,
77
+ signal_type="commit",
78
+ weight=1.0, # WEIGHT_COMMIT
79
+ timestamp=commit.timestamp,
80
+ source_id=commit.commit_id
81
+ )
82
+ # Add to module list
83
+ if mod_id not in signals_by_module:
84
+ signals_by_module[mod_id] = []
85
+ signals_by_module[mod_id].append(sig)
86
+
87
+
88
+ # 2. Process PRs and Reviews
89
+ # NEW RULE: If reviews are missing in Supabase, treat PR creation/merge as a signal for the AUTHOR.
90
+ # This ensures we get risk data even if no reviews exist.
91
+
92
+ # Process PR Author signals
93
+ for pr in prs:
94
+ # Fallback for empty files list
95
+ files = pr.files_changed if pr.files_changed else []
96
+ affected_modules = get_modules_for_paths(files, modules_config)
97
+
98
+ for mod_id in affected_modules:
99
+ # Treat PR creation as a signal (e.g. weight 1.5)
100
+ sig = Signal(
101
+ person_id=pr.author,
102
+ module_id=mod_id,
103
+ signal_type="pr_created",
104
+ weight=1.5,
105
+ timestamp=pr.created_at,
106
+ source_id=pr.pr_id
107
+ )
108
+ add_signal(mod_id, sig)
109
+
110
+ # Process Reviews (if any)
111
+ pr_map = {pr.pr_id: pr for pr in prs}
112
+
113
+ for review in reviews:
114
+ if review.pr_id not in pr_map:
115
+ continue # Skip reviews for unknown PRs
116
+
117
+ pr = pr_map[review.pr_id]
118
+ affected_modules = get_modules_for_paths(pr.files_changed, modules_config)
119
+
120
+ # Determine weight and type
121
+ w = 0.0
122
+ s_type = ""
123
+ if review.state == "APPROVED":
124
+ w = WEIGHT_REVIEW_APPROVED
125
+ s_type = "review_approval"
126
+ elif review.state == "COMMENTED":
127
+ w = WEIGHT_REVIEW_COMMENTED
128
+ s_type = "review_comment"
129
+ elif review.state == "CHANGES_REQUESTED":
130
+ w = WEIGHT_REVIEW_CHANGES_REQUESTED
131
+ s_type = "review_changes_requested"
132
+ else:
133
+ continue # Unknown state
134
+
135
+ for mod_id in affected_modules:
136
+ sig = Signal(
137
+ person_id=review.reviewer,
138
+ module_id=mod_id,
139
+ signal_type=s_type,
140
+ weight=w,
141
+ timestamp=review.timestamp,
142
+ source_id=review.pr_id
143
+ )
144
+ add_signal(mod_id, sig)
145
+
146
+ return signals_by_module
backend/backend_app/core/strategic_controller.py ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timezone
2
+ import json
3
+ from openai import OpenAI
4
+ from backend_app.state.store import store
5
+ from backend_app.core.planning_engine import compute_autocorrect # Re-use metrics if needed
6
+ from backend_app.integrations.supabase_client import supabase
7
+
8
+ # --- Configuration ---
9
+ FEATHERLESS_API_KEY = "rc_3a397e668b06eae8d8e477e2f5434b97dc9f3ffd8bf0856563a6d1cd9941fcac"
10
+ FEATHERLESS_BASE_URL = "https://api.featherless.ai/v1"
11
+ MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"
12
+
13
+ def get_strategic_audit():
14
+ """
15
+ Gather data from the store, format it for the LLM, and request an executive briefing.
16
+ """
17
+
18
+ # 1. Gather Data (Jira Plan vs Github Reality)
19
+
20
+ # Jira: Current Active Sprint (Sprint 02 in dummy data)
21
+ # We need to find the "current" sprint relative to NOW (2026-02-07T15:00:39+05:30)
22
+ # Using fixed NOW from previous context: 2026-02-07
23
+ NOW = datetime(2026, 2, 7, 15, 0, 0, tzinfo=timezone.utc)
24
+
25
+ current_sprint = None
26
+ if store.sprints:
27
+ for s in store.sprints:
28
+ s_start = s.start_date.replace(tzinfo=timezone.utc) if s.start_date.tzinfo is None else s.start_date
29
+ s_end = s.end_date.replace(tzinfo=timezone.utc) if s.end_date.tzinfo is None else s.end_date
30
+
31
+ if s_start <= NOW <= s_end:
32
+ current_sprint = s
33
+ break
34
+ if not current_sprint:
35
+ current_sprint = store.sprints[-1]
36
+
37
+ jira_summary = {
38
+ "sprint": current_sprint.name if current_sprint else "Unknown",
39
+ "planned_points": current_sprint.planned_story_points if current_sprint else 0,
40
+ "team": current_sprint.team_id if current_sprint else "Unknown",
41
+ "active_issues": []
42
+ }
43
+
44
+ # Filter issues for this sprint
45
+ issues_list = []
46
+ if current_sprint:
47
+ # Be careful with ID match, assuming string match on sprint_id
48
+ issues_list = [i for i in store.issues if i.sprint_id == current_sprint.sprint_id]
49
+
50
+ for i in issues_list:
51
+ jira_summary["active_issues"].append({
52
+ "id": i.issue_id,
53
+ "title": i.title,
54
+ "points": i.story_points,
55
+ "module": i.module_id,
56
+ "assignee": i.assignee,
57
+ "type": i.issue_type # Assuming stored as issue_type
58
+ })
59
+
60
+ # GitHub: Recent Activity (in Sprint Window)
61
+ sprint_start = current_sprint.start_date.replace(tzinfo=timezone.utc) if current_sprint and current_sprint.start_date.tzinfo is None else (current_sprint.start_date if current_sprint else NOW)
62
+ sprint_end = current_sprint.end_date.replace(tzinfo=timezone.utc) if current_sprint and current_sprint.end_date.tzinfo is None else (current_sprint.end_date if current_sprint else NOW)
63
+
64
+ github_summary = {
65
+ "recent_commits_count": 0,
66
+ "recent_prs": [],
67
+ "active_contributors": set()
68
+ }
69
+
70
+ # Scan Commits
71
+ for c in store.commits:
72
+ c_ts = c.timestamp.replace(tzinfo=timezone.utc) if c.timestamp.tzinfo is None else c.timestamp
73
+ if sprint_start <= c_ts <= sprint_end:
74
+ github_summary["recent_commits_count"] += 1
75
+ github_summary["active_contributors"].add(c.author)
76
+
77
+ # Scan PRs
78
+ for p in store.prs:
79
+ p_ts = p.created_at.replace(tzinfo=timezone.utc) if p.created_at.tzinfo is None else p.created_at
80
+ relevant = False
81
+ if sprint_start <= p_ts <= sprint_end: relevant = True
82
+
83
+ if p.merged_at:
84
+ p_m = p.merged_at.replace(tzinfo=timezone.utc) if p.merged_at.tzinfo is None else p.merged_at
85
+ if sprint_start <= p_m <= sprint_end: relevant = True
86
+
87
+ if relevant:
88
+ github_summary["recent_prs"].append({
89
+ "id": p.pr_id,
90
+ "author": p.author,
91
+ "files": p.files_changed[:2],
92
+ "merged": bool(p.merged_at)
93
+ })
94
+ github_summary["active_contributors"].add(p.author)
95
+
96
+ github_summary["active_contributors"] = list(github_summary["active_contributors"])
97
+
98
+
99
+ # 2. Construct Prompt for LLM
100
+
101
+ system_prompt = (
102
+ "You are a 'Strategic Engineering Controller.' Your job is to reconcile two conflicting data sources: "
103
+ "Jira (The Plan) and GitHub (The Technical Reality). "
104
+ "You must identify 'Strategic Drift'—the gap between what the company thinks it's doing and what is actually happening. "
105
+ "Output your analysis in a concise, high-impact 'Executive Briefing' format."
106
+ )
107
+
108
+ user_prompt = f"""
109
+ DATA INPUTS:
110
+
111
+ Jira Sprint Data:
112
+ {json.dumps(jira_summary, indent=2, default=str)}
113
+
114
+ GitHub Activity:
115
+ {json.dumps(github_summary, indent=2, default=str)}
116
+
117
+ TASK: Analyze these inputs and provide:
118
+
119
+ 1. The Reality Score: A percentage (0-100%) of how "on track" the project truly is compared to the Jira board.
120
+ 2. The Shadow Work Audit: Identify what percentage of time is being spent on tasks NOT in Jira (e.g., maintenance, mentoring, or technical debt) based on GitHub activity vs Jira tickets.
121
+ 3. The Tribal Knowledge Hero: Identify the developer who is providing the most "unseen" value through mentoring and code reviews (infer from PRs/commits).
122
+ 4. Financial Risk Alert: Estimate the dollar cost of current delays (assume $100/hr avg cost) and suggest one specific resource reallocation to fix it.
123
+ 5. Executive Summary: A 3-sentence briefing for the CEO.
124
+
125
+ Format the output clearly with headers. Be direct and concise.
126
+ """
127
+
128
+ # 3. Call LLM
129
+ try:
130
+ client = OpenAI(
131
+ base_url=FEATHERLESS_BASE_URL,
132
+ api_key=FEATHERLESS_API_KEY,
133
+ )
134
+
135
+ response = client.chat.completions.create(
136
+ model=MODEL_NAME,
137
+ messages=[
138
+ {"role": "system", "content": system_prompt},
139
+ {"role": "user", "content": user_prompt}
140
+ ],
141
+ temperature=0.7,
142
+ max_tokens=600
143
+ )
144
+
145
+ return response.choices[0].message.content
146
+
147
+ except Exception as e:
148
+ return f"Error generating strategic audit: {str(e)}"
149
+
150
+ def get_latest_jira_payload():
151
+ """
152
+ Fetch the latest Jira payload from the 'jira_data' table in Supabase.
153
+ """
154
+ if not supabase:
155
+ print("Warning: Supabase client not initialized.")
156
+ return None
157
+
158
+ try:
159
+ # Fetch the latest record based on synced_at
160
+ res = supabase.table("jira_data").select("jira_payload").order("synced_at", desc=True).limit(1).execute()
161
+ if res.data:
162
+ return res.data[0]['jira_payload']
163
+ return None
164
+ except Exception as e:
165
+ print(f"Error fetching Jira payload: {e}")
166
+ return None
167
+
168
+ def analyze_jira_from_db():
169
+ """
170
+ Compare Jira data from DB with GitHub reality using Featherless API.
171
+ """
172
+ # 1. Fetch Jira Payload
173
+ jira_payload = get_latest_jira_payload()
174
+ if not jira_payload:
175
+ return "No Jira data found in database."
176
+
177
+ # 2. Gather GitHub Data (Reality)
178
+ # Using existing store data (populated via /load_data or /load_live_data)
179
+ # We'll use a summary similar to get_strategic_audit but tailored for this comparison
180
+
181
+ # Calculate time window based on data available or assume last 2 weeks if not specified
182
+ # For now, let's use the full loaded data context to find active sprint window if possible,
183
+ # or just summarize recent activity.
184
+
185
+ # If jira_payload has sprint info, use that window.
186
+ # Assuming jira_payload is a dict with sprint info.
187
+ sprint_info = jira_payload.get("sprint", {}) if isinstance(jira_payload, dict) else {}
188
+ # If payload structure is list of issues, we might need to infer.
189
+
190
+ # Simplification: pass the raw payload structure to LLM to interpret,
191
+ # along with a structured summary of GitHub activity.
192
+
193
+ github_summary = {
194
+ "total_commits": len(store.commits),
195
+ "recent_commits": [
196
+ {
197
+ "author": c.author,
198
+ "message": c.message if hasattr(c, 'message') else "",
199
+ "timestamp": str(c.timestamp)
200
+ } for c in store.commits[:20] # Last 20 commits
201
+ ],
202
+ "active_prs": [
203
+ {
204
+ "id": str(p.pr_id),
205
+ "author": p.author,
206
+ "status": "merged" if p.merged_at else "open",
207
+ "created_at": str(p.created_at)
208
+ } for p in store.prs if not p.merged_at or (p.merged_at and (datetime.now(timezone.utc) - p.merged_at).days < 14)
209
+ ]
210
+ }
211
+
212
+ # 3. Construct Prompt
213
+ system_prompt = (
214
+ "You are an expert Engineering Analyst. Your goal is to compare the planned work (Jira) "
215
+ "against the actual engineering activity (GitHub) to identify discrepancies, risks, and "
216
+ "undocumented work."
217
+ )
218
+
219
+ user_prompt = f"""
220
+ JIRA DATA (The Plan):
221
+ {json.dumps(jira_payload, indent=2, default=str)}
222
+
223
+ GITHUB DATA (The Reality):
224
+ {json.dumps(github_summary, indent=2, default=str)}
225
+
226
+ TASK:
227
+ Analyze the alignment between the Jira plan and GitHub activity.
228
+ 1. Identify any work in GitHub that is not tracked in Jira (Shadow Work).
229
+ 2. Identify any Jira items that show no corresponding GitHub activity (Stalled Work).
230
+ 3. Provide a 'Reality Score' (0-100%) indicating how well the plan matches reality.
231
+ 4. Highlight top risks.
232
+
233
+ Output in a clear, executive summary format.
234
+ """
235
+
236
+ # 4. Call Featherless API
237
+ try:
238
+ client = OpenAI(
239
+ base_url=FEATHERLESS_BASE_URL,
240
+ api_key=FEATHERLESS_API_KEY,
241
+ )
242
+
243
+ response = client.chat.completions.create(
244
+ model=MODEL_NAME,
245
+ messages=[
246
+ {"role": "system", "content": system_prompt},
247
+ {"role": "user", "content": user_prompt}
248
+ ],
249
+ temperature=0.7,
250
+ max_tokens=800
251
+ )
252
+
253
+ return response.choices[0].message.content
254
+
255
+ except Exception as e:
256
+ return f"Error generating analysis: {str(e)}"
backend/backend_app/integrations/__pycache__/repo_api.cpython-311.pyc ADDED
Binary file (6.34 kB). View file
 
backend/backend_app/integrations/__pycache__/repo_api.cpython-312.pyc ADDED
Binary file (5.56 kB). View file