Datathon / backend /backend_app /core /planning_engine.py
RishiXD's picture
Upload 67 files
b23ff00 verified
from datetime import datetime, timezone, timedelta
from typing import List, Dict, Optional, Tuple
import math
from backend_app.core.planning_models import (
RawSprint, RawIssue, RawIssueEvent,
SprintMetrics, CorrectionRule, AutoCorrectHeadline
)
from backend_app.core.models import Signal, RawPR, RawReview
# We need access to GitHub data (processed signals or raw)
# Heuristic Constants
DEFAULT_POINTS_PER_DAY_DEV = 1.0 # Fallback
REALITY_GAP_WEIGHT_POINTS = 0.6
REALITY_GAP_WEIGHT_REVIEW = 0.4
def compute_autocorrect(
sprints: List[RawSprint],
issues: List[RawIssue],
events: List[RawIssueEvent],
github_prs: List[RawPR],
github_reviews: List[RawReview],
modules_config: Dict[str, List[str]]
) -> Tuple[List[SprintMetrics], List[CorrectionRule], str]:
# 1. Organize Data
# Issues per sprint
issues_by_sprint = {s.sprint_id: [] for s in sprints}
for i in issues:
if i.sprint_id in issues_by_sprint:
issues_by_sprint[i.sprint_id].append(i)
# Events by issue
events_by_issue = {i.issue_id: [] for i in issues}
for e in events:
if e.issue_id in events_by_issue:
events_by_issue[e.issue_id].append(e)
# Sort events by time
for iid in events_by_issue:
events_by_issue[iid].sort(key=lambda x: x.timestamp)
# 2. Historical Analysis (Correction Rules)
# We look at COMPLETED sprints to learn multipliers.
# Current time is "now" (simulated). We can assume "now" is the end of the last sprint or mid-current.
# The prompt says "current local time is 2026-02-07".
# Sprint 1 (Jan 15-29) is done. Sprint 2 (Feb 1-14) is in progress.
correction_rules = _learn_correction_rules(sprints, issues, events_by_issue)
# 3. Compute Metrics for Sprints (focus on active/recent)
sprint_metrics_list = []
# We need to simulate "current status" relative to 2026-02-07 (NOW)
NOW = datetime(2026, 2, 7, 14, 0, 0, tzinfo=timezone.utc)
headline = "No active sprint analysis."
for sprint in sprints:
# Determine if sprint is past, current, or future
# Simple check
is_current = sprint.start_date <= NOW <= sprint.end_date
is_past = sprint.end_date < NOW
# Calculate Planned
total_points = sprint.planned_story_points
days_duration = (sprint.end_date - sprint.start_date).days + 1
points_per_day_planned = total_points / days_duration if days_duration > 0 else 0
# Calculate Actual / Projected
# Points completed within sprint window (for past) or up to NOW (for current)
completed_points = 0
sprint_issues = issues_by_sprint[sprint.sprint_id]
# Track module breakdown
# mod_id -> {planned: int, completed: int}
mod_stats = {}
for issue in sprint_issues:
mid = issue.module_id
if mid not in mod_stats: mod_stats[mid] = {"planned": 0, "completed": 0}
mod_stats[mid]["planned"] += issue.story_points
# Check if done
# Issue is done if it has a transition to DONE within the sprint window
# For current sprint, within start -> NOW
# For past, within start -> end
cutoff = NOW if is_current else sprint.end_date
done_time = None
evt_list = events_by_issue.get(issue.issue_id, [])
for evt in evt_list:
if evt.to_status == "DONE":
done_time = evt.timestamp
break # Assuming once done stays done for simplicity
if done_time and done_time <= cutoff and done_time >= sprint.start_date:
completed_points += issue.story_points
mod_stats[mid]["completed"] += issue.story_points
# --- Gap Analysis ---
# Expected completion based on linear burn
# For past sprints, expected at end is 100%.
# For current, expected is proportional to time passed.
if is_past:
time_progress_pct = 1.0
else:
days_passed = (NOW - sprint.start_date).days
if days_passed < 0: days_passed = 0
time_progress_pct = days_passed / days_duration
expected_points = total_points * time_progress_pct
points_gap = expected_points - completed_points
# Review Delay Signal from GitHub
# Get PRs created during this sprint
sprint_prs = []
# Naive PR filter by created_at in sprint window
# Note: timezone awareness might be tricky if mixed naive/aware.
# Assuming GitHub data is loaded as datetime (model).
for pr in github_prs:
# check overlap? created_at inside sprint
# Handle tz: ensure both are consistent.
# Our models define datetime, likely parsed as aware or naive.
# We'll assume both are UTC aware for this exercise.
if sprint.start_date <= pr.created_at <= sprint.end_date:
sprint_prs.append(pr)
# Calculate avg review time
# We need reviews for these PRs
# Map needed.
# This is expensive if unrelated, but dataset is small.
review_delays = []
for pr in sprint_prs:
# Find approval
approval_ts = None
for rev in github_reviews:
if rev.pr_id == pr.pr_id and rev.state == "APPROVED":
approval_ts = rev.timestamp
break
if approval_ts:
delay = (approval_ts - pr.created_at).total_seconds() / 86400.0 # days
review_delays.append(delay)
elif is_current:
# If not approved yet, delay is (NOW - created)
current_wait = (NOW - pr.created_at).total_seconds() / 86400.0
if current_wait > 1.0: # Only count if waiting > 1 day
review_delays.append(current_wait)
avg_review_delay = sum(review_delays)/len(review_delays) if review_delays else 0.5 # default 0.5d
# Baseline review delay? Say 0.6 is good.
review_gap = max(0, avg_review_delay - 0.6)
# Reality Gap Score (0-100)
# normalize points gap: if we are 30% behind, that's bad.
pct_behind = points_gap / total_points if total_points > 0 else 0
score_points = min(100, max(0, pct_behind * 100 * 2)) # Multiplier 2x: 50% behind = 100 risk
score_review = min(100, review_gap * 20) # 1 day late = 20 pts, 5 days = 100
reality_gap_score = int(score_points * 0.7 + score_review * 0.3)
# Prediction
# Simple velocity based on current completed vs time used
predicted_slip = 0
predicted_finish = sprint.end_date
if is_current and completed_points < total_points and time_progress_pct > 0.1:
# Pace: points per day actual
days_spent = (NOW - sprint.start_date).days
if days_spent < 1: days_spent = 1
avg_pace = completed_points / days_spent
remaining = total_points - completed_points
if avg_pace > 0:
days_needed = remaining / avg_pace
finish_date = NOW + timedelta(days=days_needed)
slip = (finish_date - sprint.end_date).days
if slip > 0:
predicted_slip = int(slip)
predicted_finish = finish_date
else:
# Stall
predicted_slip = 99
predicted_finish = NOW + timedelta(days=30)
# Explainability
top_drivers = []
# Who is missing points?
# Which modules?
bad_modules = []
for m, stats in mod_stats.items():
if stats["planned"] > 0:
p = stats["completed"] / stats["planned"]
# Adjust expectation: expected p should be time_progress_pct
if p < (time_progress_pct * 0.7): # 30% buffer
bad_modules.append(m)
if bad_modules:
top_drivers.append(f"Modules behind schedule: {', '.join(bad_modules)}")
if review_gap > 1.0:
top_drivers.append(f"High review delays (avg {avg_review_delay:.1f}d)")
if points_gap > 5:
top_drivers.append(f"Point completion gap: {points_gap} pts behind plan")
# Recommendations
actions = []
if is_current and "payments" in bad_modules and review_gap > 1.0:
actions.append("Payments module is bottlenecked by reviews. Assign 1 extra reviewer.")
if predicted_slip > 2:
actions.append(f"Predicted slip {predicted_slip} days. Reduce scope by {int(points_gap)} pts.")
metric = SprintMetrics(
sprint_id=sprint.sprint_id,
name=sprint.name,
start_date=sprint.start_date,
end_date=sprint.end_date,
planned_story_points=total_points,
completed_story_points=completed_points,
completion_pct=round(completed_points / total_points * 100, 1) if total_points else 0,
reality_gap_score=reality_gap_score,
points_completion_gap=round(points_gap, 1),
predicted_slip_days=predicted_slip,
predicted_finish_date=predicted_finish.strftime("%Y-%m-%d"),
module_breakdown=mod_stats,
top_drivers=top_drivers,
recommended_actions=actions
)
sprint_metrics_list.append(metric)
if is_current:
drivers_short = "; ".join(top_drivers[:1]) if top_drivers else "on track"
headline = f"{sprint.name} is trending {predicted_slip} days late: {drivers_short}."
return sprint_metrics_list, correction_rules, headline
def _learn_correction_rules(sprints: List[RawSprint], issues: List[RawIssue], events_by_issue: Dict[str, List[RawIssueEvent]]) -> List[CorrectionRule]:
"""
Learn from past COMPLETED sprints.
Correction = actual_duration / planned_duration
Wait, issues don't have "planned duration", they have points.
We need:
planned_days = points / sprint_avg_velocity (points/day)
actual_days = DONE - IN_PROGRESS timestamp
"""
rules = []
# Group by (team, module, type) -> list of ratios
history: Dict[Tuple[str, str, str], List[float]] = {}
# Pre-calc sprint velocities
sprint_velocities = {} # sprint_id -> points/day
for s in sprints:
duration = (s.end_date - s.start_date).days + 1
vel = s.planned_story_points / duration if duration > 0 else 1.0
sprint_velocities[s.sprint_id] = vel
for issue in issues:
# Only look at fully done issues
evts = events_by_issue.get(issue.issue_id, [])
start_ts = None
end_ts = None
for e in evts:
if e.to_status == "IN_PROGRESS": start_ts = e.timestamp
if e.to_status == "DONE": end_ts = e.timestamp
if start_ts and end_ts:
actual_days = (end_ts - start_ts).total_seconds() / 86400.0
if actual_days < 0.1: actual_days = 0.1 # min
# Planned days
vel = sprint_velocities.get(issue.sprint_id, 1.0)
planned_days = issue.story_points / vel
ratio = actual_days / planned_days
# Key
# We assume team_alpha for all as per dummy data
key = ("team_alpha", issue.module_id, issue.issue_type)
if key not in history: history[key] = []
history[key].append(ratio)
# Compile rules
for key, ratios in history.items():
team, mod, itype = key
avg_ratio = sum(ratios) / len(ratios)
# Clamp
multiplier = max(1.0, min(avg_ratio, 2.5))
# Build explanation
expl = f"Historically {mod}/{itype} tasks take {multiplier:.1f}x longer than planned."
rules.append(CorrectionRule(
team_id=team,
module_id=mod,
issue_type=itype,
multiplier=round(multiplier, 2),
samples_count=len(ratios),
explanation=expl
))
return rules