|
|
import gradio as gr |
|
|
import joblib |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
from sklearn.preprocessing import StandardScaler |
|
|
from fastapi import FastAPI, HTTPException |
|
|
from pydantic import BaseModel |
|
|
import uvicorn |
|
|
import os |
|
|
import requests |
|
|
import json |
|
|
from datetime import datetime, timedelta, timezone |
|
|
from typing import Dict, List, Optional |
|
|
from urllib.parse import urlparse |
|
|
import time |
|
|
|
|
|
|
|
|
app = FastAPI(title="Developer Productivity Prediction API", version="1.0.0") |
|
|
|
|
|
|
|
|
model = joblib.load('dev_productivity_model.joblib') |
|
|
scaler = joblib.load('scaler.joblib') |
|
|
|
|
|
|
|
|
class ProductivityRequest(BaseModel): |
|
|
daily_coding_hours: float |
|
|
commits_per_day: int |
|
|
pull_requests_per_week: int |
|
|
issues_closed_per_week: int |
|
|
active_repos: int |
|
|
code_reviews_per_week: int |
|
|
|
|
|
class ProductivityResponse(BaseModel): |
|
|
predicted_score: float |
|
|
status: str |
|
|
|
|
|
class GitHubAnalysisRequest(BaseModel): |
|
|
repo_url: str |
|
|
github_token: str |
|
|
|
|
|
class GitHubAnalysisResponse(BaseModel): |
|
|
repo_metrics: dict |
|
|
ml_features: dict |
|
|
predicted_score: float |
|
|
productivity_indicators: dict |
|
|
status: str |
|
|
|
|
|
|
|
|
class RepoProductivityAnalyzer: |
|
|
def __init__(self, github_token: str): |
|
|
if not github_token or github_token == "YOUR_TOKEN_HERE": |
|
|
raise ValueError("Please provide a valid GitHub token") |
|
|
|
|
|
self.token = github_token |
|
|
self.headers = { |
|
|
'Authorization': f'token {github_token}', |
|
|
'Accept': 'application/vnd.github.v3+json' |
|
|
} |
|
|
self.days_back = 90 |
|
|
self.max_retries = 3 |
|
|
|
|
|
def safe_request(self, url: str, retries: int = None) -> Optional[List]: |
|
|
if retries is None: |
|
|
retries = self.max_retries |
|
|
|
|
|
for attempt in range(retries): |
|
|
try: |
|
|
response = requests.get(url, headers=self.headers, timeout=30) |
|
|
|
|
|
if response.status_code == 200: |
|
|
return response.json() |
|
|
elif response.status_code == 403: |
|
|
time.sleep(60) |
|
|
continue |
|
|
elif response.status_code == 404: |
|
|
return [] |
|
|
else: |
|
|
return [] |
|
|
|
|
|
except requests.exceptions.RequestException: |
|
|
if attempt < retries - 1: |
|
|
time.sleep(2 ** attempt) |
|
|
else: |
|
|
return [] |
|
|
return [] |
|
|
|
|
|
def parse_repo_url(self, repo_url: str) -> tuple: |
|
|
try: |
|
|
parsed = urlparse(repo_url) |
|
|
path = parsed.path.strip('/').split('/') |
|
|
if len(path) < 2: |
|
|
raise ValueError("Invalid GitHub URL format") |
|
|
return path[0], path[1] |
|
|
except Exception as e: |
|
|
raise ValueError(f"Invalid repo URL: {str(e)}") |
|
|
|
|
|
def safe_parse_datetime(self, date_str: str) -> Optional[datetime]: |
|
|
if not date_str: |
|
|
return None |
|
|
try: |
|
|
dt = datetime.fromisoformat(date_str.replace('Z', '+00:00')) |
|
|
if dt.tzinfo is None: |
|
|
dt = dt.replace(tzinfo=timezone.utc) |
|
|
return dt |
|
|
except: |
|
|
return None |
|
|
|
|
|
def get_metrics(self, repo_url: str) -> Dict: |
|
|
try: |
|
|
owner, repo = self.parse_repo_url(repo_url) |
|
|
except ValueError as e: |
|
|
return {"error": str(e)} |
|
|
|
|
|
now = datetime.now(timezone.utc) |
|
|
since_dt = now - timedelta(days=self.days_back) |
|
|
since = since_dt.isoformat() |
|
|
|
|
|
metrics = { |
|
|
'repo': f"{owner}/{repo}", |
|
|
'period_days': self.days_back, |
|
|
'analyzed_at': now.isoformat(), |
|
|
'status': 'success' |
|
|
} |
|
|
|
|
|
try: |
|
|
|
|
|
repo_info = self.safe_request(f"https://api.github.com/repos/{owner}/{repo}") |
|
|
if not repo_info: |
|
|
return {"error": "Repository not found or inaccessible"} |
|
|
|
|
|
|
|
|
commits_url = f"https://api.github.com/repos/{owner}/{repo}/commits?per_page=100&since={since}" |
|
|
commits = self.safe_request(commits_url) or [] |
|
|
metrics['total_commits'] = len(commits) |
|
|
|
|
|
|
|
|
prs_url = f"https://api.github.com/repos/{owner}/{repo}/pulls?state=all&per_page=100" |
|
|
prs = self.safe_request(prs_url) or [] |
|
|
|
|
|
recent_prs = [] |
|
|
for pr in prs: |
|
|
created_at = self.safe_parse_datetime(pr.get('created_at')) |
|
|
if created_at and created_at >= since_dt: |
|
|
recent_prs.append(pr) |
|
|
|
|
|
metrics['prs_total'] = len(recent_prs) |
|
|
metrics['prs_merged'] = len([p for p in recent_prs if p.get('merged_at')]) |
|
|
|
|
|
|
|
|
issues_url = f"https://api.github.com/repos/{owner}/{repo}/issues?state=closed&per_page=100" |
|
|
issues = self.safe_request(issues_url) or [] |
|
|
|
|
|
recent_issues = [] |
|
|
for issue in issues: |
|
|
closed_at = self.safe_parse_datetime(issue.get('closed_at')) |
|
|
if closed_at and closed_at >= since_dt: |
|
|
recent_issues.append(issue) |
|
|
|
|
|
metrics['issues_total'] = len(recent_issues) |
|
|
|
|
|
|
|
|
metrics['commits_per_day'] = metrics['total_commits'] / max(self.days_back, 1) |
|
|
metrics['prs_per_week'] = metrics['prs_total'] / max((self.days_back / 7), 1) |
|
|
metrics['issues_per_week'] = metrics['issues_total'] / max((self.days_back / 7), 1) |
|
|
|
|
|
return metrics |
|
|
|
|
|
except Exception as e: |
|
|
return { |
|
|
"error": f"Analysis failed: {str(e)}", |
|
|
"repo": f"{owner}/{repo}", |
|
|
"analyzed_at": now.isoformat() |
|
|
} |
|
|
|
|
|
def predict_productivity_core(daily_coding_hours, commits_per_day, pull_requests_per_week, |
|
|
issues_closed_per_week, active_repos, code_reviews_per_week): |
|
|
try: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cycle_time = max(1, 7 - commits_per_day) |
|
|
pr_size = max(100, 500 - (pull_requests_per_week * 50)) |
|
|
dev_satisfaction = min(10, 5 + (daily_coding_hours * 0.5)) |
|
|
deployment_frequency = max(1, 7 - (pull_requests_per_week * 0.5)) |
|
|
change_failure_rate = max(0.1, 0.5 - (code_reviews_per_week * 0.05)) |
|
|
cognitive_load = max(1, 8 - daily_coding_hours) |
|
|
test_coverage = min(1.0, 0.6 + (code_reviews_per_week * 0.05)) |
|
|
|
|
|
features = np.array([[ |
|
|
cycle_time, pr_size, dev_satisfaction, deployment_frequency, |
|
|
change_failure_rate, cognitive_load, test_coverage |
|
|
]]) |
|
|
features_scaled = scaler.transform(features) |
|
|
prediction = model.predict(features_scaled)[0] |
|
|
return float(prediction) |
|
|
except Exception as e: |
|
|
raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}") |
|
|
|
|
|
|
|
|
@app.get("/") |
|
|
async def root(): |
|
|
return {"message": "Developer Productivity Prediction API", "status": "online"} |
|
|
|
|
|
@app.post("/predict", response_model=ProductivityResponse) |
|
|
async def predict_productivity(request: ProductivityRequest): |
|
|
try: |
|
|
prediction = predict_productivity_core( |
|
|
request.daily_coding_hours, request.commits_per_day, request.pull_requests_per_week, |
|
|
request.issues_closed_per_week, request.active_repos, request.code_reviews_per_week |
|
|
) |
|
|
return ProductivityResponse(predicted_score=prediction, status="success") |
|
|
except Exception as e: |
|
|
raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
|
@app.post("/analyze-github", response_model=GitHubAnalysisResponse) |
|
|
async def analyze_github_repo(request: GitHubAnalysisRequest): |
|
|
try: |
|
|
|
|
|
if not request.repo_url or not request.github_token: |
|
|
raise HTTPException(status_code=422, detail="repo_url and github_token are required") |
|
|
|
|
|
analyzer = RepoProductivityAnalyzer(request.github_token) |
|
|
metrics = analyzer.get_metrics(request.repo_url) |
|
|
|
|
|
if "error" in metrics: |
|
|
raise HTTPException(status_code=400, detail=metrics["error"]) |
|
|
|
|
|
|
|
|
commits_per_day = float(metrics.get('commits_per_day', 0)) |
|
|
prs_per_week = float(metrics.get('prs_per_week', 0)) |
|
|
issues_per_week = float(metrics.get('issues_per_week', 0)) |
|
|
|
|
|
|
|
|
ml_features = { |
|
|
'daily_coding_hours': min(commits_per_day * 2, 8), |
|
|
'commits_per_day': max(int(commits_per_day), 0), |
|
|
'pull_requests_per_week': max(int(prs_per_week), 0), |
|
|
'issues_closed_per_week': max(int(issues_per_week), 0), |
|
|
'active_repos': 1, |
|
|
'code_reviews_per_week': max(int(prs_per_week), 0) |
|
|
} |
|
|
|
|
|
prediction = predict_productivity_core(**ml_features) |
|
|
|
|
|
productivity_indicators = { |
|
|
'high_commit_frequency': commits_per_day > 1, |
|
|
'active_pr_process': prs_per_week > 2, |
|
|
'good_issue_resolution': issues_per_week > 1, |
|
|
'overall_productivity': prediction > 0.7 |
|
|
} |
|
|
|
|
|
return GitHubAnalysisResponse( |
|
|
repo_metrics=metrics, |
|
|
ml_features=ml_features, |
|
|
predicted_score=float(prediction), |
|
|
productivity_indicators=productivity_indicators, |
|
|
status="success" |
|
|
) |
|
|
except HTTPException: |
|
|
raise |
|
|
except Exception as e: |
|
|
raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}") |
|
|
|
|
|
|
|
|
def gradio_predict(daily_coding_hours, commits_per_day, pull_requests_per_week, |
|
|
issues_closed_per_week, active_repos, code_reviews_per_week): |
|
|
try: |
|
|
prediction = predict_productivity_core( |
|
|
daily_coding_hours, commits_per_day, pull_requests_per_week, |
|
|
issues_closed_per_week, active_repos, code_reviews_per_week |
|
|
) |
|
|
return f"Predicted Score: {prediction:.3f}" |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
def gradio_github_analysis(repo_url, github_token): |
|
|
try: |
|
|
analyzer = RepoProductivityAnalyzer(github_token) |
|
|
metrics = analyzer.get_metrics(repo_url) |
|
|
|
|
|
if "error" in metrics: |
|
|
return f"Error: {metrics['error']}" |
|
|
|
|
|
ml_features = { |
|
|
'daily_coding_hours': min(metrics['commits_per_day'] * 2, 8), |
|
|
'commits_per_day': max(int(metrics['commits_per_day']), 0), |
|
|
'pull_requests_per_week': max(int(metrics['prs_per_week']), 0), |
|
|
'issues_closed_per_week': max(int(metrics['issues_per_week']), 0), |
|
|
'active_repos': 1, |
|
|
'code_reviews_per_week': max(int(metrics['prs_per_week']), 0) |
|
|
} |
|
|
|
|
|
prediction = predict_productivity_core(**ml_features) |
|
|
|
|
|
return f"""π PRODUCTIVITY ANALYSIS |
|
|
π Repository: {metrics['repo']} |
|
|
β±οΈ Period: {metrics['period_days']} days |
|
|
|
|
|
π KEY METRICS: |
|
|
β’ Commits/day: {metrics['commits_per_day']:.1f} |
|
|
β’ PRs/week: {metrics['prs_per_week']:.1f} |
|
|
β’ Issues/week: {metrics['issues_per_week']:.1f} |
|
|
|
|
|
π€ ML PREDICTION: {prediction:.3f} |
|
|
{'π High Productivity!' if prediction > 0.7 else 'β οΈ Room for improvement'} |
|
|
|
|
|
π‘ FEATURES: |
|
|
β’ Daily coding hours: {ml_features['daily_coding_hours']} |
|
|
β’ Commits/day: {ml_features['commits_per_day']} |
|
|
β’ PRs/week: {ml_features['pull_requests_per_week']} |
|
|
β’ Issues/week: {ml_features['issues_closed_per_week']} |
|
|
β’ Active repos: {ml_features['active_repos']} |
|
|
β’ Reviews/week: {ml_features['code_reviews_per_week']}""" |
|
|
|
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
|
with gr.Blocks(title="Developer Productivity Predictor") as demo: |
|
|
gr.Markdown("# π Developer Productivity Predictor") |
|
|
gr.Markdown("Predict productivity scores and analyze GitHub repositories using ML") |
|
|
|
|
|
with gr.Tab("Manual Prediction"): |
|
|
gr.Markdown("### Enter your development metrics:") |
|
|
with gr.Row(): |
|
|
daily_hours = gr.Slider(1, 12, value=6, label="Daily Coding Hours") |
|
|
commits = gr.Slider(0, 20, value=3, label="Commits per Day") |
|
|
prs = gr.Slider(0, 10, value=2, label="Pull Requests per Week") |
|
|
with gr.Row(): |
|
|
issues = gr.Slider(0, 15, value=3, label="Issues Closed per Week") |
|
|
repos = gr.Slider(1, 10, value=2, label="Active Repositories") |
|
|
reviews = gr.Slider(0, 20, value=5, label="Code Reviews per Week") |
|
|
|
|
|
predict_btn = gr.Button("π Predict Productivity", variant="primary") |
|
|
prediction_output = gr.Textbox(label="Prediction Result", lines=2) |
|
|
|
|
|
predict_btn.click( |
|
|
gradio_predict, |
|
|
inputs=[daily_hours, commits, prs, issues, repos, reviews], |
|
|
outputs=prediction_output |
|
|
) |
|
|
|
|
|
with gr.Tab("GitHub Analysis"): |
|
|
gr.Markdown("### Analyze any GitHub repository:") |
|
|
|
|
|
repo_url_input = gr.Textbox( |
|
|
label="GitHub Repository URL", |
|
|
placeholder="https://github.com/owner/repo", |
|
|
value="https://github.com/microsoft/vscode" |
|
|
) |
|
|
token_input = gr.Textbox( |
|
|
label="GitHub Token", |
|
|
type="password", |
|
|
placeholder="ghp_xxxxxxxxxxxx" |
|
|
) |
|
|
|
|
|
analyze_btn = gr.Button("π Analyze Repository", variant="primary") |
|
|
analysis_output = gr.Textbox(label="Analysis Result", lines=15) |
|
|
|
|
|
analyze_btn.click( |
|
|
gradio_github_analysis, |
|
|
inputs=[repo_url_input, token_input], |
|
|
outputs=analysis_output |
|
|
) |
|
|
|
|
|
|
|
|
app = gr.mount_gradio_app(app, demo, path="/") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
uvicorn.run(app, host="0.0.0.0", port=7860) |