import gradio as gr import joblib import numpy as np import pandas as pd from sklearn.preprocessing import StandardScaler from fastapi import FastAPI, HTTPException from pydantic import BaseModel import uvicorn import os import requests import json from datetime import datetime, timedelta, timezone from typing import Dict, List, Optional from urllib.parse import urlparse import time # Create FastAPI app app = FastAPI(title="Developer Productivity Prediction API", version="1.0.0") # Load the trained model and scaler model = joblib.load('dev_productivity_model.joblib') scaler = joblib.load('scaler.joblib') # Pydantic models class ProductivityRequest(BaseModel): daily_coding_hours: float commits_per_day: int pull_requests_per_week: int issues_closed_per_week: int active_repos: int code_reviews_per_week: int class ProductivityResponse(BaseModel): predicted_score: float status: str class GitHubAnalysisRequest(BaseModel): repo_url: str github_token: str class GitHubAnalysisResponse(BaseModel): repo_metrics: dict ml_features: dict predicted_score: float productivity_indicators: dict status: str # GitHub Repository Analyzer class RepoProductivityAnalyzer: def __init__(self, github_token: str): if not github_token or github_token == "YOUR_TOKEN_HERE": raise ValueError("Please provide a valid GitHub token") self.token = github_token self.headers = { 'Authorization': f'token {github_token}', 'Accept': 'application/vnd.github.v3+json' } self.days_back = 90 self.max_retries = 3 def safe_request(self, url: str, retries: int = None) -> Optional[List]: if retries is None: retries = self.max_retries for attempt in range(retries): try: response = requests.get(url, headers=self.headers, timeout=30) if response.status_code == 200: return response.json() elif response.status_code == 403: time.sleep(60) # Rate limit continue elif response.status_code == 404: return [] else: return [] except requests.exceptions.RequestException: if attempt < retries - 1: time.sleep(2 ** attempt) else: return [] return [] def parse_repo_url(self, repo_url: str) -> tuple: try: parsed = urlparse(repo_url) path = parsed.path.strip('/').split('/') if len(path) < 2: raise ValueError("Invalid GitHub URL format") return path[0], path[1] except Exception as e: raise ValueError(f"Invalid repo URL: {str(e)}") def safe_parse_datetime(self, date_str: str) -> Optional[datetime]: if not date_str: return None try: dt = datetime.fromisoformat(date_str.replace('Z', '+00:00')) if dt.tzinfo is None: dt = dt.replace(tzinfo=timezone.utc) return dt except: return None def get_metrics(self, repo_url: str) -> Dict: try: owner, repo = self.parse_repo_url(repo_url) except ValueError as e: return {"error": str(e)} now = datetime.now(timezone.utc) since_dt = now - timedelta(days=self.days_back) since = since_dt.isoformat() metrics = { 'repo': f"{owner}/{repo}", 'period_days': self.days_back, 'analyzed_at': now.isoformat(), 'status': 'success' } try: # Check repo exists repo_info = self.safe_request(f"https://api.github.com/repos/{owner}/{repo}") if not repo_info: return {"error": "Repository not found or inaccessible"} # Get commits commits_url = f"https://api.github.com/repos/{owner}/{repo}/commits?per_page=100&since={since}" commits = self.safe_request(commits_url) or [] metrics['total_commits'] = len(commits) # Get PRs prs_url = f"https://api.github.com/repos/{owner}/{repo}/pulls?state=all&per_page=100" prs = self.safe_request(prs_url) or [] recent_prs = [] for pr in prs: created_at = self.safe_parse_datetime(pr.get('created_at')) if created_at and created_at >= since_dt: recent_prs.append(pr) metrics['prs_total'] = len(recent_prs) metrics['prs_merged'] = len([p for p in recent_prs if p.get('merged_at')]) # Get issues issues_url = f"https://api.github.com/repos/{owner}/{repo}/issues?state=closed&per_page=100" issues = self.safe_request(issues_url) or [] recent_issues = [] for issue in issues: closed_at = self.safe_parse_datetime(issue.get('closed_at')) if closed_at and closed_at >= since_dt: recent_issues.append(issue) metrics['issues_total'] = len(recent_issues) # Calculate rates metrics['commits_per_day'] = metrics['total_commits'] / max(self.days_back, 1) metrics['prs_per_week'] = metrics['prs_total'] / max((self.days_back / 7), 1) metrics['issues_per_week'] = metrics['issues_total'] / max((self.days_back / 7), 1) return metrics except Exception as e: return { "error": f"Analysis failed: {str(e)}", "repo": f"{owner}/{repo}", "analyzed_at": now.isoformat() } def predict_productivity_core(daily_coding_hours, commits_per_day, pull_requests_per_week, issues_closed_per_week, active_repos, code_reviews_per_week): try: # Map the 6 input features to the 7 features the model expects: # ['cycle_time', 'pr_size', 'dev_satisfaction', 'deployment_frequency', # 'change_failure_rate', 'cognitive_load', 'test_coverage'] # Create mappings with reasonable defaults cycle_time = max(1, 7 - commits_per_day) # Inverse relationship with commits pr_size = max(100, 500 - (pull_requests_per_week * 50)) # Smaller if more PRs dev_satisfaction = min(10, 5 + (daily_coding_hours * 0.5)) # Based on coding hours deployment_frequency = max(1, 7 - (pull_requests_per_week * 0.5)) # Related to PRs change_failure_rate = max(0.1, 0.5 - (code_reviews_per_week * 0.05)) # Lower with more reviews cognitive_load = max(1, 8 - daily_coding_hours) # Inverse of coding hours test_coverage = min(1.0, 0.6 + (code_reviews_per_week * 0.05)) # Higher with reviews features = np.array([[ cycle_time, pr_size, dev_satisfaction, deployment_frequency, change_failure_rate, cognitive_load, test_coverage ]]) features_scaled = scaler.transform(features) prediction = model.predict(features_scaled)[0] return float(prediction) except Exception as e: raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}") # FastAPI Endpoints @app.get("/") async def root(): return {"message": "Developer Productivity Prediction API", "status": "online"} @app.post("/predict", response_model=ProductivityResponse) async def predict_productivity(request: ProductivityRequest): try: prediction = predict_productivity_core( request.daily_coding_hours, request.commits_per_day, request.pull_requests_per_week, request.issues_closed_per_week, request.active_repos, request.code_reviews_per_week ) return ProductivityResponse(predicted_score=prediction, status="success") except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.post("/analyze-github", response_model=GitHubAnalysisResponse) async def analyze_github_repo(request: GitHubAnalysisRequest): try: # Validate inputs if not request.repo_url or not request.github_token: raise HTTPException(status_code=422, detail="repo_url and github_token are required") analyzer = RepoProductivityAnalyzer(request.github_token) metrics = analyzer.get_metrics(request.repo_url) if "error" in metrics: raise HTTPException(status_code=400, detail=metrics["error"]) # Ensure all required metrics exist with defaults commits_per_day = float(metrics.get('commits_per_day', 0)) prs_per_week = float(metrics.get('prs_per_week', 0)) issues_per_week = float(metrics.get('issues_per_week', 0)) # Transform to ML features ml_features = { 'daily_coding_hours': min(commits_per_day * 2, 8), 'commits_per_day': max(int(commits_per_day), 0), 'pull_requests_per_week': max(int(prs_per_week), 0), 'issues_closed_per_week': max(int(issues_per_week), 0), 'active_repos': 1, 'code_reviews_per_week': max(int(prs_per_week), 0) } prediction = predict_productivity_core(**ml_features) productivity_indicators = { 'high_commit_frequency': commits_per_day > 1, 'active_pr_process': prs_per_week > 2, 'good_issue_resolution': issues_per_week > 1, 'overall_productivity': prediction > 0.7 } return GitHubAnalysisResponse( repo_metrics=metrics, ml_features=ml_features, predicted_score=float(prediction), productivity_indicators=productivity_indicators, status="success" ) except HTTPException: raise except Exception as e: raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}") # Gradio Interface Functions def gradio_predict(daily_coding_hours, commits_per_day, pull_requests_per_week, issues_closed_per_week, active_repos, code_reviews_per_week): try: prediction = predict_productivity_core( daily_coding_hours, commits_per_day, pull_requests_per_week, issues_closed_per_week, active_repos, code_reviews_per_week ) return f"Predicted Score: {prediction:.3f}" except Exception as e: return f"Error: {str(e)}" def gradio_github_analysis(repo_url, github_token): try: analyzer = RepoProductivityAnalyzer(github_token) metrics = analyzer.get_metrics(repo_url) if "error" in metrics: return f"Error: {metrics['error']}" ml_features = { 'daily_coding_hours': min(metrics['commits_per_day'] * 2, 8), 'commits_per_day': max(int(metrics['commits_per_day']), 0), 'pull_requests_per_week': max(int(metrics['prs_per_week']), 0), 'issues_closed_per_week': max(int(metrics['issues_per_week']), 0), 'active_repos': 1, 'code_reviews_per_week': max(int(metrics['prs_per_week']), 0) } prediction = predict_productivity_core(**ml_features) return f"""🏆 PRODUCTIVITY ANALYSIS 📊 Repository: {metrics['repo']} ⏱️ Period: {metrics['period_days']} days 📈 KEY METRICS: • Commits/day: {metrics['commits_per_day']:.1f} • PRs/week: {metrics['prs_per_week']:.1f} • Issues/week: {metrics['issues_per_week']:.1f} 🤖 ML PREDICTION: {prediction:.3f} {'🚀 High Productivity!' if prediction > 0.7 else '⚠️ Room for improvement'} 💡 FEATURES: • Daily coding hours: {ml_features['daily_coding_hours']} • Commits/day: {ml_features['commits_per_day']} • PRs/week: {ml_features['pull_requests_per_week']} • Issues/week: {ml_features['issues_closed_per_week']} • Active repos: {ml_features['active_repos']} • Reviews/week: {ml_features['code_reviews_per_week']}""" except Exception as e: return f"Error: {str(e)}" # Create Gradio Interface with gr.Blocks(title="Developer Productivity Predictor") as demo: gr.Markdown("# 🏆 Developer Productivity Predictor") gr.Markdown("Predict productivity scores and analyze GitHub repositories using ML") with gr.Tab("Manual Prediction"): gr.Markdown("### Enter your development metrics:") with gr.Row(): daily_hours = gr.Slider(1, 12, value=6, label="Daily Coding Hours") commits = gr.Slider(0, 20, value=3, label="Commits per Day") prs = gr.Slider(0, 10, value=2, label="Pull Requests per Week") with gr.Row(): issues = gr.Slider(0, 15, value=3, label="Issues Closed per Week") repos = gr.Slider(1, 10, value=2, label="Active Repositories") reviews = gr.Slider(0, 20, value=5, label="Code Reviews per Week") predict_btn = gr.Button("🚀 Predict Productivity", variant="primary") prediction_output = gr.Textbox(label="Prediction Result", lines=2) predict_btn.click( gradio_predict, inputs=[daily_hours, commits, prs, issues, repos, reviews], outputs=prediction_output ) with gr.Tab("GitHub Analysis"): gr.Markdown("### Analyze any GitHub repository:") repo_url_input = gr.Textbox( label="GitHub Repository URL", placeholder="https://github.com/owner/repo", value="https://github.com/microsoft/vscode" ) token_input = gr.Textbox( label="GitHub Token", type="password", placeholder="ghp_xxxxxxxxxxxx" ) analyze_btn = gr.Button("🔍 Analyze Repository", variant="primary") analysis_output = gr.Textbox(label="Analysis Result", lines=15) analyze_btn.click( gradio_github_analysis, inputs=[repo_url_input, token_input], outputs=analysis_output ) # Mount Gradio app to FastAPI app = gr.mount_gradio_app(app, demo, path="/") if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)