github_recap / functions.py
za3karia's picture
Update functions.py
72f0bad verified
raw
history blame
9.94 kB
import requests
from datetime import datetime, timedelta
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from collections import defaultdict
import pathlib
import textwrap
import json
from tqdm import tqdm
import openai
# Assuming secrets is a module or a dictionary containing secrets
from streamlit import secrets
# Constants
GITHUB_API = "https://api.github.com"
# No default TOKEN assignment here, we will handle it dynamically
# Replace with your actual OpenAI API key
openai.api_key = secrets['OPEN_AI_TOKEN']
# Function to update headers with a new token
def get_headers(updated_token=None):
# Use the provided token or fallback to the existing secret token
token = updated_token if updated_token!="" else secrets['GITHUB_API_TOKEN']
return {
"Authorization": f"token {token}",
"Accept": "application/vnd.github.v3+json"
}
# Example of refactored function to accept an optional token parameter
def get_contributors(repo, updated_token=None):
headers = get_headers(updated_token)
url = f"{GITHUB_API}/repos/{repo}/contributors"
response = requests.get(url, headers=headers)
return [contributor['login'] for contributor in response.json()]
# Function to get repository branches
def get_repo_branches(repo_name, updated_token=None):
"""
Fetch and return a list of branches from a GitHub repository.
Parameters:
- repo_name: str - The full name of the repository (e.g., "owner/repo").
- updated_token: str - Optional. A GitHub token for authentication.
Returns:
- A list of branch names (str) of the specified repository.
"""
headers = get_headers(updated_token)
url = f"{GITHUB_API}/repos/{repo_name}/branches"
try:
response = requests.get(url, headers=headers)
response.raise_for_status() # Raises an HTTPError if the response status code is 4XX or 5XX
branches = [branch['name'] for branch in response.json()]
return branches
except requests.exceptions.RequestException as e:
print(f"Failed to fetch branches: {e}")
return [] # Return an empty list in case of failure
# Function to calculate the start date based on the period
def get_start_date(period):
if period == "month":
return datetime.now() - timedelta(days=30)
elif period == "3 months":
return datetime.now() - timedelta(days=30*3)
elif period == "6 months":
return datetime.now() - timedelta(days=30*6)
elif period == "year":
return datetime.now() - timedelta(days=365)
else:
raise ValueError("Invalid period specified")
def extract_commit_summary(commit_data):
# Assuming commit_data is a dict containing commit information as fetched from GitHub's API
commit_details = commit_data.get('commit', {})
author_info = commit_details.get('author', {})
files_info = commit_data.get('files', []) # Assuming this is where you get files information
commit_summary = {
'message': commit_details.get('message', ''),
'author': author_info.get('name', ''),
'date': author_info.get('date', ''), # Include commit date
}
# 'file_patches': [
# {'filename': file.get('filename', ''), 'patch': file.get('patch', '')}
# for file in files_info if 'patch' in file
# ],
# 'stats': {
# 'additions': commit_details.get('stats', {}).get('additions', 0),
# 'deletions': commit_details.get('stats', {}).get('deletions', 0),
# 'total': commit_details.get('stats', {}).get('total', 0)
# }
return commit_summary
# Adjusted function to get commit files and stats
def get_commit_files_and_stats(commit_url, updated_token=None):
headers = get_headers(updated_token)
response = requests.get(commit_url, headers=headers)
commit_data = response.json()
files_info = []
if 'files' in commit_data:
for file in commit_data['files']:
files_info.append({
'filename': file['filename'],
'status': file['status'],
'additions': file['additions'],
'deletions': file['deletions'],
'changes': file['changes'],
'patch': file.get('patch') # Use get to avoid KeyError if 'patch' does not exist
})
stats_info = commit_data.get('stats', {})
author_name = commit_data['commit']['author']['name']
commit_message = commit_data['commit']['message']
return {
'author_name': author_name,
'commit_message': commit_message,
'files_info': files_info,
'stats_info': stats_info
}
# Function to fetch commits by a contributor in a given period
def get_contributor_commits(repo_name, contributor, period,branches, updated_token=None):
"""
Fetch all commits made by a specific contributor within a specified period for a given repository.
Parameters:
- repo_name: str - The full name of the repository (e.g., "owner/repo").
- contributor: str - The GitHub username of the contributor.
- period: str - The period for which to fetch commits ("month", "3 months", "6 months", "year").
- updated_token: str - Optional. A GitHub token for authentication.
Returns:
- A list of commits made by the specified contributor in the given period.
"""
print((repo_name, contributor, period,branches))
headers = get_headers(updated_token)
start_date = get_start_date(period)
commits = []
try:
# Fetch commits for each branch
for branch in branches:
commits_url = f"{GITHUB_API}/repos/{repo_name}/commits"
params = {
"sha": branch,
"since": start_date.isoformat(),
"author": contributor
}
commits_response = requests.get(commits_url, headers=headers, params=params)
commits_response.raise_for_status()
branch_commits = commits_response.json()
print("branch_commits")
print(branch_commits)
commits.extend(branch_commits)
except requests.exceptions.RequestException as e:
print(f"Failed to fetch commits: {e}")
print("commits")
all_commits = []
for commit in commits:
commit_details = get_commit_files_and_stats(commit['url'], updated_token)
commit['commit_files_stats'] = commit_details
all_commits.append(commit)
print(commits)
summarized_commits = [extract_commit_summary(commit) for commit in all_commits]
return summarized_commits
def chat_complete(message, model="gpt-3.5-turbo-0125"):
response = openai.ChatCompletion.create(
model=model,
messages=[
{"role": "user", "content": str(message)}
]
)
return response.choices[0].message['content'] if response.choices else "response not available"
def openai_get_AI_summary(commit_info):
prompt_prefix = "Summarize this git commit given the following context in JSON:\n"
prompt_suffix = "\n---\nPut the summary in the following JSON schema: {...}"
# Construct the message with commit_info
message = prompt_prefix + json.dumps(commit_info, indent=2) + prompt_suffix
# Fetch the summary from OpenAI
response = chat_complete(message)
return response
def process_commits_with_openai_summaries(commits):
summaries = []
for commit in tqdm(commits, desc='Generating AI summaries'):
# Assuming each commit includes 'commit_files_stats' with detailed info
summary = openai_get_AI_summary(str(commit))
# commit['ai_summary'] = summary
summaries.append(summary)
return summaries
def generate_contributor_report_prompt(summaries):
"""
Construct a prompt for generating a report on a contributor's achievements, skills, and areas of improvement.
Parameters:
- summaries: list of str - Summaries of the contributor's commits.
Returns:
- str: A prompt for the OpenAI model.
"""
summaries_joined = "\n".join([f"- {summary}" for summary in summaries])
if len(summaries_joined) > 40000:
summaries_joined = summaries_joined[:40000]
prompt = f"""
Given the following list of commit summaries by a software developer, create a comprehensive report in markdown format outlining the developer's achievements, skills demonstrated through these commits, and areas for improvement. Use headings for each section.
## Commit Summaries
{summaries_joined}
## Report structer
### Achievements
- Analyze the summaries to highlight significant contributions and achievements.(1)
---
### Achievements
- Analyze the summaries to highlight significant contributions and achievements.(2)
---
### Achievements
- Analyze problems overcomed.(3)
---
### Skills Demonstrated
- Based on the commit summaries, list the technical skills demonstrated by the developer.
---
### Skills Demonstrated
- Based on the commit summaries, list the soft skills demonstrated by the developer.
---
### Skills Demonstrated
- Based on the commit summaries, list the technical and soft skills demonstrated by the developer.
### Areas for Improvement
/- Suggest areas where the developer could improve based on patterns or gaps identified in the commit summaries.(1)
/- Suggest areas where the developer could improve based on patterns or gaps identified in the commit summaries.(2)
/- Suggest areas where the developer could improve based on patterns or gaps identified in the commit summaries.(3)
Please ensure the report is well-organized, respets the structure above, and provides clear insights into the developer's contributions and growth areas.
use a lot of --- seperators to seperate sections and bullet points to have each section short
and / to show continued informations
"""
markdown = chat_complete(prompt)
return markdown