rbbist's picture
Update app.py
3f10fa8 verified
import gradio as gr
import requests
from datetime import datetime, timezone, timedelta
import os
import google.generativeai as genai
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
def get_all_commits_in_range(owner, repo_name, start_date, end_date, token):
"""
Fetch all unique commits across all branches within a date range.
Uses a more efficient approach by fetching commits once and filtering by date.
"""
headers = {"Authorization": f"token {token}"}
base_url = f"https://api.github.com/repos/{owner}/{repo_name}"
# Get all branches
try:
branches_resp = requests.get(f"{base_url}/branches", headers=headers)
branches_resp.raise_for_status()
branches = [b['name'] for b in branches_resp.json()]
except requests.exceptions.RequestException as e:
return None, f"Error fetching branches: {str(e)}"
# Define date range
since = f"{start_date}T00:00:00Z"
until = f"{end_date}T23:59:59Z"
all_commits = {}
# Fetch commits from all branches
for branch in branches:
try:
params = {"sha": branch, "since": since, "until": until, "per_page": 100}
commits_resp = requests.get(f"{base_url}/commits", headers=headers, params=params)
commits_resp.raise_for_status()
commits = commits_resp.json()
# Add unique commits (keyed by SHA to avoid duplicates)
for commit in commits:
sha = commit["sha"]
if sha not in all_commits:
all_commits[sha] = commit
except requests.exceptions.RequestException as e:
continue # Skip branches that cause errors
# Sort by commit date
sorted_commits = sorted(
all_commits.values(),
key=lambda c: datetime.strptime(c["commit"]["author"]["date"], "%Y-%m-%dT%H:%M:%SZ")
)
return sorted_commits, None
def format_commits(commits, timezone_offset_hours=5, timezone_offset_minutes=45):
"""
Format commits with timezone conversion and readable output.
"""
formatted = []
for commit in commits:
sha = commit["sha"][:8] # Short SHA for readability
message = commit["commit"]["message"].split('\n')[0] # First line only
author = commit["commit"]["author"]["name"]
utc_time_str = commit["commit"]["author"]["date"]
# Parse and convert timezone
utc_time = datetime.strptime(utc_time_str, "%Y-%m-%dT%H:%M:%SZ")
utc_time = utc_time.replace(tzinfo=timezone.utc)
# Apply timezone offset
local_offset = timedelta(hours=timezone_offset_hours, minutes=timezone_offset_minutes)
local_time = utc_time.astimezone(timezone(local_offset))
formatted_line = (
f"{sha} | {author:20s} | {local_time.strftime('%Y-%m-%d %H:%M:%S')} | {message}"
)
formatted.append(formatted_line)
return "\n".join(formatted)
def summarize_with_gemini(repo_name, owner, commit_lines, start_date, end_date, api_key):
"""
Generate a comprehensive summary using Gemini with an improved prompt.
"""
genai.configure(api_key=api_key)
model = genai.GenerativeModel("gemini-2.5-flash")
date_range = f"{start_date}" if start_date == end_date else f"{start_date} to {end_date}"
# prompt = f"""You are a technical project analyst reviewing Git commit history.
# Repository: {owner}/{repo_name}
# Date Range: {date_range}
# Total Commits: {len(commit_lines.split(chr(10)))}
# Commit History:
# {commit_lines}
# Please provide a comprehensive analysis with the following sections:
# 1. **Executive Summary**: A 2-3 sentence overview of the work accomplished during this period.
# 2. **Key Developments**: Highlight the most significant changes, features, or fixes (3-5 bullet points).
# 3. **Development Timeline**: Chronological narrative of how the work progressed throughout the day/period.
# 4. **Author Contributions**: For each author, provide:
# - Total number of commits
# - Time range of their work
# - Main focus areas and specific accomplishments
# - Notable commits with their purpose
# 5. **Technical Insights**: Identify patterns such as:
# - Types of work (features, bugs, refactoring, documentation)
# - Areas of the codebase that received attention
# - Any potential concerns or areas needing follow-up
# 6. **Impact Assessment**: Evaluate the overall progress and its significance to the project.
# Format your response with clear headings and bullet points for readability."""
prompt = f"""You are a technical project analyst reviewing Git commit history.
Write the report in the following format EXACTLY:
1. Begin with: "Here’s a comprehensive analysis of the Git commit history for the {owner}/{repo_name} repository on {start_date} to {end_date}:"
2. Use numbered section headers WITHOUT colons, each on its own line, with a blank line after each header.
Example:
1. Executive Summary
<content>
3. Required sections and formatting:
1. Executive Summary
Write 2–3 sentences summarizing the overall work done during this period.
2. Key Developments
Provide 3–5 bullet points describing the most important changes, features, or fixes.
3. Development Timeline
Provide a chronological narrative of the day’s work with time ranges.
Use bullet points and sub-bullets in the exact style shown:
* Morning (HH:MM - HH:MM):
* Description...
* Afternoon (HH:MM - HH:MM):
* Description...
4. Author Contributions
For EACH author, format exactly like this:
* Author: <name>
* Total number of commits: <number>
* Time range of their work: <start> to <end>
* Main focus areas and specific accomplishments: <text>
* Notable commits with their purpose:
* `<hash>`: <description>
5. Technical Insights
Use multiple bullet groups exactly like the example:
* Types of work:
* Feature Development (...)
* Refactoring (...)
* etc.
* Areas of the codebase that received attention:
* <file or module>
* Potential concerns or areas needing follow-up:
* <text>
6. Impact Assessment
Write a detailed, narrative paragraph evaluating the overall progress.
Use the following data to generate the report:
Repository: {owner}/{repo_name}
Date: {start_date} to {end_date}
Total Commits: {len(commit_lines.split(chr(10)))}
Commit History:
{commit_lines}
Write the full analysis following the format EXACTLY as described above."""
try:
response = model.generate_content(prompt)
return response.text
except Exception as e:
return f"Error generating summary: {str(e)}"
def analyze_commits(repo_url, start_date, end_date):
"""
Main function to orchestrate the commit analysis.
"""
# Get tokens from environment variables
github_token = os.getenv("GITHUB_TOKEN")
gemini_key = os.getenv("GEMINI_API_KEY")
# Validate tokens
if not github_token or not gemini_key:
return "❌ API tokens not configured. Please set GITHUB_TOKEN and GEMINI_API_KEY in Hugging Face Spaces secrets."
# Parse repository URL
try:
if "github.com/" in repo_url:
parts = repo_url.rstrip('/').split('github.com/')[-1].split('/')
owner = parts[0]
repo_name = parts[1].replace('.git', '')
else:
# Assume format: owner/repo
owner, repo_name = repo_url.split('/')
except:
return "❌ Invalid repository format. Use 'owner/repo' or full GitHub URL."
# Fetch commits
status_msg = f"πŸ” Fetching commits from {owner}/{repo_name}...\n"
commits, error = get_all_commits_in_range(owner, repo_name, start_date, end_date, github_token)
if error:
return f"{status_msg}❌ {error}"
if not commits:
return f"{status_msg}ℹ️ No commits found in the specified date range."
status_msg += f"βœ… Found {len(commits)} unique commits across all branches.\n\n"
# Format commits
formatted_commits = format_commits(commits)
status_msg += "πŸ“ Commit History:\n" + "="*80 + "\n"
status_msg += formatted_commits + "\n\n"
# Generate summary
status_msg += "πŸ€– Generating AI summary...\n" + "="*80 + "\n"
summary = summarize_with_gemini(repo_name, owner, formatted_commits, start_date, end_date, gemini_key)
status_msg += summary
return status_msg
# Gradio Interface
with gr.Blocks(title="Git Commit Story Analyzer", theme=gr.themes.Soft()) as app:
gr.Markdown("""
# πŸ“š Git Commit Story Analyzer
Transform your Git commit history into comprehensive narratives using AI.
This tool fetches commits from **all branches** and generates detailed insights.
""")
with gr.Row():
with gr.Column(scale=2):
repo_input = gr.Textbox(
label="Repository",
placeholder="owner/repo or https://github.com/owner/repo",
info="Enter GitHub repository in format 'owner/repo' or paste the full URL"
)
with gr.Row():
start_date_input = gr.Textbox(
label="Start Date",
value=datetime.now().strftime("%Y-%m-%d"),
placeholder="YYYY-MM-DD"
)
end_date_input = gr.Textbox(
label="End Date",
value=datetime.now().strftime("%Y-%m-%d"),
placeholder="YYYY-MM-DD"
)
analyze_btn = gr.Button("πŸš€ Analyze Commits", variant="primary", size="lg")
with gr.Column(scale=1):
gr.Markdown("""
### πŸ’‘ Tips
- **Single Day**: Set both dates to the same day
- **Date Range**: Use different start and end dates
- **All Branches**: Automatically analyzes commits from all branches
- **Timezone**: Displays times in Nepal timezone (UTC+5:45)
### πŸ” Setup Required
Configure these secrets in Hugging Face Spaces:
- `GITHUB_TOKEN`: Your GitHub Personal Access Token
- `GEMINI_API_KEY`: Your Google Gemini API Key
""")
output = gr.Textbox(
label="Analysis Results",
lines=25,
max_lines=50,
show_copy_button=True
)
analyze_btn.click(
fn=analyze_commits,
inputs=[repo_input, start_date_input, end_date_input],
outputs=output
)
gr.Markdown("""
---
### πŸ“– How to Use
1. **Enter Repository**: Provide the GitHub repository (e.g., `torvalds/linux` or full URL)
2. **Select Date Range**: Choose single day or date range for analysis
3. **Analyze**: Click the button to generate your commit story
### πŸ”§ Setup Instructions for Hugging Face Spaces
Before deploying, add these secrets in your Space settings:
1. Go to Settings β†’ Repository secrets
2. Add `GITHUB_TOKEN` - Get from GitHub: Settings β†’ Developer settings β†’ Personal access tokens
3. Add `GEMINI_API_KEY` - Get from [Google AI Studio](https://makersuite.google.com/app/apikey)
**Note**: The tool fetches commits from all branches and removes duplicates automatically.
""")
if __name__ == "__main__":
app.launch()