Spaces:

nihalaninihal
/

githubComplete

Sleeping

App Files Files Community

githubComplete / app.py

nihalaninihal

Update app.py

d431f8e verified 11 months ago

raw

history blame contribute delete

166 kB



	import os
	import json
	import time
	import re
	import logging
	import datetime
	import concurrent.futures
	import sys
	import base64
	import tempfile
	from pathlib import Path
	from typing import Dict, List, Union, Any, Optional, Tuple, Set
	from collections import Counter, defaultdict
	from dataclasses import dataclass, field, asdict
	from io import BytesIO, StringIO
	import urllib.request

	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	import networkx as nx
	import plotly.express as px
	import plotly.graph_objects as go
	from plotly.subplots import make_subplots
	from tqdm.notebook import tqdm
	from dateutil.relativedelta import relativedelta
	from github import Github, GithubException, RateLimitExceededException
	import gradio as gr

	# For PDF Generation
	from reportlab.lib.pagesizes import letter, A4
	from reportlab.lib import colors
	from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
	from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, Table, TableStyle, PageBreak
	from reportlab.lib.units import inch
	from reportlab.pdfgen import canvas
	from reportlab.lib.enums import TA_CENTER, TA_LEFT

	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
	handlers=[
	logging.StreamHandler()
	]
	)
	logger = logging.getLogger("github_analyzer")


	@dataclass
	class GitHubAPIConfig:
	"""Configuration for the GitHub API client with sensible defaults."""

	# API access configuration
	token: str = None
	max_retries: int = 5
	backoff_factor: int = 2
	per_page: int = 100 # Max allowed by GitHub
	timeout: int = 30

	# Retry status codes
	retry_status_codes: Set[int] = field(default_factory=lambda: {
	403, 429, 500, 502, 503, 504
	})

	# Permission types
	collaborator_permission_types: List[str] = field(default_factory=lambda: [
	"admin", "push", "pull", "maintain", "triage"
	])

	# File classification
	code_extensions: List[str] = field(default_factory=lambda: [
	".py", ".js", ".java", ".c", ".cpp", ".cs", ".go", ".php", ".rb",
	".swift", ".kt", ".ts", ".rs", ".scala", ".lua", ".m", ".mm",
	".h", ".hpp", ".cc", ".hh", ".f", ".f90", ".f95", ".f03", ".f08",
	".for", ".f77", ".jl", ".pl", ".pm", ".t", ".r", ".dart", ".groovy",
	".v", ".vhd", ".vhdl", ".erl", ".hrl", ".hs", ".lhs", ".ex", ".exs", ".hx"
	])

	markup_extensions: List[str] = field(default_factory=lambda: [
	".md", ".html", ".htm", ".xml", ".json", ".yaml", ".yml", ".txt",
	".rst", ".tex", ".adoc", ".csv", ".tsv", ".toml", ".ini", ".cfg"
	])

	script_extensions: List[str] = field(default_factory=lambda: [
	".sh", ".bash", ".zsh", ".ps1", ".bat", ".cmd"
	])

	notebook_extensions: List[str] = field(default_factory=lambda: [
	".ipynb"
	])

	data_extensions: List[str] = field(default_factory=lambda: [
	".csv", ".tsv", ".json", ".xml", ".xls", ".xlsx", ".hdf5",
	".parquet", ".feather", ".pkl", ".sav", ".dta", ".arff"
	])

	config_extensions: List[str] = field(default_factory=lambda: [
	".yml", ".yaml", ".json", ".toml", ".ini", ".cfg", ".conf"
	])

	other_extensions: List[str] = field(default_factory=lambda: [
	".txt", ".log", ".svg", ".png", ".jpg", ".jpeg"
	])

	# Data collection limits (set to None for no limit)
	max_contributors: Optional[int] = 50
	max_issues: Optional[int] = 100
	max_commits: Optional[int] = 200
	max_search_results: Optional[int] = 50
	max_pull_requests: Optional[int] = 100
	max_collaborators: Optional[int] = 30

	# Output configuration
	output_dir: str = "/tmp/github_data"
	generate_visualizations: bool = True

	def __post_init__(self):
	"""Ensure output directory exists"""
	os.makedirs(self.output_dir, exist_ok=True)

	def all_code_extensions(self) -> List[str]:
	"""Return all code-related file extensions"""
	return list(set(
	self.code_extensions +
	self.script_extensions +
	self.config_extensions
	))


	class GithubClient:
	"""
	A robust GitHub client that handles rate limiting, retries, and provides
	consistent error handling.
	"""

	def __init__(self, config: GitHubAPIConfig):
	"""Initialize the GitHub client with configuration."""
	self.config = config
	self.github = Github(
	config.token,
	per_page=config.per_page,
	timeout=config.timeout,
	retry=config.max_retries
	)
	self.cache = {} # Simple in-memory cache

	def get_repo(self, repo_path: str):
	"""Get a repository by owner/name with caching."""
	cache_key = f"repo:{repo_path}"
	if cache_key in self.cache:
	return self.cache[cache_key]

	repo = self.github.get_repo(repo_path)
	self.cache[cache_key] = repo
	return repo

	def _handle_exception(self, e: GithubException, retry_count: int) -> bool:
	"""
	Handle GitHub exceptions with proper retries and backoff strategy.

	Args:
	e: The exception to handle
	retry_count: Current retry count

	Returns:
	bool: True if retry should be attempted, False otherwise
	"""
	if retry_count >= self.config.max_retries:
	logger.error(f"Max retries ({self.config.max_retries}) exceeded.")
	return False

	if isinstance(e, RateLimitExceededException):
	# Handle primary rate limit
	rate_limit = self.github.get_rate_limit()
	reset_time = rate_limit.core.reset.timestamp() if hasattr(rate_limit, 'core') else time.time() + 3600
	sleep_time = max(0, int(reset_time - time.time())) + 1

	logger.warning(f"Rate limit exceeded. Waiting for {sleep_time} seconds...")
	time.sleep(sleep_time)
	return True

	elif e.status in self.config.retry_status_codes:
	# Handle secondary rate limits and server errors
	sleep_time = self.config.backoff_factor ** retry_count
	logger.warning(
	f"Temporary error (status {e.status}). Retrying in {sleep_time} seconds. "
	f"Attempt {retry_count+1}/{self.config.max_retries}."
	)
	time.sleep(sleep_time)
	return True

	# Non-recoverable error
	logger.error(f"Non-recoverable GitHub API error: {e}")
	return False

	def _paginated_request(self, method, args, *kwargs):
	"""
	Execute a paginated GitHub API request with retry logic.

	Args:
	method: The PyGithub method to call

	Returns:
	List of results or None on non-recoverable error
	"""
	results = []
	retry_count = 0
	max_results = kwargs.pop('max_results', None)

	while retry_count <= self.config.max_retries:
	try:
	paginated_list = method(args, *kwargs)

	# Process items
	for item in paginated_list:
	results.append(item)
	if max_results and len(results) >= max_results:
	return results

	# Check if we've reached the end
	if paginated_list.totalCount <= len(results):
	break

	# Reset retry counter on success
	retry_count = 0

	except GithubException as e:
	if self._handle_exception(e, retry_count):
	retry_count += 1
	else:
	return None

	return results

	def _execute_request(self, method, args, *kwargs):
	"""
	Execute a single GitHub API request with retry logic.

	Args:
	method: The PyGithub method to call

	Returns:
	Result of the API call or None on non-recoverable error
	"""
	retry_count = 0
	while retry_count <= self.config.max_retries:
	try:
	result = method(args, *kwargs)
	return result
	except GithubException as e:
	# Special case for 404 errors - file not found
	if e.status == 404:
	logger.info(f"Resource not found: {e}")
	return None

	if self._handle_exception(e, retry_count):
	retry_count += 1
	else:
	return None

	return None


	class GitHubRepoAnalyzer:
	"""
	Main class for analyzing GitHub repositories and generating insights.
	"""

	def __init__(self, config: GitHubAPIConfig):
	"""Initialize the analyzer with configuration."""
	self.config = config
	self.client = GithubClient(config)

	def get_repo_details(self, repo) -> Dict[str, Any]:
	"""Get comprehensive repository metadata."""
	logger.info(f"Fetching repository details for {repo.full_name}")

	return {
	"name": repo.name,
	"full_name": repo.full_name,
	"description": repo.description,
	"html_url": repo.html_url,
	"stargazers_count": repo.stargazers_count,
	"watchers_count": repo.watchers_count,
	"forks_count": repo.forks_count,
	"open_issues_count": repo.open_issues_count,
	"language": repo.language,
	"default_branch": repo.default_branch,
	"created_at": repo.created_at.isoformat() if repo.created_at else None,
	"updated_at": repo.updated_at.isoformat() if repo.updated_at else None,
	"pushed_at": repo.pushed_at.isoformat() if repo.pushed_at else None,
	"license": repo.license.name if repo.license else None,
	"topics": list(repo.get_topics()),
	"archived": repo.archived,
	"disabled": repo.disabled,
	"visibility": repo.visibility,
	"has_wiki": repo.has_wiki,
	"has_pages": repo.has_pages,
	"has_projects": repo.has_projects,
	"has_issues": repo.has_issues,
	"has_discussions": repo.has_discussions if hasattr(repo, 'has_discussions') else None,
	"size": repo.size, # Size in KB
	"network_count": repo.network_count,
	"subscribers_count": repo.subscribers_count,
	"organization": repo.organization.login if repo.organization else None,
	"parent": repo.parent.full_name if hasattr(repo, 'parent') and repo.parent else None,
	"fork": repo.fork,
	}

	def get_contributors(self, repo) -> List[Dict[str, Any]]:
	"""Get repository contributors with detailed information."""
	logger.info(f"Fetching contributors for {repo.full_name}")

	contributors = self.client._paginated_request(
	repo.get_contributors,
	max_results=self.config.max_contributors
	)

	if contributors is None:
	return []

	return [
	{
	"login": c.login,
	"id": c.id,
	"contributions": c.contributions,
	"type": c.type,
	"html_url": c.html_url,
	"followers": c.followers,
	"following": c.following,
	"public_repos": c.public_repos if hasattr(c, 'public_repos') else None,
	"bio": c.bio if hasattr(c, 'bio') else None,
	"location": c.location if hasattr(c, 'location') else None,
	"company": c.company if hasattr(c, 'company') else None,
	"email": c.email if hasattr(c, 'email') else None,
	"avatar_url": c.avatar_url if hasattr(c, 'avatar_url') else None,
	}
	for c in contributors
	]

	def get_languages(self, repo) -> Dict[str, int]:
	"""Get languages used in the repository."""
	logger.info(f"Fetching languages for {repo.full_name}")

	languages = self.client._execute_request(repo.get_languages)
	return languages or {}

	def get_issues(self, repo, state: str = "all") -> List[Dict[str, Any]]:
	"""Get repository issues."""
	logger.info(f"Fetching issues for {repo.full_name} with state={state}")

	issues = self.client._paginated_request(
	repo.get_issues,
	state=state,
	max_results=self.config.max_issues
	)

	if issues is None:
	return []

	return [
	{
	"id": issue.id,
	"number": issue.number,
	"title": issue.title,
	"body": issue.body,
	"state": issue.state,
	"user_login": issue.user.login if issue.user else None,
	"labels": [label.name for label in issue.labels],
	"comments": issue.comments,
	"created_at": issue.created_at.isoformat() if issue.created_at else None,
	"updated_at": issue.updated_at.isoformat() if issue.updated_at else None,
	"closed_at": issue.closed_at.isoformat() if issue.closed_at else None,
	"pull_request": issue.pull_request is not None,
	"milestone": issue.milestone.title if issue.milestone else None,
	"assignees": [user.login for user in issue.assignees] if issue.assignees else [],
	}
	for issue in issues
	]

	def get_commits(self, repo) -> List[Dict[str, Any]]:
	"""Get repository commits."""
	logger.info(f"Fetching commits for {repo.full_name}")

	commits = self.client._paginated_request(
	repo.get_commits,
	max_results=self.config.max_commits
	)

	if commits is None:
	return []

	return [
	{
	"sha": commit.sha,
	"commit_message": commit.commit.message,
	"author_login": commit.author.login if commit.author else None,
	"author_name": commit.commit.author.name if commit.commit and commit.commit.author else None,
	"author_email": commit.commit.author.email if commit.commit and commit.commit.author else None,
	"committer_login": commit.committer.login if commit.committer else None,
	"committer_name": commit.commit.committer.name if commit.commit and commit.commit.committer else None,
	"date": commit.commit.author.date.isoformat() if commit.commit and commit.commit.author else None,
	"html_url": commit.html_url,
	"stats": {
	"additions": commit.stats.additions if hasattr(commit, 'stats') else None,
	"deletions": commit.stats.deletions if hasattr(commit, 'stats') else None,
	"total": commit.stats.total if hasattr(commit, 'stats') else None,
	},
	"files_changed": [
	{"filename": f.filename, "additions": f.additions, "deletions": f.deletions, "status": f.status}
	for f in commit.files
	] if hasattr(commit, 'files') else [],
	}
	for commit in commits
	]

	def get_readme(self, repo) -> str:
	"""Get repository README content."""
	logger.info(f"Fetching README for {repo.full_name}")

	readme = self.client._execute_request(repo.get_readme)
	if readme is None:
	return ""

	try:
	return readme.decoded_content.decode('utf-8')
	except UnicodeDecodeError:
	logger.warning(f"Could not decode README content for {repo.full_name}")
	return ""

	def get_pull_requests(self, repo, state: str = "all") -> List[Dict[str, Any]]:
	"""Get repository pull requests."""
	logger.info(f"Fetching pull requests for {repo.full_name} with state={state}")

	pulls = self.client._paginated_request(
	repo.get_pulls,
	state=state,
	max_results=self.config.max_pull_requests
	)

	if pulls is None:
	return []

	return [
	{
	"id": pull.id,
	"number": pull.number,
	"title": pull.title,
	"body": pull.body,
	"state": pull.state,
	"user_login": pull.user.login if pull.user else None,
	"created_at": pull.created_at.isoformat() if pull.created_at else None,
	"updated_at": pull.updated_at.isoformat() if pull.updated_at else None,
	"closed_at": pull.closed_at.isoformat() if pull.closed_at else None,
	"merged_at": pull.merged_at.isoformat() if pull.merged_at else None,
	"draft": pull.draft if hasattr(pull, 'draft') else None,
	"mergeable": pull.mergeable if hasattr(pull, 'mergeable') else None,
	"mergeable_state": pull.mergeable_state if hasattr(pull, 'mergeable_state') else None,
	"merged": pull.merged if hasattr(pull, 'merged') else None,
	"merge_commit_sha": pull.merge_commit_sha if hasattr(pull, 'merge_commit_sha') else None,
	"comments": pull.comments if hasattr(pull, 'comments') else 0,
	"review_comments": pull.review_comments if hasattr(pull, 'review_comments') else 0,
	"commits": pull.commits if hasattr(pull, 'commits') else 0,
	"additions": pull.additions if hasattr(pull, 'additions') else 0,
	"deletions": pull.deletions if hasattr(pull, 'deletions') else 0,
	"changed_files": pull.changed_files if hasattr(pull, 'changed_files') else 0,
	"head_ref": pull.head.ref if hasattr(pull, 'head') and pull.head else None,
	"base_ref": pull.base.ref if hasattr(pull, 'base') and pull.base else None,
	"labels": [label.name for label in pull.labels] if hasattr(pull, 'labels') else [],
	"assignees": [user.login for user in pull.assignees] if hasattr(pull, 'assignees') else [],
	"requested_reviewers": [user.login for user in pull.requested_reviewers] if hasattr(pull, 'requested_reviewers') else [],
	}
	for pull in pulls
	]

	def get_collaborators(self, repo, affiliation: str = "all") -> List[Dict[str, Any]]:
	"""Get repository collaborators."""
	logger.info(f"Fetching collaborators for {repo.full_name} with affiliation={affiliation}")

	collaborators = self.client._paginated_request(
	repo.get_collaborators,
	affiliation=affiliation,
	max_results=self.config.max_collaborators
	)

	if collaborators is None:
	return []

	return [
	{
	"login": c.login,
	"id": c.id,
	"type": c.type,
	"url": c.url,
	"site_admin": c.site_admin if hasattr(c, 'site_admin') else None,
	"role_name": self._get_permission_level(repo, c.login),
	"avatar_url": c.avatar_url if hasattr(c, 'avatar_url') else None,
	}
	for c in collaborators
	]

	def _get_permission_level(self, repo, username: str) -> str:
	"""Get permission level for a collaborator."""
	try:
	return repo.get_collaborator_permission(username)
	except GithubException:
	return "unknown"

	def get_file_distribution(self, repo) -> Dict[str, int]:
	"""Analyze file types distribution in the repository."""
	logger.info(f"Analyzing file distribution for {repo.full_name}")

	# Get all files in the repo (only feasible for smaller repos)
	try:
	contents = self.client._execute_request(repo.get_contents, "")
	if not contents:
	return {}

	file_types = defaultdict(int)
	directories = []

	# Process initial contents
	for item in contents:
	if item.type == "dir":
	directories.append(item.path)
	elif item.type == "file":
	ext = os.path.splitext(item.name)[1].lower()
	file_types[ext if ext else "no_extension"] += 1

	# Process directories (up to a reasonable depth to avoid API rate limits)
	max_depth = 3
	for depth in range(max_depth):
	if not directories:
	break

	next_level = []
	for directory in directories[:100]: # Limit to avoid excessive API calls
	dir_contents = self.client._execute_request(repo.get_contents, directory)
	if not dir_contents:
	continue

	for item in dir_contents:
	if item.type == "dir":
	next_level.append(item.path)
	elif item.type == "file":
	ext = os.path.splitext(item.name)[1].lower()
	file_types[ext if ext else "no_extension"] += 1

	directories = next_level

	return dict(file_types)
	except GithubException:
	logger.warning(f"Could not get file distribution for {repo.full_name}")
	return {}

	def search_code(self, repo, query_terms: List[str]) -> List[Dict[str, Any]]:
	"""Search for specific terms in the repository code."""
	logger.info(f"Searching code in {repo.full_name} for terms: {query_terms}")

	results = []
	for term in query_terms:
	query = f"repo:{repo.full_name} {term}"
	search_results = self.client._paginated_request(
	self.client.github.search_code,
	query,
	max_results=self.config.max_search_results
	)

	if search_results:
	results.extend([
	{
	"term": term,
	"name": result.name,
	"path": result.path,
	"sha": result.sha,
	"url": result.html_url,
	"repository": result.repository.full_name,
	}
	for result in search_results
	if result.repository.full_name == repo.full_name
	])

	return results

	def get_branches(self, repo) -> List[Dict[str, Any]]:
	"""Get repository branches."""
	logger.info(f"Fetching branches for {repo.full_name}")

	branches = self.client._paginated_request(repo.get_branches)

	if branches is None:
	return []

	return [
	{
	"name": branch.name,
	"protected": branch.protected,
	"commit_sha": branch.commit.sha if branch.commit else None,
	}
	for branch in branches
	]

	def get_releases(self, repo) -> List[Dict[str, Any]]:
	"""Get repository releases."""
	logger.info(f"Fetching releases for {repo.full_name}")

	releases = self.client._paginated_request(repo.get_releases)

	if releases is None:
	return []

	return [
	{
	"id": release.id,
	"tag_name": release.tag_name,
	"name": release.title,
	"body": release.body,
	"draft": release.draft,
	"prerelease": release.prerelease,
	"created_at": release.created_at.isoformat() if release.created_at else None,
	"published_at": release.published_at.isoformat() if release.published_at else None,
	"author_login": release.author.login if release.author else None,
	"html_url": release.html_url,
	"assets": [
	{
	"name": asset.name,
	"label": asset.label,
	"content_type": asset.content_type,
	"size": asset.size,
	"download_count": asset.download_count,
	"browser_download_url": asset.browser_download_url,
	}
	for asset in release.get_assets()
	],
	}
	for release in releases
	]

	def get_workflows(self, repo) -> List[Dict[str, Any]]:
	"""Get repository GitHub Actions workflows."""
	logger.info(f"Fetching workflows for {repo.full_name}")

	try:
	workflows = self.client._paginated_request(repo.get_workflows)

	if workflows is None:
	return []

	return [
	{
	"id": workflow.id,
	"name": workflow.name,
	"path": workflow.path,
	"state": workflow.state,
	"created_at": workflow.created_at.isoformat() if workflow.created_at else None,
	"updated_at": workflow.updated_at.isoformat() if workflow.updated_at else None,
	}
	for workflow in workflows
	]
	except (GithubException, AttributeError):
	# Older PyGithub versions or repositories without workflows
	return []

	def analyze_commit_activity(self, repo) -> Dict[str, Any]:
	"""Analyze commit activity patterns."""
	logger.info(f"Analyzing commit activity for {repo.full_name}")

	# Get stats commit activity
	stats = self.client._execute_request(repo.get_stats_commit_activity)
	if not stats:
	return {}

	weekly_commits = []
	for week in stats:
	if hasattr(week, 'week') and hasattr(week, 'total'):
	date = datetime.datetime.fromtimestamp(week.week).strftime('%Y-%m-%d')
	weekly_commits.append({
	"week": date,
	"total": week.total,
	"days": week.days if hasattr(week, 'days') else [],
	})

	# Get code frequency
	code_freq = self.client._execute_request(repo.get_stats_code_frequency)
	if not code_freq:
	code_frequency = []
	else:
	code_frequency = []
	for item in code_freq:
	date = datetime.datetime.fromtimestamp(item[0]).strftime('%Y-%m-%d')
	code_frequency.append({
	"week": date,
	"additions": item[1],
	"deletions": -item[2], # Convert to positive for better readability
	})

	return {
	"weekly_commits": weekly_commits,
	"code_frequency": code_frequency,
	}

	def analyze_contributor_activity(self, repo) -> Dict[str, Any]:
	"""Analyze contributor activity patterns."""
	logger.info(f"Analyzing contributor activity for {repo.full_name}")

	# Get contributor stats
	stats = self.client._execute_request(repo.get_stats_contributors)
	if not stats:
	return {}

	contributor_stats = []
	for stat in stats:
	if not hasattr(stat, 'author') or not stat.author:
	continue

	weeks_data = []
	for week in stat.weeks:
	if hasattr(week, 'w'):
	date = datetime.datetime.fromtimestamp(week.w).strftime('%Y-%m-%d')
	weeks_data.append({
	"week": date,
	"additions": week.a,
	"deletions": week.d,
	"commits": week.c,
	})

	contributor_stats.append({
	"author": stat.author.login,
	"total_commits": stat.total,
	"weeks": weeks_data,
	})

	return {
	"contributor_stats": contributor_stats,
	}

	def analyze_issue_distribution(self, issues: List[Dict[str, Any]]) -> Dict[str, Any]:
	"""Analyze distribution of issues by various metrics."""
	if not issues:
	return {}

	# Convert to DataFrame for easier analysis
	df = pd.DataFrame(issues)

	# Issues by state
	state_counts = df['state'].value_counts().to_dict() if 'state' in df else {}

	# Issues by user
	user_counts = df['user_login'].value_counts().head(10).to_dict() if 'user_login' in df else {}

	# Pull requests vs regular issues
	is_pr_counts = df['pull_request'].value_counts().to_dict() if 'pull_request' in df else {}

	# Issues by labels (flattening the labels list)
	labels = []
	if 'labels' in df:
	for label_list in df['labels']:
	if label_list:
	labels.extend(label_list)

	label_counts = Counter(labels)
	top_labels = dict(label_counts.most_common(10))

	# Time analysis
	if 'created_at' in df:
	df['created_date'] = pd.to_datetime(df['created_at'])
	df['month_year'] = df['created_date'].dt.strftime('%Y-%m')
	issues_by_month = df.groupby('month_year').size().to_dict()
	else:
	issues_by_month = {}

	# Calculate resolution time for closed issues
	resolution_times = []
	if 'created_at' in df and 'closed_at' in df:
	for _, issue in df.iterrows():
	if pd.notna(issue.get('closed_at')) and pd.notna(issue.get('created_at')):
	created = pd.to_datetime(issue['created_at'])
	closed = pd.to_datetime(issue['closed_at'])
	resolution_time = (closed - created).total_seconds() / 3600 # hours
	resolution_times.append(resolution_time)

	resolution_stats = {}
	if resolution_times:
	resolution_stats = {
	"mean_hours": sum(resolution_times) / len(resolution_times),
	"median_hours": sorted(resolution_times)[len(resolution_times) // 2],
	"min_hours": min(resolution_times),
	"max_hours": max(resolution_times),
	}

	return {
	"by_state": state_counts,
	"by_user": user_counts,
	"pr_vs_issue": is_pr_counts,
	"by_label": top_labels,
	"by_month": issues_by_month,
	"resolution_time": resolution_stats,
	}

	def generate_insights(self, repo_data: Dict[str, Any]) -> Dict[str, Any]:
	"""Generate higher-level insights from the collected repository data."""
	insights = {}

	# Repository activity and health
	if "repo_details" in repo_data:
	repo_details = repo_data["repo_details"]
	insights["repository_age_days"] = self._calculate_age_days(repo_details.get("created_at"))
	insights["freshness_days"] = self._calculate_freshness_days(repo_details.get("pushed_at"))

	# Popularity metrics
	insights["popularity"] = {
	"stars": repo_details.get("stargazers_count", 0),
	"forks": repo_details.get("forks_count", 0),
	"watchers": repo_details.get("watchers_count", 0),
	"star_fork_ratio": self._calculate_ratio(
	repo_details.get("stargazers_count", 0),
	repo_details.get("forks_count", 0)
	),
	}

	# Language distribution
	if "languages" in repo_data:
	languages = repo_data["languages"]
	total_bytes = sum(languages.values()) if languages else 0

	if total_bytes > 0:
	language_percentages = {
	lang: (bytes_count / total_bytes) * 100
	for lang, bytes_count in languages.items()
	}

	insights["language_distribution"] = {
	"primary_language": max(languages.items(), key=lambda x: x[1])[0] if languages else None,
	"language_count": len(languages),
	"percentages": language_percentages,
	}

	# Contributor insights
	if "contributors" in repo_data:
	contributors = repo_data["contributors"]

	if contributors:
	total_contributions = sum(c.get("contributions", 0) for c in contributors)
	insights["contributor_insights"] = {
	"contributor_count": len(contributors),
	"total_contributions": total_contributions,
	"avg_contributions_per_contributor": total_contributions / len(contributors) if len(contributors) > 0 else 0,
	"contribution_distribution": self._analyze_contribution_distribution(contributors),
	}

	# Issue and PR dynamics
	if "issues" in repo_data:
	issues = repo_data["issues"]
	insights["issue_insights"] = self.analyze_issue_distribution(issues)

	if "pull_requests" in repo_data:
	prs = repo_data["pull_requests"]
	insights["pr_insights"] = self.analyze_issue_distribution(prs) # Reuse the same analysis

	# Additional PR-specific metrics
	if prs:
	insights["pr_code_change_stats"] = self._analyze_pr_code_changes(prs)

	# Commit patterns
	if "commits" in repo_data:
	commits = repo_data["commits"]
	insights["commit_insights"] = self._analyze_commit_patterns(commits)

	# Check for CI/CD presence
	insights["ci_cd_presence"] = self._detect_ci_cd(repo_data)

	# Documentation quality
	if "readme" in repo_data:
	readme = repo_data["readme"]
	insights["documentation_quality"] = self._assess_documentation_quality(readme)

	# Project Activity Level
	insights["activity_level"] = self._calculate_activity_level(repo_data)

	# Code complexity analysis
	insights["code_complexity"] = self._analyze_code_complexity(repo_data)

	# Community health analysis
	insights["community_health"] = self._analyze_community_health(repo_data)

	return insights

	def _calculate_age_days(self, created_at_iso: str) -> float:
	"""Calculate repository age in days."""
	if not created_at_iso:
	return 0

	try:
	created_at = datetime.datetime.fromisoformat(created_at_iso.replace('Z', '+00:00'))
	now = datetime.datetime.now(datetime.timezone.utc)
	return (now - created_at).total_seconds() / (24 * 3600)
	except ValueError:
	return 0

	def _calculate_freshness_days(self, pushed_at_iso: str) -> float:
	"""Calculate days since last push."""
	if not pushed_at_iso:
	return float('inf')

	try:
	pushed_at = datetime.datetime.fromisoformat(pushed_at_iso.replace('Z', '+00:00'))
	now = datetime.datetime.now(datetime.timezone.utc)
	return (now - pushed_at).total_seconds() / (24 * 3600)
	except ValueError:
	return float('inf')

	def _calculate_ratio(self, numerator: int, denominator: int) -> float:
	"""Calculate ratio with handling for zero denominator."""
	return numerator / denominator if denominator and denominator > 0 else float('inf')

	def _analyze_contribution_distribution(self, contributors: List[Dict[str, Any]]) -> Dict[str, Any]:
	"""Analyze the distribution of contributions among contributors."""
	if not contributors:
	return {}

	# Sort contributors by number of contributions
	sorted_contributors = sorted(contributors, key=lambda c: c.get("contributions", 0), reverse=True)

	# Calculate percentiles
	total_contributions = sum(c.get("contributions", 0) for c in contributors)
	cumulative_contributions = 0
	percentile_20 = 0
	percentile_50 = 0
	percentile_80 = 0

	for i, contributor in enumerate(sorted_contributors):
	contributions = contributor.get("contributions", 0)
	cumulative_contributions += contributions
	percentage = (cumulative_contributions / total_contributions) * 100

	if percentage >= 20 and percentile_20 == 0:
	percentile_20 = i + 1
	if percentage >= 50 and percentile_50 == 0:
	percentile_50 = i + 1
	if percentage >= 80 and percentile_80 == 0:
	percentile_80 = i + 1

	# Calculate Gini coefficient to measure inequality
	gini = self._calculate_gini([c.get("contributions", 0) for c in contributors])

	return {
	"contributors_for_20_percent": percentile_20,
	"contributors_for_50_percent": percentile_50,
	"contributors_for_80_percent": percentile_80,
	"gini_coefficient": gini,
	"top_contributor_percentage": (sorted_contributors[0].get("contributions", 0) / total_contributions) * 100 if sorted_contributors else 0,
	}

	def _calculate_gini(self, values: List[int]) -> float:
	"""Calculate the Gini coefficient of a distribution."""
	if not values or sum(values) == 0:
	return 0

	values = sorted(values)
	n = len(values)
	cumsum = 0
	for i, value in enumerate(values):
	cumsum += value
	values[i] = cumsum

	return (2 * sum(values) / (n * sum(values[-1]))) - (n + 1) / n

	def _analyze_pr_code_changes(self, prs: List[Dict[str, Any]]) -> Dict[str, Any]:
	"""Analyze code changes across pull requests."""
	if not prs:
	return {}

	# Extract metrics
	additions = [pr.get("additions", 0) for pr in prs if pr.get("additions") is not None]
	deletions = [pr.get("deletions", 0) for pr in prs if pr.get("deletions") is not None]
	changed_files = [pr.get("changed_files", 0) for pr in prs if pr.get("changed_files") is not None]

	# Calculate stats
	stats = {}

	if additions:
	stats["additions"] = {
	"mean": sum(additions) / len(additions),
	"median": sorted(additions)[len(additions) // 2],
	"max": max(additions),
	"total": sum(additions),
	}

	if deletions:
	stats["deletions"] = {
	"mean": sum(deletions) / len(deletions),
	"median": sorted(deletions)[len(deletions) // 2],
	"max": max(deletions),
	"total": sum(deletions),
	}

	if changed_files:
	stats["changed_files"] = {
	"mean": sum(changed_files) / len(changed_files),
	"median": sorted(changed_files)[len(changed_files) // 2],
	"max": max(changed_files),
	"total": sum(changed_files),
	}

	return stats

	def _analyze_commit_patterns(self, commits: List[Dict[str, Any]]) -> Dict[str, Any]:
	"""Analyze patterns in commit data."""
	if not commits:
	return {}

	# Count by author
	commit_counts = Counter(
	commit.get("author_login", "Unknown")
	for commit in commits
	if commit.get("author_login")
	)

	# Analyze message patterns
	message_lengths = [
	len(commit.get("commit_message", ""))
	for commit in commits
	if commit.get("commit_message")
	]

	# Extract dates for time-based analysis
	dates = []
	for commit in commits:
	date_str = commit.get("date")
	if date_str:
	try:
	date = datetime.datetime.fromisoformat(date_str.replace('Z', '+00:00'))
	dates.append(date)
	except ValueError:
	pass

	# Analyze times of day
	hours = [date.hour for date in dates]
	hour_counts = Counter(hours)

	# Analyze days of week
	weekdays = [date.weekday() for date in dates]
	weekday_counts = Counter(weekdays)
	weekday_names = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
	weekday_data = {weekday_names[day]: count for day, count in weekday_counts.items()}

	# Analyze frequency of commits over time
	commit_frequency = {}
	if dates:
	dates_sorted = sorted(dates)
	first_date = dates_sorted[0]
	last_date = dates_sorted[-1]

	# Calculate commit frequency by month
	current_date = first_date.replace(day=1)
	while current_date <= last_date:
	next_month = current_date.replace(day=28) + datetime.timedelta(days=4)
	next_month = next_month.replace(day=1)

	month_key = current_date.strftime('%Y-%m')
	commit_frequency[month_key] = sum(
	1 for date in dates
	if date.year == current_date.year and date.month == current_date.month
	)

	current_date = next_month

	return {
	"top_contributors": dict(commit_counts.most_common(5)),
	"message_length": {
	"mean": sum(message_lengths) / len(message_lengths) if message_lengths else 0,
	"max": max(message_lengths) if message_lengths else 0,
	"min": min(message_lengths) if message_lengths else 0,
	},
	"commit_time_patterns": {
	"by_hour": dict(sorted(hour_counts.items())),
	"by_weekday": weekday_data,
	},
	"commit_frequency": commit_frequency,
	}

	def _detect_ci_cd(self, repo_data: Dict[str, Any]) -> Dict[str, Any]:
	"""Detect CI/CD presence and configuration in the repository."""
	ci_cd_indicators = {
	"github_actions": False,
	"travis": False,
	"circle_ci": False,
	"jenkins": False,
	"gitlab_ci": False,
	"azure_pipelines": False,
	}

	# Check workflows
	if "workflows" in repo_data and repo_data["workflows"]:
	ci_cd_indicators["github_actions"] = True

	# Check for CI configuration files
	if "file_distribution" in repo_data:
	files = repo_data.get("file_distribution", {})
	if ".travis.yml" in files:
	ci_cd_indicators["travis"] = True
	if ".circleci/config.yml" in files or "circle.yml" in files:
	ci_cd_indicators["circle_ci"] = True
	if "Jenkinsfile" in files:
	ci_cd_indicators["jenkins"] = True
	if ".gitlab-ci.yml" in files:
	ci_cd_indicators["gitlab_ci"] = True
	if "azure-pipelines.yml" in files:
	ci_cd_indicators["azure_pipelines"] = True

	return {
	"has_ci_cd": any(ci_cd_indicators.values()),
	"ci_cd_systems": ci_cd_indicators,
	}

	def _assess_documentation_quality(self, readme: str) -> Dict[str, Any]:
	"""Assess the quality of documentation based on the README."""
	if not readme:
	return {
	"has_readme": False,
	"readme_length": 0,
	"score": 0,
	"sections": {},
	}

	# Analyze the README content
	lines = readme.strip().split('\n')
	word_count = len(readme.split())
	sections = {}

	# Check for common README sections
	section_keywords = {
	"introduction": ["introduction", "overview", "about"],
	"installation": ["installation", "install", "setup", "getting started"],
	"usage": ["usage", "using", "example", "examples"],
	"api": ["api", "reference", "documentation"],
	"contributing": ["contributing", "contribute", "development"],
	"license": ["license", "licensing"],
	"code_of_conduct": ["code of conduct"],
	}

	for section, keywords in section_keywords.items():
	sections[section] = any(
	any(keyword.lower() in line.lower() for keyword in keywords)
	for line in lines
	)

	# Count images/diagrams (markdown format)
	image_count = readme.count("![")

	# Count code examples
	code_block_count = readme.count("```")

	# Calculate a simple score
	section_score = sum(1 for present in sections.values() if present) / len(sections)
	has_images = image_count > 0
	has_code = code_block_count > 0
	length_score = min(1.0, word_count / 1000) # Normalize to 0-1, with 1000+ words being "complete"

	score = (section_score * 0.5) + (has_images * 0.2) + (has_code * 0.2) + (length_score * 0.1)

	return {
	"has_readme": True,
	"readme_length": word_count,
	"score": score,
	"sections": sections,
	"has_images": has_images,
	"image_count": image_count,
	"has_code_examples": has_code,
	"code_block_count": code_block_count // 2, # Each block has opening and closing ```
	}

	def _calculate_activity_level(self, repo_data: Dict[str, Any]) -> Dict[str, Any]:
	"""Calculate repository activity level based on commits, PRs, and issues."""
	activity_score = 0
	activity_details = {}

	# Get repository age in months
	if "repo_details" in repo_data:
	age_days = self._calculate_age_days(repo_data["repo_details"].get("created_at"))
	age_months = age_days / 30.5 # Approximate

	if age_months < 1:
	age_months = 1 # Avoid division by zero

	activity_details["age_months"] = age_months
	else:
	age_months = 1

	# Check recent commits (last 3 months)
	recent_commits = 0
	if "commits" in repo_data:
	commits = repo_data["commits"]
	three_months_ago = datetime.datetime.now(datetime.timezone.utc) - relativedelta(months=3)

	for commit in commits:
	if commit.get("date"):
	commit_date = datetime.datetime.fromisoformat(commit["date"].replace('Z', '+00:00'))
	if commit_date >= three_months_ago:
	recent_commits += 1

	activity_details["recent_commits"] = recent_commits
	activity_score += min(10, recent_commits / 10) # Up to 10 points for recent commits

	# Check recent PRs and issues (last 3 months)
	recent_prs = 0
	if "pull_requests" in repo_data:
	prs = repo_data["pull_requests"]
	three_months_ago = datetime.datetime.now(datetime.timezone.utc) - relativedelta(months=3)

	for pr in prs:
	if pr.get("created_at"):
	pr_date = datetime.datetime.fromisoformat(pr["created_at"].replace('Z', '+00:00'))
	if pr_date >= three_months_ago:
	recent_prs += 1

	activity_details["recent_prs"] = recent_prs
	activity_score += min(5, recent_prs / 5) # Up to 5 points for recent PRs

	recent_issues = 0
	if "issues" in repo_data:
	issues = [issue for issue in repo_data["issues"] if not issue.get("pull_request")]
	three_months_ago = datetime.datetime.now(datetime.timezone.utc) - relativedelta(months=3)

	for issue in issues:
	if issue.get("created_at"):
	issue_date = datetime.datetime.fromisoformat(issue["created_at"].replace('Z', '+00:00'))
	if issue_date >= three_months_ago:
	recent_issues += 1

	activity_details["recent_issues"] = recent_issues
	activity_score += min(5, recent_issues / 5) # Up to 5 points for recent issues

	# Check release frequency
	if "releases" in repo_data:
	releases = repo_data["releases"]
	release_count = len(releases)

	# Calculate releases per month
	releases_per_month = release_count / max(1, age_months)
	activity_details["releases_per_month"] = releases_per_month
	activity_score += min(5, releases_per_month * 2.5) # Up to 5 points for regular releases

	# Determine activity level
	activity_level = "None"
	if activity_score >= 20:
	activity_level = "Very High"
	elif activity_score >= 15:
	activity_level = "High"
	elif activity_score >= 10:
	activity_level = "Medium"
	elif activity_score >= 5:
	activity_level = "Low"
	elif activity_score > 0:
	activity_level = "Very Low"

	return {
	"score": activity_score,
	"level": activity_level,
	"details": activity_details,
	}

	def _analyze_code_complexity(self, repo_data: Dict[str, Any]) -> Dict[str, Any]:
	"""Estimate code complexity based on available metrics."""
	complexity = {}

	# Analyze file distribution
	if "file_distribution" in repo_data:
	file_types = repo_data["file_distribution"]
	total_files = sum(file_types.values())

	code_files = sum(
	count for ext, count in file_types.items()
	if ext in self.config.all_code_extensions()
	)

	complexity["file_counts"] = {
	"total_files": total_files,
	"code_files": code_files,
	}

	# Analyze PR complexity
	if "pull_requests" in repo_data:
	prs = repo_data["pull_requests"]

	# Get average changes per PR
	additions = [pr.get("additions", 0) for pr in prs if pr.get("additions") is not None]
	deletions = [pr.get("deletions", 0) for pr in prs if pr.get("deletions") is not None]
	changed_files = [pr.get("changed_files", 0) for pr in prs if pr.get("changed_files") is not None]

	if additions and deletions and changed_files:
	avg_additions = sum(additions) / len(additions)
	avg_deletions = sum(deletions) / len(deletions)
	avg_changed_files = sum(changed_files) / len(changed_files)

	complexity["pr_complexity"] = {
	"avg_additions": avg_additions,
	"avg_deletions": avg_deletions,
	"avg_changed_files": avg_changed_files,
	}

	# Estimate complexity score
	pr_complexity_score = min(10, (avg_additions + avg_deletions) / 100)
	complexity["pr_complexity_score"] = pr_complexity_score

	# Check dependency complexity
	dependency_complexity_score = 0
	if "commit_insights" in repo_data.get("insights", {}):
	commit_messages = [
	commit.get("commit_message", "").lower()
	for commit in repo_data.get("commits", [])
	]

	# Check for dependency-related keywords
	dependency_keywords = ["dependency", "dependencies", "upgrade", "update", "version", "package"]
	dependency_commits = sum(
	1 for message in commit_messages
	if any(keyword in message for keyword in dependency_keywords)
	)

	dependency_ratio = dependency_commits / len(commit_messages) if commit_messages else 0
	dependency_complexity_score = min(5, dependency_ratio * 20) # Up to 5 points

	complexity["dependency_complexity"] = {
	"dependency_commits": dependency_commits,
	"dependency_ratio": dependency_ratio,
	"score": dependency_complexity_score,
	}

	# Overall complexity score
	overall_score = 0
	contributors = len(repo_data.get("contributors", []))
	if contributors > 0:
	contributor_score = min(5, contributors / 10) # Up to 5 points
	overall_score += contributor_score

	if "pr_complexity_score" in complexity:
	overall_score += complexity["pr_complexity_score"]

	overall_score += dependency_complexity_score

	# Code size complexity
	if "languages" in repo_data:
	languages = repo_data["languages"]
	total_bytes = sum(languages.values()) if languages else 0

	# Size points based on code size in MB
	size_mb = total_bytes / (1024 * 1024)
	size_score = min(10, size_mb / 5) # Up to 10 points for large codebases
	overall_score += size_score

	complexity["code_size"] = {
	"total_bytes": total_bytes,
	"size_mb": size_mb,
	"score": size_score,
	}

	# Determine complexity level
	complexity_level = "Low"
	if overall_score >= 25:
	complexity_level = "Very High"
	elif overall_score >= 20:
	complexity_level = "High"
	elif overall_score >= 15:
	complexity_level = "Medium-High"
	elif overall_score >= 10:
	complexity_level = "Medium"
	elif overall_score >= 5:
	complexity_level = "Low-Medium"

	complexity["overall"] = {
	"score": overall_score,
	"level": complexity_level,
	}

	return complexity

	def _analyze_community_health(self, repo_data: Dict[str, Any]) -> Dict[str, Any]:
	"""Analyze the community health of the repository."""
	health = {}

	# Calculate issue responsiveness
	if "issues" in repo_data:
	issues = repo_data["issues"]
	closed_issues = [issue for issue in issues if issue.get("state") == "closed"]

	if issues:
	closure_rate = len(closed_issues) / len(issues)
	health["issue_closure_rate"] = closure_rate

	# Calculate average time to close
	resolution_times = []
	for issue in closed_issues:
	if issue.get("created_at") and issue.get("closed_at"):
	created = datetime.datetime.fromisoformat(issue["created_at"].replace('Z', '+00:00'))
	closed = datetime.datetime.fromisoformat(issue["closed_at"].replace('Z', '+00:00'))
	resolution_time = (closed - created).total_seconds() / 3600 # hours
	resolution_times.append(resolution_time)

	if resolution_times:
	avg_resolution_time = sum(resolution_times) / len(resolution_times)
	health["avg_issue_resolution_time_hours"] = avg_resolution_time

	# Calculate PR review responsiveness
	if "pull_requests" in repo_data:
	prs = repo_data["pull_requests"]
	merged_prs = [pr for pr in prs if pr.get("merged")]

	if prs:
	merge_rate = len(merged_prs) / len(prs)
	health["pr_merge_rate"] = merge_rate

	# Calculate average time to merge
	merge_times = []
	for pr in merged_prs:
	if pr.get("created_at") and pr.get("merged_at"):
	created = datetime.datetime.fromisoformat(pr["created_at"].replace('Z', '+00:00'))
	merged = datetime.datetime.fromisoformat(pr["merged_at"].replace('Z', '+00:00'))
	merge_time = (merged - created).total_seconds() / 3600 # hours
	merge_times.append(merge_time)

	if merge_times:
	avg_merge_time = sum(merge_times) / len(merge_times)
	health["avg_pr_merge_time_hours"] = avg_merge_time

	# Check for community guidelines
	community_files = [
	"CONTRIBUTING.md",
	"CODE_OF_CONDUCT.md",
	"SECURITY.md",
	"SUPPORT.md",
	"GOVERNANCE.md",
	]

	community_file_presence = {}
	if "file_distribution" in repo_data:
	file_paths = []
	for item in repo_data.get("file_distribution", {}):
	file_paths.append(item)

	for community_file in community_files:
	present = any(community_file.lower() in path.lower() for path in file_paths)
	community_file_presence[community_file] = present

	health["community_guidelines"] = community_file_presence

	# Calculate contributor diversity
	if "contributors" in repo_data:
	contributors = repo_data["contributors"]

	if contributors:
	# Calculate Gini coefficient for contribution distribution
	gini = self._calculate_gini([c.get("contributions", 0) for c in contributors])
	health["contributor_gini"] = gini

	# Interpret Gini coefficient
	if gini < 0.4:
	diversity_level = "High"
	elif gini < 0.6:
	diversity_level = "Medium"
	else:
	diversity_level = "Low"

	health["contributor_diversity"] = diversity_level

	# Calculate overall health score
	health_score = 0

	# Points for issue responsiveness
	if "issue_closure_rate" in health:
	health_score += health["issue_closure_rate"] * 10 # Up to 10 points

	# Points for PR responsiveness
	if "pr_merge_rate" in health:
	health_score += health["pr_merge_rate"] * 10 # Up to 10 points

	# Points for community guidelines
	guideline_count = sum(1 for present in community_file_presence.values() if present)
	health_score += guideline_count * 2 # Up to 10 points

	# Points for contributor diversity
	if "contributor_gini" in health:
	diversity_score = 10 * (1 - health["contributor_gini"]) # Up to 10 points
	health_score += diversity_score

	# Determine health level
	health_level = "Poor"
	if health_score >= 30:
	health_level = "Excellent"
	elif health_score >= 25:
	health_level = "Very Good"
	elif health_score >= 20:
	health_level = "Good"
	elif health_score >= 15:
	health_level = "Fair"
	elif health_score >= 10:
	health_level = "Needs Improvement"

	health["overall"] = {
	"score": health_score,
	"level": health_level,
	}

	return health

	def generate_visualizations(self, repo_data: Dict[str, Any], insights: Dict[str, Any]) -> Dict[str, plt.Figure]:
	"""
	Generate visualizations of repository data.

	Returns:
	Dict of visualization figures
	"""
	if not self.config.generate_visualizations:
	return {}

	figures = {}

	# Create visualizations
	lang_fig = self._visualize_language_distribution(repo_data)
	if lang_fig:
	figures["language_distribution"] = lang_fig

	commit_figs = self._visualize_commit_activity(repo_data, insights)
	figures.update(commit_figs)

	contrib_figs = self._visualize_contributor_activity(repo_data, insights)
	figures.update(contrib_figs)

	issue_figs = self._visualize_issues_and_prs(repo_data, insights)
	figures.update(issue_figs)

	# Add interactive visualizations with Plotly
	plotly_figs = self._generate_plotly_visualizations(repo_data, insights)
	figures.update(plotly_figs)

	# Generate collaboration network
	collab_fig = self._visualize_collaboration_network(repo_data, insights)
	if collab_fig:
	figures["collaboration_network"] = collab_fig

	return figures

	def _visualize_language_distribution(self, repo_data: Dict[str, Any]) -> Optional[plt.Figure]:
	"""Create a visualization of language distribution."""
	languages = repo_data.get("languages", {})
	if not languages:
	return None

	# Create a pie chart of language distribution
	fig, ax = plt.subplots(figsize=(10, 6))
	total = sum(languages.values())

	# Filter out small languages for better visualization
	threshold = total * 0.01 # 1% threshold
	other_sum = sum(size for lang, size in languages.items() if size < threshold)
	filtered_languages = {lang: size for lang, size in languages.items() if size >= threshold}
	if other_sum > 0:
	filtered_languages["Other"] = other_sum

	sizes = list(filtered_languages.values())
	labels = list(filtered_languages.keys())

	wedges, texts, autotexts = ax.pie(
	sizes,
	labels=labels,
	autopct='%1.1f%%',
	startangle=90,
	shadow=False,
	textprops={'fontsize': 9}, # Smaller font for better fit
	wedgeprops={'linewidth': 1, 'edgecolor': 'white'} # Add white edge
	)

	# Make the percentage labels more readable
	for autotext in autotexts:
	autotext.set_color('white')
	autotext.set_fontweight('bold')

	ax.axis('equal')
	plt.title(f"Language Distribution", fontsize=16)
	plt.tight_layout()

	return fig

	def _visualize_commit_activity(self, repo_data: Dict[str, Any], insights: Dict[str, Any]) -> Dict[str, plt.Figure]:
	"""Create visualizations of commit activity."""
	figures = {}

	commit_activity = repo_data.get("commit_activity", {})
	weekly_commits = commit_activity.get("weekly_commits", [])

	if weekly_commits:
	# Extract weeks and commit counts
	weeks = [item["week"] for item in weekly_commits]
	commits = [item["total"] for item in weekly_commits]

	# Create a time series plot
	fig, ax = plt.subplots(figsize=(12, 6))
	ax.plot(weeks, commits, marker='o', linestyle='-', color='blue', alpha=0.7)

	# Add trend line
	z = np.polyfit(range(len(weeks)), commits, 1)
	p = np.poly1d(z)
	ax.plot(weeks, p(range(len(weeks))), "r--", alpha=0.7)

	ax.set_title("Weekly Commit Activity", fontsize=16)
	ax.set_xlabel("Week")
	ax.set_ylabel("Number of Commits")
	plt.xticks(rotation=45)
	ax.grid(True, linestyle='--', alpha=0.7)

	# Show only some x-axis labels to avoid crowding
	if len(weeks) > 20:
	every_nth = len(weeks) // 10
	for n, label in enumerate(ax.xaxis.get_ticklabels()):
	if n % every_nth != 0:
	label.set_visible(False)

	plt.tight_layout()

	figures["weekly_commits"] = fig

	# Visualize code frequency if available
	code_frequency = commit_activity.get("code_frequency", [])
	if code_frequency:
	weeks = [item["week"] for item in code_frequency]
	additions = [item["additions"] for item in code_frequency]
	deletions = [item["deletions"] for item in code_frequency]

	fig, ax = plt.subplots(figsize=(12, 6))
	ax.plot(weeks, additions, marker='o', linestyle='-', color='green', label='Additions')
	ax.plot(weeks, deletions, marker='o', linestyle='-', color='red', label='Deletions')
	ax.set_title("Code Frequency", fontsize=16)
	ax.set_xlabel("Week")
	ax.set_ylabel("Lines Changed")
	plt.xticks(rotation=45)
	ax.legend()
	ax.grid(True, linestyle='--', alpha=0.7)

	# Show only some x-axis labels to avoid crowding
	if len(weeks) > 20:
	every_nth = len(weeks) // 10
	for n, label in enumerate(ax.xaxis.get_ticklabels()):
	if n % every_nth != 0:
	label.set_visible(False)

	plt.tight_layout()

	figures["code_frequency"] = fig

	# Commits by weekday
	if "commit_insights" in insights:
	commit_insights = insights["commit_insights"]
	by_weekday = commit_insights.get("commit_time_patterns", {}).get("by_weekday", {})

	if by_weekday:
	fig, ax = plt.subplots(figsize=(10, 6))
	weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
	counts = [by_weekday.get(day, 0) for day in weekdays]

	# Create gradient colors based on commit counts
	colors = plt.cm.Blues(np.array(counts) / max(counts))

	ax.bar(weekdays, counts, color=colors)
	ax.set_title("Commits by Day of Week", fontsize=16)
	ax.set_xlabel("Day of Week")
	ax.set_ylabel("Number of Commits")
	ax.grid(True, axis='y', linestyle='--', alpha=0.7)
	plt.tight_layout()

	figures["commits_by_weekday"] = fig

	# Commits by hour
	by_hour = commit_insights.get("commit_time_patterns", {}).get("by_hour", {})

	if by_hour:
	fig, ax = plt.subplots(figsize=(12, 6))
	hours = sorted(by_hour.keys())
	counts = [by_hour[hour] for hour in hours]

	# Create gradient colors based on commit counts
	colors = plt.cm.Greens(np.array(counts) / max(counts))

	ax.bar(hours, counts, color=colors)
	ax.set_title("Commits by Hour of Day (UTC)", fontsize=16)
	ax.set_xlabel("Hour")
	ax.set_ylabel("Number of Commits")
	ax.set_xticks(range(0, 24, 2))
	ax.grid(True, axis='y', linestyle='--', alpha=0.7)
	plt.tight_layout()

	figures["commits_by_hour"] = fig

	return figures

	def _visualize_contributor_activity(self, repo_data: Dict[str, Any], insights: Dict[str, Any]) -> Dict[str, plt.Figure]:
	"""Create visualizations of contributor activity."""
	figures = {}

	contributors = repo_data.get("contributors", [])

	if contributors:
	# Create a bar chart of top contributors
	contributors_sorted = sorted(contributors, key=lambda x: x.get("contributions", 0), reverse=True)
	top_n = min(10, len(contributors_sorted))

	fig, ax = plt.subplots(figsize=(12, 6))
	names = [c.get("login", "Unknown") for c in contributors_sorted[:top_n]]
	contributions = [c.get("contributions", 0) for c in contributors_sorted[:top_n]]

	# Create gradient colors based on contribution counts
	colors = plt.cm.viridis(np.array(contributions) / max(contributions))

	bars = ax.bar(names, contributions, color=colors)
	ax.set_title("Top Contributors by Commit Count", fontsize=16)
	ax.set_xlabel("Contributor")
	ax.set_ylabel("Number of Commits")
	plt.xticks(rotation=45, ha='right')
	ax.grid(True, axis='y', linestyle='--', alpha=0.7)

	# Add value labels on top of bars
	for bar in bars:
	height = bar.get_height()
	ax.annotate(f'{height}',
	xy=(bar.get_x() + bar.get_width() / 2, height),
	xytext=(0, 3), # 3 points vertical offset
	textcoords="offset points",
	ha='center', va='bottom')

	plt.tight_layout()

	figures["top_contributors"] = fig

	# Visualize contribution distribution if insights available
	if "contributor_insights" in insights:
	contributor_insights = insights["contributor_insights"]
	distribution = contributor_insights.get("contribution_distribution", {})
	if distribution:
	# Create a pie chart showing contributor concentration
	fig, ax = plt.subplots(figsize=(10, 6))

	percentiles = [
	distribution.get("contributors_for_20_percent", 0),
	distribution.get("contributors_for_50_percent", 0) - distribution.get("contributors_for_20_percent", 0),
	distribution.get("contributors_for_80_percent", 0) - distribution.get("contributors_for_50_percent", 0),
	len(contributors) - distribution.get("contributors_for_80_percent", 0)
	]

	labels = [
	f"Top {percentiles[0]} contributors (0-20%)",
	f"Next {percentiles[1]} contributors (20-50%)",
	f"Next {percentiles[2]} contributors (50-80%)",
	f"Remaining {percentiles[3]} contributors (80-100%)"
	]

	wedges, texts, autotexts = ax.pie(
	[20, 30, 30, 20], # Fixed percentages for visualization
	labels=labels,
	autopct='%1.1f%%',
	startangle=90,
	shadow=False,
	explode=(0.1, 0, 0, 0), # Emphasize the top contributors
	wedgeprops={'linewidth': 1, 'edgecolor': 'white'} # Add white edge
	)

	# Make the percentage labels more readable
	for autotext in autotexts:
	autotext.set_color('white')
	autotext.set_fontweight('bold')

	ax.axis('equal')
	ax.set_title("Contribution Distribution", fontsize=16)
	plt.tight_layout()

	figures["contribution_distribution"] = fig

	return figures

	def _visualize_issues_and_prs(self, repo_data: Dict[str, Any], insights: Dict[str, Any]) -> Dict[str, plt.Figure]:
	"""Create visualizations of issues and pull requests."""
	figures = {}

	# Visualize issue distribution if available
	if "issue_insights" in insights:
	issue_insights = insights["issue_insights"]

	# Issues by state
	by_state = issue_insights.get("by_state", {})
	if by_state:
	fig, ax = plt.subplots(figsize=(8, 6))
	states = list(by_state.keys())
	counts = list(by_state.values())

	colors = ['red' if state.lower() == 'open' else 'green' for state in states]
	ax.bar(states, counts, color=colors)
	ax.set_title("Issues by State", fontsize=16)
	ax.set_xlabel("State")
	ax.set_ylabel("Count")

	# Add count labels on top of bars
	for i, v in enumerate(counts):
	ax.text(i, v + 0.5, str(v), ha='center')

	ax.grid(True, axis='y', linestyle='--', alpha=0.7)
	plt.tight_layout()

	figures["issues_by_state"] = fig

	# Issues by month
	by_month = issue_insights.get("by_month", {})
	if by_month:
	fig, ax = plt.subplots(figsize=(12, 6))
	months = sorted(by_month.keys())
	counts = [by_month[month] for month in months]

	ax.plot(months, counts, marker='o', linestyle='-', color='blue')

	# Add trend line
	z = np.polyfit(range(len(months)), counts, 1)
	p = np.poly1d(z)
	ax.plot(months, p(range(len(months))), "r--", alpha=0.7)

	ax.set_title("Issues Created by Month", fontsize=16)
	ax.set_xlabel("Month")
	ax.set_ylabel("Number of Issues")
	plt.xticks(rotation=45)
	ax.grid(True, linestyle='--', alpha=0.7)

	# Show only some x-axis labels to avoid crowding
	if len(months) > 12:
	every_nth = max(1, len(months) // 12)
	for n, label in enumerate(ax.xaxis.get_ticklabels()):
	if n % every_nth != 0:
	label.set_visible(False)

	plt.tight_layout()

	figures["issues_by_month"] = fig

	# Issues by label
	by_label = issue_insights.get("by_label", {})
	if by_label and len(by_label) > 1:
	fig, ax = plt.subplots(figsize=(12, 6))
	labels = list(by_label.keys())
	counts = list(by_label.values())

	# Sort by count
	sorted_indices = np.argsort(counts)[::-1]
	labels = [labels[i] for i in sorted_indices]
	counts = [counts[i] for i in sorted_indices]

	# Limit to top 10
	if len(labels) > 10:
	labels = labels[:10]
	counts = counts[:10]

	# Create gradient colors
	colors = plt.cm.tab10(np.linspace(0, 1, len(labels)))

	bars = ax.barh(labels, counts, color=colors)
	ax.set_title("Top Issue Labels", fontsize=16)
	ax.set_xlabel("Count")
	ax.set_ylabel("Label")

	# Add count labels
	for bar in bars:
	width = bar.get_width()
	ax.annotate(f'{int(width)}',
	xy=(width, bar.get_y() + bar.get_height() / 2),
	xytext=(3, 0), # 3 points horizontal offset
	textcoords="offset points",
	ha='left', va='center')

	ax.grid(True, axis='x', linestyle='--', alpha=0.7)
	plt.tight_layout()

	figures["issues_by_label"] = fig

	# Visualize PR insights if available
	if "pr_insights" in insights and "pr_code_change_stats" in insights:
	pr_code_stats = insights["pr_code_change_stats"]

	# Additions and deletions by PR
	if "additions" in pr_code_stats and "deletions" in pr_code_stats:
	fig, ax = plt.subplots(figsize=(10, 6))

	categories = ["Mean", "Median", "Max"]
	additions = [
	pr_code_stats["additions"].get("mean", 0),
	pr_code_stats["additions"].get("median", 0),
	pr_code_stats["additions"].get("max", 0) / 10 # Scale down for visibility
	]
	deletions = [
	pr_code_stats["deletions"].get("mean", 0),
	pr_code_stats["deletions"].get("median", 0),
	pr_code_stats["deletions"].get("max", 0) / 10 # Scale down for visibility
	]

	x = range(len(categories))
	width = 0.35

	addition_bars = ax.bar([i - width/2 for i in x], additions, width, label='Additions', color='green')
	deletion_bars = ax.bar([i + width/2 for i in x], deletions, width, label='Deletions', color='red')

	ax.set_xlabel('Metric')
	ax.set_ylabel('Lines of Code')
	ax.set_title('PR Code Change Statistics')
	plt.xticks(x, categories)
	ax.legend()

	# Add value labels
	for bars in [addition_bars, deletion_bars]:
	for bar in bars:
	height = bar.get_height()
	ax.annotate(f'{int(height)}',
	xy=(bar.get_x() + bar.get_width() / 2, height),
	xytext=(0, 3), # 3 points vertical offset
	textcoords="offset points",
	ha='center', va='bottom')

	if "max" in pr_code_stats["additions"]:
	plt.annotate(f"Max: {int(pr_code_stats['additions']['max'])}",
	(2 - width/2, additions[2] + 5),
	textcoords="offset points",
	xytext=(0,10),
	ha='center')

	if "max" in pr_code_stats["deletions"]:
	plt.annotate(f"Max: {int(pr_code_stats['deletions']['max'])}",
	(2 + width/2, deletions[2] + 5),
	textcoords="offset points",
	xytext=(0,10),
	ha='center')

	plt.tight_layout()
	figures["pr_code_changes"] = fig

	return figures

	def _generate_plotly_visualizations(self, repo_data: Dict[str, Any], insights: Dict[str, Any]) -> Dict[str, Any]:
	"""Generate interactive Plotly visualizations."""
	plotly_figures = {}

	# Activity heatmap (commits by day and hour)
	if "commits" in repo_data:
	commits = repo_data["commits"]
	dates = []

	for commit in commits:
	date_str = commit.get("date")
	if date_str:
	try:
	date = datetime.datetime.fromisoformat(date_str.replace('Z', '+00:00'))
	dates.append(date)
	except ValueError:
	pass

	if dates:
	# Group by day of week and hour
	day_hour_counts = defaultdict(int)
	for date in dates:
	day_hour_counts[(date.weekday(), date.hour)] += 1

	# Create 2D array for heatmap
	days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
	hours = list(range(24))

	z = np.zeros((7, 24))
	for (day, hour), count in day_hour_counts.items():
	z[day][hour] = count

	# Create heatmap
	fig = go.Figure(data=go.Heatmap(
	z=z,
	x=hours,
	y=days,
	colorscale='Viridis',
	hoverongaps=False,
	hovertemplate='Day: %{y}<br>Hour: %{x}<br>Commits: %{z}<extra></extra>'
	))

	fig.update_layout(
	title='Commit Activity Heatmap',
	xaxis_title='Hour of Day (UTC)',
	yaxis_title='Day of Week',
	yaxis={'categoryorder': 'array', 'categoryarray': days},
	width=900,
	height=500
	)

	plotly_figures["commit_heatmap"] = fig

	# Language breakdown treemap
	if "languages" in repo_data:
	languages = repo_data["languages"]

	if languages:
	# Create data for treemap
	labels = list(languages.keys())
	values = list(languages.values())

	fig = go.Figure(go.Treemap(
	labels=labels,
	values=values,
	parents=[""] * len(labels),
	marker_colorscale='RdBu',
	hovertemplate='Language: %{label}<br>Bytes: %{value}<br>Percentage: %{percentRoot:.2%}<extra></extra>'
	))

	fig.update_layout(
	title='Repository Language Breakdown',
	width=800,
	height=600
	)

	plotly_figures["language_treemap"] = fig

	# Issue/PR timeline
	issues = repo_data.get("issues", [])
	prs = repo_data.get("pull_requests", [])

	if issues or prs:
	# Create timeline data
	timeline_data = []

	for issue in issues:
	if not issue.get("pull_request") and issue.get("created_at"):
	try:
	created_date = datetime.datetime.fromisoformat(issue["created_at"].replace('Z', '+00:00'))
	timeline_data.append({
	"date": created_date,
	"type": "Issue",
	"id": issue.get("number", ""),
	"title": issue.get("title", ""),
	"state": issue.get("state", "")
	})
	except ValueError:
	pass

	for pr in prs:
	if pr.get("created_at"):
	try:
	created_date = datetime.datetime.fromisoformat(pr["created_at"].replace('Z', '+00:00'))
	timeline_data.append({
	"date": created_date,
	"type": "PR",
	"id": pr.get("number", ""),
	"title": pr.get("title", ""),
	"state": pr.get("state", "")
	})
	except ValueError:
	pass

	if timeline_data:
	# Sort by date
	timeline_data.sort(key=lambda x: x["date"])

	# Create DataFrame for easier plotting
	df = pd.DataFrame(timeline_data)

	# Calculate cumulative counts
	df["cumulative_issues"] = (df["type"] == "Issue").cumsum()
	df["cumulative_prs"] = (df["type"] == "PR").cumsum()

	# Create plot
	fig = go.Figure()
	fig.add_trace(go.Scatter(
	x=df["date"],
	y=df["cumulative_issues"],
	mode='lines',
	name='Issues',
	line=dict(color='red', width=2)
	))

	fig.add_trace(go.Scatter(
	x=df["date"],
	y=df["cumulative_prs"],
	mode='lines',
	name='Pull Requests',
	line=dict(color='blue', width=2)
	))

	fig.update_layout(
	title='Cumulative Issues and Pull Requests Over Time',
	xaxis_title='Date',
	yaxis_title='Count',
	legend=dict(
	yanchor="top",
	y=0.99,
	xanchor="left",
	x=0.01
	),
	width=900,
	height=500
	)

	plotly_figures["issue_pr_timeline"] = fig

	return plotly_figures

	def _visualize_collaboration_network(self, repo_data: Dict[str, Any], insights: Dict[str, Any]) -> Optional[plt.Figure]:
	"""Create a visualization of the collaboration network."""
	if "pull_requests" not in repo_data or "contributors" not in repo_data:
	return None

	prs = repo_data["pull_requests"]
	contributors = repo_data["contributors"]

	# Create a network of collaborations
	G = nx.Graph()

	# Add nodes (contributors)
	contributor_logins = [c.get("login") for c in contributors if c.get("login")]
	for login in contributor_logins:
	G.add_node(login)

	# Add edges (collaborations through PRs)
	collaborations = defaultdict(int)

	for pr in prs:
	author = pr.get("user_login")
	if not author or author not in contributor_logins:
	continue

	# Consider reviewers as collaborators
	reviewers = pr.get("requested_reviewers", [])

	for reviewer in reviewers:
	if reviewer in contributor_logins and reviewer != author:
	pair = tuple(sorted([author, reviewer]))
	collaborations[pair] += 1

	for (author, reviewer), weight in collaborations.items():
	G.add_edge(author, reviewer, weight=weight)

	if not G.edges():
	return None

	# Draw the collaboration network
	fig, ax = plt.subplots(figsize=(12, 10))

	# Calculate node sizes based on contributions
	contributor_dict = {c.get("login"): c.get("contributions", 1) for c in contributors if c.get("login")}
	node_sizes = [contributor_dict.get(node, 1) * 30 for node in G.nodes()]

	# Calculate edge widths based on collaboration count
	edge_widths = [G[u][v]['weight'] * 0.5 for u, v in G.edges()]

	# Calculate node colors based on contributor roles
	# (assign different colors to different types of contributors)
	color_map = []
	for node in G.nodes():
	degree = G.degree(node)
	if degree > 5:
	color_map.append('red') # Central collaborators
	elif degree > 2:
	color_map.append('blue') # Active collaborators
	else:
	color_map.append('green') # Peripheral contributors

	# Position nodes using a force-directed layout
	pos = nx.spring_layout(G, seed=42)

	# Draw the network
	nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color=color_map, alpha=0.8)
	nx.draw_networkx_edges(G, pos, width=edge_widths, alpha=0.5, edge_color='gray')
	nx.draw_networkx_labels(G, pos, font_size=8, font_family='sans-serif')

	ax.set_title("Collaboration Network", fontsize=16)
	ax.axis('off')
	plt.tight_layout()

	return fig

	def analyze_repo(self, owner: str, repo_name: str) -> Dict[str, Any]:
	"""
	Main method to analyze a repository.

	Args:
	owner: GitHub username or organization
	repo_name: Name of the repository

	Returns:
	Dict containing all repository data and insights
	"""
	start_time = time.time()
	logger.info(f"Starting analysis of {owner}/{repo_name}")

	repo_path = f"{owner}/{repo_name}"
	repo = self.client.get_repo(repo_path)

	repo_data = {}

	# Collect basic repository metadata
	repo_data["repo_details"] = self.get_repo_details(repo)

	# Define data collection tasks
	tasks = [
	("contributors", lambda: self.get_contributors(repo)),
	("languages", lambda: self.get_languages(repo)),
	("issues", lambda: self.get_issues(repo, "all")),
	("pull_requests", lambda: self.get_pull_requests(repo, "all")),
	("commits", lambda: self.get_commits(repo)),
	("readme", lambda: self.get_readme(repo)),
	("branches", lambda: self.get_branches(repo)),
	("releases", lambda: self.get_releases(repo)),
	("workflows", lambda: self.get_workflows(repo)),
	("file_distribution", lambda: self.get_file_distribution(repo)),
	("collaborators", lambda: self.get_collaborators(repo)),
	("commit_activity", lambda: self.analyze_commit_activity(repo)),
	("contributor_activity", lambda: self.analyze_contributor_activity(repo)),
	]

	# Search for security and quality indicators
	important_terms = [
	"security", "vulnerability", "auth", "password", "token",
	"test", "spec", "fixture", "mock", "stub",
	"TODO", "FIXME", "HACK", "XXX"
	]
	tasks.append(("code_search", lambda: self.search_code(repo, important_terms)))

	# Collect data with progress bar
	with tqdm(total=len(tasks), desc="Collecting repository data") as pbar:
	for key, task_func in tasks:
	try:
	result = task_func()
	repo_data[key] = result
	except Exception as e:
	logger.error(f"Error collecting {key}: {e}")
	finally:
	pbar.update(1)

	# Generate insights from collected data
	repo_data["insights"] = self.generate_insights(repo_data)

	# Generate visualizations
	if self.config.generate_visualizations:
	repo_data["visualizations"] = self.generate_visualizations(repo_data, repo_data["insights"])

	end_time = time.time()
	logger.info(f"Analysis completed in {end_time - start_time:.2f} seconds")

	return repo_data


	class PDFReportGenerator:
	"""
	Class for generating comprehensive PDF reports from repository analysis data.
	"""

	def __init__(self, repo_data: Dict[str, Any], output_path: str = None):
	"""Initialize the PDF report generator with repository data."""
	self.repo_data = repo_data
	self.output_path = output_path or tempfile.mktemp(suffix='.pdf')
	self.styles = getSampleStyleSheet()

	# Create custom styles
	self.styles.add(ParagraphStyle(
	name='SectionTitle',
	parent=self.styles['Heading2'],
	fontSize=14,
	spaceAfter=10
	))

	self.styles.add(ParagraphStyle(
	name='SubsectionTitle',
	parent=self.styles['Heading3'],
	fontSize=12,
	spaceAfter=6
	))

	self.styles.add(ParagraphStyle(
	name='MetricsTable',
	parent=self.styles['Normal'],
	fontSize=10,
	alignment=TA_LEFT
	))

	self.styles.add(ParagraphStyle(
	name='Small',
	parent=self.styles['Normal'],
	fontSize=8
	))

	self.styles.add(ParagraphStyle(
	name='ReportTitle',
	parent=self.styles['Title'],
	fontSize=24,
	alignment=TA_CENTER,
	spaceAfter=20
	))

	def generate_report(self) -> str:
	"""
	Generate a PDF report of repository analysis.

	Returns:
	str: Path to the generated PDF file
	"""
	doc = SimpleDocTemplate(
	self.output_path,
	pagesize=letter,
	rightMargin=72, leftMargin=72,
	topMargin=72, bottomMargin=72
	)

	elements = []

	# Add report title
	repo_name = self.repo_data.get("repo_details", {}).get("full_name", "Repository")
	elements.append(Paragraph(f"GitHub Repository Analysis: {repo_name}", self.styles['ReportTitle']))

	# Add report generation date
	report_date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
	elements.append(Paragraph(f"Report generated on: {report_date}", self.styles['Normal']))
	elements.append(Spacer(1, 20))

	# Add repository overview section
	elements.extend(self._create_repo_overview())
	elements.append(PageBreak())

	# Add activity analysis section
	elements.extend(self._create_activity_analysis())
	elements.append(PageBreak())

	# Add code analysis section
	elements.extend(self._create_code_analysis())
	elements.append(PageBreak())

	# Add community analysis section
	elements.extend(self._create_community_analysis())

	# Add visualizations if available
	if self.repo_data.get("visualizations"):
	elements.append(PageBreak())
	elements.extend(self._create_visualization_pages())

	# Add summary and recommendations
	elements.append(PageBreak())
	elements.extend(self._create_summary_and_recommendations())

	# Build the PDF
	doc.build(elements)

	return self.output_path

	def _create_repo_overview(self) -> List[Any]:
	"""Create repository overview section of the report."""
	elements = []

	# Section title
	elements.append(Paragraph("Repository Overview", self.styles['Heading1']))
	elements.append(Spacer(1, 10))

	# Basic repository information
	repo_details = self.repo_data.get("repo_details", {})

	# Create a table for repository details
	data = [
	["Name", repo_details.get("name", "N/A")],
	["Full Name", repo_details.get("full_name", "N/A")],
	["Description", repo_details.get("description", "No description")],
	["URL", repo_details.get("html_url", "N/A")],
	["Primary Language", repo_details.get("language", "Not specified")],
	["Created On", repo_details.get("created_at", "N/A")],
	["Last Updated", repo_details.get("updated_at", "N/A")],
	["Stars", str(repo_details.get("stargazers_count", 0))],
	["Forks", str(repo_details.get("forks_count", 0))],
	["Watchers", str(repo_details.get("watchers_count", 0))],
	["Open Issues", str(repo_details.get("open_issues_count", 0))],
	["License", repo_details.get("license", "Not specified")],
	["Fork", "Yes" if repo_details.get("fork", False) else "No"],
	["Archived", "Yes" if repo_details.get("archived", False) else "No"],
	["Visibility", repo_details.get("visibility", "N/A").capitalize()],
	]

	table = Table(data, colWidths=[100, 350])
	table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (0, -1), colors.lightgrey),
	('TEXTCOLOR', (0, 0), (0, -1), colors.black),
	('ALIGN', (0, 0), (0, -1), 'RIGHT'),
	('ALIGN', (1, 0), (1, -1), 'LEFT'),
	('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
	('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 10),
	('BOTTOMPADDING', (0, 0), (-1, -1), 6),
	('TOPPADDING', (0, 0), (-1, -1), 6),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	]))

	elements.append(table)
	elements.append(Spacer(1, 20))

	# Key metrics and insights
	elements.append(Paragraph("Key Metrics & Insights", self.styles['SectionTitle']))

	insights = self.repo_data.get("insights", {})

	# Repository age
	age_days = insights.get("repository_age_days", 0)
	age_years = age_days / 365.25
	freshness_days = insights.get("freshness_days", 0)

	age_text = f"Repository Age: {age_years:.1f} years ({int(age_days)} days)"
	freshness_text = f"Last Activity: {int(freshness_days)} days ago"

	elements.append(Paragraph(age_text, self.styles['Normal']))
	elements.append(Paragraph(freshness_text, self.styles['Normal']))
	elements.append(Spacer(1, 10))

	# Activity level
	activity_level = insights.get("activity_level", {})
	if activity_level:
	activity_text = f"Activity Level: {activity_level.get('level', 'Unknown')} (Score: {activity_level.get('score', 0):.1f}/25)"
	elements.append(Paragraph(activity_text, self.styles['Normal']))
	elements.append(Spacer(1, 10))

	# Code complexity
	code_complexity = insights.get("code_complexity", {}).get("overall", {})
	if code_complexity:
	complexity_text = f"Code Complexity: {code_complexity.get('level', 'Unknown')} (Score: {code_complexity.get('score', 0):.1f}/30)"
	elements.append(Paragraph(complexity_text, self.styles['Normal']))
	elements.append(Spacer(1, 10))

	# Documentation quality
	doc_quality = insights.get("documentation_quality", {})
	if doc_quality:
	quality_score = doc_quality.get("score", 0)
	quality_level = "High" if quality_score > 0.7 else "Medium" if quality_score > 0.4 else "Low"
	doc_text = f"Documentation Quality: {quality_level} (Score: {quality_score:.2f})"
	elements.append(Paragraph(doc_text, self.styles['Normal']))
	elements.append(Spacer(1, 10))

	# Community health
	community_health = insights.get("community_health", {}).get("overall", {})
	if community_health:
	health_text = f"Community Health: {community_health.get('level', 'Unknown')} (Score: {community_health.get('score', 0):.1f}/40)"
	elements.append(Paragraph(health_text, self.styles['Normal']))

	return elements

	def _create_activity_analysis(self) -> List[Any]:
	"""Create activity analysis section of the report."""
	elements = []

	# Section title
	elements.append(Paragraph("Activity Analysis", self.styles['Heading1']))
	elements.append(Spacer(1, 10))

	insights = self.repo_data.get("insights", {})

	# Commit activity
	elements.append(Paragraph("Commit Activity", self.styles['SectionTitle']))

	commit_insights = insights.get("commit_insights", {})
	if commit_insights:
	# Top contributors
	top_contributors = commit_insights.get("top_contributors", {})
	if top_contributors:
	elements.append(Paragraph("Top Contributors by Commits:", self.styles['SubsectionTitle']))

	data = [["Contributor", "Commits"]]
	for contributor, commits in top_contributors.items():
	data.append([contributor, str(commits)])

	table = Table(data, colWidths=[200, 100])
	table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey),
	('TEXTCOLOR', (0, 0), (-1, 0), colors.black),
	('ALIGN', (0, 0), (0, -1), 'LEFT'),
	('ALIGN', (1, 0), (1, -1), 'RIGHT'),
	('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 10),
	('BOTTOMPADDING', (0, 0), (-1, -1), 4),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	]))

	elements.append(table)
	elements.append(Spacer(1, 15))

	# Commit time patterns
	time_patterns = commit_insights.get("commit_time_patterns", {})
	if time_patterns:
	elements.append(Paragraph("Commit Timing Patterns:", self.styles['SubsectionTitle']))

	weekday_data = time_patterns.get("by_weekday", {})
	if weekday_data:
	day_text = "Most active day: " + max(weekday_data.items(), key=lambda x: x[1])[0]
	elements.append(Paragraph(day_text, self.styles['Normal']))

	hour_data = time_patterns.get("by_hour", {})
	if hour_data and hour_data:
	hour = max(hour_data.items(), key=lambda x: x[1])[0]
	hour_text = f"Most active hour: {hour}:00 UTC"
	elements.append(Paragraph(hour_text, self.styles['Normal']))

	elements.append(Spacer(1, 10))

	# Pull Request activity
	elements.append(Paragraph("Pull Request Activity", self.styles['SectionTitle']))

	pr_insights = insights.get("pr_insights", {})
	pr_code_changes = insights.get("pr_code_change_stats", {})

	if pr_insights or pr_code_changes:
	# PR state distribution
	state_counts = pr_insights.get("by_state", {})
	if state_counts:
	elements.append(Paragraph("Pull Request States:", self.styles['SubsectionTitle']))

	data = [["State", "Count"]]
	for state, count in state_counts.items():
	data.append([state.capitalize(), str(count)])

	table = Table(data, colWidths=[100, 100])
	table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey),
	('TEXTCOLOR', (0, 0), (-1, 0), colors.black),
	('ALIGN', (0, 0), (0, -1), 'LEFT'),
	('ALIGN', (1, 0), (1, -1), 'RIGHT'),
	('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	]))

	elements.append(table)
	elements.append(Spacer(1, 15))

	# PR code change statistics
	if pr_code_changes:
	elements.append(Paragraph("Pull Request Size Statistics:", self.styles['SubsectionTitle']))

	# Table for code change stats
	data = [["Metric", "Additions", "Deletions", "Files Changed"]]

	metrics = ["mean", "median", "max", "total"]
	for metric in metrics:
	row = [metric.capitalize()]
	for stat_type in ["additions", "deletions", "changed_files"]:
	if stat_type in pr_code_changes and metric in pr_code_changes[stat_type]:
	value = pr_code_changes[stat_type][metric]
	row.append(f"{value:.1f}" if isinstance(value, float) else str(value))
	else:
	row.append("N/A")

	data.append(row)

	table = Table(data, colWidths=[80, 80, 80, 80])
	table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey),
	('TEXTCOLOR', (0, 0), (-1, 0), colors.black),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('ALIGN', (0, 0), (0, -1), 'LEFT'),
	('ALIGN', (1, 0), (-1, -1), 'RIGHT'),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	]))

	elements.append(table)
	elements.append(Spacer(1, 15))

	# Issue activity
	elements.append(Paragraph("Issue Activity", self.styles['SectionTitle']))

	issue_insights = insights.get("issue_insights", {})
	if issue_insights:
	# Issue state distribution
	state_counts = issue_insights.get("by_state", {})
	if state_counts:
	elements.append(Paragraph("Issue States:", self.styles['SubsectionTitle']))

	data = [["State", "Count"]]
	for state, count in state_counts.items():
	data.append([state.capitalize(), str(count)])

	table = Table(data, colWidths=[100, 100])
	table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey),
	('TEXTCOLOR', (0, 0), (-1, 0), colors.black),
	('ALIGN', (0, 0), (0, -1), 'LEFT'),
	('ALIGN', (1, 0), (1, -1), 'RIGHT'),
	('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	]))

	elements.append(table)
	elements.append(Spacer(1, 15))

	# Issue resolution time
	resolution_stats = issue_insights.get("resolution_time", {})
	if resolution_stats:
	elements.append(Paragraph("Issue Resolution Time (hours):", self.styles['SubsectionTitle']))

	mean_hours = resolution_stats.get("mean_hours", 0)
	median_hours = resolution_stats.get("median_hours", 0)

	if mean_hours > 24:
	mean_days = mean_hours / 24
	mean_text = f"Mean: {mean_days:.1f} days"
	else:
	mean_text = f"Mean: {mean_hours:.1f} hours"

	if median_hours > 24:
	median_days = median_hours / 24
	median_text = f"Median: {median_days:.1f} days"
	else:
	median_text = f"Median: {median_hours:.1f} hours"

	elements.append(Paragraph(mean_text, self.styles['Normal']))
	elements.append(Paragraph(median_text, self.styles['Normal']))
	elements.append(Spacer(1, 10))

	# Top issue labels
	top_labels = issue_insights.get("by_label", {})
	if top_labels:
	elements.append(Paragraph("Top Issue Labels:", self.styles['SubsectionTitle']))

	data = [["Label", "Count"]]
	for label, count in list(top_labels.items())[:5]: # Top 5 labels
	data.append([label, str(count)])

	table = Table(data, colWidths=[150, 50])
	table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey),
	('TEXTCOLOR', (0, 0), (-1, 0), colors.black),
	('ALIGN', (0, 0), (0, -1), 'LEFT'),
	('ALIGN', (1, 0), (1, -1), 'RIGHT'),
	('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	]))

	elements.append(table)

	return elements

	def _create_code_analysis(self) -> List[Any]:
	"""Create code analysis section of the report."""
	elements = []

	# Section title
	elements.append(Paragraph("Code Analysis", self.styles['Heading1']))
	elements.append(Spacer(1, 10))

	# Language distribution
	elements.append(Paragraph("Language Distribution", self.styles['SectionTitle']))

	languages = self.repo_data.get("languages", {})
	insights = self.repo_data.get("insights", {})

	if languages:
	# Sort languages by byte count
	sorted_languages = sorted(languages.items(), key=lambda x: x[1], reverse=True)

	# Create language distribution table
	data = [["Language", "Bytes", "Percentage"]]

	total_bytes = sum(languages.values())
	for language, bytes_count in sorted_languages[:10]: # Top 10 languages
	percentage = (bytes_count / total_bytes) * 100
	data.append([
	language,
	f"{bytes_count:,}",
	f"{percentage:.1f}%"
	])

	table = Table(data, colWidths=[120, 120, 80])
	table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey),
	('TEXTCOLOR', (0, 0), (-1, 0), colors.black),
	('ALIGN', (0, 0), (0, -1), 'LEFT'),
	('ALIGN', (1, 0), (2, -1), 'RIGHT'),
	('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	]))

	elements.append(table)
	elements.append(Spacer(1, 15))

	# File distribution
	elements.append(Paragraph("File Type Distribution", self.styles['SectionTitle']))

	file_dist = self.repo_data.get("file_distribution", {})
	if file_dist:
	# Group extensions by type
	file_types = {
	"Code": sum(file_dist.get(ext, 0) for ext in self.config.code_extensions),
	"Markup": sum(file_dist.get(ext, 0) for ext in self.config.markup_extensions),
	"Scripts": sum(file_dist.get(ext, 0) for ext in self.config.script_extensions),
	"Data": sum(file_dist.get(ext, 0) for ext in self.config.data_extensions),
	"Config": sum(file_dist.get(ext, 0) for ext in self.config.config_extensions),
	"Notebooks": sum(file_dist.get(ext, 0) for ext in self.config.notebook_extensions),
	"Other": sum(file_dist.get(ext, 0) for ext in self.config.other_extensions)
	}

	# Create file type distribution table
	data = [["File Type", "Count", "Percentage"]]

	total_files = sum(file_types.values())
	for file_type, count in sorted(file_types.items(), key=lambda x: x[1], reverse=True):
	if count > 0:
	percentage = (count / total_files) * 100
	data.append([
	file_type,
	str(count),
	f"{percentage:.1f}%"
	])

	table = Table(data, colWidths=[120, 80, 80])
	table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey),
	('TEXTCOLOR', (0, 0), (-1, 0), colors.black),
	('ALIGN', (0, 0), (0, -1), 'LEFT'),
	('ALIGN', (1, 0), (2, -1), 'RIGHT'),
	('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	]))

	elements.append(table)
	elements.append(Spacer(1, 15))

	# Code complexity analysis
	elements.append(Paragraph("Code Complexity Analysis", self.styles['SectionTitle']))

	code_complexity = insights.get("code_complexity", {})
	if code_complexity:
	complexity_overall = code_complexity.get("overall", {})
	elements.append(Paragraph(
	f"Overall Complexity: {complexity_overall.get('level', 'Unknown')} (Score: {complexity_overall.get('score', 0):.1f}/30)",
	self.styles['Normal']
	))
	elements.append(Spacer(1, 10))

	# Code size
	code_size = code_complexity.get("code_size", {})
	if code_size:
	size_mb = code_size.get("size_mb", 0)
	elements.append(Paragraph(f"Code Size: {size_mb:.2f} MB", self.styles['Normal']))
	elements.append(Spacer(1, 5))

	# PR complexity
	pr_complexity = code_complexity.get("pr_complexity", {})
	if pr_complexity:
	elements.append(Paragraph("Average Pull Request Size:", self.styles['SubsectionTitle']))

	avg_additions = pr_complexity.get("avg_additions", 0)
	avg_deletions = pr_complexity.get("avg_deletions", 0)
	avg_files = pr_complexity.get("avg_changed_files", 0)

	elements.append(Paragraph(f"Lines Added: {avg_additions:.1f}", self.styles['Normal']))
	elements.append(Paragraph(f"Lines Deleted: {avg_deletions:.1f}", self.styles['Normal']))
	elements.append(Paragraph(f"Files Changed: {avg_files:.1f}", self.styles['Normal']))
	elements.append(Spacer(1, 10))

	# CI/CD presence
	elements.append(Paragraph("CI/CD Systems", self.styles['SectionTitle']))

	ci_cd = insights.get("ci_cd_presence", {})
	if ci_cd:
	has_ci_cd = ci_cd.get("has_ci_cd", False)
	systems = ci_cd.get("ci_cd_systems", {})

	if has_ci_cd:
	elements.append(Paragraph("Detected CI/CD Systems:", self.styles['Normal']))

	detected_systems = [name for name, present in systems.items() if present]
	for system in detected_systems:
	elements.append(Paragraph(f"• {system.replace('_', ' ').title()}", self.styles['Normal']))
	else:
	elements.append(Paragraph("No CI/CD systems detected", self.styles['Normal']))

	return elements

	def _create_community_analysis(self) -> List[Any]:
	"""Create community analysis section of the report."""
	elements = []

	# Section title
	elements.append(Paragraph("Community Analysis", self.styles['Heading1']))
	elements.append(Spacer(1, 10))

	insights = self.repo_data.get("insights", {})

	# Contributor insights
	elements.append(Paragraph("Contributor Analysis", self.styles['SectionTitle']))

	contributor_insights = insights.get("contributor_insights", {})
	if contributor_insights:
	contributor_count = contributor_insights.get("contributor_count", 0)
	total_contributions = contributor_insights.get("total_contributions", 0)
	avg_contributions = contributor_insights.get("avg_contributions_per_contributor", 0)

	elements.append(Paragraph(f"Total Contributors: {contributor_count}", self.styles['Normal']))
	elements.append(Paragraph(f"Total Contributions: {total_contributions}", self.styles['Normal']))
	elements.append(Paragraph(f"Average Contributions per Contributor: {avg_contributions:.1f}", self.styles['Normal']))
	elements.append(Spacer(1, 10))

	# Contribution distribution
	distribution = contributor_insights.get("contribution_distribution", {})
	if distribution:
	elements.append(Paragraph("Contribution Distribution:", self.styles['SubsectionTitle']))

	gini = distribution.get("gini_coefficient", 0)
	top_percent = distribution.get("top_contributor_percentage", 0)
	contributors_20 = distribution.get("contributors_for_20_percent", 0)
	contributors_50 = distribution.get("contributors_for_50_percent", 0)
	contributors_80 = distribution.get("contributors_for_80_percent", 0)

	# Format distribution metrics
	elements.append(Paragraph(f"Top Contributor: {top_percent:.1f}% of all contributions", self.styles['Normal']))
	elements.append(Paragraph(f"Contributors for first 20% work: {contributors_20}", self.styles['Normal']))
	elements.append(Paragraph(f"Contributors for first 50% work: {contributors_50}", self.styles['Normal']))
	elements.append(Paragraph(f"Contributors for first 80% work: {contributors_80}", self.styles['Normal']))
	elements.append(Paragraph(f"Gini Coefficient: {gini:.2f} ({'High' if gini > 0.6 else 'Medium' if gini > 0.4 else 'Low'} inequality)", self.styles['Normal']))
	elements.append(Spacer(1, 15))

	# Community health
	elements.append(Paragraph("Community Health", self.styles['SectionTitle']))

	community_health = insights.get("community_health", {})
	if community_health:
	health_overall = community_health.get("overall", {})
	elements.append(Paragraph(
	f"Overall Health: {health_overall.get('level', 'Unknown')} (Score: {health_overall.get('score', 0):.1f}/40)",
	self.styles['Normal']
	))
	elements.append(Spacer(1, 10))

	# Issue and PR responsiveness
	if "issue_closure_rate" in community_health:
	closure_rate = community_health.get("issue_closure_rate", 0)
	elements.append(Paragraph(f"Issue Closure Rate: {closure_rate:.1%}", self.styles['Normal']))

	if "avg_issue_resolution_time_hours" in community_health:
	resolution_hours = community_health.get("avg_issue_resolution_time_hours", 0)
	if resolution_hours > 72:
	resolution_days = resolution_hours / 24
	elements.append(Paragraph(f"Avg. Issue Resolution Time: {resolution_days:.1f} days", self.styles['Normal']))
	else:
	elements.append(Paragraph(f"Avg. Issue Resolution Time: {resolution_hours:.1f} hours", self.styles['Normal']))

	if "pr_merge_rate" in community_health:
	merge_rate = community_health.get("pr_merge_rate", 0)
	elements.append(Paragraph(f"PR Merge Rate: {merge_rate:.1%}", self.styles['Normal']))

	if "avg_pr_merge_time_hours" in community_health:
	merge_hours = community_health.get("avg_pr_merge_time_hours", 0)
	if merge_hours > 72:
	merge_days = merge_hours / 24
	elements.append(Paragraph(f"Avg. PR Merge Time: {merge_days:.1f} days", self.styles['Normal']))
	else:
	elements.append(Paragraph(f"Avg. PR Merge Time: {merge_hours:.1f} hours", self.styles['Normal']))

	elements.append(Spacer(1, 10))

	# Community guidelines
	community_files = community_health.get("community_guidelines", {})
	if community_files:
	elements.append(Paragraph("Community Guidelines:", self.styles['SubsectionTitle']))

	files = [
	("CONTRIBUTING.md", "Contributing Guidelines"),
	("CODE_OF_CONDUCT.md", "Code of Conduct"),
	("SECURITY.md", "Security Policy"),
	("SUPPORT.md", "Support Information"),
	("GOVERNANCE.md", "Governance Model")
	]

	data = [["Guideline", "Present"]]
	for file_name, display_name in files:
	present = community_files.get(file_name, False)
	data.append([display_name, "✓" if present else "✗"])

	table = Table(data, colWidths=[150, 50])
	table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey),
	('TEXTCOLOR', (0, 0), (-1, 0), colors.black),
	('ALIGN', (0, 0), (0, -1), 'LEFT'),
	('ALIGN', (1, 0), (1, -1), 'CENTER'),
	('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('TEXTCOLOR', (1, 1), (1, -1), lambda row, col: colors.green if data[row][col] == "✓" else colors.red),
	]))

	elements.append(table)
	elements.append(Spacer(1, 15))

	# Documentation quality
	elements.append(Paragraph("Documentation Analysis", self.styles['SectionTitle']))

	doc_quality = insights.get("documentation_quality", {})
	if doc_quality:
	has_readme = doc_quality.get("has_readme", False)

	if has_readme:
	quality_score = doc_quality.get("score", 0)
	quality_level = "High" if quality_score > 0.7 else "Medium" if quality_score > 0.4 else "Low"
	word_count = doc_quality.get("readme_length", 0)

	elements.append(Paragraph(f"README Quality: {quality_level} (Score: {quality_score:.2f})", self.styles['Normal']))
	elements.append(Paragraph(f"README Length: {word_count} words", self.styles['Normal']))
	elements.append(Spacer(1, 10))

	# Section analysis
	sections = doc_quality.get("sections", {})
	if sections:
	elements.append(Paragraph("README Sections Present:", self.styles['SubsectionTitle']))

	section_labels = {
	"introduction": "Introduction/Overview",
	"installation": "Installation Instructions",
	"usage": "Usage Examples",
	"api": "API Documentation",
	"contributing": "Contributing Guidelines",
	"license": "License Information",
	"code_of_conduct": "Code of Conduct"
	}

	data = [["Section", "Present"]]
	for section_key, section_label in section_labels.items():
	present = sections.get(section_key, False)
	data.append([section_label, "✓" if present else "✗"])

	table = Table(data, colWidths=[150, 50])
	table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey),
	('TEXTCOLOR', (0, 0), (-1, 0), colors.black),
	('ALIGN', (0, 0), (0, -1), 'LEFT'),
	('ALIGN', (1, 0), (1, -1), 'CENTER'),
	('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('TEXTCOLOR', (1, 1), (1, -1), lambda row, col: colors.green if data[row][col] == "✓" else colors.red),
	]))

	elements.append(table)
	elements.append(Spacer(1, 10))

	# Additional doc quality metrics
	has_images = doc_quality.get("has_images", False)
	has_code = doc_quality.get("has_code_examples", False)

	metrics_text = "Additional Features: "
	if has_images:
	img_count = doc_quality.get("image_count", 0)
	metrics_text += f"{img_count} images/diagrams, "
	if has_code:
	code_blocks = doc_quality.get("code_block_count", 0)
	metrics_text += f"{code_blocks} code examples"

	if has_images or has_code:
	elements.append(Paragraph(metrics_text, self.styles['Normal']))
	else:
	elements.append(Paragraph("No README file found.", self.styles['Normal']))

	return elements

	def _create_visualization_pages(self) -> List[Any]:
	"""Create pages with visualizations."""
	elements = []

	# Section title
	elements.append(Paragraph("Visualizations", self.styles['Heading1']))
	elements.append(Spacer(1, 10))

	visualizations = self.repo_data.get("visualizations", {})

	# Organize visualizations by category
	categories = {
	"Language Analysis": ["language_distribution", "language_treemap"],
	"Commit Activity": ["weekly_commits", "code_frequency", "commits_by_weekday", "commits_by_hour", "commit_heatmap"],
	"Contributor Analysis": ["top_contributors", "contribution_distribution", "collaboration_network"],
	"Issue & PR Analysis": ["issues_by_state", "issues_by_month", "issues_by_label", "pr_code_changes", "issue_pr_timeline"]
	}

	# Add visualizations by category
	for category, viz_keys in categories.items():
	category_visualizations = [key for key in viz_keys if key in visualizations]

	if category_visualizations:
	elements.append(Paragraph(category, self.styles['SectionTitle']))
	elements.append(Spacer(1, 10))

	for viz_key in category_visualizations:
	fig = visualizations.get(viz_key)
	if fig:
	# Save figure to a temporary buffer
	img_buffer = BytesIO()

	if isinstance(fig, go.Figure):
	# Handle Plotly figures
	fig.write_image(img_buffer, format="png", width=800, height=500)
	else:
	# Handle Matplotlib figures
	fig.savefig(img_buffer, format="png", dpi=150)

	img_buffer.seek(0)
	img = Image(img_buffer, width=6inch, height=4inch)

	# Add caption
	caption = viz_key.replace("_", " ").title()
	elements.append(Paragraph(caption, self.styles['SubsectionTitle']))
	elements.append(img)
	elements.append(Spacer(1, 20))

	# Add page break after each category
	elements.append(PageBreak())

	return elements

	def _create_summary_and_recommendations(self) -> List[Any]:
	"""Create summary and recommendations section."""
	elements = []

	# Section title
	elements.append(Paragraph("Summary & Recommendations", self.styles['Heading1']))
	elements.append(Spacer(1, 10))

	# Repository summary
	elements.append(Paragraph("Project Summary", self.styles['SectionTitle']))

	insights = self.repo_data.get("insights", {})
	repo_details = self.repo_data.get("repo_details", {})

	# Short description of the project
	repo_name = repo_details.get("name", "The repository")
	repo_desc = repo_details.get("description", "")
	primary_lang = repo_details.get("language", "various languages")

	summary_text = f"{repo_name} is a {primary_lang} project"
	if repo_desc:
	summary_text += f" that {repo_desc.lower() if repo_desc[0].isupper() else repo_desc}"
	summary_text += "."

	elements.append(Paragraph(summary_text, self.styles['Normal']))
	elements.append(Spacer(1, 10))

	# Key metrics summary
	community_health = insights.get("community_health", {}).get("overall", {})
	activity_level = insights.get("activity_level", {})
	code_complexity = insights.get("code_complexity", {}).get("overall", {})

	metrics_text = f"The project has {repo_details.get('stargazers_count', 0)} stars and {repo_details.get('forks_count', 0)} forks."

	if "contributor_insights" in insights:
	contributor_count = insights["contributor_insights"].get("contributor_count", 0)
	metrics_text += f" It has {contributor_count} contributors"

	gini = insights["contributor_insights"].get("contribution_distribution", {}).get("gini_coefficient", 0)
	if gini > 0.7:
	metrics_text += " with a highly centralized contribution pattern"
	elif gini > 0.4:
	metrics_text += " with a moderately distributed contribution pattern"
	else:
	metrics_text += " with a well-distributed contribution pattern"

	metrics_text += "."

	elements.append(Paragraph(metrics_text, self.styles['Normal']))
	elements.append(Spacer(1, 10))

	# Activity summary
	if activity_level:
	activity_text = f"The project shows {activity_level.get('level', 'Unknown').lower()} activity levels"

	# Add activity context
	if activity_level.get('level') in ["High", "Very High"]:
	activity_text += " with regular commits and issue management."
	elif activity_level.get('level') in ["Medium"]:
	activity_text += " with moderate development progress."
	else:
	activity_text += " with limited recent development."

	elements.append(Paragraph(activity_text, self.styles['Normal']))
	elements.append(Spacer(1, 10))

	# Code quality summary
	if code_complexity:
	complexity_text = f"The codebase has {code_complexity.get('level', 'Unknown').lower()} complexity"

	if code_complexity.get('level') in ["High", "Very High"]:
	complexity_text += ", which may present challenges for new contributors and maintenance."
	elif code_complexity.get('level') in ["Medium", "Medium-High"]:
	complexity_text += " with a reasonable balance between functionality and maintainability."
	else:
	complexity_text += " and should be relatively straightforward to understand and maintain."

	elements.append(Paragraph(complexity_text, self.styles['Normal']))
	elements.append(Spacer(1, 10))

	# Community health summary
	if community_health:
	health_text = f"The project demonstrates {community_health.get('level', 'Unknown').lower()} community health"

	if community_health.get('level') in ["Excellent", "Very Good", "Good"]:
	health_text += " with responsive maintainers and clear contribution guidelines."
	elif community_health.get('level') in ["Fair"]:
	health_text += " with some community structures in place."
	else:
	health_text += " with opportunities for improved community engagement."

	elements.append(Paragraph(health_text, self.styles['Normal']))
	elements.append(Spacer(1, 15))

	# Recommendations
	elements.append(Paragraph("Recommendations", self.styles['SectionTitle']))

	recommendations = []

	# Documentation recommendations
	doc_quality = insights.get("documentation_quality", {})
	if doc_quality:
	score = doc_quality.get("score", 0)
	if score < 0.4:
	recommendations.append("Improve documentation by adding more comprehensive README content, including usage examples and API documentation.")
	elif score < 0.7:
	recommendations.append("Enhance existing documentation with more examples and clearer installation instructions.")

	sections = doc_quality.get("sections", {})
	missing_key_sections = []
	if not sections.get("installation", False):
	missing_key_sections.append("installation instructions")
	if not sections.get("usage", False):
	missing_key_sections.append("usage examples")

	if missing_key_sections:
	recommendations.append(f"Add missing documentation sections: {', '.join(missing_key_sections)}.")

	# Community recommendations
	community_files = insights.get("community_health", {}).get("community_guidelines", {})
	if community_files:
	missing_guidelines = []
	if not community_files.get("CONTRIBUTING.md", False):
	missing_guidelines.append("contribution guidelines")
	if not community_files.get("CODE_OF_CONDUCT.md", False):
	missing_guidelines.append("code of conduct")

	if missing_guidelines:
	recommendations.append(f"Create missing community files: {', '.join(missing_guidelines)}.")

	# Issue management recommendations
	issue_insights = insights.get("issue_insights", {})
	if issue_insights:
	resolution_time = issue_insights.get("resolution_time", {}).get("mean_hours", 0)
	if resolution_time > 168: # 1 week
	recommendations.append("Improve issue response time to enhance user experience and community engagement.")

	# Code complexity recommendations
	if code_complexity and code_complexity.get('level') in ["High", "Very High"]:
	recommendations.append("Consider refactoring complex parts of the codebase to improve maintainability.")

	# CI/CD recommendations
	ci_cd = insights.get("ci_cd_presence", {})
	if not ci_cd.get("has_ci_cd", False):
	recommendations.append("Implement CI/CD pipelines (e.g., GitHub Actions) to automate testing and deployment.")

	# Activity recommendations
	if activity_level and activity_level.get('level') in ["Low", "Very Low", "None"]:
	recommendations.append("Revitalize project with regular updates and community engagement to attract more contributors.")

	# Add recommendations to the report
	if recommendations:
	for i, recommendation in enumerate(recommendations, 1):
	elements.append(Paragraph(f"{i}. {recommendation}", self.styles['Normal']))
	elements.append(Spacer(1, 5))
	else:
	elements.append(Paragraph("This project follows good development practices and no significant improvements are needed at this time.", self.styles['Normal']))

	return elements


	class RAGHelper:
	"""
	Helper class for Retrieval Augmented Generation (RAG) to enhance chatbot responses
	with repository insights.
	"""

	def __init__(self, repo_data: Dict[str, Any]):
	"""Initialize with repository data."""
	self.repo_data = repo_data
	self.insights = repo_data.get("insights", {})

	# Extract key information for easy retrieval
	self._extract_key_info()

	def _extract_key_info(self):
	"""Extract and organize key information from repository data."""
	self.repo_info = {}

	# Basic repository details
	if "repo_details" in self.repo_data:
	details = self.repo_data["repo_details"]
	self.repo_info["name"] = details.get("name", "")
	self.repo_info["full_name"] = details.get("full_name", "")
	self.repo_info["description"] = details.get("description", "")
	self.repo_info["url"] = details.get("html_url", "")
	self.repo_info["stars"] = details.get("stargazers_count", 0)
	self.repo_info["forks"] = details.get("forks_count", 0)
	self.repo_info["language"] = details.get("language", "")
	self.repo_info["created_at"] = details.get("created_at", "")
	self.repo_info["license"] = details.get("license", "")

	# Languages used
	if "languages" in self.repo_data:
	languages = self.repo_data["languages"]
	total_bytes = sum(languages.values()) if languages else 0

	if total_bytes > 0:
	language_percentages = {
	lang: (bytes_count / total_bytes) * 100
	for lang, bytes_count in languages.items()
	}

	self.repo_info["language_breakdown"] = language_percentages
	sorted_languages = sorted(language_percentages.items(), key=lambda x: x[1], reverse=True)
	self.repo_info["top_languages"] = sorted_languages[:5]

	# Contributors
	if "contributors" in self.repo_data:
	contributors = self.repo_data["contributors"]
	self.repo_info["total_contributors"] = len(contributors)

	if contributors:
	sorted_contributors = sorted(contributors, key=lambda x: x.get("contributions", 0), reverse=True)
	self.repo_info["top_contributors"] = [
	{
	"name": c.get("login", "Unknown"),
	"contributions": c.get("contributions", 0)
	}
	for c in sorted_contributors[:5]
	]

	# Activity metrics
	if "commit_insights" in self.insights:
	commit_insights = self.insights["commit_insights"]
	self.repo_info["commit_patterns"] = commit_insights.get("commit_time_patterns", {})
	self.repo_info["top_committers"] = commit_insights.get("top_contributors", {})

	# Documentation quality
	if "documentation_quality" in self.insights:
	doc_quality = self.insights["documentation_quality"]
	self.repo_info["documentation_score"] = doc_quality.get("score", 0)
	self.repo_info["documentation_quality"] = (
	"High" if doc_quality.get("score", 0) > 0.7
	else "Medium" if doc_quality.get("score", 0) > 0.4
	else "Low"
	)
	self.repo_info["readme_sections"] = doc_quality.get("sections", {})

	# Community health
	if "community_health" in self.insights:
	community_health = self.insights["community_health"]
	self.repo_info["community_health_level"] = community_health.get("overall", {}).get("level", "Unknown")
	self.repo_info["community_guidelines"] = community_health.get("community_guidelines", {})

	# Activity level
	if "activity_level" in self.insights:
	activity_level = self.insights["activity_level"]
	self.repo_info["activity_level"] = activity_level.get("level", "Unknown")

	# Code complexity
	if "code_complexity" in self.insights:
	code_complexity = self.insights["code_complexity"]
	self.repo_info["code_complexity_level"] = code_complexity.get("overall", {}).get("level", "Unknown")

	def get_context_for_query(self, query: str) -> str:
	"""
	Retrieve relevant context from repository data based on the query.

	Args:
	query: The user's query

	Returns:
	str: Contextual information to enhance the response
	"""
	# Convert query to lowercase for easier matching
	query_lower = query.lower()

	# Define keywords for different aspects of the repository
	keywords = {
	"overview": ["overview", "about", "what is", "tell me about", "summary"],
	"languages": ["language", "programming language", "code language", "tech stack"],
	"contributors": ["contributor", "who", "team", "maintainer", "author"],
	"activity": ["activity", "active", "commit", "update", "recent", "frequency"],
	"documentation": ["documentation", "docs", "readme", "well documented"],
	"community": ["community", "health", "governance", "conduct", "guideline"],
	"complexity": ["complex", "complexity", "difficult", "simple", "codebase", "understand"],
	"issues": ["issue", "bug", "problem", "ticket", "feature request"],
	"pulls": ["pull request", "pr", "merge", "contribution"],
	}

	# Check which aspects are relevant to the query
	relevant_aspects = []
	for aspect, terms in keywords.items():
	if any(term in query_lower for term in terms):
	relevant_aspects.append(aspect)

	# If no specific aspects are identified, provide a general overview
	if not relevant_aspects:
	relevant_aspects = ["overview"]

	# Build context information based on relevant aspects
	context_parts = []

	# Repository overview
	if "overview" in relevant_aspects:
	repo_name = self.repo_info.get("full_name", "The repository")
	stars = self.repo_info.get("stars", 0)
	forks = self.repo_info.get("forks", 0)
	description = self.repo_info.get("description", "")

	overview = f"{repo_name} is a GitHub repository with {stars} stars and {forks} forks. "
	if description:
	overview += f"Description: {description}. "

	language = self.repo_info.get("language", "")
	if language:
	overview += f"It's primarily written in {language}. "

	created_at = self.repo_info.get("created_at", "")
	if created_at:
	try:
	date = datetime.datetime.fromisoformat(created_at.replace('Z', '+00:00'))
	overview += f"The repository was created on {date.strftime('%B %d, %Y')}. "
	except (ValueError, AttributeError):
	pass

	context_parts.append(overview)

	# Language breakdown
	if "languages" in relevant_aspects:
	top_languages = self.repo_info.get("top_languages", [])
	if top_languages:
	languages_text = "Language breakdown: "
	languages_text += ", ".join([f"{lang}: {pct:.1f}%" for lang, pct in top_languages])
	languages_text += "."
	context_parts.append(languages_text)

	# Contributors
	if "contributors" in relevant_aspects:
	total_contributors = self.repo_info.get("total_contributors", 0)
	top_contributors = self.repo_info.get("top_contributors", [])

	contributors_text = f"The repository has {total_contributors} contributors. "
	if top_contributors:
	contributors_text += "Top contributors: "
	contributors_text += ", ".join([
	f"{c['name']} ({c['contributions']} commits)"
	for c in top_contributors
	])
	contributors_text += "."

	context_parts.append(contributors_text)

	# Activity metrics
	if "activity" in relevant_aspects:
	activity_level = self.repo_info.get("activity_level", "Unknown")

	activity_text = f"Activity level: {activity_level}. "

	commit_patterns = self.repo_info.get("commit_patterns", {})
	by_weekday = commit_patterns.get("by_weekday", {})
	if by_weekday:
	most_active_day = max(by_weekday.items(), key=lambda x: x[1])[0]
	activity_text += f"Most active day of the week: {most_active_day}. "

	context_parts.append(activity_text)

	# Documentation quality
	if "documentation" in relevant_aspects:
	doc_quality = self.repo_info.get("documentation_quality", "Unknown")
	doc_score = self.repo_info.get("documentation_score", 0)

	docs_text = f"Documentation quality: {doc_quality} (score: {doc_score:.2f}/1.0). "

	readme_sections = self.repo_info.get("readme_sections", {})
	if readme_sections:
	present_sections = [k for k, v in readme_sections.items() if v]
	missing_sections = [k for k, v in readme_sections.items() if not v]

	if present_sections:
	docs_text += f"README includes sections on: {', '.join(present_sections)}. "
	if missing_sections:
	docs_text += f"README is missing sections on: {', '.join(missing_sections)}."

	context_parts.append(docs_text)

	# Community health
	if "community" in relevant_aspects:
	health_level = self.repo_info.get("community_health_level", "Unknown")
	guidelines = self.repo_info.get("community_guidelines", {})

	community_text = f"Community health: {health_level}. "

	if guidelines:
	present_guidelines = [k for k, v in guidelines.items() if v]
	missing_guidelines = [k for k, v in guidelines.items() if not v]

	if present_guidelines:
	community_text += f"Has community files: {', '.join(present_guidelines)}. "
	if missing_guidelines:
	community_text += f"Missing community files: {', '.join(missing_guidelines)}."

	context_parts.append(community_text)

	# Code complexity
	if "complexity" in relevant_aspects:
	complexity_level = self.repo_info.get("code_complexity_level", "Unknown")
	complexity_text = f"Code complexity: {complexity_level}."
	context_parts.append(complexity_text)

	# Issues
	if "issues" in relevant_aspects and "issue_insights" in self.insights:
	issue_insights = self.insights["issue_insights"]
	by_state = issue_insights.get("by_state", {})

	issues_text = "Issues: "
	if by_state:
	issues_text += ", ".join([f"{count} {state}" for state, count in by_state.items()])
	issues_text += ". "

	resolution_time = issue_insights.get("resolution_time", {})
	if resolution_time:
	mean_hours = resolution_time.get("mean_hours", 0)
	if mean_hours > 24:
	mean_days = mean_hours / 24
	issues_text += f"Average resolution time: {mean_days:.1f} days."
	else:
	issues_text += f"Average resolution time: {mean_hours:.1f} hours."

	context_parts.append(issues_text)

	# Pull requests
	if "pulls" in relevant_aspects and "pr_insights" in self.insights:
	pr_insights = self.insights["pr_insights"]
	by_state = pr_insights.get("by_state", {})

	prs_text = "Pull Requests: "
	if by_state:
	prs_text += ", ".join([f"{count} {state}" for state, count in by_state.items()])
	prs_text += ". "

	context_parts.append(prs_text)

	# Join all context parts
	context = " ".join(context_parts)

	return context


	def create_gradio_interface():
	"""
	Create and launch the Gradio interface for GitHub repository analysis.
	"""
	# Styling
	css = """
	.gradio-container {max-width: 100% !important}
	.main-analysis-area {min-height: 600px}
	.analysis-result {overflow-y: auto; max-height: 500px}
	.chat-interface {border: 1px solid #ccc; border-radius: 5px; padding: 10px}
	.pdf-download {margin-top: 20px}
	"""

	# Initialize state
	repo_data = {}
	analyzer = None

	def parse_repo_url(url: str) -> Tuple[str, str]:
	"""Parse GitHub repository URL into owner and repo name."""
	# Pattern for GitHub repo URLs
	patterns = [
	r"github\.com\/([^\/]+)\/([^\/]+)", # github.com/owner/repo
	r"github\.com\/([^\/]+)\/([^\/]+)\/?$", # github.com/owner/repo/
	r"github\.com\/([^\/]+)\/([^\/]+)\.git", # github.com/owner/repo.git
	]

	for pattern in patterns:
	match = re.search(pattern, url)
	if match:
	return match.group(1), match.group(2)

	return None, None

	def analyze_repository(repo_url: str, is_private: bool, github_token: str = None, progress=gr.Progress()) -> Tuple[str, Dict]:
	"""Analyze GitHub repository and return the analysis results."""
	# Validate URL and extract owner/repo
	owner, repo_name = parse_repo_url(repo_url)

	if not owner or not repo_name:
	return "Invalid GitHub repository URL. Please use format: https://github.com/owner/repo", {}

	# Use provided token or default token
	token = github_token if is_private and github_token else os.environ.get("GITHUB_TOKEN", "")

	if is_private and not token:
	return "GitHub token is required for private repositories.", {}

	# Configure analyzer
	config = GitHubAPIConfig(token=token)
	nonlocal analyzer
	analyzer = GitHubRepoAnalyzer(config)

	# Analyze repository with progress updates
	progress(0, desc="Starting repository analysis...")
	try:
	progress(0.1, desc="Fetching repository details...")
	global repo_data
	repo_data = analyzer.analyze_repo(owner, repo_name)

	progress(0.9, desc="Generating insights...")

	# Create a summary of the analysis
	repo_details = repo_data.get("repo_details", {})
	insights = repo_data.get("insights", {})

	repo_name = repo_details.get("full_name", "")
	description = repo_details.get("description", "No description provided")
	stars = repo_details.get("stargazers_count", 0)
	forks = repo_details.get("forks_count", 0)
	language = repo_details.get("language", "Unknown")

	# Calculate age
	created_at = repo_details.get("created_at", "")
	age_str = "Unknown"
	if created_at:
	try:
	created_date = datetime.datetime.fromisoformat(created_at.replace('Z', '+00:00'))
	age_days = (datetime.datetime.now(datetime.timezone.utc) - created_date).days
	age_years = age_days / 365.25
	age_str = f"{age_years:.1f} years ({age_days} days)"
	except (ValueError, AttributeError):
	pass

	# Get activity level
	activity_level = insights.get("activity_level", {}).get("level", "Unknown")

	# Documentation quality
	doc_quality = insights.get("documentation_quality", {})
	has_readme = doc_quality.get("has_readme", False)
	doc_score = doc_quality.get("score", 0) if has_readme else 0
	doc_quality_level = "High" if doc_score > 0.7 else "Medium" if doc_score > 0.4 else "Low"

	# Community health
	community_health = insights.get("community_health", {}).get("overall", {})
	health_level = community_health.get("level", "Unknown")

	# Code complexity
	code_complexity = insights.get("code_complexity", {}).get("overall", {})
	complexity_level = code_complexity.get("level", "Unknown")

	# Create summary HTML
	summary_html = f"""
	<h1>{repo_name}</h1>
	<p><strong>Description:</strong> {description}</p>
	<div style="display: flex; flex-wrap: wrap; gap: 20px; margin-bottom: 20px;">
	<div style="flex: 1; min-width: 200px;">
	<h3>Repository Details</h3>
	<ul>
	<li><strong>Primary Language:</strong> {language}</li>
	<li><strong>Stars:</strong> {stars}</li>
	<li><strong>Forks:</strong> {forks}</li>
	<li><strong>Age:</strong> {age_str}</li>
	<li><strong>License:</strong> {repo_details.get("license", "Not specified")}</li>
	</ul>
	</div>
	<div style="flex: 1; min-width: 200px;">
	<h3>Key Insights</h3>
	<ul>
	<li><strong>Activity Level:</strong> {activity_level}</li>
	<li><strong>Documentation Quality:</strong> {doc_quality_level}</li>
	<li><strong>Community Health:</strong> {health_level}</li>
	<li><strong>Code Complexity:</strong> {complexity_level}</li>
	</ul>
	</div>
	</div>
	"""

	# Contributors section
	contributors = repo_data.get("contributors", [])
	if contributors:
	top_contributors = sorted(contributors, key=lambda x: x.get("contributions", 0), reverse=True)[:5]

	summary_html += f"""
	<div style="margin-bottom: 20px;">
	<h3>Top Contributors</h3>
	<div style="display: flex; flex-wrap: wrap; gap: 10px;">
	"""

	for contributor in top_contributors:
	avatar_url = contributor.get("avatar_url", "")
	login = contributor.get("login", "Unknown")
	contributions = contributor.get("contributions", 0)

	summary_html += f"""
	<div style="text-align: center; width: 100px;">
	<img src="{avatar_url}" style="width: 50px; height: 50px; border-radius: 25px; margin-bottom: 5px;">
	<div><strong>{login}</strong></div>
	<div>{contributions} commits</div>
	</div>
	"""

	summary_html += """
	</div>
	</div>
	"""

	# Language distribution section
	languages = repo_data.get("languages", {})
	if languages:
	total_bytes = sum(languages.values())
	language_percentages = [
	(lang, bytes_count, (bytes_count / total_bytes) * 100)
	for lang, bytes_count in languages.items()
	]
	sorted_languages = sorted(language_percentages, key=lambda x: x[1], reverse=True)[:5]

	summary_html += f"""
	<div style="margin-bottom: 20px;">
	<h3>Language Distribution</h3>
	<div style="display: flex; flex-direction: column; gap: 5px;">
	"""

	for lang, bytes_count, percentage in sorted_languages:
	bar_width = max(1, min(100, percentage))
	summary_html += f"""
	<div>
	<div style="display: flex; align-items: center; gap: 10px;">
	<div style="width: 100px; text-align: right;"><strong>{lang}</strong></div>
	<div style="flex-grow: 1; background-color: #eee; height: 20px; border-radius: 10px;">
	<div style="width: {bar_width}%; background-color: #4CAF50; height: 100%; border-radius: 10px;"></div>
	</div>
	<div style="width: 60px;">{percentage:.1f}%</div>
	</div>
	</div>
	"""

	summary_html += """
	</div>
	</div>
	"""

	progress(1.0, desc="Analysis complete!")
	return summary_html, repo_data

	except Exception as e:
	error_message = f"Error analyzing repository: {str(e)}"
	logger.error(error_message)
	return error_message, {}

	def generate_pdf_report() -> Tuple[str, Dict[str, str]]:
	"""Generate and download PDF report."""
	if not repo_data:
	return "Please analyze a repository first.", {}

	try:
	# Create PDF report
	pdf_generator = PDFReportGenerator(repo_data)
	pdf_path = pdf_generator.generate_report()

	# Return file path for download
	repo_name = repo_data.get("repo_details", {}).get("full_name", "repository").replace("/", "_")
	return f"PDF report generated for {repo_name}", {"report.pdf": pdf_path}

	except Exception as e:
	error_message = f"Error generating PDF report: {str(e)}"
	logger.error(error_message)
	return error_message, {}

	def chat_with_repo(query: str, history: List[Tuple[str, str]]) -> str:
	"""
	Chat with the repository analysis data using RAG approach.

	Args:
	query: User's question
	history: Chat history

	Returns:
	str: Response to the user's question
	"""
	if not repo_data:
	return "Please analyze a repository first before asking questions."

	try:
	# Use RAG helper to get relevant context
	rag_helper = RAGHelper(repo_data)
	context = rag_helper.get_context_for_query(query)

	# For a real implementation, you would use the Gemini API here
	# This is a simulated response based on the context

	# Format response based on the query and context
	response = ""

	# Extract repo name for more natural responses
	repo_name = repo_data.get("repo_details", {}).get("name", "The repository")

	# General info about the repo
	if any(term in query.lower() for term in ["what is", "tell me about", "overview", "about"]):
	response = f"{context}\n\nIs there something specific about {repo_name} you'd like to know more about?"

	# Language related queries
	elif any(term in query.lower() for term in ["language", "programming", "written in"]):
	response = f"{context}\n\nWould you like to know more about any specific language used in {repo_name}?"

	# Contributor related queries
	elif any(term in query.lower() for term in ["contributor", "who", "maintain", "author"]):
	response = f"{context}\n\nI can provide more details about specific contributors if you're interested."

	# Activity related queries
	elif any(term in query.lower() for term in ["active", "activity", "commit", "frequency"]):
	response = f"{context}\n\nWould you like to see visualizations of the commit activity patterns?"

	# Documentation related queries
	elif any(term in query.lower() for term in ["document", "readme", "docs"]):
	response = f"{context}\n\nIs there a specific aspect of the documentation you'd like feedback on?"

	# Code complexity related queries
	elif any(term in query.lower() for term in ["complex", "difficulty", "understand"]):
	response = f"{context}\n\nWould you like suggestions for navigating this codebase effectively?"

	# Default response for other queries
	else:
	response = f"Based on my analysis of {repo_name}:\n\n{context}\n\nIs there anything specific you'd like to know more about?"

	return response

	except Exception as e:
	error_message = f"Error processing your question: {str(e)}"
	logger.error(error_message)
	return error_message

	# Create Gradio interface
	with gr.Blocks(css=css) as interface:
	gr.Markdown("# GitHub Repository Analyzer")
	gr.Markdown("Analyze GitHub repositories and chat about the insights")

	with gr.Tab("Repository Analysis"):
	with gr.Row():
	with gr.Column(scale=3):
	repo_url = gr.Textbox(label="GitHub Repository URL", placeholder="https://github.com/owner/repo")
	with gr.Column(scale=1):
	is_private = gr.Checkbox(label="Private Repository")
	github_token = gr.Textbox(label="GitHub Token (for private repos)", type="password", visible=False)

	# Show/hide token input based on private repo checkbox
	is_private.change(fn=lambda x: gr.update(visible=x), inputs=[is_private], outputs=[github_token])

	analyze_btn = gr.Button("Analyze Repository", variant="primary")

	with gr.Row():
	with gr.Column(scale=2):
	analysis_result = gr.HTML(label="Analysis Result", elem_classes=["analysis-result"])
	with gr.Column(scale=1):
	with gr.Group():
	gr.Markdown("### PDF Report")
	pdf_btn = gr.Button("Generate PDF Report", variant="secondary")
	pdf_output = gr.Markdown()
	pdf_download = gr.File(label="Download Report", elem_classes=["pdf-download"])

	# Connect buttons to functions
	analyze_btn.click(
	fn=analyze_repository,
	inputs=[repo_url, is_private, github_token],
	outputs=[analysis_result, pdf_output]
	)

	pdf_btn.click(
	fn=generate_pdf_report,
	inputs=[],
	outputs=[pdf_output, pdf_download]
	)

	with gr.Tab("Chat with Repository"):
	gr.Markdown("Ask questions about the repository and get insights")

	chatbot = gr.Chatbot(elem_classes=["chat-interface"])
	msg = gr.Textbox(
	placeholder="Ask me anything about the repository...",
	show_label=False
	)
	clear = gr.Button("Clear")

	# Connect chat interface
	msg.submit(
	fn=chat_with_repo,
	inputs=[msg, chatbot],
	outputs=[chatbot],
	postprocess=lambda x: [(msg.value, x)]
	).then(lambda: "", None, msg)

	clear.click(lambda: None, None, chatbot, queue=False)

	return interface

	# Main code to run the application
	if __name__ == "__main__":
	# Create and launch Gradio interface
	interface = create_gradio_interface()
	interface.launch(debug=True, share=True)