githubtopuae / app.py
yasserrmd's picture
Update app.py
86745a2 verified
import gradio as gr
import requests
import pandas as pd
from datetime import datetime, timedelta
import time
import os
def get_contribution_count(username, github_token, days=365):
"""Fetch contribution count using GitHub GraphQL API"""
if not github_token:
return 0
end_date = datetime.now()
start_date = end_date - timedelta(days=days)
query = """
query($username: String!, $from: DateTime!, $to: DateTime!) {
user(login: $username) {
contributionsCollection(from: $from, to: $to) {
contributionCalendar {
totalContributions
}
}
}
}
"""
variables = {
"username": username,
"from": start_date.isoformat(),
"to": end_date.isoformat()
}
headers = {
'Authorization': f'bearer {github_token}',
'Content-Type': 'application/json',
}
try:
response = requests.post(
'https://api.github.com/graphql',
json={'query': query, 'variables': variables},
headers=headers,
timeout=10
)
if response.status_code == 200:
data = response.json()
if 'data' in data and data['data'] and data['data']['user']:
return data['data']['user']['contributionsCollection']['contributionCalendar']['totalContributions']
except Exception as e:
print(f"Error for {username}: {e}")
return 0
def fetch_github_users(github_token=None, max_users=256, min_followers=34, days=365):
"""Fetch UAE GitHub users matching committers.top methodology"""
if not github_token:
github_token = os.getenv('GITHUB_TOKEN')
if not github_token:
return pd.DataFrame(), "GitHub token is required"
headers = {
'Accept': 'application/vnd.github.v3+json',
'Authorization': f'token {github_token}'
}
all_users = {}
status_updates = []
api_calls = 0
uae_keywords = ['dubai', 'abu dhabi', 'abudhabi', 'sharjah', 'ajman',
'uae', 'u.a.e', 'united arab emirates', 'ras al khaimah',
'fujairah', 'umm al quwain', 'emirates', 'rak']
try:
status_updates.append(f"Searching UAE users (min {min_followers} followers)...")
# Search by location - get top 1000 by followers
search_url = f'https://api.github.com/search/users?q=location:"United Arab Emirates"+followers:>={min_followers}&sort=followers&order=desc&per_page=100'
response = requests.get(search_url, headers=headers)
api_calls += 1
if response.status_code == 200:
data = response.json()
total_count = data.get('total_count', 0)
users = data.get('items', [])
status_updates.append(f"Found {total_count} UAE users, fetching details...")
page = 1
while users and len(all_users) < 1000 and api_calls < 150:
for user in users:
if len(all_users) >= 1000:
break
if user['login'] not in all_users:
user_response = requests.get(user['url'], headers=headers)
api_calls += 1
if user_response.status_code == 200:
user_data = user_response.json()
user_location = (user_data.get('location') or '').lower()
if any(kw in user_location for kw in uae_keywords):
all_users[user['login']] = {
'login': user_data.get('login', ''),
'name': user_data.get('name') or user_data.get('login', ''),
'avatar': user_data.get('avatar_url', ''),
'followers': user_data.get('followers', 0),
'public_repos': user_data.get('public_repos', 0),
'location': user_data.get('location', ''),
'contributions': 0
}
time.sleep(0.5)
if len(users) < 100:
break
page += 1
if page > 10:
break
next_url = f'https://api.github.com/search/users?q=location:"United Arab Emirates"+followers:>={min_followers}&sort=followers&order=desc&per_page=100&page={page}'
response = requests.get(next_url, headers=headers)
api_calls += 1
if response.status_code == 200:
users = response.json().get('items', [])
status_updates.append(f" Page {page}: fetched")
else:
break
time.sleep(1)
elif response.status_code == 403:
return pd.DataFrame(), "Rate limit exceeded"
status_updates.append(f"Collected {len(all_users)} UAE users (top 1000 by followers)")
# Get contributions for all collected users
status_updates.append(f"Fetching contributions (last {days} days)...")
count = 0
for login, user in all_users.items():
if api_calls >= 300:
break
contributions = get_contribution_count(login, github_token, days)
user['contributions'] = contributions
count += 1
if count % 50 == 0:
status_updates.append(f" Progress: {count}/{len(all_users)} users...")
time.sleep(0.5)
# Sort by contributions
users_list = list(all_users.values())
users_list.sort(key=lambda x: (x['contributions'], x['followers']), reverse=True)
top_users = users_list[:max_users]
for i, user in enumerate(top_users, 1):
user['rank'] = i
df = pd.DataFrame(top_users)
if not df.empty:
display_df = df[['rank', 'name', 'login', 'contributions', 'followers', 'public_repos', 'location']].copy()
display_df.columns = ['Rank', 'Name', 'Username', 'Contributions', 'Followers', 'Public Repos', 'Location']
display_df['GitHub Profile'] = df['login'].apply(lambda x: f"https://github.com/{x}")
status_message = f"Success! Top {len(df)} UAE contributors (last {days} days)\n"
status_message += f"Total UAE users collected: {len(all_users)}, API calls: {api_calls}\n\n"
status_message += "\n".join(status_updates[-5:])
return display_df, status_message
else:
return pd.DataFrame(), "No users found"
except Exception as e:
return pd.DataFrame(), f"Error: {str(e)}"
def search_users(df, search_term):
"""Filter users by name or username"""
if df is None or df.empty or not search_term:
return df
search_term = search_term.lower()
mask = (
df['Name'].str.lower().str.contains(search_term, na=False) |
df['Username'].str.lower().str.contains(search_term, na=False)
)
return df[mask]
with gr.Blocks(theme=gr.themes.Soft(), title="GitHub UAE Top Contributors") as app:
gr.Markdown("# GitHub UAE Top Contributors")
gr.Markdown("*Following committers.top methodology*")
with gr.Row():
token_input = gr.Textbox(
label="GitHub Token",
placeholder="ghp_xxxx",
type="password",
scale=3
)
max_users_input = gr.Slider(
label="Max Users to Display",
minimum=50,
maximum=500,
value=256,
step=10,
scale=1
)
with gr.Row():
min_followers_input = gr.Slider(
label="Min Followers (committers.top uses 34)",
minimum=10,
maximum=100,
value=34,
step=1
)
days_input = gr.Slider(
label="Days to Count (committers.top uses 365)",
minimum=30,
maximum=365,
value=30,
step=1
)
fetch_btn = gr.Button("Fetch Top Contributors", variant="primary", size="lg")
status_msg = gr.Textbox(label="Status", interactive=False, lines=6)
with gr.Row():
search_box = gr.Textbox(
label="Filter Results",
placeholder="Search by name or username...",
scale=4
)
clear_btn = gr.Button("Clear", scale=1)
full_data = gr.State(value=pd.DataFrame())
data_display = gr.Dataframe(
headers=["Rank", "Name", "Username", "Contributions", "Followers", "Public Repos", "Location", "GitHub Profile"],
datatype=["number", "str", "str", "number", "number", "number", "str", "str"],
wrap=True,
interactive=False
)
def fetch_and_display(token, max_users, min_followers, days):
df, msg = fetch_github_users(token or None, int(max_users), int(min_followers), int(days))
return df, df, msg
def filter_data(df, search):
return search_users(df, search) if df is not None else df
def clear_search(df):
return "", df
fetch_btn.click(
fn=fetch_and_display,
inputs=[token_input, max_users_input, min_followers_input, days_input],
outputs=[full_data, data_display, status_msg]
)
search_box.change(
fn=filter_data,
inputs=[full_data, search_box],
outputs=data_display
)
clear_btn.click(
fn=clear_search,
inputs=[full_data],
outputs=[search_box, data_display]
)
if __name__ == "__main__":
app.launch()