githubtopuae / app.py
yasserrmd's picture
Update app.py
dd78c8e verified
raw
history blame
10.1 kB
import gradio as gr
import requests
import pandas as pd
from datetime import datetime, timedelta
import time
import os
def get_contribution_count(username, github_token, days=365):
"""Fetch contribution count using GitHub GraphQL API"""
if not github_token:
return 0
end_date = datetime.now()
start_date = end_date - timedelta(days=days)
query = """
query($username: String!, $from: DateTime!, $to: DateTime!) {
user(login: $username) {
contributionsCollection(from: $from, to: $to) {
contributionCalendar {
totalContributions
}
}
}
}
"""
variables = {
"username": username,
"from": start_date.isoformat(),
"to": end_date.isoformat()
}
headers = {
'Authorization': f'bearer {github_token}',
'Content-Type': 'application/json',
}
try:
response = requests.post(
'https://api.github.com/graphql',
json={'query': query, 'variables': variables},
headers=headers,
timeout=10
)
if response.status_code == 200:
data = response.json()
if 'data' in data and data['data'] and data['data']['user']:
return data['data']['user']['contributionsCollection']['contributionCalendar']['totalContributions']
except Exception as e:
print(f"Error for {username}: {e}")
return 0
def fetch_github_users(github_token=None, max_users=256, min_followers=34, days=365):
"""Fetch UAE GitHub users matching committers.top methodology"""
if not github_token:
github_token = os.getenv('GITHUB_TOKEN')
if not github_token:
return pd.DataFrame(), "GitHub token is required"
headers = {
'Accept': 'application/vnd.github.v3+json',
'Authorization': f'token {github_token}'
}
all_users = {}
status_updates = []
api_calls = 0
uae_keywords = ['dubai', 'abu dhabi', 'abudhabi', 'sharjah', 'ajman',
'uae', 'u.a.e', 'united arab emirates', 'ras al khaimah',
'fujairah', 'umm al quwain', 'emirates', 'rak']
try:
status_updates.append(f"Searching UAE users (min {min_followers} followers)...")
# Search by location - get top 1000 by followers
search_url = f'https://api.github.com/search/users?q=location:"United Arab Emirates"+followers:>={min_followers}&sort=followers&order=desc&per_page=100'
response = requests.get(search_url, headers=headers)
api_calls += 1
if response.status_code == 200:
data = response.json()
total_count = data.get('total_count', 0)
users = data.get('items', [])
status_updates.append(f"Found {total_count} UAE users, fetching details...")
page = 1
while users and len(all_users) < 1000 and api_calls < 150:
for user in users:
if len(all_users) >= 1000:
break
if user['login'] not in all_users:
user_response = requests.get(user['url'], headers=headers)
api_calls += 1
if user_response.status_code == 200:
user_data = user_response.json()
user_location = (user_data.get('location') or '').lower()
if any(kw in user_location for kw in uae_keywords):
all_users[user['login']] = {
'login': user_data.get('login', ''),
'name': user_data.get('name') or user_data.get('login', ''),
'avatar': user_data.get('avatar_url', ''),
'followers': user_data.get('followers', 0),
'public_repos': user_data.get('public_repos', 0),
'location': user_data.get('location', ''),
'contributions': 0
}
time.sleep(0.5)
if len(users) < 100:
break
page += 1
if page > 10:
break
next_url = f'https://api.github.com/search/users?q=location:"United Arab Emirates"+followers:>={min_followers}&sort=followers&order=desc&per_page=100&page={page}'
response = requests.get(next_url, headers=headers)
api_calls += 1
if response.status_code == 200:
users = response.json().get('items', [])
status_updates.append(f" Page {page}: fetched")
else:
break
time.sleep(1)
elif response.status_code == 403:
return pd.DataFrame(), "Rate limit exceeded"
status_updates.append(f"Collected {len(all_users)} UAE users (top 1000 by followers)")
# Get contributions for all collected users
status_updates.append(f"Fetching contributions (last {days} days)...")
count = 0
for login, user in all_users.items():
if api_calls >= 300:
break
contributions = get_contribution_count(login, github_token, days)
user['contributions'] = contributions
count += 1
if count % 50 == 0:
status_updates.append(f" Progress: {count}/{len(all_users)} users...")
time.sleep(0.5)
# Sort by contributions
users_list = list(all_users.values())
users_list.sort(key=lambda x: (x['contributions'], x['followers']), reverse=True)
top_users = users_list[:max_users]
for i, user in enumerate(top_users, 1):
user['rank'] = i
df = pd.DataFrame(top_users)
if not df.empty:
display_df = df[['rank', 'name', 'login', 'contributions', 'followers', 'public_repos', 'location']].copy()
display_df.columns = ['Rank', 'Name', 'Username', 'Contributions', 'Followers', 'Public Repos', 'Location']
display_df['GitHub Profile'] = df['login'].apply(lambda x: f"https://github.com/{x}")
status_message = f"Success! Top {len(df)} UAE contributors (last {days} days)\n"
status_message += f"Total UAE users collected: {len(all_users)}, API calls: {api_calls}\n\n"
status_message += "\n".join(status_updates[-5:])
return display_df, status_message
else:
return pd.DataFrame(), "No users found"
except Exception as e:
return pd.DataFrame(), f"Error: {str(e)}"
def search_users(df, search_term):
"""Filter users by name or username"""
if df is None or df.empty or not search_term:
return df
search_term = search_term.lower()
mask = (
df['Name'].str.lower().str.contains(search_term, na=False) |
df['Username'].str.lower().str.contains(search_term, na=False)
)
return df[mask]
with gr.Blocks(theme=gr.themes.Soft(), title="GitHub UAE Top Contributors") as app:
gr.Markdown("# GitHub UAE Top Contributors")
gr.Markdown("*Following committers.top methodology*")
with gr.Row():
token_input = gr.Textbox(
label="GitHub Token",
placeholder="ghp_xxxx",
type="password",
scale=3
)
max_users_input = gr.Slider(
label="Max Users to Display",
minimum=50,
maximum=500,
value=256,
step=10,
scale=1
)
with gr.Row():
min_followers_input = gr.Slider(
label="Min Followers (committers.top uses 34)",
minimum=10,
maximum=100,
value=34,
step=1
)
days_input = gr.Slider(
label="Days to Count (committers.top uses 365)",
minimum=30,
maximum=365,
value=365,
step=1
)
fetch_btn = gr.Button("Fetch Top Contributors", variant="primary", size="lg")
status_msg = gr.Textbox(label="Status", interactive=False, lines=6)
with gr.Row():
search_box = gr.Textbox(
label="Filter Results",
placeholder="Search by name or username...",
scale=4
)
clear_btn = gr.Button("Clear", scale=1)
full_data = gr.State(value=pd.DataFrame())
data_display = gr.Dataframe(
headers=["Rank", "Name", "Username", "Contributions", "Followers", "Public Repos", "Location", "GitHub Profile"],
datatype=["number", "str", "str", "number", "number", "number", "str", "str"],
wrap=True,
interactive=False
)
def fetch_and_display(token, max_users, min_followers, days):
df, msg = fetch_github_users(token or None, int(max_users), int(min_followers), int(days))
return df, df, msg
def filter_data(df, search):
return search_users(df, search) if df is not None else df
def clear_search(df):
return "", df
fetch_btn.click(
fn=fetch_and_display,
inputs=[token_input, max_users_input, min_followers_input, days_input],
outputs=[full_data, data_display, status_msg]
)
search_box.change(
fn=filter_data,
inputs=[full_data, search_box],
outputs=data_display
)
clear_btn.click(
fn=clear_search,
inputs=[full_data],
outputs=[search_box, data_display]
)
if __name__ == "__main__":
app.launch()