import gradio as gr import requests import pandas as pd from datetime import datetime, timedelta import time import os def get_contribution_count(username, github_token, days=365): """Fetch contribution count using GitHub GraphQL API""" if not github_token: return 0 end_date = datetime.now() start_date = end_date - timedelta(days=days) query = """ query($username: String!, $from: DateTime!, $to: DateTime!) { user(login: $username) { contributionsCollection(from: $from, to: $to) { contributionCalendar { totalContributions } } } } """ variables = { "username": username, "from": start_date.isoformat(), "to": end_date.isoformat() } headers = { 'Authorization': f'bearer {github_token}', 'Content-Type': 'application/json', } try: response = requests.post( 'https://api.github.com/graphql', json={'query': query, 'variables': variables}, headers=headers, timeout=10 ) if response.status_code == 200: data = response.json() if 'data' in data and data['data'] and data['data']['user']: return data['data']['user']['contributionsCollection']['contributionCalendar']['totalContributions'] except Exception as e: print(f"Error for {username}: {e}") return 0 def fetch_github_users(github_token=None, max_users=256, min_followers=34, days=365): """Fetch UAE GitHub users matching committers.top methodology""" if not github_token: github_token = os.getenv('GITHUB_TOKEN') if not github_token: return pd.DataFrame(), "GitHub token is required" headers = { 'Accept': 'application/vnd.github.v3+json', 'Authorization': f'token {github_token}' } all_users = {} status_updates = [] api_calls = 0 uae_keywords = ['dubai', 'abu dhabi', 'abudhabi', 'sharjah', 'ajman', 'uae', 'u.a.e', 'united arab emirates', 'ras al khaimah', 'fujairah', 'umm al quwain', 'emirates', 'rak'] try: status_updates.append(f"Searching UAE users (min {min_followers} followers)...") # Search by location - get top 1000 by followers search_url = f'https://api.github.com/search/users?q=location:"United Arab Emirates"+followers:>={min_followers}&sort=followers&order=desc&per_page=100' response = requests.get(search_url, headers=headers) api_calls += 1 if response.status_code == 200: data = response.json() total_count = data.get('total_count', 0) users = data.get('items', []) status_updates.append(f"Found {total_count} UAE users, fetching details...") page = 1 while users and len(all_users) < 1000 and api_calls < 150: for user in users: if len(all_users) >= 1000: break if user['login'] not in all_users: user_response = requests.get(user['url'], headers=headers) api_calls += 1 if user_response.status_code == 200: user_data = user_response.json() user_location = (user_data.get('location') or '').lower() if any(kw in user_location for kw in uae_keywords): all_users[user['login']] = { 'login': user_data.get('login', ''), 'name': user_data.get('name') or user_data.get('login', ''), 'avatar': user_data.get('avatar_url', ''), 'followers': user_data.get('followers', 0), 'public_repos': user_data.get('public_repos', 0), 'location': user_data.get('location', ''), 'contributions': 0 } time.sleep(0.5) if len(users) < 100: break page += 1 if page > 10: break next_url = f'https://api.github.com/search/users?q=location:"United Arab Emirates"+followers:>={min_followers}&sort=followers&order=desc&per_page=100&page={page}' response = requests.get(next_url, headers=headers) api_calls += 1 if response.status_code == 200: users = response.json().get('items', []) status_updates.append(f" Page {page}: fetched") else: break time.sleep(1) elif response.status_code == 403: return pd.DataFrame(), "Rate limit exceeded" status_updates.append(f"Collected {len(all_users)} UAE users (top 1000 by followers)") # Get contributions for all collected users status_updates.append(f"Fetching contributions (last {days} days)...") count = 0 for login, user in all_users.items(): if api_calls >= 300: break contributions = get_contribution_count(login, github_token, days) user['contributions'] = contributions count += 1 if count % 50 == 0: status_updates.append(f" Progress: {count}/{len(all_users)} users...") time.sleep(0.5) # Sort by contributions users_list = list(all_users.values()) users_list.sort(key=lambda x: (x['contributions'], x['followers']), reverse=True) top_users = users_list[:max_users] for i, user in enumerate(top_users, 1): user['rank'] = i df = pd.DataFrame(top_users) if not df.empty: display_df = df[['rank', 'name', 'login', 'contributions', 'followers', 'public_repos', 'location']].copy() display_df.columns = ['Rank', 'Name', 'Username', 'Contributions', 'Followers', 'Public Repos', 'Location'] display_df['GitHub Profile'] = df['login'].apply(lambda x: f"https://github.com/{x}") status_message = f"Success! Top {len(df)} UAE contributors (last {days} days)\n" status_message += f"Total UAE users collected: {len(all_users)}, API calls: {api_calls}\n\n" status_message += "\n".join(status_updates[-5:]) return display_df, status_message else: return pd.DataFrame(), "No users found" except Exception as e: return pd.DataFrame(), f"Error: {str(e)}" def search_users(df, search_term): """Filter users by name or username""" if df is None or df.empty or not search_term: return df search_term = search_term.lower() mask = ( df['Name'].str.lower().str.contains(search_term, na=False) | df['Username'].str.lower().str.contains(search_term, na=False) ) return df[mask] with gr.Blocks(theme=gr.themes.Soft(), title="GitHub UAE Top Contributors") as app: gr.Markdown("# GitHub UAE Top Contributors") gr.Markdown("*Following committers.top methodology*") with gr.Row(): token_input = gr.Textbox( label="GitHub Token", placeholder="ghp_xxxx", type="password", scale=3 ) max_users_input = gr.Slider( label="Max Users to Display", minimum=50, maximum=500, value=256, step=10, scale=1 ) with gr.Row(): min_followers_input = gr.Slider( label="Min Followers (committers.top uses 34)", minimum=10, maximum=100, value=34, step=1 ) days_input = gr.Slider( label="Days to Count (committers.top uses 365)", minimum=30, maximum=365, value=30, step=1 ) fetch_btn = gr.Button("Fetch Top Contributors", variant="primary", size="lg") status_msg = gr.Textbox(label="Status", interactive=False, lines=6) with gr.Row(): search_box = gr.Textbox( label="Filter Results", placeholder="Search by name or username...", scale=4 ) clear_btn = gr.Button("Clear", scale=1) full_data = gr.State(value=pd.DataFrame()) data_display = gr.Dataframe( headers=["Rank", "Name", "Username", "Contributions", "Followers", "Public Repos", "Location", "GitHub Profile"], datatype=["number", "str", "str", "number", "number", "number", "str", "str"], wrap=True, interactive=False ) def fetch_and_display(token, max_users, min_followers, days): df, msg = fetch_github_users(token or None, int(max_users), int(min_followers), int(days)) return df, df, msg def filter_data(df, search): return search_users(df, search) if df is not None else df def clear_search(df): return "", df fetch_btn.click( fn=fetch_and_display, inputs=[token_input, max_users_input, min_followers_input, days_input], outputs=[full_data, data_display, status_msg] ) search_box.change( fn=filter_data, inputs=[full_data, search_box], outputs=data_display ) clear_btn.click( fn=clear_search, inputs=[full_data], outputs=[search_box, data_display] ) if __name__ == "__main__": app.launch()