yasserrmd commited on
Commit
31fa8cc
Β·
verified Β·
1 Parent(s): ee0a5bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -130
app.py CHANGED
@@ -6,9 +6,7 @@ import time
6
  import os
7
 
8
  def get_contribution_count(username, github_token, days=30):
9
- """
10
- Get accurate contribution count using GitHub GraphQL API
11
- """
12
  if not github_token:
13
  return 0
14
 
@@ -42,7 +40,8 @@ def get_contribution_count(username, github_token, days=30):
42
  response = requests.post(
43
  'https://api.github.com/graphql',
44
  json={'query': query, 'variables': variables},
45
- headers=headers
 
46
  )
47
 
48
  if response.status_code == 200:
@@ -54,131 +53,151 @@ def get_contribution_count(username, github_token, days=30):
54
 
55
  return 0
56
 
57
- def fetch_github_users(github_token=None, max_users=200, min_followers=10, days=30):
58
  """
59
- Fetch GitHub users from UAE directly using GitHub API
60
- Sorted by contributions (primary) and followers (secondary)
 
 
 
61
  """
62
  if not github_token:
63
  github_token = os.getenv('GITHUB_TOKEN')
 
 
 
64
 
65
  headers = {
66
  'Accept': 'application/vnd.github.v3+json',
 
67
  }
68
 
69
- if github_token:
70
- headers['Authorization'] = f'token {github_token}'
71
-
72
- all_users = []
73
 
74
- # Search locations in UAE - KEEP THE ORIGINAL WORKING APPROACH
75
  locations = [
76
- 'Dubai', 'Abu Dhabi', 'Sharjah', 'Ajman',
77
  'United Arab Emirates', 'UAE',
 
78
  'Ras Al Khaimah', 'Fujairah', 'Umm Al Quwain'
79
  ]
80
 
81
- status_updates = []
82
-
83
  try:
 
 
 
84
  for location in locations:
85
- status_updates.append(f"πŸ” Searching users in {location}...")
86
-
87
- # Search users by location and followers
88
- search_url = f'https://api.github.com/search/users?q=location:{location}+followers:>={min_followers}&sort=followers&order=desc&per_page=100'
89
-
 
90
  response = requests.get(search_url, headers=headers)
91
-
 
92
  if response.status_code == 200:
93
  data = response.json()
94
  users = data.get('items', [])
95
-
 
96
  for user in users:
97
- if not any(u['login'] == user['login'] for u in all_users):
98
- # Fetch detailed user info
99
- user_url = user['url']
100
- user_response = requests.get(user_url, headers=headers)
101
-
102
  if user_response.status_code == 200:
103
  user_data = user_response.json()
104
 
105
- # Verify location contains UAE-related keywords (case insensitive)
106
  user_location = (user_data.get('location') or '').lower()
107
- # Keywords are already lowercase for comparison
108
- uae_keywords = ['dubai', 'abu dhabi', 'sharjah', 'ajman', 'uae', 'u.a.e',
109
- 'united arab emirates', 'ras al khaimah', 'fujairah',
110
- 'umm al quwain', 'emirates', 'abudhabi', 'rak']
111
 
112
- # Check if any UAE keyword exists in the lowercased location
113
- if not any(keyword in user_location for keyword in uae_keywords):
114
- continue
115
-
116
- # Get accurate contribution count using GraphQL (FIX: Use GraphQL instead of Events API)
117
- contributions = get_contribution_count(user['login'], github_token, days)
118
-
119
- all_users.append({
120
- 'login': user_data.get('login', ''),
121
- 'name': user_data.get('name', user_data.get('login', '')),
122
- 'avatar': user_data.get('avatar_url', ''),
123
- 'followers': user_data.get('followers', 0),
124
- 'public_repos': user_data.get('public_repos', 0),
125
- 'contributions': contributions,
126
- 'location': user_data.get('location', ''),
127
- 'bio': user_data.get('bio', ''),
128
- 'company': user_data.get('company', ''),
129
- })
130
-
131
- time.sleep(0.5) # Rate limiting
132
-
133
- if len(all_users) >= max_users * 2: # Fetch more to ensure we get top contributors
134
- break
135
-
136
- status_updates.append(f"βœ… Found {len(users)} users in {location}")
137
- time.sleep(1) # Rate limiting between searches
138
-
139
  elif response.status_code == 403:
140
- status_updates.append(f"⚠️ Rate limit reached. Please add a GitHub token.")
141
  break
142
- else:
143
- status_updates.append(f"❌ Error searching {location}: {response.status_code}")
144
-
145
- if len(all_users) >= max_users * 2:
 
 
 
 
 
146
  break
147
-
148
- # Sort by CONTRIBUTIONS FIRST (descending), then by followers (descending)
149
- all_users.sort(key=lambda x: (x['contributions'], x['followers']), reverse=True)
150
-
151
- # Take top contributors based on max_users
152
- top_users = all_users[:max_users]
153
-
154
- # Add rank based on contribution order
 
 
 
 
 
 
 
 
 
 
155
  for i, user in enumerate(top_users, 1):
156
  user['rank'] = i
157
-
158
- # Convert to DataFrame
159
  df = pd.DataFrame(top_users)
160
-
161
  if not df.empty:
162
  display_df = df[['rank', 'name', 'login', 'contributions', 'followers', 'public_repos', 'location']].copy()
163
  display_df.columns = ['Rank', 'Name', 'Username', 'Contributions', 'Followers', 'Public Repos', 'Location']
164
  display_df['GitHub Profile'] = df['login'].apply(lambda x: f"https://github.com/{x}")
165
-
166
- status_message = f"βœ… Successfully fetched top {len(df)} contributors (last {days} days)\n" + "\n".join(status_updates[-5:])
 
 
 
 
 
 
 
 
 
 
 
167
  return display_df, status_message
168
  else:
169
- return pd.DataFrame(), "⚠️ No users found\n" + "\n".join(status_updates)
170
-
171
  except Exception as e:
172
- return pd.DataFrame(), f"❌ Error: {str(e)}\n" + "\n".join(status_updates)
173
 
174
  def search_users(df, search_term):
175
- """Filter users based on search term"""
176
- if df is None or df.empty:
177
  return df
178
-
179
- if not search_term:
180
- return df
181
-
182
  search_term = search_term.lower()
183
  mask = (
184
  df['Name'].str.lower().str.contains(search_term, na=False) |
@@ -186,25 +205,27 @@ def search_users(df, search_term):
186
  )
187
  return df[mask]
188
 
189
- # Create Gradio interface
190
  with gr.Blocks(theme=gr.themes.Soft(), title="GitHub UAE Top Contributors") as app:
191
 
192
  gr.Markdown("""
193
- # πŸ† Top 200 GitHub Contributors in UAE (Last 30 Days)
194
- ### Ranked by Contributions Using GitHub GraphQL API
195
-
196
- **This version uses the WORKING search from v1 + ACCURATE GraphQL contribution counting**
 
 
197
  """)
198
 
199
  with gr.Row():
200
  token_input = gr.Textbox(
201
- label="GitHub Personal Access Token (REQUIRED for GraphQL)",
202
- placeholder="ghp_xxxxxxxxxxxx or leave empty to use GITHUB_TOKEN env var",
203
  type="password",
204
  scale=3
205
  )
206
  max_users_input = gr.Slider(
207
- label="Max Contributors",
208
  minimum=10,
209
  maximum=500,
210
  value=200,
@@ -214,37 +235,34 @@ with gr.Blocks(theme=gr.themes.Soft(), title="GitHub UAE Top Contributors") as a
214
 
215
  with gr.Row():
216
  min_followers_input = gr.Slider(
217
- label="Minimum Followers (for initial search)",
218
  minimum=0,
219
  maximum=100,
220
- value=10,
221
  step=1
222
  )
223
  days_input = gr.Slider(
224
- label="Days to Count (30 = last 30 days)",
225
  minimum=7,
226
  maximum=90,
227
  value=30,
228
  step=1
229
  )
230
 
231
- with gr.Row():
232
- fetch_btn = gr.Button("πŸš€ Fetch Top Contributors from GitHub", variant="primary", size="lg")
233
-
234
- status_msg = gr.Textbox(label="Status", interactive=False, lines=3)
235
 
236
  with gr.Row():
237
  search_box = gr.Textbox(
238
- label="πŸ” Search by Name or Username",
239
- placeholder="Type to search...",
240
  scale=4
241
  )
242
  clear_btn = gr.Button("Clear", scale=1)
243
 
244
- # Store the full dataframe
245
  full_data = gr.State(value=pd.DataFrame())
246
-
247
- # Display dataframe
248
  data_display = gr.Dataframe(
249
  headers=["Rank", "Name", "Username", "Contributions", "Followers", "Public Repos", "Location", "GitHub Profile"],
250
  datatype=["number", "str", "str", "number", "number", "number", "str", "str"],
@@ -254,40 +272,43 @@ with gr.Blocks(theme=gr.themes.Soft(), title="GitHub UAE Top Contributors") as a
254
 
255
  gr.Markdown("""
256
  ---
257
- **What Changed from Original:**
258
- - βœ… Kept the WORKING search approach (searches Dubai, Abu Dhabi, Sharjah, UAE, etc.)
259
- - βœ… Fixed contribution counting: Now uses GitHub GraphQL API for ACCURATE counts
260
- - βœ… GraphQL returns the EXACT number from user's contribution graph (not estimated from events)
261
 
262
- **Original Problem:**
263
- - Events API only returns ~100 events and misses many contributions
264
- - GraphQL API returns the actual total contributions for the date range
 
 
 
 
 
265
 
266
- **Ranking:**
267
- - Primary: Contributions in last N days (accurate from GraphQL)
268
- - Secondary: Followers count
 
 
 
269
 
270
- **Token Setup (REQUIRED):**
271
- 1. Go to [GitHub Settings > Tokens](https://github.com/settings/tokens)
272
  2. Generate new token (classic)
273
- 3. Select scope: `read:user` (REQUIRED for GraphQL)
274
- 4. Copy and paste above OR set `GITHUB_TOKEN` env var
 
 
 
 
275
 
276
- **Why Token is Required:**
277
- - REST API works without token but with rate limits
278
- - GraphQL API REQUIRES authentication
279
- - Without token: can search users but contributions will be 0
280
  """)
281
 
282
- # Event handlers
283
  def fetch_and_display(token, max_users, min_followers, days):
284
- df, msg = fetch_github_users(token if token else None, int(max_users), int(min_followers), int(days))
285
  return df, df, msg
286
 
287
  def filter_data(df, search):
288
- if df is None or df.empty:
289
- return df
290
- return search_users(df, search)
291
 
292
  def clear_search(df):
293
  return "", df
@@ -310,6 +331,5 @@ with gr.Blocks(theme=gr.themes.Soft(), title="GitHub UAE Top Contributors") as a
310
  outputs=[search_box, data_display]
311
  )
312
 
313
- # Launch the app
314
  if __name__ == "__main__":
315
  app.launch()
 
6
  import os
7
 
8
  def get_contribution_count(username, github_token, days=30):
9
+ """Get accurate contribution count using GitHub GraphQL API"""
 
 
10
  if not github_token:
11
  return 0
12
 
 
40
  response = requests.post(
41
  'https://api.github.com/graphql',
42
  json={'query': query, 'variables': variables},
43
+ headers=headers,
44
+ timeout=10
45
  )
46
 
47
  if response.status_code == 200:
 
53
 
54
  return 0
55
 
56
+ def fetch_github_users(github_token=None, max_users=200, min_followers=0, days=30):
57
  """
58
+ Fetch GitHub users from UAE using committers.top methodology:
59
+ 1. Search by location + sort by followers
60
+ 2. Get top N users by followers
61
+ 3. Get their contributions
62
+ 4. Re-sort by contributions
63
  """
64
  if not github_token:
65
  github_token = os.getenv('GITHUB_TOKEN')
66
+
67
+ if not github_token:
68
+ return pd.DataFrame(), "❌ GitHub token is REQUIRED for GraphQL API (contribution counting)"
69
 
70
  headers = {
71
  'Accept': 'application/vnd.github.v3+json',
72
+ 'Authorization': f'token {github_token}'
73
  }
74
 
75
+ all_users = {} # Use dict to avoid duplicates
76
+ status_updates = []
77
+ api_calls = 0
 
78
 
79
+ # UAE location search terms
80
  locations = [
 
81
  'United Arab Emirates', 'UAE',
82
+ 'Dubai', 'Abu Dhabi', 'Sharjah', 'Ajman',
83
  'Ras Al Khaimah', 'Fujairah', 'Umm Al Quwain'
84
  ]
85
 
 
 
86
  try:
87
+ status_updates.append(f"πŸ” Step 1: Searching for UAE users (sorted by followers)...")
88
+
89
+ # STEP 1: Get users sorted by FOLLOWERS (like committers.top does)
90
  for location in locations:
91
+ if api_calls >= 100:
92
+ break
93
+
94
+ # Search by followers DESC
95
+ search_url = f'https://api.github.com/search/users?q=location:"{location}"+followers:>={min_followers}&sort=followers&order=desc&per_page=100'
96
+
97
  response = requests.get(search_url, headers=headers)
98
+ api_calls += 1
99
+
100
  if response.status_code == 200:
101
  data = response.json()
102
  users = data.get('items', [])
103
+ status_updates.append(f" πŸ“ {location}: {len(users)} users")
104
+
105
  for user in users:
106
+ if user['login'] not in all_users and api_calls < 100:
107
+ # Get detailed user info
108
+ user_response = requests.get(user['url'], headers=headers)
109
+ api_calls += 1
110
+
111
  if user_response.status_code == 200:
112
  user_data = user_response.json()
113
 
114
+ # Verify UAE location (case insensitive)
115
  user_location = (user_data.get('location') or '').lower()
116
+ uae_keywords = ['dubai', 'abu dhabi', 'abudhabi', 'sharjah', 'ajman',
117
+ 'uae', 'u.a.e', 'united arab emirates', 'ras al khaimah',
118
+ 'fujairah', 'umm al quwain', 'emirates', 'rak']
 
119
 
120
+ if any(kw in user_location for kw in uae_keywords):
121
+ all_users[user['login']] = {
122
+ 'login': user_data.get('login', ''),
123
+ 'name': user_data.get('name') or user_data.get('login', ''),
124
+ 'avatar': user_data.get('avatar_url', ''),
125
+ 'followers': user_data.get('followers', 0),
126
+ 'public_repos': user_data.get('public_repos', 0),
127
+ 'location': user_data.get('location', ''),
128
+ 'contributions': 0 # Will fill later
129
+ }
130
+
131
+ time.sleep(0.5)
132
+
133
+ time.sleep(1)
134
+
 
 
 
 
 
 
 
 
 
 
 
 
135
  elif response.status_code == 403:
136
+ status_updates.append(f"⚠️ Rate limit hit")
137
  break
138
+
139
+ status_updates.append(f"βœ… Found {len(all_users)} unique UAE users")
140
+
141
+ # STEP 2: Get contributions for collected users
142
+ status_updates.append(f"πŸ“Š Step 2: Fetching contribution counts...")
143
+
144
+ count = 0
145
+ for login, user in all_users.items():
146
+ if api_calls >= 200:
147
  break
148
+
149
+ contributions = get_contribution_count(login, github_token, days)
150
+ user['contributions'] = contributions
151
+
152
+ count += 1
153
+ if count % 20 == 0:
154
+ status_updates.append(f" ⏳ Processed {count}/{len(all_users)} users...")
155
+
156
+ time.sleep(0.5)
157
+
158
+ # STEP 3: Sort by CONTRIBUTIONS (like committers.top final step)
159
+ users_list = list(all_users.values())
160
+ users_list.sort(key=lambda x: (x['contributions'], x['followers']), reverse=True)
161
+
162
+ # Take top N
163
+ top_users = users_list[:max_users]
164
+
165
+ # Add rank
166
  for i, user in enumerate(top_users, 1):
167
  user['rank'] = i
168
+
169
+ # Create DataFrame
170
  df = pd.DataFrame(top_users)
171
+
172
  if not df.empty:
173
  display_df = df[['rank', 'name', 'login', 'contributions', 'followers', 'public_repos', 'location']].copy()
174
  display_df.columns = ['Rank', 'Name', 'Username', 'Contributions', 'Followers', 'Public Repos', 'Location']
175
  display_df['GitHub Profile'] = df['login'].apply(lambda x: f"https://github.com/{x}")
176
+
177
+ status_message = f"""βœ… Success! Top {len(df)} UAE contributors (last {days} days)
178
+ πŸ“Š Total UAE users found: {len(all_users)}
179
+ πŸ”§ API calls used: {api_calls}
180
+
181
+ Methodology (like committers.top):
182
+ 1. Search by location + sort by followers
183
+ 2. Get top users by followers
184
+ 3. Fetch contribution counts
185
+ 4. Re-sort by contributions
186
+
187
+ """ + "\n".join(status_updates[-5:])
188
+
189
  return display_df, status_message
190
  else:
191
+ return pd.DataFrame(), "⚠️ No users found"
192
+
193
  except Exception as e:
194
+ return pd.DataFrame(), f"❌ Error: {str(e)}"
195
 
196
  def search_users(df, search_term):
197
+ """Filter users"""
198
+ if df is None or df.empty or not search_term:
199
  return df
200
+
 
 
 
201
  search_term = search_term.lower()
202
  mask = (
203
  df['Name'].str.lower().str.contains(search_term, na=False) |
 
205
  )
206
  return df[mask]
207
 
208
+ # Gradio interface
209
  with gr.Blocks(theme=gr.themes.Soft(), title="GitHub UAE Top Contributors") as app:
210
 
211
  gr.Markdown("""
212
+ # πŸ‡¦πŸ‡ͺ GitHub UAE Top Contributors
213
+ ### Using committers.top Methodology
214
+
215
+ **Approach:** Search by location β†’ Sort by followers β†’ Get contributions β†’ Re-sort by contributions
216
+
217
+ **Reference:** [committers.top/uae](https://committers.top/uae.html) found 31,463 UAE users!
218
  """)
219
 
220
  with gr.Row():
221
  token_input = gr.Textbox(
222
+ label="GitHub Token (REQUIRED - needs 'read:user' scope)",
223
+ placeholder="ghp_xxxx or set GITHUB_TOKEN env var",
224
  type="password",
225
  scale=3
226
  )
227
  max_users_input = gr.Slider(
228
+ label="Max Users to Display",
229
  minimum=10,
230
  maximum=500,
231
  value=200,
 
235
 
236
  with gr.Row():
237
  min_followers_input = gr.Slider(
238
+ label="Min Followers (committers.top uses 34)",
239
  minimum=0,
240
  maximum=100,
241
+ value=0,
242
  step=1
243
  )
244
  days_input = gr.Slider(
245
+ label="Days to Count",
246
  minimum=7,
247
  maximum=90,
248
  value=30,
249
  step=1
250
  )
251
 
252
+ fetch_btn = gr.Button("πŸš€ Fetch Top Contributors", variant="primary", size="lg")
253
+
254
+ status_msg = gr.Textbox(label="Status", interactive=False, lines=6)
 
255
 
256
  with gr.Row():
257
  search_box = gr.Textbox(
258
+ label="πŸ” Filter Results",
259
+ placeholder="Search by name or username...",
260
  scale=4
261
  )
262
  clear_btn = gr.Button("Clear", scale=1)
263
 
 
264
  full_data = gr.State(value=pd.DataFrame())
265
+
 
266
  data_display = gr.Dataframe(
267
  headers=["Rank", "Name", "Username", "Contributions", "Followers", "Public Repos", "Location", "GitHub Profile"],
268
  datatype=["number", "str", "str", "number", "number", "number", "str", "str"],
 
272
 
273
  gr.Markdown("""
274
  ---
275
+ ## How This Works:
 
 
 
276
 
277
+ **Methodology (same as committers.top):**
278
+ ```
279
+ 1. Search: location:UAE + sort:followers
280
+ 2. Filter: Verify location has UAE keywords
281
+ 3. Collect: Top N users by followers
282
+ 4. Count: Get contributions via GraphQL
283
+ 5. Sort: Re-rank by contributions
284
+ ```
285
 
286
+ **Why This Finds Everyone:**
287
+ - Searches multiple UAE locations
288
+ - Sorts by followers first (GitHub's default)
289
+ - Gets top users who are likely active
290
+ - Then accurately counts contributions
291
+ - Re-sorts by actual contributions
292
 
293
+ **Token Setup:**
294
+ 1. Go to: https://github.com/settings/tokens
295
  2. Generate new token (classic)
296
+ 3. **Must have `read:user` scope** (for GraphQL)
297
+ 4. Copy and paste above
298
+
299
+ **Expected Results:**
300
+ - Should find: tschm (~10k), zning1994 (~9k), YASSERRMD (~788), nkapila6 (~446)
301
+ - All users from committers.top list should appear
302
 
303
+ **Note:** Takes 2-5 minutes. Be patient!
 
 
 
304
  """)
305
 
 
306
  def fetch_and_display(token, max_users, min_followers, days):
307
+ df, msg = fetch_github_users(token or None, int(max_users), int(min_followers), int(days))
308
  return df, df, msg
309
 
310
  def filter_data(df, search):
311
+ return search_users(df, search) if df is not None else df
 
 
312
 
313
  def clear_search(df):
314
  return "", df
 
331
  outputs=[search_box, data_display]
332
  )
333
 
 
334
  if __name__ == "__main__":
335
  app.launch()