yasserrmd commited on
Commit
f2813ad
Β·
verified Β·
1 Parent(s): 754e3fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +274 -126
app.py CHANGED
@@ -55,10 +55,58 @@ def get_contribution_count(username, github_token, days=30):
55
 
56
  return 0
57
 
58
- def fetch_github_users(github_token=None, max_users=200, min_followers=10, days=30):
59
  """
60
- Fetch GitHub users from UAE directly using GitHub API
61
- Sorted by contributions (primary) and followers (secondary)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  """
63
  # Read token from environment variable if not provided
64
  if not github_token:
@@ -72,94 +120,182 @@ def fetch_github_users(github_token=None, max_users=200, min_followers=10, days=
72
  'Authorization': f'token {github_token}'
73
  }
74
 
75
- all_users = []
76
-
77
- # Search locations in UAE
78
- locations = [
79
- 'Dubai', 'Abu Dhabi', 'Sharjah', 'Ajman',
80
- 'United Arab Emirates', 'UAE',
81
- 'Ras Al Khaimah', 'Fujairah', 'Umm Al Quwain'
82
- ]
83
-
84
  status_updates = []
 
85
 
86
  try:
87
- for location in locations:
88
- status_updates.append(f"πŸ” Searching users in {location}...")
89
-
90
- # Search users by location and followers
91
- search_url = f'https://api.github.com/search/users?q=location:{location}+followers:>={min_followers}&sort=followers&order=desc&per_page=100'
92
-
93
- response = requests.get(search_url, headers=headers)
94
-
95
- if response.status_code == 200:
96
- data = response.json()
97
- users = data.get('items', [])
98
-
99
- for user in users:
100
- if not any(u['login'] == user['login'] for u in all_users):
101
- # Fetch detailed user info
102
- user_url = user['url']
103
- user_response = requests.get(user_url, headers=headers)
104
-
105
- if user_response.status_code == 200:
106
- user_data = user_response.json()
107
-
108
- # Get accurate contribution count using GraphQL
109
- contributions = get_contribution_count(user['login'], github_token, days)
110
-
111
- all_users.append({
112
- 'login': user_data.get('login', ''),
113
- 'name': user_data.get('name', user_data.get('login', '')),
114
- 'avatar': user_data.get('avatar_url', ''),
115
- 'followers': user_data.get('followers', 0),
116
- 'public_repos': user_data.get('public_repos', 0),
117
- 'contributions': contributions,
118
- 'location': user_data.get('location', ''),
119
- 'bio': user_data.get('bio', ''),
120
- 'company': user_data.get('company', ''),
121
- })
122
-
123
- time.sleep(0.5) # Rate limiting
124
-
125
- if len(all_users) >= max_users * 2: # Fetch more to ensure we get top contributors
126
- break
127
-
128
- status_updates.append(f"βœ… Found {len(users)} users in {location}")
129
- time.sleep(1) # Rate limiting between searches
130
-
131
- elif response.status_code == 403:
132
- status_updates.append(f"⚠️ Rate limit reached.")
133
- break
134
- else:
135
- status_updates.append(f"❌ Error searching {location}: {response.status_code}")
136
-
137
- if len(all_users) >= max_users * 2:
138
  break
139
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  # Sort by CONTRIBUTIONS FIRST (descending), then by followers (descending)
141
- all_users.sort(key=lambda x: (x['contributions'], x['followers']), reverse=True)
142
-
143
  # Take top contributors based on max_users
144
- top_users = all_users[:max_users]
145
-
146
  # Add rank based on contribution order
147
  for i, user in enumerate(top_users, 1):
148
  user['rank'] = i
149
-
150
  # Convert to DataFrame
151
  df = pd.DataFrame(top_users)
152
-
153
  if not df.empty:
154
  display_df = df[['rank', 'name', 'login', 'contributions', 'followers', 'public_repos', 'location']].copy()
155
  display_df.columns = ['Rank', 'Name', 'Username', 'Contributions', 'Followers', 'Public Repos', 'Location']
156
  display_df['GitHub Profile'] = df['login'].apply(lambda x: f"https://github.com/{x}")
157
-
158
- status_message = f"βœ… Successfully fetched top {len(df)} contributors (last {days} days)\n" + "\n".join(status_updates[-5:])
 
 
 
 
159
  return display_df, status_message
160
  else:
161
  return pd.DataFrame(), "⚠️ No users found\n" + "\n".join(status_updates)
162
-
163
  except Exception as e:
164
  return pd.DataFrame(), f"❌ Error: {str(e)}\n" + "\n".join(status_updates)
165
 
@@ -167,28 +303,29 @@ def search_users(df, search_term):
167
  """Filter users based on search term"""
168
  if df is None or df.empty:
169
  return df
170
-
171
  if not search_term:
172
  return df
173
-
174
  search_term = search_term.lower()
175
  mask = (
176
  df['Name'].str.lower().str.contains(search_term, na=False) |
177
- df['Username'].str.lower().str.contains(search_term, na=False)
 
178
  )
179
  return df[mask]
180
 
181
  # Create Gradio interface
182
  with gr.Blocks(theme=gr.themes.Soft(), title="GitHub UAE Top Contributors") as app:
183
-
184
  gr.Markdown("""
185
- # πŸ† Top 200 GitHub Contributors in UAE
186
- ### Ranked by Accurate Contribution Counts
187
-
188
- **IMPORTANT:** This app requires a GitHub Personal Access Token for accurate contribution counts.
189
- The token needs the `read:user` scope for GraphQL API access.
190
  """)
191
-
192
  with gr.Row():
193
  token_input = gr.Textbox(
194
  label="GitHub Personal Access Token (Required)",
@@ -197,46 +334,46 @@ with gr.Blocks(theme=gr.themes.Soft(), title="GitHub UAE Top Contributors") as a
197
  scale=3
198
  )
199
  max_users_input = gr.Slider(
200
- label="Max Contributors",
201
  minimum=10,
202
  maximum=500,
203
  value=200,
204
  step=10,
205
  scale=1
206
  )
207
-
208
  with gr.Row():
209
  min_followers_input = gr.Slider(
210
- label="Minimum Followers (for initial search)",
211
- minimum=1,
212
- maximum=100,
213
- value=10,
214
  step=1
215
  )
216
  days_input = gr.Slider(
217
- label="Days to Count (30 days = last 30 days, not calendar month)",
218
  minimum=7,
219
  maximum=90,
220
  value=30,
221
  step=1
222
  )
223
-
224
  with gr.Row():
225
- fetch_btn = gr.Button("πŸš€ Fetch Top Contributors from GitHub", variant="primary", size="lg")
226
-
227
- status_msg = gr.Textbox(label="Status", interactive=False, lines=3)
228
-
229
  with gr.Row():
230
  search_box = gr.Textbox(
231
- label="πŸ” Search by Name or Username",
232
  placeholder="Type to search...",
233
  scale=4
234
  )
235
  clear_btn = gr.Button("Clear", scale=1)
236
-
237
  # Store the full dataframe
238
  full_data = gr.State(value=pd.DataFrame())
239
-
240
  # Display dataframe
241
  data_display = gr.Dataframe(
242
  headers=["Rank", "Name", "Username", "Contributions", "Followers", "Public Repos", "Location", "GitHub Profile"],
@@ -244,57 +381,68 @@ with gr.Blocks(theme=gr.themes.Soft(), title="GitHub UAE Top Contributors") as a
244
  wrap=True,
245
  interactive=False
246
  )
247
-
248
  gr.Markdown("""
249
  ---
250
- **Ranking Criteria:**
251
- - **Primary Sort:** Total contributions in selected time period (accurate count from GitHub)
252
- - **Secondary Sort:** Followers count
253
- - Uses GitHub's GraphQL API for accurate contribution counts
254
- - Counts ALL contribution types (commits, PRs, issues, reviews, etc.)
255
-
256
- **How to set up GitHub Token with correct permissions:**
257
-
258
- 1. Go to [GitHub Settings > Developer settings > Personal access tokens > Tokens (classic)](https://github.com/settings/tokens)
259
- 2. Click "Generate new token (classic)"
260
- 3. Give it a name like "UAE Contributors Tracker"
261
- 4. Select scopes: **read:user** (this is required for GraphQL API)
262
- 5. Generate and copy the token
263
- 6. Either:
264
- - Paste it in the field above, OR
265
- - Set environment variable: `export GITHUB_TOKEN="ghp_your_token_here"`
266
-
267
- **Why the change?**
268
- - The Events API only returns ~100 events and may miss many contributions
269
- - GraphQL API gives accurate counts of ALL contribution types
270
- - This matches what you see on your GitHub profile
 
 
 
 
 
 
 
 
 
 
 
271
  """)
272
-
273
  # Event handlers
274
  def fetch_and_display(token, max_users, min_followers, days):
275
  df, msg = fetch_github_users(token if token else None, int(max_users), int(min_followers), int(days))
276
  return df, df, msg
277
-
278
  def filter_data(df, search):
279
  if df is None or df.empty:
280
  return df
281
  return search_users(df, search)
282
-
283
  def clear_search(df):
284
  return "", df
285
-
286
  fetch_btn.click(
287
  fn=fetch_and_display,
288
  inputs=[token_input, max_users_input, min_followers_input, days_input],
289
  outputs=[full_data, data_display, status_msg]
290
  )
291
-
292
  search_box.change(
293
  fn=filter_data,
294
  inputs=[full_data, search_box],
295
  outputs=data_display
296
  )
297
-
298
  clear_btn.click(
299
  fn=clear_search,
300
  inputs=[full_data],
 
55
 
56
  return 0
57
 
58
+ def is_uae_location(location):
59
  """
60
+ Check if location string indicates UAE
61
+ """
62
+ if not location:
63
+ return False
64
+
65
+ location = location.lower()
66
+
67
+ # Country-level keywords (highest priority)
68
+ country_keywords = [
69
+ 'united arab emirates',
70
+ 'uae',
71
+ 'u.a.e',
72
+ 'u.a.e.',
73
+ 'emirates',
74
+ ]
75
+
76
+ # City keywords (secondary check)
77
+ city_keywords = [
78
+ 'dubai',
79
+ 'abu dhabi',
80
+ 'abudhabi',
81
+ 'sharjah',
82
+ 'ajman',
83
+ 'ras al khaimah',
84
+ 'ras al-khaimah',
85
+ 'rak',
86
+ 'fujairah',
87
+ 'umm al quwain',
88
+ 'umm al-quwain',
89
+ 'al ain',
90
+ 'dubayy',
91
+ 'دبي',
92
+ 'Ψ§Ω„Ψ₯Ω…Ψ§Ψ±Ψ§Ψͺ'
93
+ ]
94
+
95
+ # Check country keywords first
96
+ for keyword in country_keywords:
97
+ if keyword in location:
98
+ return True
99
+
100
+ # Then check city keywords
101
+ for keyword in city_keywords:
102
+ if keyword in location:
103
+ return True
104
+
105
+ return False
106
+
107
+ def fetch_github_users(github_token=None, max_users=200, min_followers=1, days=30):
108
+ """
109
+ Fetch GitHub users from UAE - Country-focused search
110
  """
111
  # Read token from environment variable if not provided
112
  if not github_token:
 
120
  'Authorization': f'token {github_token}'
121
  }
122
 
123
+ all_users = {} # Use dict to avoid duplicates by login
 
 
 
 
 
 
 
 
124
  status_updates = []
125
+ total_api_calls = 0
126
 
127
  try:
128
+ # PRIORITY 1: Country-level searches (most reliable)
129
+ country_searches = [
130
+ 'United+Arab+Emirates',
131
+ 'UAE',
132
+ 'U.A.E',
133
+ '"United Arab Emirates"',
134
+ '"UAE"',
135
+ 'Emirates',
136
+ ]
137
+
138
+ status_updates.append("πŸ” Phase 1: Searching by country (United Arab Emirates)...")
139
+
140
+ for search_term in country_searches:
141
+ if total_api_calls >= 200:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  break
143
+
144
+ # Search with different sorting methods to catch different users
145
+ sort_methods = [
146
+ ('followers', 'desc'), # Most followed
147
+ ('repositories', 'desc'), # Most repos
148
+ ('joined', 'desc'), # Recently joined
149
+ ]
150
+
151
+ for sort_by, order in sort_methods:
152
+ if total_api_calls >= 200:
153
+ break
154
+
155
+ search_url = f'https://api.github.com/search/users?q=location:{search_term}+followers:>={min_followers}&sort={sort_by}&order={order}&per_page=100'
156
+
157
+ response = requests.get(search_url, headers=headers)
158
+ total_api_calls += 1
159
+
160
+ if response.status_code == 200:
161
+ data = response.json()
162
+ users = data.get('items', [])
163
+
164
+ status_updates.append(f" πŸ“ Found {len(users)} users with search '{search_term}' sorted by {sort_by}")
165
+
166
+ for user in users:
167
+ if user['login'] not in all_users:
168
+ if total_api_calls >= 200:
169
+ break
170
+
171
+ # Fetch detailed user info
172
+ user_url = user['url']
173
+ user_response = requests.get(user_url, headers=headers)
174
+ total_api_calls += 1
175
+
176
+ if user_response.status_code == 200:
177
+ user_data = user_response.json()
178
+
179
+ # Verify location is UAE-related
180
+ user_location = user_data.get('location', '')
181
+
182
+ if is_uae_location(user_location):
183
+ # Get accurate contribution count
184
+ contributions = get_contribution_count(user['login'], github_token, days)
185
+
186
+ all_users[user['login']] = {
187
+ 'login': user_data.get('login', ''),
188
+ 'name': user_data.get('name', user_data.get('login', '')),
189
+ 'avatar': user_data.get('avatar_url', ''),
190
+ 'followers': user_data.get('followers', 0),
191
+ 'public_repos': user_data.get('public_repos', 0),
192
+ 'contributions': contributions,
193
+ 'location': user_data.get('location', ''),
194
+ 'bio': user_data.get('bio', ''),
195
+ 'company': user_data.get('company', ''),
196
+ }
197
+
198
+ if len(all_users) % 10 == 0:
199
+ status_updates.append(f" βœ… Collected {len(all_users)} unique UAE users so far...")
200
+
201
+ time.sleep(0.5) # Rate limiting
202
+
203
+ time.sleep(1) # Rate limiting between searches
204
+
205
+ elif response.status_code == 403:
206
+ status_updates.append(f"⚠️ Rate limit reached after {total_api_calls} calls")
207
+ break
208
+ elif response.status_code == 422:
209
+ status_updates.append(f" ⚠️ Search term '{search_term}' invalid, skipping...")
210
+ continue
211
+ else:
212
+ status_updates.append(f" ❌ Error with search '{search_term}': {response.status_code}")
213
+
214
+ # PRIORITY 2: City-level searches as supplementary (if we need more users)
215
+ if len(all_users) < max_users * 1.5 and total_api_calls < 180:
216
+ status_updates.append(f"πŸ” Phase 2: Supplementary city-level search...")
217
+
218
+ city_searches = [
219
+ 'Dubai',
220
+ 'Abu+Dhabi',
221
+ 'Sharjah',
222
+ 'Ajman',
223
+ ]
224
+
225
+ for city in city_searches:
226
+ if total_api_calls >= 200 or len(all_users) >= max_users * 2:
227
+ break
228
+
229
+ search_url = f'https://api.github.com/search/users?q=location:{city}+followers:>={min_followers}&sort=followers&order=desc&per_page=100'
230
+
231
+ response = requests.get(search_url, headers=headers)
232
+ total_api_calls += 1
233
+
234
+ if response.status_code == 200:
235
+ data = response.json()
236
+ users = data.get('items', [])
237
+
238
+ for user in users[:30]: # Limit to top 30 per city
239
+ if user['login'] not in all_users and total_api_calls < 200:
240
+ user_url = user['url']
241
+ user_response = requests.get(user_url, headers=headers)
242
+ total_api_calls += 1
243
+
244
+ if user_response.status_code == 200:
245
+ user_data = user_response.json()
246
+ user_location = user_data.get('location', '')
247
+
248
+ if is_uae_location(user_location):
249
+ contributions = get_contribution_count(user['login'], github_token, days)
250
+
251
+ all_users[user['login']] = {
252
+ 'login': user_data.get('login', ''),
253
+ 'name': user_data.get('name', user_data.get('login', '')),
254
+ 'avatar': user_data.get('avatar_url', ''),
255
+ 'followers': user_data.get('followers', 0),
256
+ 'public_repos': user_data.get('public_repos', 0),
257
+ 'contributions': contributions,
258
+ 'location': user_data.get('location', ''),
259
+ 'bio': user_data.get('bio', ''),
260
+ 'company': user_data.get('company', ''),
261
+ }
262
+
263
+ time.sleep(0.5)
264
+
265
+ time.sleep(1)
266
+
267
+ # Convert dict to list
268
+ users_list = list(all_users.values())
269
+
270
+ status_updates.append(f"πŸ“Š Total unique UAE users found: {len(users_list)}")
271
+
272
  # Sort by CONTRIBUTIONS FIRST (descending), then by followers (descending)
273
+ users_list.sort(key=lambda x: (x['contributions'], x['followers']), reverse=True)
274
+
275
  # Take top contributors based on max_users
276
+ top_users = users_list[:max_users]
277
+
278
  # Add rank based on contribution order
279
  for i, user in enumerate(top_users, 1):
280
  user['rank'] = i
281
+
282
  # Convert to DataFrame
283
  df = pd.DataFrame(top_users)
284
+
285
  if not df.empty:
286
  display_df = df[['rank', 'name', 'login', 'contributions', 'followers', 'public_repos', 'location']].copy()
287
  display_df.columns = ['Rank', 'Name', 'Username', 'Contributions', 'Followers', 'Public Repos', 'Location']
288
  display_df['GitHub Profile'] = df['login'].apply(lambda x: f"https://github.com/{x}")
289
+
290
+ status_message = f"""βœ… Successfully fetched top {len(df)} contributors (last {days} days)
291
+ πŸ“Š Total unique users found: {len(all_users)}
292
+ πŸ”§ API calls made: {total_api_calls}
293
+ 🌍 Search strategy: Country-first (UAE/United Arab Emirates)
294
+ """ + "\n".join(status_updates[-8:])
295
  return display_df, status_message
296
  else:
297
  return pd.DataFrame(), "⚠️ No users found\n" + "\n".join(status_updates)
298
+
299
  except Exception as e:
300
  return pd.DataFrame(), f"❌ Error: {str(e)}\n" + "\n".join(status_updates)
301
 
 
303
  """Filter users based on search term"""
304
  if df is None or df.empty:
305
  return df
306
+
307
  if not search_term:
308
  return df
309
+
310
  search_term = search_term.lower()
311
  mask = (
312
  df['Name'].str.lower().str.contains(search_term, na=False) |
313
+ df['Username'].str.lower().str.contains(search_term, na=False) |
314
+ df['Location'].str.lower().str.contains(search_term, na=False)
315
  )
316
  return df[mask]
317
 
318
  # Create Gradio interface
319
  with gr.Blocks(theme=gr.themes.Soft(), title="GitHub UAE Top Contributors") as app:
320
+
321
  gr.Markdown("""
322
+ # πŸ‡¦πŸ‡ͺ Top 200 GitHub Contributors in UAE
323
+ ### Country-Focused Search with Accurate Contribution Counts
324
+
325
+ **Search Strategy:** Prioritizes "United Arab Emirates" and "UAE" country-level searches,
326
+ supplemented by major city searches. This ensures comprehensive coverage.
327
  """)
328
+
329
  with gr.Row():
330
  token_input = gr.Textbox(
331
  label="GitHub Personal Access Token (Required)",
 
334
  scale=3
335
  )
336
  max_users_input = gr.Slider(
337
+ label="Max Contributors to Display",
338
  minimum=10,
339
  maximum=500,
340
  value=200,
341
  step=10,
342
  scale=1
343
  )
344
+
345
  with gr.Row():
346
  min_followers_input = gr.Slider(
347
+ label="Minimum Followers (0 = most comprehensive)",
348
+ minimum=0,
349
+ maximum=50,
350
+ value=0,
351
  step=1
352
  )
353
  days_input = gr.Slider(
354
+ label="Days to Count Contributions",
355
  minimum=7,
356
  maximum=90,
357
  value=30,
358
  step=1
359
  )
360
+
361
  with gr.Row():
362
+ fetch_btn = gr.Button("πŸš€ Fetch Top Contributors from UAE", variant="primary", size="lg")
363
+
364
+ status_msg = gr.Textbox(label="Status & Progress", interactive=False, lines=6)
365
+
366
  with gr.Row():
367
  search_box = gr.Textbox(
368
+ label="πŸ” Search by Name, Username, or Location",
369
  placeholder="Type to search...",
370
  scale=4
371
  )
372
  clear_btn = gr.Button("Clear", scale=1)
373
+
374
  # Store the full dataframe
375
  full_data = gr.State(value=pd.DataFrame())
376
+
377
  # Display dataframe
378
  data_display = gr.Dataframe(
379
  headers=["Rank", "Name", "Username", "Contributions", "Followers", "Public Repos", "Location", "GitHub Profile"],
 
381
  wrap=True,
382
  interactive=False
383
  )
384
+
385
  gr.Markdown("""
386
  ---
387
+ **Improved Search Strategy:**
388
+
389
+ βœ… **Phase 1: Country-Level Search (Primary)**
390
+ - Searches: "United Arab Emirates", "UAE", "U.A.E", "Emirates"
391
+ - Multiple sort orders: followers, repositories, recently joined
392
+ - Catches users regardless of city
393
+
394
+ βœ… **Phase 2: City-Level Search (Supplementary)**
395
+ - Searches: Dubai, Abu Dhabi, Sharjah, Ajman
396
+ - Only if needed to reach target count
397
+
398
+ βœ… **Smart Location Verification**
399
+ - Validates location field contains UAE/Emirates/city keywords
400
+ - Filters out false positives
401
+
402
+ βœ… **Comprehensive Coverage**
403
+ - Multiple search variations and sort methods
404
+ - Automatic deduplication
405
+ - Up to 200 API calls for thorough search
406
+
407
+ **Ranking:**
408
+ - Primary: Contributions in last N days (accurate count from GitHub GraphQL)
409
+ - Secondary: Follower count
410
+
411
+ **Token Setup:**
412
+ 1. [Generate token](https://github.com/settings/tokens) with `read:user` scope
413
+ 2. Paste above or set `GITHUB_TOKEN` environment variable
414
+
415
+ **Tips:**
416
+ - Set "Minimum Followers" to 0 for most comprehensive results
417
+ - Increase "Max Contributors" to 300-500 for broader coverage
418
+ - Takes 2-5 minutes depending on settings
419
  """)
420
+
421
  # Event handlers
422
  def fetch_and_display(token, max_users, min_followers, days):
423
  df, msg = fetch_github_users(token if token else None, int(max_users), int(min_followers), int(days))
424
  return df, df, msg
425
+
426
  def filter_data(df, search):
427
  if df is None or df.empty:
428
  return df
429
  return search_users(df, search)
430
+
431
  def clear_search(df):
432
  return "", df
433
+
434
  fetch_btn.click(
435
  fn=fetch_and_display,
436
  inputs=[token_input, max_users_input, min_followers_input, days_input],
437
  outputs=[full_data, data_display, status_msg]
438
  )
439
+
440
  search_box.change(
441
  fn=filter_data,
442
  inputs=[full_data, search_box],
443
  outputs=data_display
444
  )
445
+
446
  clear_btn.click(
447
  fn=clear_search,
448
  inputs=[full_data],