yasserrmd commited on
Commit
dd78c8e
·
verified ·
1 Parent(s): 31fa8cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -131
app.py CHANGED
@@ -5,8 +5,9 @@ from datetime import datetime, timedelta
5
  import time
6
  import os
7
 
8
- def get_contribution_count(username, github_token, days=30):
9
- """Get accurate contribution count using GitHub GraphQL API"""
 
10
  if not github_token:
11
  return 0
12
 
@@ -49,73 +50,60 @@ def get_contribution_count(username, github_token, days=30):
49
  if 'data' in data and data['data'] and data['data']['user']:
50
  return data['data']['user']['contributionsCollection']['contributionCalendar']['totalContributions']
51
  except Exception as e:
52
- print(f"Error fetching contributions for {username}: {e}")
53
 
54
  return 0
55
 
56
- def fetch_github_users(github_token=None, max_users=200, min_followers=0, days=30):
57
- """
58
- Fetch GitHub users from UAE using committers.top methodology:
59
- 1. Search by location + sort by followers
60
- 2. Get top N users by followers
61
- 3. Get their contributions
62
- 4. Re-sort by contributions
63
- """
64
  if not github_token:
65
  github_token = os.getenv('GITHUB_TOKEN')
66
 
67
  if not github_token:
68
- return pd.DataFrame(), "GitHub token is REQUIRED for GraphQL API (contribution counting)"
69
-
70
  headers = {
71
  'Accept': 'application/vnd.github.v3+json',
72
  'Authorization': f'token {github_token}'
73
  }
74
-
75
- all_users = {} # Use dict to avoid duplicates
76
  status_updates = []
77
  api_calls = 0
78
-
79
- # UAE location search terms
80
- locations = [
81
- 'United Arab Emirates', 'UAE',
82
- 'Dubai', 'Abu Dhabi', 'Sharjah', 'Ajman',
83
- 'Ras Al Khaimah', 'Fujairah', 'Umm Al Quwain'
84
- ]
85
-
86
  try:
87
- status_updates.append(f"🔍 Step 1: Searching for UAE users (sorted by followers)...")
88
 
89
- # STEP 1: Get users sorted by FOLLOWERS (like committers.top does)
90
- for location in locations:
91
- if api_calls >= 100:
92
- break
93
-
94
- # Search by followers DESC
95
- search_url = f'https://api.github.com/search/users?q=location:"{location}"+followers:>={min_followers}&sort=followers&order=desc&per_page=100'
96
-
97
- response = requests.get(search_url, headers=headers)
98
- api_calls += 1
 
99
 
100
- if response.status_code == 200:
101
- data = response.json()
102
- users = data.get('items', [])
103
- status_updates.append(f" 📍 {location}: {len(users)} users")
104
-
105
  for user in users:
106
- if user['login'] not in all_users and api_calls < 100:
107
- # Get detailed user info
 
 
108
  user_response = requests.get(user['url'], headers=headers)
109
  api_calls += 1
110
 
111
  if user_response.status_code == 200:
112
  user_data = user_response.json()
113
-
114
- # Verify UAE location (case insensitive)
115
  user_location = (user_data.get('location') or '').lower()
116
- uae_keywords = ['dubai', 'abu dhabi', 'abudhabi', 'sharjah', 'ajman',
117
- 'uae', 'u.a.e', 'united arab emirates', 'ras al khaimah',
118
- 'fujairah', 'umm al quwain', 'emirates', 'rak']
119
 
120
  if any(kw in user_location for kw in uae_keywords):
121
  all_users[user['login']] = {
@@ -125,48 +113,61 @@ def fetch_github_users(github_token=None, max_users=200, min_followers=0, days=3
125
  'followers': user_data.get('followers', 0),
126
  'public_repos': user_data.get('public_repos', 0),
127
  'location': user_data.get('location', ''),
128
- 'contributions': 0 # Will fill later
129
  }
130
 
131
  time.sleep(0.5)
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  time.sleep(1)
134
-
135
- elif response.status_code == 403:
136
- status_updates.append(f"⚠️ Rate limit hit")
137
- break
138
 
139
- status_updates.append(f"✅ Found {len(all_users)} unique UAE users")
 
 
 
140
 
141
- # STEP 2: Get contributions for collected users
142
- status_updates.append(f"📊 Step 2: Fetching contribution counts...")
143
 
144
  count = 0
145
  for login, user in all_users.items():
146
- if api_calls >= 200:
147
  break
148
 
149
  contributions = get_contribution_count(login, github_token, days)
150
  user['contributions'] = contributions
151
 
152
  count += 1
153
- if count % 20 == 0:
154
- status_updates.append(f" Processed {count}/{len(all_users)} users...")
155
 
156
  time.sleep(0.5)
157
 
158
- # STEP 3: Sort by CONTRIBUTIONS (like committers.top final step)
159
  users_list = list(all_users.values())
160
  users_list.sort(key=lambda x: (x['contributions'], x['followers']), reverse=True)
161
 
162
- # Take top N
163
  top_users = users_list[:max_users]
164
 
165
- # Add rank
166
  for i, user in enumerate(top_users, 1):
167
  user['rank'] = i
168
 
169
- # Create DataFrame
170
  df = pd.DataFrame(top_users)
171
 
172
  if not df.empty:
@@ -174,27 +175,20 @@ def fetch_github_users(github_token=None, max_users=200, min_followers=0, days=3
174
  display_df.columns = ['Rank', 'Name', 'Username', 'Contributions', 'Followers', 'Public Repos', 'Location']
175
  display_df['GitHub Profile'] = df['login'].apply(lambda x: f"https://github.com/{x}")
176
 
177
- status_message = f"""✅ Success! Top {len(df)} UAE contributors (last {days} days)
178
- 📊 Total UAE users found: {len(all_users)}
179
- 🔧 API calls used: {api_calls}
180
-
181
- Methodology (like committers.top):
182
- 1. Search by location + sort by followers
183
- 2. Get top users by followers
184
- 3. Fetch contribution counts
185
- 4. Re-sort by contributions
186
-
187
- """ + "\n".join(status_updates[-5:])
188
 
189
  return display_df, status_message
190
  else:
191
- return pd.DataFrame(), "⚠️ No users found"
192
 
193
  except Exception as e:
194
- return pd.DataFrame(), f"Error: {str(e)}"
 
195
 
196
  def search_users(df, search_term):
197
- """Filter users"""
198
  if df is None or df.empty or not search_term:
199
  return df
200
 
@@ -205,30 +199,23 @@ def search_users(df, search_term):
205
  )
206
  return df[mask]
207
 
208
- # Gradio interface
209
- with gr.Blocks(theme=gr.themes.Soft(), title="GitHub UAE Top Contributors") as app:
210
 
211
- gr.Markdown("""
212
- # 🇦🇪 GitHub UAE Top Contributors
213
- ### Using committers.top Methodology
214
-
215
- **Approach:** Search by location → Sort by followers → Get contributions → Re-sort by contributions
216
 
217
- **Reference:** [committers.top/uae](https://committers.top/uae.html) found 31,463 UAE users!
218
- """)
219
-
220
  with gr.Row():
221
  token_input = gr.Textbox(
222
- label="GitHub Token (REQUIRED - needs 'read:user' scope)",
223
- placeholder="ghp_xxxx or set GITHUB_TOKEN env var",
224
  type="password",
225
  scale=3
226
  )
227
  max_users_input = gr.Slider(
228
  label="Max Users to Display",
229
- minimum=10,
230
  maximum=500,
231
- value=200,
232
  step=10,
233
  scale=1
234
  )
@@ -236,26 +223,26 @@ with gr.Blocks(theme=gr.themes.Soft(), title="GitHub UAE Top Contributors") as a
236
  with gr.Row():
237
  min_followers_input = gr.Slider(
238
  label="Min Followers (committers.top uses 34)",
239
- minimum=0,
240
  maximum=100,
241
- value=0,
242
  step=1
243
  )
244
  days_input = gr.Slider(
245
- label="Days to Count",
246
- minimum=7,
247
- maximum=90,
248
- value=30,
249
  step=1
250
  )
251
 
252
- fetch_btn = gr.Button("🚀 Fetch Top Contributors", variant="primary", size="lg")
253
 
254
  status_msg = gr.Textbox(label="Status", interactive=False, lines=6)
255
 
256
  with gr.Row():
257
  search_box = gr.Textbox(
258
- label="🔍 Filter Results",
259
  placeholder="Search by name or username...",
260
  scale=4
261
  )
@@ -270,39 +257,6 @@ with gr.Blocks(theme=gr.themes.Soft(), title="GitHub UAE Top Contributors") as a
270
  interactive=False
271
  )
272
 
273
- gr.Markdown("""
274
- ---
275
- ## How This Works:
276
-
277
- **Methodology (same as committers.top):**
278
- ```
279
- 1. Search: location:UAE + sort:followers
280
- 2. Filter: Verify location has UAE keywords
281
- 3. Collect: Top N users by followers
282
- 4. Count: Get contributions via GraphQL
283
- 5. Sort: Re-rank by contributions
284
- ```
285
-
286
- **Why This Finds Everyone:**
287
- - Searches multiple UAE locations
288
- - Sorts by followers first (GitHub's default)
289
- - Gets top users who are likely active
290
- - Then accurately counts contributions
291
- - Re-sorts by actual contributions
292
-
293
- **Token Setup:**
294
- 1. Go to: https://github.com/settings/tokens
295
- 2. Generate new token (classic)
296
- 3. **Must have `read:user` scope** (for GraphQL)
297
- 4. Copy and paste above
298
-
299
- **Expected Results:**
300
- - Should find: tschm (~10k), zning1994 (~9k), YASSERRMD (~788), nkapila6 (~446)
301
- - All users from committers.top list should appear
302
-
303
- **Note:** Takes 2-5 minutes. Be patient!
304
- """)
305
-
306
  def fetch_and_display(token, max_users, min_followers, days):
307
  df, msg = fetch_github_users(token or None, int(max_users), int(min_followers), int(days))
308
  return df, df, msg
@@ -331,5 +285,6 @@ with gr.Blocks(theme=gr.themes.Soft(), title="GitHub UAE Top Contributors") as a
331
  outputs=[search_box, data_display]
332
  )
333
 
 
334
  if __name__ == "__main__":
335
- app.launch()
 
5
  import time
6
  import os
7
 
8
+
9
+ def get_contribution_count(username, github_token, days=365):
10
+ """Fetch contribution count using GitHub GraphQL API"""
11
  if not github_token:
12
  return 0
13
 
 
50
  if 'data' in data and data['data'] and data['data']['user']:
51
  return data['data']['user']['contributionsCollection']['contributionCalendar']['totalContributions']
52
  except Exception as e:
53
+ print(f"Error for {username}: {e}")
54
 
55
  return 0
56
 
57
+
58
+ def fetch_github_users(github_token=None, max_users=256, min_followers=34, days=365):
59
+ """Fetch UAE GitHub users matching committers.top methodology"""
 
 
 
 
 
60
  if not github_token:
61
  github_token = os.getenv('GITHUB_TOKEN')
62
 
63
  if not github_token:
64
+ return pd.DataFrame(), "GitHub token is required"
65
+
66
  headers = {
67
  'Accept': 'application/vnd.github.v3+json',
68
  'Authorization': f'token {github_token}'
69
  }
70
+
71
+ all_users = {}
72
  status_updates = []
73
  api_calls = 0
74
+
75
+ uae_keywords = ['dubai', 'abu dhabi', 'abudhabi', 'sharjah', 'ajman',
76
+ 'uae', 'u.a.e', 'united arab emirates', 'ras al khaimah',
77
+ 'fujairah', 'umm al quwain', 'emirates', 'rak']
78
+
 
 
 
79
  try:
80
+ status_updates.append(f"Searching UAE users (min {min_followers} followers)...")
81
 
82
+ # Search by location - get top 1000 by followers
83
+ search_url = f'https://api.github.com/search/users?q=location:"United Arab Emirates"+followers:>={min_followers}&sort=followers&order=desc&per_page=100'
84
+
85
+ response = requests.get(search_url, headers=headers)
86
+ api_calls += 1
87
+
88
+ if response.status_code == 200:
89
+ data = response.json()
90
+ total_count = data.get('total_count', 0)
91
+ users = data.get('items', [])
92
+ status_updates.append(f"Found {total_count} UAE users, fetching details...")
93
 
94
+ page = 1
95
+ while users and len(all_users) < 1000 and api_calls < 150:
 
 
 
96
  for user in users:
97
+ if len(all_users) >= 1000:
98
+ break
99
+
100
+ if user['login'] not in all_users:
101
  user_response = requests.get(user['url'], headers=headers)
102
  api_calls += 1
103
 
104
  if user_response.status_code == 200:
105
  user_data = user_response.json()
 
 
106
  user_location = (user_data.get('location') or '').lower()
 
 
 
107
 
108
  if any(kw in user_location for kw in uae_keywords):
109
  all_users[user['login']] = {
 
113
  'followers': user_data.get('followers', 0),
114
  'public_repos': user_data.get('public_repos', 0),
115
  'location': user_data.get('location', ''),
116
+ 'contributions': 0
117
  }
118
 
119
  time.sleep(0.5)
120
 
121
+ if len(users) < 100:
122
+ break
123
+
124
+ page += 1
125
+ if page > 10:
126
+ break
127
+
128
+ next_url = f'https://api.github.com/search/users?q=location:"United Arab Emirates"+followers:>={min_followers}&sort=followers&order=desc&per_page=100&page={page}'
129
+ response = requests.get(next_url, headers=headers)
130
+ api_calls += 1
131
+
132
+ if response.status_code == 200:
133
+ users = response.json().get('items', [])
134
+ status_updates.append(f" Page {page}: fetched")
135
+ else:
136
+ break
137
+
138
  time.sleep(1)
 
 
 
 
139
 
140
+ elif response.status_code == 403:
141
+ return pd.DataFrame(), "Rate limit exceeded"
142
+
143
+ status_updates.append(f"Collected {len(all_users)} UAE users (top 1000 by followers)")
144
 
145
+ # Get contributions for all collected users
146
+ status_updates.append(f"Fetching contributions (last {days} days)...")
147
 
148
  count = 0
149
  for login, user in all_users.items():
150
+ if api_calls >= 300:
151
  break
152
 
153
  contributions = get_contribution_count(login, github_token, days)
154
  user['contributions'] = contributions
155
 
156
  count += 1
157
+ if count % 50 == 0:
158
+ status_updates.append(f" Progress: {count}/{len(all_users)} users...")
159
 
160
  time.sleep(0.5)
161
 
162
+ # Sort by contributions
163
  users_list = list(all_users.values())
164
  users_list.sort(key=lambda x: (x['contributions'], x['followers']), reverse=True)
165
 
 
166
  top_users = users_list[:max_users]
167
 
 
168
  for i, user in enumerate(top_users, 1):
169
  user['rank'] = i
170
 
 
171
  df = pd.DataFrame(top_users)
172
 
173
  if not df.empty:
 
175
  display_df.columns = ['Rank', 'Name', 'Username', 'Contributions', 'Followers', 'Public Repos', 'Location']
176
  display_df['GitHub Profile'] = df['login'].apply(lambda x: f"https://github.com/{x}")
177
 
178
+ status_message = f"Success! Top {len(df)} UAE contributors (last {days} days)\n"
179
+ status_message += f"Total UAE users collected: {len(all_users)}, API calls: {api_calls}\n\n"
180
+ status_message += "\n".join(status_updates[-5:])
 
 
 
 
 
 
 
 
181
 
182
  return display_df, status_message
183
  else:
184
+ return pd.DataFrame(), "No users found"
185
 
186
  except Exception as e:
187
+ return pd.DataFrame(), f"Error: {str(e)}"
188
+
189
 
190
  def search_users(df, search_term):
191
+ """Filter users by name or username"""
192
  if df is None or df.empty or not search_term:
193
  return df
194
 
 
199
  )
200
  return df[mask]
201
 
 
 
202
 
203
+ with gr.Blocks(theme=gr.themes.Soft(), title="GitHub UAE Top Contributors") as app:
204
+ gr.Markdown("# GitHub UAE Top Contributors")
205
+ gr.Markdown("*Following committers.top methodology*")
 
 
206
 
 
 
 
207
  with gr.Row():
208
  token_input = gr.Textbox(
209
+ label="GitHub Token",
210
+ placeholder="ghp_xxxx",
211
  type="password",
212
  scale=3
213
  )
214
  max_users_input = gr.Slider(
215
  label="Max Users to Display",
216
+ minimum=50,
217
  maximum=500,
218
+ value=256,
219
  step=10,
220
  scale=1
221
  )
 
223
  with gr.Row():
224
  min_followers_input = gr.Slider(
225
  label="Min Followers (committers.top uses 34)",
226
+ minimum=10,
227
  maximum=100,
228
+ value=34,
229
  step=1
230
  )
231
  days_input = gr.Slider(
232
+ label="Days to Count (committers.top uses 365)",
233
+ minimum=30,
234
+ maximum=365,
235
+ value=365,
236
  step=1
237
  )
238
 
239
+ fetch_btn = gr.Button("Fetch Top Contributors", variant="primary", size="lg")
240
 
241
  status_msg = gr.Textbox(label="Status", interactive=False, lines=6)
242
 
243
  with gr.Row():
244
  search_box = gr.Textbox(
245
+ label="Filter Results",
246
  placeholder="Search by name or username...",
247
  scale=4
248
  )
 
257
  interactive=False
258
  )
259
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  def fetch_and_display(token, max_users, min_followers, days):
261
  df, msg = fetch_github_users(token or None, int(max_users), int(min_followers), int(days))
262
  return df, df, msg
 
285
  outputs=[search_box, data_display]
286
  )
287
 
288
+
289
  if __name__ == "__main__":
290
+ app.launch()