mylesai commited on
Commit
f62fe01
·
verified ·
1 Parent(s): 1c76922

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -30
app.py CHANGED
@@ -122,8 +122,6 @@ def get_insta_info(df, progress=gr.Progress()):
122
  def scrape_linkedins(links):
123
  url = "https://linkedin-bulk-data-scraper.p.rapidapi.com/profiles"
124
 
125
- payload = {"links": links}
126
-
127
  headers = {
128
  "x-rapidapi-key": f"{RAPIDAPI_API_KEY}",
129
  "x-rapidapi-host": "linkedin-bulk-data-scraper.p.rapidapi.com",
@@ -133,39 +131,47 @@ def scrape_linkedins(links):
133
 
134
  # Initialize an empty list to store the dictionaries
135
  profile_info_list = []
 
 
 
 
136
 
137
  try:
138
- response = requests.post(url, json=payload, headers=headers)
139
- response.raise_for_status() # Raise HTTPError for bad responses
140
- data = response.json()
 
 
 
 
141
 
142
- if 'data' not in data:
143
- raise ValueError("Missing 'data' in response")
144
 
145
- responses = data['data']
146
-
147
- for response_item in responses:
148
- response_data = response_item.get('data', {})
149
-
150
- # Use get() method with default empty strings for missing fields
151
- profile_info = {
152
- 'link': response_item.get('entry', ''),
153
- 'full_name': response_data.get('fullName', ''),
154
- 'headline': response_data.get('headline', ''),
155
- 'connections': response_data.get('followers', ''), # or 'connections' based on availability
156
- 'country': response_data.get('addressCountryOnly', ''),
157
- 'address': response_data.get('addressWithoutCountry', ''),
158
- 'about': response_data.get('about', ''),
159
- 'current_role': (f"{response_data.get('experiences', [{}])[0].get('title', '')} at "
160
- f"{response_data.get('experiences', [{}])[0].get('subtitle', '')}"),
161
- 'all_roles': [f"{item.get('title', '')} at {item.get('subtitle', '')}" for item in response_data.get('experiences', [{}])],
162
- 'education': (f"{response_data.get('educations', [{}])[0].get('subtitle', '')} at "
163
- f"{response_data.get('educations', [{}])[0].get('title', '')}"),
164
- 'all_education': [f"{item.get('subtitle', '')} at {item.get('title', '')}" for item in response_data.get('educations', [{}])]
165
- }
166
 
167
- # Append the dictionary to the list
168
- profile_info_list.append(profile_info)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
  except requests.exceptions.RequestException as e:
171
  print(f"Request error: {e}")
@@ -178,6 +184,7 @@ def scrape_linkedins(links):
178
 
179
  return profile_info_list
180
 
 
181
  # Function to populate DataFrame with LinkedIn information
182
  def get_LI_info(df, progress=gr.Progress()):
183
  try:
 
122
  def scrape_linkedins(links):
123
  url = "https://linkedin-bulk-data-scraper.p.rapidapi.com/profiles"
124
 
 
 
125
  headers = {
126
  "x-rapidapi-key": f"{RAPIDAPI_API_KEY}",
127
  "x-rapidapi-host": "linkedin-bulk-data-scraper.p.rapidapi.com",
 
131
 
132
  # Initialize an empty list to store the dictionaries
133
  profile_info_list = []
134
+ chunk_size = 100
135
+
136
+ # Calculate the number of chunks needed
137
+ num_chunks = math.ceil(len(links) / chunk_size)
138
 
139
  try:
140
+ for i in range(num_chunks):
141
+ chunk = links[i * chunk_size:(i + 1) * chunk_size]
142
+ payload = {"links": chunk}
143
+
144
+ response = requests.post(url, json=payload, headers=headers)
145
+ response.raise_for_status() # Raise HTTPError for bad responses
146
+ data = response.json()
147
 
148
+ if 'data' not in data:
149
+ raise ValueError("Missing 'data' in response")
150
 
151
+ responses = data['data']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
+ for response_item in responses:
154
+ response_data = response_item.get('data', {})
155
+
156
+ # Use get() method with default empty strings for missing fields
157
+ profile_info = {
158
+ 'link': response_item.get('entry', ''),
159
+ 'full_name': response_data.get('fullName', ''),
160
+ 'headline': response_data.get('headline', ''),
161
+ 'connections': response_data.get('followers', ''), # or 'connections' based on availability
162
+ 'country': response_data.get('addressCountryOnly', ''),
163
+ 'address': response_data.get('addressWithoutCountry', ''),
164
+ 'about': response_data.get('about', ''),
165
+ 'current_role': (f"{response_data.get('experiences', [{}])[0].get('title', '')} at "
166
+ f"{response_data.get('experiences', [{}])[0].get('subtitle', '')}"),
167
+ 'all_roles': [f"{item.get('title', '')} at {item.get('subtitle', '')}" for item in response_data.get('experiences', [{}])],
168
+ 'education': (f"{response_data.get('educations', [{}])[0].get('subtitle', '')} at "
169
+ f"{response_data.get('educations', [{}])[0].get('title', '')}"),
170
+ 'all_education': [f"{item.get('subtitle', '')} at {item.get('title', '')}" for item in response_data.get('educations', [{}])]
171
+ }
172
+
173
+ # Append the dictionary to the list
174
+ profile_info_list.append(profile_info)
175
 
176
  except requests.exceptions.RequestException as e:
177
  print(f"Request error: {e}")
 
184
 
185
  return profile_info_list
186
 
187
+
188
  # Function to populate DataFrame with LinkedIn information
189
  def get_LI_info(df, progress=gr.Progress()):
190
  try: