Update app.py
Browse files
app.py
CHANGED
|
@@ -122,8 +122,6 @@ def get_insta_info(df, progress=gr.Progress()):
|
|
| 122 |
def scrape_linkedins(links):
|
| 123 |
url = "https://linkedin-bulk-data-scraper.p.rapidapi.com/profiles"
|
| 124 |
|
| 125 |
-
payload = {"links": links}
|
| 126 |
-
|
| 127 |
headers = {
|
| 128 |
"x-rapidapi-key": f"{RAPIDAPI_API_KEY}",
|
| 129 |
"x-rapidapi-host": "linkedin-bulk-data-scraper.p.rapidapi.com",
|
|
@@ -133,39 +131,47 @@ def scrape_linkedins(links):
|
|
| 133 |
|
| 134 |
# Initialize an empty list to store the dictionaries
|
| 135 |
profile_info_list = []
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
try:
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
-
|
| 143 |
-
|
| 144 |
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
for response_item in responses:
|
| 148 |
-
response_data = response_item.get('data', {})
|
| 149 |
-
|
| 150 |
-
# Use get() method with default empty strings for missing fields
|
| 151 |
-
profile_info = {
|
| 152 |
-
'link': response_item.get('entry', ''),
|
| 153 |
-
'full_name': response_data.get('fullName', ''),
|
| 154 |
-
'headline': response_data.get('headline', ''),
|
| 155 |
-
'connections': response_data.get('followers', ''), # or 'connections' based on availability
|
| 156 |
-
'country': response_data.get('addressCountryOnly', ''),
|
| 157 |
-
'address': response_data.get('addressWithoutCountry', ''),
|
| 158 |
-
'about': response_data.get('about', ''),
|
| 159 |
-
'current_role': (f"{response_data.get('experiences', [{}])[0].get('title', '')} at "
|
| 160 |
-
f"{response_data.get('experiences', [{}])[0].get('subtitle', '')}"),
|
| 161 |
-
'all_roles': [f"{item.get('title', '')} at {item.get('subtitle', '')}" for item in response_data.get('experiences', [{}])],
|
| 162 |
-
'education': (f"{response_data.get('educations', [{}])[0].get('subtitle', '')} at "
|
| 163 |
-
f"{response_data.get('educations', [{}])[0].get('title', '')}"),
|
| 164 |
-
'all_education': [f"{item.get('subtitle', '')} at {item.get('title', '')}" for item in response_data.get('educations', [{}])]
|
| 165 |
-
}
|
| 166 |
|
| 167 |
-
|
| 168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
|
| 170 |
except requests.exceptions.RequestException as e:
|
| 171 |
print(f"Request error: {e}")
|
|
@@ -178,6 +184,7 @@ def scrape_linkedins(links):
|
|
| 178 |
|
| 179 |
return profile_info_list
|
| 180 |
|
|
|
|
| 181 |
# Function to populate DataFrame with LinkedIn information
|
| 182 |
def get_LI_info(df, progress=gr.Progress()):
|
| 183 |
try:
|
|
|
|
| 122 |
def scrape_linkedins(links):
|
| 123 |
url = "https://linkedin-bulk-data-scraper.p.rapidapi.com/profiles"
|
| 124 |
|
|
|
|
|
|
|
| 125 |
headers = {
|
| 126 |
"x-rapidapi-key": f"{RAPIDAPI_API_KEY}",
|
| 127 |
"x-rapidapi-host": "linkedin-bulk-data-scraper.p.rapidapi.com",
|
|
|
|
| 131 |
|
| 132 |
# Initialize an empty list to store the dictionaries
|
| 133 |
profile_info_list = []
|
| 134 |
+
chunk_size = 100
|
| 135 |
+
|
| 136 |
+
# Calculate the number of chunks needed
|
| 137 |
+
num_chunks = math.ceil(len(links) / chunk_size)
|
| 138 |
|
| 139 |
try:
|
| 140 |
+
for i in range(num_chunks):
|
| 141 |
+
chunk = links[i * chunk_size:(i + 1) * chunk_size]
|
| 142 |
+
payload = {"links": chunk}
|
| 143 |
+
|
| 144 |
+
response = requests.post(url, json=payload, headers=headers)
|
| 145 |
+
response.raise_for_status() # Raise HTTPError for bad responses
|
| 146 |
+
data = response.json()
|
| 147 |
|
| 148 |
+
if 'data' not in data:
|
| 149 |
+
raise ValueError("Missing 'data' in response")
|
| 150 |
|
| 151 |
+
responses = data['data']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
+
for response_item in responses:
|
| 154 |
+
response_data = response_item.get('data', {})
|
| 155 |
+
|
| 156 |
+
# Use get() method with default empty strings for missing fields
|
| 157 |
+
profile_info = {
|
| 158 |
+
'link': response_item.get('entry', ''),
|
| 159 |
+
'full_name': response_data.get('fullName', ''),
|
| 160 |
+
'headline': response_data.get('headline', ''),
|
| 161 |
+
'connections': response_data.get('followers', ''), # or 'connections' based on availability
|
| 162 |
+
'country': response_data.get('addressCountryOnly', ''),
|
| 163 |
+
'address': response_data.get('addressWithoutCountry', ''),
|
| 164 |
+
'about': response_data.get('about', ''),
|
| 165 |
+
'current_role': (f"{response_data.get('experiences', [{}])[0].get('title', '')} at "
|
| 166 |
+
f"{response_data.get('experiences', [{}])[0].get('subtitle', '')}"),
|
| 167 |
+
'all_roles': [f"{item.get('title', '')} at {item.get('subtitle', '')}" for item in response_data.get('experiences', [{}])],
|
| 168 |
+
'education': (f"{response_data.get('educations', [{}])[0].get('subtitle', '')} at "
|
| 169 |
+
f"{response_data.get('educations', [{}])[0].get('title', '')}"),
|
| 170 |
+
'all_education': [f"{item.get('subtitle', '')} at {item.get('title', '')}" for item in response_data.get('educations', [{}])]
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
# Append the dictionary to the list
|
| 174 |
+
profile_info_list.append(profile_info)
|
| 175 |
|
| 176 |
except requests.exceptions.RequestException as e:
|
| 177 |
print(f"Request error: {e}")
|
|
|
|
| 184 |
|
| 185 |
return profile_info_list
|
| 186 |
|
| 187 |
+
|
| 188 |
# Function to populate DataFrame with LinkedIn information
|
| 189 |
def get_LI_info(df, progress=gr.Progress()):
|
| 190 |
try:
|