Update upwork_scraper.py
Browse files- upwork_scraper.py +14 -12
upwork_scraper.py
CHANGED
|
@@ -14,8 +14,13 @@ USER_AGENT = "Firstify Upwork Bypasser (contact@example.com)"
|
|
| 14 |
CUSTOM_RSS_URL = ""
|
| 15 |
|
| 16 |
UPWORK_FEEDS = [
|
| 17 |
-
{"name": "AI & Machine Learning", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=artificial+intelligence&sort=recency"},
|
| 18 |
-
{"name": "Web & Fullstack", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=nextjs+react+typescript&sort=recency"},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
{"name": "Social Media & Marketing", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=social+media+marketing+content&sort=recency"},
|
| 20 |
{"name": "Data Entry & Admin", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=data+entry+virtual+assistant&sort=recency"},
|
| 21 |
]
|
|
@@ -119,16 +124,13 @@ def fetch_upwork_jobs():
|
|
| 119 |
company_placeholder = entry.title.split(" - ")[0]
|
| 120 |
|
| 121 |
# Clean up Upwork Link
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
guid_match = re.search(r'~(01[a-z0-9]+)', entry.guid)
|
| 130 |
-
if guid_match:
|
| 131 |
-
job_link = f"https://www.upwork.com/jobs/~{guid_match.group(1)}"
|
| 132 |
|
| 133 |
all_jobs.append({
|
| 134 |
"src": "Upwork",
|
|
|
|
| 14 |
CUSTOM_RSS_URL = ""
|
| 15 |
|
| 16 |
UPWORK_FEEDS = [
|
| 17 |
+
{"name": "AI & Machine Learning", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=artificial+intelligence+machine+learning+nlp+llm&sort=recency"},
|
| 18 |
+
{"name": "Web & Fullstack", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=nextjs+react+typescript+node+python+django+flask&sort=recency"},
|
| 19 |
+
{"name": "Mobile Development", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=ios+android+flutter+react+native+mobile+app&sort=recency"},
|
| 20 |
+
{"name": "DevOps & Cloud", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=aws+azure+gcp+docker+kubernetes+devops&sort=recency"},
|
| 21 |
+
{"name": "Data Science & Python", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=data+science+analytics+python+sql&sort=recency"},
|
| 22 |
+
{"name": "Cyber Security", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=cyber+security+penetration+testing+security+audit&sort=recency"},
|
| 23 |
+
{"name": "UI/UX & Design", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=ui+ux+product+design+figma&sort=recency"},
|
| 24 |
{"name": "Social Media & Marketing", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=social+media+marketing+content&sort=recency"},
|
| 25 |
{"name": "Data Entry & Admin", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=data+entry+virtual+assistant&sort=recency"},
|
| 26 |
]
|
|
|
|
| 124 |
company_placeholder = entry.title.split(" - ")[0]
|
| 125 |
|
| 126 |
# Clean up Upwork Link
|
| 127 |
+
job_id_match = re.search(r'~(01[a-z0-9]+)', str(getattr(entry, 'guid', '')))
|
| 128 |
+
if job_id_match:
|
| 129 |
+
job_link = f"https://www.upwork.com/jobs/~{job_id_match.group(1)}"
|
| 130 |
+
else:
|
| 131 |
+
job_link = entry.link
|
| 132 |
+
if "?" in job_link:
|
| 133 |
+
job_link = job_link.split("?")[0]
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
all_jobs.append({
|
| 136 |
"src": "Upwork",
|