babaTEEpe commited on
Commit
05d1446
·
verified ·
1 Parent(s): 90f4046

Update upwork_scraper.py

Browse files
Files changed (1) hide show
  1. upwork_scraper.py +14 -12
upwork_scraper.py CHANGED
@@ -14,8 +14,13 @@ USER_AGENT = "Firstify Upwork Bypasser (contact@example.com)"
14
  CUSTOM_RSS_URL = ""
15
 
16
  UPWORK_FEEDS = [
17
- {"name": "AI & Machine Learning", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=artificial+intelligence&sort=recency"},
18
- {"name": "Web & Fullstack", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=nextjs+react+typescript&sort=recency"},
 
 
 
 
 
19
  {"name": "Social Media & Marketing", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=social+media+marketing+content&sort=recency"},
20
  {"name": "Data Entry & Admin", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=data+entry+virtual+assistant&sort=recency"},
21
  ]
@@ -119,16 +124,13 @@ def fetch_upwork_jobs():
119
  company_placeholder = entry.title.split(" - ")[0]
120
 
121
  # Clean up Upwork Link
122
- job_link = entry.link
123
- # Strip RSS tracking parameters if present
124
- if "?" in job_link:
125
- job_link = job_link.split("?")[0]
126
-
127
- # If it's a generic RSS link, try to find the specific Job hash
128
- if hasattr(entry, 'guid'):
129
- guid_match = re.search(r'~(01[a-z0-9]+)', entry.guid)
130
- if guid_match:
131
- job_link = f"https://www.upwork.com/jobs/~{guid_match.group(1)}"
132
 
133
  all_jobs.append({
134
  "src": "Upwork",
 
14
  CUSTOM_RSS_URL = ""
15
 
16
  UPWORK_FEEDS = [
17
+ {"name": "AI & Machine Learning", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=artificial+intelligence+machine+learning+nlp+llm&sort=recency"},
18
+ {"name": "Web & Fullstack", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=nextjs+react+typescript+node+python+django+flask&sort=recency"},
19
+ {"name": "Mobile Development", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=ios+android+flutter+react+native+mobile+app&sort=recency"},
20
+ {"name": "DevOps & Cloud", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=aws+azure+gcp+docker+kubernetes+devops&sort=recency"},
21
+ {"name": "Data Science & Python", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=data+science+analytics+python+sql&sort=recency"},
22
+ {"name": "Cyber Security", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=cyber+security+penetration+testing+security+audit&sort=recency"},
23
+ {"name": "UI/UX & Design", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=ui+ux+product+design+figma&sort=recency"},
24
  {"name": "Social Media & Marketing", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=social+media+marketing+content&sort=recency"},
25
  {"name": "Data Entry & Admin", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=data+entry+virtual+assistant&sort=recency"},
26
  ]
 
124
  company_placeholder = entry.title.split(" - ")[0]
125
 
126
  # Clean up Upwork Link
127
+ job_id_match = re.search(r'~(01[a-z0-9]+)', str(getattr(entry, 'guid', '')))
128
+ if job_id_match:
129
+ job_link = f"https://www.upwork.com/jobs/~{job_id_match.group(1)}"
130
+ else:
131
+ job_link = entry.link
132
+ if "?" in job_link:
133
+ job_link = job_link.split("?")[0]
 
 
 
134
 
135
  all_jobs.append({
136
  "src": "Upwork",