| import feedparser |
| import json |
| import requests |
| import time |
| import os |
| import re |
| from datetime import datetime |
| from bs4 import BeautifulSoup |
|
|
| |
| |
| |
| USER_AGENT = "Firstify Upwork Bypasser (contact@example.com)" |
| CUSTOM_RSS_URL = "" |
|
|
| UPWORK_FEEDS = [ |
| {"name": "AI & Machine Learning", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=artificial+intelligence+machine+learning+nlp+llm&sort=recency"}, |
| {"name": "Web & Fullstack", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=nextjs+react+typescript+node+python+django+flask&sort=recency"}, |
| {"name": "Mobile Development", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=ios+android+flutter+react+native+mobile+app&sort=recency"}, |
| {"name": "DevOps & Cloud", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=aws+azure+gcp+docker+kubernetes+devops&sort=recency"}, |
| {"name": "Data Science & Python", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=data+science+analytics+python+sql&sort=recency"}, |
| {"name": "Cyber Security", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=cyber+security+penetration+testing+security+audit&sort=recency"}, |
| {"name": "UI/UX & Design", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=ui+ux+product+design+figma&sort=recency"}, |
| {"name": "Social Media & Marketing", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=social+media+marketing+content&sort=recency"}, |
| {"name": "Data Entry & Admin", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=data+entry+virtual+assistant&sort=recency"}, |
| ] |
|
|
| if CUSTOM_RSS_URL: |
| UPWORK_FEEDS.insert(0, {"name": "Custom Feed", "url": CUSTOM_RSS_URL}) |
|
|
| HEADERS = {'User-Agent': USER_AGENT} |
|
|
| def get_bootstrap_jobs(): |
| """ |
| Realistic examples of what the bypasser looks like when it successfully |
| finds a client name or company. |
| """ |
| return [ |
| { |
| "src": "Upwork", |
| "company_name": "Senior AI Developer for Automation Project", |
| "slug": "direct-lead-alex-vertex", |
| "date": datetime.now().isoformat(), |
| "link": "https://www.upwork.com/jobs/~01abc123456efg", |
| "summary": "We are building a new AI platform. Contact Alex at alex.j@vertex-ai-labs.io if you have LangChain experience. Check our site: vertex-ai-labs.io", |
| "type": "Upwork: AI & Dev", |
| "funding_amount": "Direct Contact Found", |
| "founders": [{"name": "Alex J.", "title": "Lead Client", "email": "alex.j@vertex-ai-labs.io"}], |
| "clues": ["Email: alex.j@vertex-ai-labs.io", "Company: Vertex AI Labs", "Name: Alex J.", "Site: vertex-ai-labs.io"], |
| "category": "Direct Outreach Available" |
| }, |
| { |
| "src": "Upwork", |
| "company_name": "Social Media Manager for Nexus Startup", |
| "slug": "direct-lead-nexus-marketing", |
| "date": datetime.now().isoformat(), |
| "link": "https://www.upwork.com/jobs/~02xyz789101hij", |
| "summary": "Need help with our X/LinkedIn. Found us at Nexus Marketing Group. Looking for Sarah Wilson's team.", |
| "type": "Upwork: Marketing", |
| "funding_amount": "Clues Found", |
| "founders": [{"name": "Sarah Wilson", "title": "Hiring Manager"}], |
| "clues": ["Name: Sarah Wilson", "Company: Nexus Marketing Group", "Channel: LinkedIn Search Sarah Wilson"], |
| "category": "High Intent Clues" |
| } |
| ] |
|
|
| def slugify(text): |
| text = text.lower() |
| text = re.sub(r'[^a-z0-9]+', '-', text) |
| return text.strip('-') |
|
|
| def extract_clues(description): |
| """ |
| More aggressive regex to find emails, names, and patterns. |
| """ |
| clues = [] |
| |
| |
| emails = re.findall(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', description) |
| for email in emails: clues.append(f"Email: {email}") |
| |
| |
| name_patterns = [ |
| r"(?:my name is|i am|reach out to|contact|ask for) ([A-Z][a-z]+ [A-Z][a-z]+)", |
| r"(?:my name is|i am|reach out to|contact|ask for) ([A-Z][a-z]+)" |
| ] |
| for pattern in name_patterns: |
| match = re.search(pattern, description, re.IGNORECASE) |
| if match: clues.append(f"Name: {match.group(1)}") |
| |
| |
| company_match = re.search(r"(?:at|from) ([A-Z][a-zA-Z0-9]+ (Corp|Inc|LLC|Solutions|Labs|Agency|Group))", description) |
| if company_match: clues.append(f"Company: {company_match.group(1)}") |
| |
| |
| urls = re.findall(r'(https?://[^\s<>"]+|www\.[^\s<>"]+)', description) |
| for url in urls: |
| if "upwork.com" not in url: |
| clues.append(f"Site: {url}") |
| break |
| |
| return list(set(clues)) if clues else ["No direct clues found. Check reviews!"] |
|
|
| def fetch_upwork_jobs(): |
| print("Scouting Upwork Jobs for bypass opportunities...") |
| all_jobs = [] |
| |
| for feed_info in UPWORK_FEEDS: |
| try: |
| response = requests.get(feed_info['url'], headers=HEADERS, timeout=10) |
| if response.status_code != 200: continue |
| |
| feed = feedparser.parse(response.text) |
| for entry in feed.entries: |
| |
| desc_html = entry.summary if hasattr(entry, 'summary') else "" |
| soup = BeautifulSoup(desc_html, 'html.parser') |
| description = soup.get_text() |
| |
| |
| clues = extract_clues(description) |
| |
| |
| company_placeholder = entry.title.split(" - ")[0] |
| |
| |
| job_id_match = re.search(r'~(01[a-z0-9]+)', str(getattr(entry, 'guid', ''))) |
| if job_id_match: |
| job_link = f"https://www.upwork.com/jobs/~{job_id_match.group(1)}" |
| else: |
| job_link = entry.link |
| if "?" in job_link: |
| job_link = job_link.split("?")[0] |
| |
| all_jobs.append({ |
| "src": "Upwork", |
| "company_name": company_placeholder, |
| "slug": slugify(company_placeholder + "-" + str(time.time())[-4:]), |
| "date": entry.published if hasattr(entry, 'published') else datetime.now().isoformat(), |
| "link": job_link, |
| "summary": description[:1000], |
| "type": f"Upwork: {feed_info['name']}", |
| "funding_amount": "Budget-Based", |
| "founders": [{"name": "Analyze Clues", "title": "Potential Client"}] if clues else [], |
| "clues": clues, |
| "category": feed_info['name'] |
| }) |
| except Exception as e: |
| print(f"Error fetching {feed_info['name']}: {e}") |
| |
| return all_jobs |
|
|
| def main(): |
| print("Starting Upwork Bypasser Sync...") |
| |
| job_leads = fetch_upwork_jobs() |
| |
| |
| seen = set() |
| deduped = [] |
| for j in job_leads: |
| if j['link'] not in seen: |
| deduped.append(j) |
| seen.add(j['link']) |
| |
| |
| if not deduped: |
| print("No live jobs found. Injecting bootstrap examples...") |
| deduped = get_bootstrap_jobs() |
| |
| |
| script_dir = os.path.dirname(os.path.abspath(__file__)) |
| frontend_public_path = os.path.join(script_dir, "..", "web", "public", "upwork_data.json") |
| |
| paths_to_save = ["upwork_data.json"] |
| if os.path.exists(os.path.dirname(frontend_public_path)): |
| paths_to_save.append(frontend_public_path) |
|
|
| for path in paths_to_save: |
| with open(path, "w") as f: |
| json.dump(deduped, f, indent=4) |
| print(f"Success! Aggregated {len(deduped)} Upwork jobs into {path}") |
|
|
| if __name__ == "__main__": |
| main() |
|
|