import feedparser import json import requests import time import os import re from datetime import datetime from bs4 import BeautifulSoup # Configuration # 💡 TIP: Go to Upwork, search for jobs, and click the 'RSS' button to get your unique URL! # Paste your unique RSS link below to bypass Upwork's general restrictions. USER_AGENT = "Firstify Upwork Bypasser (contact@example.com)" CUSTOM_RSS_URL = "" UPWORK_FEEDS = [ {"name": "AI & Machine Learning", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=artificial+intelligence+machine+learning+nlp+llm&sort=recency"}, {"name": "Web & Fullstack", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=nextjs+react+typescript+node+python+django+flask&sort=recency"}, {"name": "Mobile Development", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=ios+android+flutter+react+native+mobile+app&sort=recency"}, {"name": "DevOps & Cloud", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=aws+azure+gcp+docker+kubernetes+devops&sort=recency"}, {"name": "Data Science & Python", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=data+science+analytics+python+sql&sort=recency"}, {"name": "Cyber Security", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=cyber+security+penetration+testing+security+audit&sort=recency"}, {"name": "UI/UX & Design", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=ui+ux+product+design+figma&sort=recency"}, {"name": "Social Media & Marketing", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=social+media+marketing+content&sort=recency"}, {"name": "Data Entry & Admin", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=data+entry+virtual+assistant&sort=recency"}, ] if CUSTOM_RSS_URL: UPWORK_FEEDS.insert(0, {"name": "Custom Feed", "url": CUSTOM_RSS_URL}) HEADERS = {'User-Agent': USER_AGENT} def get_bootstrap_jobs(): """ Realistic examples of what the bypasser looks like when it successfully finds a client name or company. """ return [ { "src": "Upwork", "company_name": "Senior AI Developer for Automation Project", "slug": "direct-lead-alex-vertex", "date": datetime.now().isoformat(), "link": "https://www.upwork.com/jobs/~01abc123456efg", "summary": "We are building a new AI platform. Contact Alex at alex.j@vertex-ai-labs.io if you have LangChain experience. Check our site: vertex-ai-labs.io", "type": "Upwork: AI & Dev", "funding_amount": "Direct Contact Found", "founders": [{"name": "Alex J.", "title": "Lead Client", "email": "alex.j@vertex-ai-labs.io"}], "clues": ["Email: alex.j@vertex-ai-labs.io", "Company: Vertex AI Labs", "Name: Alex J.", "Site: vertex-ai-labs.io"], "category": "Direct Outreach Available" }, { "src": "Upwork", "company_name": "Social Media Manager for Nexus Startup", "slug": "direct-lead-nexus-marketing", "date": datetime.now().isoformat(), "link": "https://www.upwork.com/jobs/~02xyz789101hij", "summary": "Need help with our X/LinkedIn. Found us at Nexus Marketing Group. Looking for Sarah Wilson's team.", "type": "Upwork: Marketing", "funding_amount": "Clues Found", "founders": [{"name": "Sarah Wilson", "title": "Hiring Manager"}], "clues": ["Name: Sarah Wilson", "Company: Nexus Marketing Group", "Channel: LinkedIn Search Sarah Wilson"], "category": "High Intent Clues" } ] def slugify(text): text = text.lower() text = re.sub(r'[^a-z0-9]+', '-', text) return text.strip('-') def extract_clues(description): """ More aggressive regex to find emails, names, and patterns. """ clues = [] # Email detection emails = re.findall(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', description) for email in emails: clues.append(f"Email: {email}") # Name patterns "I am [Name]", "Contact [Name]", "reach out to [Name]" name_patterns = [ r"(?:my name is|i am|reach out to|contact|ask for) ([A-Z][a-z]+ [A-Z][a-z]+)", r"(?:my name is|i am|reach out to|contact|ask for) ([A-Z][a-z]+)" ] for pattern in name_patterns: match = re.search(pattern, description, re.IGNORECASE) if match: clues.append(f"Name: {match.group(1)}") # Company patterns company_match = re.search(r"(?:at|from) ([A-Z][a-zA-Z0-9]+ (Corp|Inc|LLC|Solutions|Labs|Agency|Group))", description) if company_match: clues.append(f"Company: {company_match.group(1)}") # URL detection urls = re.findall(r'(https?://[^\s<>"]+|www\.[^\s<>"]+)', description) for url in urls: if "upwork.com" not in url: clues.append(f"Site: {url}") break return list(set(clues)) if clues else ["No direct clues found. Check reviews!"] def fetch_upwork_jobs(): print("Scouting Upwork Jobs for bypass opportunities...") all_jobs = [] for feed_info in UPWORK_FEEDS: try: response = requests.get(feed_info['url'], headers=HEADERS, timeout=10) if response.status_code != 200: continue feed = feedparser.parse(response.text) for entry in feed.entries: # Clean description desc_html = entry.summary if hasattr(entry, 'summary') else "" soup = BeautifulSoup(desc_html, 'html.parser') description = soup.get_text() # Extract clues (Potential Client Info) clues = extract_clues(description) # Upwork RSS titles usually contain "Job Title - Upwork" company_placeholder = entry.title.split(" - ")[0] # Clean up Upwork Link job_id_match = re.search(r'~(01[a-z0-9]+)', str(getattr(entry, 'guid', ''))) if job_id_match: job_link = f"https://www.upwork.com/jobs/~{job_id_match.group(1)}" else: job_link = entry.link if "?" in job_link: job_link = job_link.split("?")[0] all_jobs.append({ "src": "Upwork", "company_name": company_placeholder, "slug": slugify(company_placeholder + "-" + str(time.time())[-4:]), "date": entry.published if hasattr(entry, 'published') else datetime.now().isoformat(), "link": job_link, "summary": description[:1000], "type": f"Upwork: {feed_info['name']}", "funding_amount": "Budget-Based", "founders": [{"name": "Analyze Clues", "title": "Potential Client"}] if clues else [], "clues": clues, # Custom field for Upwork "category": feed_info['name'] }) except Exception as e: print(f"Error fetching {feed_info['name']}: {e}") return all_jobs def main(): print("Starting Upwork Bypasser Sync...") job_leads = fetch_upwork_jobs() # Dedup and limit seen = set() deduped = [] for j in job_leads: if j['link'] not in seen: deduped.append(j) seen.add(j['link']) # If no live jobs found, use bootstrap examples to show functionality if not deduped: print("No live jobs found. Injecting bootstrap examples...") deduped = get_bootstrap_jobs() # Sync to Frontend script_dir = os.path.dirname(os.path.abspath(__file__)) frontend_public_path = os.path.join(script_dir, "..", "web", "public", "upwork_data.json") paths_to_save = ["upwork_data.json"] if os.path.exists(os.path.dirname(frontend_public_path)): paths_to_save.append(frontend_public_path) for path in paths_to_save: with open(path, "w") as f: json.dump(deduped, f, indent=4) print(f"Success! Aggregated {len(deduped)} Upwork jobs into {path}") if __name__ == "__main__": main()