babaTEEpe commited on
Commit
2f96701
·
verified ·
1 Parent(s): 6fd696a

Upload 2 files

Browse files
Files changed (2) hide show
  1. upwork_data.json +1 -0
  2. upwork_scraper.py +114 -0
upwork_data.json ADDED
@@ -0,0 +1 @@
 
 
1
+ []
upwork_scraper.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import feedparser
2
+ import json
3
+ import requests
4
+ import time
5
+ import os
6
+ import re
7
+ from datetime import datetime
8
+ from bs4 import BeautifulSoup
9
+
10
+ # Configuration
11
+ # 💡 TIP: Go to Upwork, search for jobs, and click the 'RSS' button to get your unique URL!
12
+ # Paste your unique RSS link below to bypass Upwork's general restrictions.
13
+ CUSTOM_RSS_URL = ""
14
+
15
+ UPWORK_FEEDS = [
16
+ {"name": "Custom Feed", "url": CUSTOM_RSS_URL},
17
+ {"name": "AI & Machine Learning", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=artificial+intelligence&sort=recency"},
18
+ ]
19
+
20
+ HEADERS = {'User-Agent': USER_AGENT}
21
+
22
+ def slugify(text):
23
+ text = text.lower()
24
+ text = re.sub(r'[^a-z0-9]+', '-', text)
25
+ return text.strip('-')
26
+
27
+ def extract_clues(description):
28
+ """
29
+ Search for names, company names, or URLs hidden in the description.
30
+ """
31
+ clues = []
32
+
33
+ # Heuristic 1: Look for "I am [Name]" or "My name is [Name]"
34
+ name_match = re.search(r"my name is ([A-Z][a-z]+ [A-Z][a-z]+)", description)
35
+ if name_match: clues.append(f"Name: {name_match.group(1)}")
36
+
37
+ # Heuristic 2: Look for company mentions "at [Company]" or "[Company] Inc"
38
+ company_match = re.search(r"at ([A-Z][a-zA-Z0-9]+ (Corp|Inc|LLC|Solutions|Labs))", description)
39
+ if company_match: clues.append(f"Company: {company_match.group(1)}")
40
+
41
+ # Heuristic 3: Look for URLs
42
+ urls = re.findall(r'(https?://[^\s<>"]+|www\.[^\s<>"]+)', description)
43
+ if urls: clues.append(f"Site: {urls[0]}")
44
+
45
+ return clues if clues else ["No direct clues found. Check reviews!"]
46
+
47
+ def fetch_upwork_jobs():
48
+ print("Scouting Upwork Jobs for bypass opportunities...")
49
+ all_jobs = []
50
+
51
+ for feed_info in UPWORK_FEEDS:
52
+ try:
53
+ response = requests.get(feed_info['url'], headers=HEADERS, timeout=10)
54
+ if response.status_code != 200: continue
55
+
56
+ feed = feedparser.parse(response.text)
57
+ for entry in feed.entries:
58
+ # Clean description
59
+ desc_html = entry.summary if hasattr(entry, 'summary') else ""
60
+ soup = BeautifulSoup(desc_html, 'html.parser')
61
+ description = soup.get_text()
62
+
63
+ # Extract clues (Potential Client Info)
64
+ clues = extract_clues(description)
65
+
66
+ # Upwork RSS titles usually contain "Job Title - Upwork"
67
+ company_placeholder = entry.title.split(" - ")[0]
68
+
69
+ all_jobs.append({
70
+ "src": "Upwork",
71
+ "company_name": company_placeholder,
72
+ "slug": slugify(company_placeholder + "-" + str(time.time())[-4:]),
73
+ "date": entry.published if hasattr(entry, 'published') else datetime.now().isoformat(),
74
+ "link": entry.link,
75
+ "summary": description[:1000],
76
+ "type": f"Upwork: {feed_info['name']}",
77
+ "funding_amount": "Budget-Based",
78
+ "founders": [{"name": "Analyze Clues", "title": "Potential Client"}] if clues else [],
79
+ "clues": clues, # Custom field for Upwork
80
+ "category": feed_info['name']
81
+ })
82
+ except Exception as e:
83
+ print(f"Error fetching {feed_info['name']}: {e}")
84
+
85
+ return all_jobs
86
+
87
+ def main():
88
+ print("Starting Upwork Bypasser Sync...")
89
+
90
+ job_leads = fetch_upwork_jobs()
91
+
92
+ # Dedup and limit
93
+ seen = set()
94
+ deduped = []
95
+ for j in job_leads:
96
+ if j['link'] not in seen:
97
+ deduped.append(j)
98
+ seen.add(j['link'])
99
+
100
+ # Sync to Frontend
101
+ script_dir = os.path.dirname(os.path.abspath(__file__))
102
+ frontend_public_path = os.path.join(script_dir, "..", "web", "public", "upwork_data.json")
103
+
104
+ paths_to_save = ["upwork_data.json"]
105
+ if os.path.exists(os.path.dirname(frontend_public_path)):
106
+ paths_to_save.append(frontend_public_path)
107
+
108
+ for path in paths_to_save:
109
+ with open(path, "w") as f:
110
+ json.dump(deduped, f, indent=4)
111
+ print(f"Success! Aggregated {len(deduped)} Upwork jobs into {path}")
112
+
113
+ if __name__ == "__main__":
114
+ main()