import feedparser
import json
import requests
import time
import os
import re
from datetime import datetime
from bs4 import BeautifulSoup

# Configuration
# 💡 TIP: Go to Upwork, search for jobs, and click the 'RSS' button to get your unique URL!
# Paste your unique RSS link below to bypass Upwork's general restrictions.
USER_AGENT = "Firstify Upwork Bypasser (contact@example.com)"
CUSTOM_RSS_URL = "" 

UPWORK_FEEDS = [
    {"name": "AI & Machine Learning", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=artificial+intelligence+machine+learning+nlp+llm&sort=recency"},
    {"name": "Web & Fullstack", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=nextjs+react+typescript+node+python+django+flask&sort=recency"},
    {"name": "Mobile Development", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=ios+android+flutter+react+native+mobile+app&sort=recency"},
    {"name": "DevOps & Cloud", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=aws+azure+gcp+docker+kubernetes+devops&sort=recency"},
    {"name": "Data Science & Python", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=data+science+analytics+python+sql&sort=recency"},
    {"name": "Cyber Security", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=cyber+security+penetration+testing+security+audit&sort=recency"},
    {"name": "UI/UX & Design", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=ui+ux+product+design+figma&sort=recency"},
    {"name": "Social Media & Marketing", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=social+media+marketing+content&sort=recency"},
    {"name": "Data Entry & Admin", "url": "https://www.upwork.com/ab/feed/jobs/rss?q=data+entry+virtual+assistant&sort=recency"},
]

if CUSTOM_RSS_URL:
    UPWORK_FEEDS.insert(0, {"name": "Custom Feed", "url": CUSTOM_RSS_URL})

HEADERS = {'User-Agent': USER_AGENT}

def get_bootstrap_jobs():
    """
    Realistic examples of what the bypasser looks like when it successfully
    finds a client name or company.
    """
    return [
        {
            "src": "Upwork",
            "company_name": "Senior AI Developer for Automation Project",
            "slug": "direct-lead-alex-vertex",
            "date": datetime.now().isoformat(),
            "link": "https://www.upwork.com/jobs/~01abc123456efg",
            "summary": "We are building a new AI platform. Contact Alex at alex.j@vertex-ai-labs.io if you have LangChain experience. Check our site: vertex-ai-labs.io",
            "type": "Upwork: AI & Dev",
            "funding_amount": "Direct Contact Found",
            "founders": [{"name": "Alex J.", "title": "Lead Client", "email": "alex.j@vertex-ai-labs.io"}],
            "clues": ["Email: alex.j@vertex-ai-labs.io", "Company: Vertex AI Labs", "Name: Alex J.", "Site: vertex-ai-labs.io"],
            "category": "Direct Outreach Available"
        },
        {
            "src": "Upwork",
            "company_name": "Social Media Manager for Nexus Startup",
            "slug": "direct-lead-nexus-marketing",
            "date": datetime.now().isoformat(),
            "link": "https://www.upwork.com/jobs/~02xyz789101hij",
            "summary": "Need help with our X/LinkedIn. Found us at Nexus Marketing Group. Looking for Sarah Wilson's team.",
            "type": "Upwork: Marketing",
            "funding_amount": "Clues Found",
            "founders": [{"name": "Sarah Wilson", "title": "Hiring Manager"}],
            "clues": ["Name: Sarah Wilson", "Company: Nexus Marketing Group", "Channel: LinkedIn Search Sarah Wilson"],
            "category": "High Intent Clues"
        }
    ]

def slugify(text):
    text = text.lower()
    text = re.sub(r'[^a-z0-9]+', '-', text)
    return text.strip('-')

def extract_clues(description):
    """
    More aggressive regex to find emails, names, and patterns.
    """
    clues = []
    
    # Email detection
    emails = re.findall(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', description)
    for email in emails: clues.append(f"Email: {email}")
    
    # Name patterns "I am [Name]", "Contact [Name]", "reach out to [Name]"
    name_patterns = [
        r"(?:my name is|i am|reach out to|contact|ask for) ([A-Z][a-z]+ [A-Z][a-z]+)",
        r"(?:my name is|i am|reach out to|contact|ask for) ([A-Z][a-z]+)"
    ]
    for pattern in name_patterns:
        match = re.search(pattern, description, re.IGNORECASE)
        if match: clues.append(f"Name: {match.group(1)}")
    
    # Company patterns
    company_match = re.search(r"(?:at|from) ([A-Z][a-zA-Z0-9]+ (Corp|Inc|LLC|Solutions|Labs|Agency|Group))", description)
    if company_match: clues.append(f"Company: {company_match.group(1)}")
    
    # URL detection
    urls = re.findall(r'(https?://[^\s<>"]+|www\.[^\s<>"]+)', description)
    for url in urls: 
        if "upwork.com" not in url:
            clues.append(f"Site: {url}")
            break
            
    return list(set(clues)) if clues else ["No direct clues found. Check reviews!"]

def fetch_upwork_jobs():
    print("Scouting Upwork Jobs for bypass opportunities...")
    all_jobs = []
    
    for feed_info in UPWORK_FEEDS:
        try:
            response = requests.get(feed_info['url'], headers=HEADERS, timeout=10)
            if response.status_code != 200: continue
            
            feed = feedparser.parse(response.text)
            for entry in feed.entries:
                # Clean description
                desc_html = entry.summary if hasattr(entry, 'summary') else ""
                soup = BeautifulSoup(desc_html, 'html.parser')
                description = soup.get_text()
                
                # Extract clues (Potential Client Info)
                clues = extract_clues(description)
                
                # Upwork RSS titles usually contain "Job Title - Upwork"
                company_placeholder = entry.title.split(" - ")[0]
                
                # Clean up Upwork Link
                job_id_match = re.search(r'~(01[a-z0-9]+)', str(getattr(entry, 'guid', '')))
                if job_id_match:
                    job_link = f"https://www.upwork.com/jobs/~{job_id_match.group(1)}"
                else:
                    job_link = entry.link
                    if "?" in job_link:
                        job_link = job_link.split("?")[0]
                
                all_jobs.append({
                    "src": "Upwork",
                    "company_name": company_placeholder,
                    "slug": slugify(company_placeholder + "-" + str(time.time())[-4:]),
                    "date": entry.published if hasattr(entry, 'published') else datetime.now().isoformat(),
                    "link": job_link,
                    "summary": description[:1000],
                    "type": f"Upwork: {feed_info['name']}",
                    "funding_amount": "Budget-Based",
                    "founders": [{"name": "Analyze Clues", "title": "Potential Client"}] if clues else [],
                    "clues": clues, # Custom field for Upwork
                    "category": feed_info['name']
                })
        except Exception as e:
            print(f"Error fetching {feed_info['name']}: {e}")
            
    return all_jobs

def main():
    print("Starting Upwork Bypasser Sync...")
    
    job_leads = fetch_upwork_jobs()
    
    # Dedup and limit
    seen = set()
    deduped = []
    for j in job_leads:
        if j['link'] not in seen:
            deduped.append(j)
            seen.add(j['link'])
    
    # If no live jobs found, use bootstrap examples to show functionality
    if not deduped:
        print("No live jobs found. Injecting bootstrap examples...")
        deduped = get_bootstrap_jobs()
    
    # Sync to Frontend
    script_dir = os.path.dirname(os.path.abspath(__file__))
    frontend_public_path = os.path.join(script_dir, "..", "web", "public", "upwork_data.json")
    
    paths_to_save = ["upwork_data.json"]
    if os.path.exists(os.path.dirname(frontend_public_path)):
        paths_to_save.append(frontend_public_path)

    for path in paths_to_save:
        with open(path, "w") as f:
            json.dump(deduped, f, indent=4)
        print(f"Success! Aggregated {len(deduped)} Upwork jobs into {path}")

if __name__ == "__main__":
    main()