| | import time |
| | import logging |
| | from fastapi import FastAPI |
| | from fastapi.responses import HTMLResponse |
| | from fastapi.staticfiles import StaticFiles |
| | from selenium import webdriver |
| | from selenium.webdriver.common.by import By |
| | from selenium.webdriver.chrome.service import Service |
| | from selenium.webdriver.chrome.options import Options |
| | from webdriver_manager.chrome import ChromeDriverManager |
| |
|
| | app = FastAPI() |
| |
|
| | |
| | logging.basicConfig(level=logging.INFO) |
| |
|
| | |
| | app.mount("/static", StaticFiles(directory="static"), name="static") |
| |
|
| | def scrape_upwork_data(search_query, num_jobs, page): |
| | |
| | options = webdriver.ChromeOptions() |
| | options.add_argument('--headless') |
| | options.add_argument('--no-sandbox') |
| | options.add_argument('--disable-dev-shm-usage') |
| |
|
| | |
| | driver=webdriver.Chrome(options=options) |
| | job_listings = [] |
| | try: |
| | url = f'https://www.upwork.com/nx/search/jobs?amount=500-&hourly_rate=25-&location=Americas,Europe,Australia%20and%20New%20Zealand,Canada,India,Switzerland,United%20States&per_page=50&q={search_query}&sort=recency&t=0,1&page={page}' |
| | driver.get(url) |
| | driver.set_window_size(1080, 720) |
| | time.sleep(5) |
| | page_title = driver.title |
| | jobs = driver.find_elements(By.CSS_SELECTOR, 'article[data-test="JobTile"]') |
| |
|
| | for job in jobs: |
| | try: |
| | posted_date = job.find_element(By.CSS_SELECTOR, 'small[data-test="job-pubilshed-date"]').text.strip() |
| | title_element = job.find_element(By.CSS_SELECTOR, 'h2.job-tile-title > a') |
| | title = title_element.text.strip() |
| | link = title_element.get_attribute('href') |
| | description = job.find_element(By.CSS_SELECTOR, 'div[data-test="JobTileDetails"] > div > div > p').text.strip() |
| |
|
| | job_info = job.find_element(By.CSS_SELECTOR, 'ul.job-tile-info-list') |
| | job_type = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="job-type-label"]').text.strip() |
| | experience_level = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="experience-level"]').text.strip() |
| |
|
| | |
| | try: |
| | budget = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="is-fixed-price"]').text.strip() |
| | except Exception as e: |
| | logging.error(f'Error finding budget: {e}') |
| | budget = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="duration-label"]').text.strip() |
| |
|
| | job_listings.append({ |
| | 'title': title, |
| | 'date': posted_date, |
| | 'link': link, |
| | 'description': description, |
| | 'job_type': job_type, |
| | 'experience_level': experience_level, |
| | 'budget': budget |
| | }) |
| | |
| | except Exception as e: |
| | logging.error(f'Error parsing job listing: {e}') |
| |
|
| | finally: |
| | driver.quit() |
| |
|
| | return job_listings |
| |
|
| | @app.get("/", response_class=HTMLResponse) |
| | async def read_root(): |
| | return """ |
| | <html> |
| | <head> |
| | <title>Upwork Job Listings</title> |
| | </head> |
| | <body> |
| | <h1>Welcome to Upwork Job Scraper</h1> |
| | <form action="/jobs" method="get"> |
| | <input type="text" name="query" placeholder="Search Query" required> |
| | <input type="number" name="num_jobs" value="50" min="1" max="100" required> |
| | <button type="submit">Search Jobs</button> |
| | </form> |
| | </body> |
| | </html> |
| | """ |
| |
|
| | @app.get("/jobs", response_class=HTMLResponse) |
| | async def get_jobs(query: str, num_jobs: int = 50): |
| | jobs = [] |
| | for page in range(1, (num_jobs // 50) + 1): |
| | job_listings = scrape_upwork_data(query, num_jobs, page) |
| | jobs.extend(job_listings) |
| |
|
| | |
| | html_output = "<h2>Job Listings</h2>" |
| | for job in jobs: |
| | html_output += f""" |
| | <div> |
| | <h3><a href="{job['link']}">{job['title']}</a></h3> |
| | <p>Posted Date: {job['date']}</p> |
| | <p>Type: {job['job_type']}</p> |
| | <p>Experience Level: {job['experience_level']}</p> |
| | <p>Budget: {job['budget']}</p> |
| | <p>Description: {job['description']}</p> |
| | </div> |
| | <hr> |
| | """ |
| | return HTMLResponse(content=html_output) |
| |
|
| | if __name__ == "__main__": |
| | import uvicorn |
| | uvicorn.run(app, host="0.0.0.0", port=7860) |
| |
|