Spaces:

SajilAwale
/

resfit

Running

Sajil Awale

Initial commit: ResFit - AI Resume Tailor

629d435 10 days ago

1.49 kB

	import requests
	import trafilatura
	import random

	def scrape_job_details(url):
	# 1. Setup headers to look like a real browser
	user_agents = [
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
	]

	headers = {
	"User-Agent": random.choice(user_agents),
	"Accept-Language": "en-US,en;q=0.9",
	}

	try:
	# 2. Fetch the HTML manually using requests
	response = requests.get(url, headers=headers, timeout=15)
	response.raise_for_status() # Check for HTTP errors

	# 3. Pass the raw HTML to trafilatura for extraction
	# We use 'extract' on the response text directly
	content = trafilatura.extract(
	response.text,
	include_formatting=True,
	include_links=False,
	favor_precision=True
	)

	if not content:
	return "Error: Could not identify the main content of the page."

	return content

	except requests.exceptions.RequestException as e:
	return f"Network error: {e}"
	except Exception as e:
	return f"An unexpected error occurred: {e}"

	# # --- Usage ---
	# url = "https://careers.qualcomm.com/careers/job/446715275527?hl=en-US&domain=qualcomm.com&source=APPLICANT_SOURCE-6-2"
	# print(scrape_job_details(url))