import urllib.parse import urllib.parse def generate_resume_urls(base_url, selected_filters=None, filter_values=None, use_incredible_values=True): parsed_url = urllib.parse.urlparse(base_url) query_params = urllib.parse.parse_qs(parsed_url.query) if "/resumes/" in parsed_url.path: search_text = parsed_url.path.split("/")[-1].replace("_", " ") else: search_text = query_params.get("text", [""])[0] encoded_text = urllib.parse.quote(search_text) filter_options = { "job_search_status": ["unknown", "not_looking_for_job", "looking_for_offers", "active_search", "has_job_offer", "accepted_job_offer"], "gender": ["male", "female"], "age": list(range(18, 71)), "salary" : False, "salary_from": 0, "salary_to": 1000000000, "education_level": ["secondary", "special_secondary", "unfinished_higher", "bachelor", "master", "doctor", "candidate", "higher"], "experience": ["noExperience", "between1And3", "between3And6", "moreThan6"] } if filter_values: for key in filter_values.keys(): # print(key) if key == "salary": filter_options[key] = filter_values[key] continue if key == "salary_from": filter_options[key] = filter_values[key] continue if key == "salary_to": filter_options[key] = filter_values[key] continue if filter_values[key] == None: continue filter_options[key] = filter_values[key] if len(filter_values[key]) > 0 else filter_options[key] # print(selected_filters) if filter_values is None: filter_values = {} if selected_filters is None: selected_filters = filter_options.keys() generated_urls = [] # print(filter_values) for status in filter_options["job_search_status"] if "job_search_status" in selected_filters else [None]: for gender in filter_options["gender"] if "gender" in selected_filters else [None]: for age in filter_options["age"] if "age" in selected_filters else [None]: for experience in filter_options["experience"] if "experience" in selected_filters else [None]: for education in filter_options["education_level"] if "education_level" in selected_filters else [None]: url = f"https://hh.ru/search/resume?search_period=0&order_by=relevance&filter_exp_period=all_time&relocation=living_or_relocation&job_search_status_changed_by_user=true&logic=normal&pos=full_text&exp_period=all_time&text={encoded_text}" if status: url += f"&job_search_status={status}" if gender: url += f"&gender={gender}" if age: url += f"&label=only_with_age" url += f"&age_from={age}&age_to={age}" if experience: url += f"&experience={experience}" if education: url += f"&education_level={education}" if "salary" in selected_filters: url +=f"&label=only_with_salary" url += f"&salary_from={filter_options['salary_from']}&salary_to={filter_options['salary_to']}" if "area" in query_params: url += f"&area={query_params['area'][0]}" if "university" in query_params: url += f"&university={query_params['university'][0]}" generated_urls.append(url) if use_incredible_values: for status in filter_options["job_search_status"] if "job_search_status" in selected_filters else [None]: for gender in filter_options["gender"] if "gender" in selected_filters else [None]: for experience in filter_options["experience"] if "experience" in selected_filters else [None]: for education in filter_options["education_level"] if "education_level" in selected_filters else [None]: url = f"https://hh.ru/search/resume?search_period=0&order_by=relevance&salary_from=10000000&salary_to=1000000000000&filter_exp_period=all_time&relocation=living_or_relocation&job_search_status_changed_by_user=true&logic=normal&pos=full_text&exp_period=all_time&age_from=100000000&age_to=1000000&text={encoded_text}" if status: url += f"&job_search_status={status}" if gender: url += f"&gender={gender}" if experience: url += f"&experience={experience}" if education: url += f"&education_level={education}" if "area" in query_params: url += f"&area={query_params['area'][0]}" if "university" in query_params: url += f"&university={query_params['university'][0]}" generated_urls.append(url) for key, value in query_params.items(): if key not in ["text", "job_search_status", "experience", "gender", "age_from", "age_to", "education_level", "area", "university"]: for url in generated_urls: url += f"&{key}={value[0]}" return generated_urls def generate_specific_urls(base_url): parsed_url = urllib.parse.urlparse(base_url) query_params = urllib.parse.parse_qs(parsed_url.query) experience_options = ["noExperience", "between1And3", "between3And6", "moreThan6"] employment_options = ["full", "part", "project", "volunteer", "probation"] specific_urls = [] for emp in employment_options: query_params["employment"] = [emp] # Add area and university parameters if present if "area" in query_params: query_params["area"] = query_params["area"] if "university" in query_params: query_params["university"] = query_params["university"] new_query_string = urllib.parse.urlencode(query_params, doseq=True) new_url = parsed_url._replace(query=new_query_string).geturl() specific_urls.append(new_url) return specific_urls # urls = generate_resume_urls("https://hh.ru/resumes/generalnyj-direktor-stroitelnoj-kompanii",use_incredible_values=True) # print(len(urls))