Spaces:
Sleeping
Sleeping
| import urllib.parse | |
| import urllib.parse | |
| def generate_resume_urls(base_url, selected_filters=None, filter_values=None, use_incredible_values=True): | |
| parsed_url = urllib.parse.urlparse(base_url) | |
| query_params = urllib.parse.parse_qs(parsed_url.query) | |
| if "/resumes/" in parsed_url.path: | |
| search_text = parsed_url.path.split("/")[-1].replace("_", " ") | |
| else: | |
| search_text = query_params.get("text", [""])[0] | |
| encoded_text = urllib.parse.quote(search_text) | |
| filter_options = { | |
| "job_search_status": ["unknown", "not_looking_for_job", "looking_for_offers", "active_search", "has_job_offer", "accepted_job_offer"], | |
| "gender": ["male", "female"], | |
| "age": list(range(18, 71)), | |
| "salary" : False, | |
| "salary_from": 0, | |
| "salary_to": 1000000000, | |
| "education_level": ["secondary", "special_secondary", "unfinished_higher", "bachelor", "master", "doctor", "candidate", "higher"], | |
| "experience": ["noExperience", "between1And3", "between3And6", "moreThan6"] | |
| } | |
| if filter_values: | |
| for key in filter_values.keys(): | |
| # print(key) | |
| if key == "salary": | |
| filter_options[key] = filter_values[key] | |
| continue | |
| if key == "salary_from": | |
| filter_options[key] = filter_values[key] | |
| continue | |
| if key == "salary_to": | |
| filter_options[key] = filter_values[key] | |
| continue | |
| if filter_values[key] == None: | |
| continue | |
| filter_options[key] = filter_values[key] if len(filter_values[key]) > 0 else filter_options[key] | |
| # print(selected_filters) | |
| if filter_values is None: | |
| filter_values = {} | |
| if selected_filters is None: | |
| selected_filters = filter_options.keys() | |
| generated_urls = [] | |
| # print(filter_values) | |
| for status in filter_options["job_search_status"] if "job_search_status" in selected_filters else [None]: | |
| for gender in filter_options["gender"] if "gender" in selected_filters else [None]: | |
| for age in filter_options["age"] if "age" in selected_filters else [None]: | |
| for experience in filter_options["experience"] if "experience" in selected_filters else [None]: | |
| for education in filter_options["education_level"] if "education_level" in selected_filters else [None]: | |
| url = f"https://hh.ru/search/resume?search_period=0&order_by=relevance&filter_exp_period=all_time&relocation=living_or_relocation&job_search_status_changed_by_user=true&logic=normal&pos=full_text&exp_period=all_time&text={encoded_text}" | |
| if status: | |
| url += f"&job_search_status={status}" | |
| if gender: | |
| url += f"&gender={gender}" | |
| if age: | |
| url += f"&label=only_with_age" | |
| url += f"&age_from={age}&age_to={age}" | |
| if experience: | |
| url += f"&experience={experience}" | |
| if education: | |
| url += f"&education_level={education}" | |
| if "salary" in selected_filters: | |
| url +=f"&label=only_with_salary" | |
| url += f"&salary_from={filter_options['salary_from']}&salary_to={filter_options['salary_to']}" | |
| if "area" in query_params: | |
| url += f"&area={query_params['area'][0]}" | |
| if "university" in query_params: | |
| url += f"&university={query_params['university'][0]}" | |
| generated_urls.append(url) | |
| if use_incredible_values: | |
| for status in filter_options["job_search_status"] if "job_search_status" in selected_filters else [None]: | |
| for gender in filter_options["gender"] if "gender" in selected_filters else [None]: | |
| for experience in filter_options["experience"] if "experience" in selected_filters else [None]: | |
| for education in filter_options["education_level"] if "education_level" in selected_filters else [None]: | |
| url = f"https://hh.ru/search/resume?search_period=0&order_by=relevance&salary_from=10000000&salary_to=1000000000000&filter_exp_period=all_time&relocation=living_or_relocation&job_search_status_changed_by_user=true&logic=normal&pos=full_text&exp_period=all_time&age_from=100000000&age_to=1000000&text={encoded_text}" | |
| if status: | |
| url += f"&job_search_status={status}" | |
| if gender: | |
| url += f"&gender={gender}" | |
| if experience: | |
| url += f"&experience={experience}" | |
| if education: | |
| url += f"&education_level={education}" | |
| if "area" in query_params: | |
| url += f"&area={query_params['area'][0]}" | |
| if "university" in query_params: | |
| url += f"&university={query_params['university'][0]}" | |
| generated_urls.append(url) | |
| for key, value in query_params.items(): | |
| if key not in ["text", "job_search_status", "experience", "gender", "age_from", "age_to", "education_level", "area", "university"]: | |
| for url in generated_urls: | |
| url += f"&{key}={value[0]}" | |
| return generated_urls | |
| def generate_specific_urls(base_url): | |
| parsed_url = urllib.parse.urlparse(base_url) | |
| query_params = urllib.parse.parse_qs(parsed_url.query) | |
| experience_options = ["noExperience", "between1And3", "between3And6", "moreThan6"] | |
| employment_options = ["full", "part", "project", "volunteer", "probation"] | |
| specific_urls = [] | |
| for emp in employment_options: | |
| query_params["employment"] = [emp] | |
| # Add area and university parameters if present | |
| if "area" in query_params: | |
| query_params["area"] = query_params["area"] | |
| if "university" in query_params: | |
| query_params["university"] = query_params["university"] | |
| new_query_string = urllib.parse.urlencode(query_params, doseq=True) | |
| new_url = parsed_url._replace(query=new_query_string).geturl() | |
| specific_urls.append(new_url) | |
| return specific_urls | |
| # urls = generate_resume_urls("https://hh.ru/resumes/generalnyj-direktor-stroitelnoj-kompanii",use_incredible_values=True) | |
| # print(len(urls)) |