File size: 6,783 Bytes
f1c202b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import urllib.parse

import urllib.parse

def generate_resume_urls(base_url, selected_filters=None, filter_values=None, use_incredible_values=True):
    parsed_url = urllib.parse.urlparse(base_url)
    query_params = urllib.parse.parse_qs(parsed_url.query)

    if "/resumes/" in parsed_url.path:
        search_text = parsed_url.path.split("/")[-1].replace("_", " ")
    else:
        search_text = query_params.get("text", [""])[0]

    encoded_text = urllib.parse.quote(search_text)

    filter_options = {
        "job_search_status": ["unknown", "not_looking_for_job", "looking_for_offers", "active_search", "has_job_offer", "accepted_job_offer"],
        "gender": ["male", "female"],
        "age": list(range(18, 71)),
        "salary" : False,
        "salary_from": 0,
        "salary_to": 1000000000,
        "education_level": ["secondary", "special_secondary", "unfinished_higher", "bachelor", "master", "doctor", "candidate", "higher"],
        "experience": ["noExperience", "between1And3", "between3And6", "moreThan6"]
    }

    if filter_values:
        for key in filter_values.keys():
            # print(key)
            if key == "salary":
                filter_options[key] = filter_values[key]
                continue
            if key == "salary_from":
                filter_options[key] = filter_values[key]
                continue
            if key == "salary_to":
                filter_options[key] = filter_values[key]
                continue
            if filter_values[key] == None:
                continue
            
            filter_options[key] = filter_values[key] if len(filter_values[key]) > 0 else filter_options[key]
        
    # print(selected_filters)

    if filter_values is None:
        filter_values = {}
        
    if selected_filters is None:
        selected_filters = filter_options.keys()

    generated_urls = []
    
    # print(filter_values)

    for status in filter_options["job_search_status"] if "job_search_status" in selected_filters else [None]:
        for gender in filter_options["gender"] if "gender" in selected_filters else [None]:
            for age in filter_options["age"] if "age" in selected_filters else [None]:
                for experience in filter_options["experience"] if "experience" in selected_filters else [None]:
                    for education in filter_options["education_level"] if "education_level" in selected_filters else [None]:
                        url = f"https://hh.ru/search/resume?search_period=0&order_by=relevance&filter_exp_period=all_time&relocation=living_or_relocation&job_search_status_changed_by_user=true&logic=normal&pos=full_text&exp_period=all_time&text={encoded_text}"
                                                
                        if status:
                            url += f"&job_search_status={status}"
                        if gender:
                            url += f"&gender={gender}"
                        if age:
                            url += f"&label=only_with_age" 
                            url += f"&age_from={age}&age_to={age}"
                        if experience:
                            url += f"&experience={experience}"
                        if education:
                            url += f"&education_level={education}"
                            
                        if "salary" in selected_filters:
                            url +=f"&label=only_with_salary"
                            url += f"&salary_from={filter_options['salary_from']}&salary_to={filter_options['salary_to']}"

                        if "area" in query_params:
                            url += f"&area={query_params['area'][0]}"
                        if "university" in query_params:
                            url += f"&university={query_params['university'][0]}"

                        generated_urls.append(url)

    if use_incredible_values:
        for status in filter_options["job_search_status"] if "job_search_status" in selected_filters else [None]:
            for gender in filter_options["gender"] if "gender" in selected_filters else [None]:
                for experience in filter_options["experience"] if "experience" in selected_filters else [None]:
                    for education in filter_options["education_level"] if "education_level" in selected_filters else [None]:
                        url = f"https://hh.ru/search/resume?search_period=0&order_by=relevance&salary_from=10000000&salary_to=1000000000000&filter_exp_period=all_time&relocation=living_or_relocation&job_search_status_changed_by_user=true&logic=normal&pos=full_text&exp_period=all_time&age_from=100000000&age_to=1000000&text={encoded_text}"

                        if status:
                            url += f"&job_search_status={status}"
                        if gender:
                            url += f"&gender={gender}"
                        if experience:
                            url += f"&experience={experience}"
                        if education:
                            url += f"&education_level={education}"

                        if "area" in query_params:
                            url += f"&area={query_params['area'][0]}"
                        if "university" in query_params:
                            url += f"&university={query_params['university'][0]}"

                        generated_urls.append(url)

    for key, value in query_params.items():
        if key not in ["text", "job_search_status", "experience", "gender", "age_from", "age_to", "education_level", "area", "university"]:
            for url in generated_urls:
                url += f"&{key}={value[0]}"

    return generated_urls


def generate_specific_urls(base_url):
    parsed_url = urllib.parse.urlparse(base_url)
    query_params = urllib.parse.parse_qs(parsed_url.query)

    experience_options = ["noExperience", "between1And3", "between3And6", "moreThan6"]
    employment_options = ["full", "part", "project", "volunteer", "probation"]

    specific_urls = []

    for emp in employment_options:
            query_params["employment"] = [emp]

            # Add area and university parameters if present
            if "area" in query_params:
                query_params["area"] = query_params["area"]
            if "university" in query_params:
                query_params["university"] = query_params["university"]

            new_query_string = urllib.parse.urlencode(query_params, doseq=True)
            new_url = parsed_url._replace(query=new_query_string).geturl()
            specific_urls.append(new_url)

    return specific_urls


# urls = generate_resume_urls("https://hh.ru/resumes/generalnyj-direktor-stroitelnoj-kompanii",use_incredible_values=True)
# print(len(urls))