File size: 3,718 Bytes
5e898dd
 
 
 
 
 
 
 
25feb32
5e898dd
 
 
 
25feb32
bbc3ad3
 
5e898dd
 
c6c08f0
bbc3ad3
 
5e898dd
 
 
 
 
 
 
 
 
 
 
 
6ed5295
5e898dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ed5295
5e898dd
 
 
 
 
 
 
 
 
 
25feb32
5e898dd
 
 
6ed5295
5e898dd
 
 
 
 
4d9c970
 
 
5e898dd
bbc3ad3
4d9c970
 
 
 
5e898dd
 
 
 
 
 
25feb32
5e898dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ed5295
5e898dd
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import traceback
import pandas as pd
import json
import uuid
import time
from apollo_apis import get_mixed_people, get_person_contact


def convert_json_to_files(api_key,payload):
    try:
        # Parse the input JSON
        data = json.loads(payload)

        res = get_mixed_people(data,api_key)
        print("RES >>>>>>>>>>>>>>> ", res)
        # exit()
        starting_page_index = res["pagination"]["page"]
        ending_page_index = res["pagination"]["total_pages"]
        # print("Total Pages > ", ending_page_index)
        if ending_page_index > 5:
            ending_page_index = 5           # On free account only 5 pages can be scrapped

        name = []
        first_name = []
        email = []
        company = []
        website_link = []
        company_size = []
        job_title = []
        city = []
        country = []
        linkedin_profile = []
        phone_number = []
        # seo_description = []
        # industry = []
        # seo_keywords = []
        # technology = []

        df_data = {
            "Name": name,
            "First name": first_name,
            "email": email,
            "Company": company,
            "Website Link": website_link,
            "Job Title": job_title,
            "City": city,
            "Country": country,
            "Linkedin Profile": linkedin_profile,
            "Phone Number": phone_number,
            # "Company Size": company_size,
            # "Industry": industry,
            # "SEO description": seo_description,
            # "SEO keywords": seo_keywords,
            # "Technology": technology,
        }

        for x in range(1, ending_page_index + 1):

            data["page"] = x

            # print("data > ", data)

            res = get_mixed_people(data,api_key)

            if res is None:
                continue
            
            people = res["people"]
            print("Total People : ", len(people))

            for person in people:

                print("Name > ", person.get("name"))
                name.append(person.get("name"))
                first_name.append(person.get("first_name"))
                company.append(person["organization"]["name"])
                website_link.append(person["organization"].get("website_url"))
                job_title.append(person.get("title"))
                city.append(person.get("city"))
                country.append(person.get("country"))
                linkedin_profile.append(person.get("linkedin_url"))

        chunks = [linkedin_profile[i:i + 10] for i in range(0, len(linkedin_profile), 10)]

        for chunk in chunks:
            print("Chunk size >>>>>>>> ", len(chunk))
            chunk  = [{"linkedin_url": url} for url in chunk]
            person_email, person_number = get_person_contact(chunk, api_key)
            email.extend(person_email)
            phone_number.extend(person_number)

        # bulk_linkedin_urls = []

        # company_size.append()
        # industry.append()
        # seo_description
        # seo_keywords
        # technology

        # print("df_data > ", len(df_data))

        with open('saving_df.json', 'w') as json_file:
            json.dump(df_data, json_file, indent=4)  # `indent=4` makes the JSON file readable

        df = pd.DataFrame(data=df_data)

        # Save as XLSX
        unique_id = uuid.uuid4()

        xlsx_file = f"output_files/leads_{unique_id}.xlsx"
        df.to_excel(xlsx_file, index=False)

        print("=" * 70)

        return df, xlsx_file

    except Exception as e:
        print(res)
        print(f"Exception | convert_json_to_files | {str(e)}")
        traceback.print_exc()
        df = pd.DataFrame(data={})
        return df, None