Spaces:

ShahzainHaider
/

leadfetcher

Paused

File size: 3,718 Bytes

import traceback
import pandas as pd
import json
import uuid
import time
from apollo_apis import get_mixed_people, get_person_contact


def convert_json_to_files(api_key,payload):
    try:
        # Parse the input JSON
        data = json.loads(payload)

        res = get_mixed_people(data,api_key)
        print("RES >>>>>>>>>>>>>>> ", res)
        # exit()
        starting_page_index = res["pagination"]["page"]
        ending_page_index = res["pagination"]["total_pages"]
        # print("Total Pages > ", ending_page_index)
        if ending_page_index > 5:
            ending_page_index = 5           # On free account only 5 pages can be scrapped

        name = []
        first_name = []
        email = []
        company = []
        website_link = []
        company_size = []
        job_title = []
        city = []
        country = []
        linkedin_profile = []
        phone_number = []
        # seo_description = []
        # industry = []
        # seo_keywords = []
        # technology = []

        df_data = {
            "Name": name,
            "First name": first_name,
            "email": email,
            "Company": company,
            "Website Link": website_link,
            "Job Title": job_title,
            "City": city,
            "Country": country,
            "Linkedin Profile": linkedin_profile,
            "Phone Number": phone_number,
            # "Company Size": company_size,
            # "Industry": industry,
            # "SEO description": seo_description,
            # "SEO keywords": seo_keywords,
            # "Technology": technology,
        }

        for x in range(1, ending_page_index + 1):

            data["page"] = x

            # print("data > ", data)

            res = get_mixed_people(data,api_key)

            if res is None:
                continue
            
            people = res["people"]
            print("Total People : ", len(people))

            for person in people:

                print("Name > ", person.get("name"))
                name.append(person.get("name"))
                first_name.append(person.get("first_name"))
                company.append(person["organization"]["name"])
                website_link.append(person["organization"].get("website_url"))
                job_title.append(person.get("title"))
                city.append(person.get("city"))
                country.append(person.get("country"))
                linkedin_profile.append(person.get("linkedin_url"))

        chunks = [linkedin_profile[i:i + 10] for i in range(0, len(linkedin_profile), 10)]

        for chunk in chunks:
            print("Chunk size >>>>>>>> ", len(chunk))
            chunk  = [{"linkedin_url": url} for url in chunk]
            person_email, person_number = get_person_contact(chunk, api_key)
            email.extend(person_email)
            phone_number.extend(person_number)

        # bulk_linkedin_urls = []

        # company_size.append()
        # industry.append()
        # seo_description
        # seo_keywords
        # technology

        # print("df_data > ", len(df_data))

        with open('saving_df.json', 'w') as json_file:
            json.dump(df_data, json_file, indent=4)  # `indent=4` makes the JSON file readable

        df = pd.DataFrame(data=df_data)

        # Save as XLSX
        unique_id = uuid.uuid4()

        xlsx_file = f"output_files/leads_{unique_id}.xlsx"
        df.to_excel(xlsx_file, index=False)

        print("=" * 70)

        return df, xlsx_file

    except Exception as e:
        print(res)
        print(f"Exception | convert_json_to_files | {str(e)}")
        traceback.print_exc()
        df = pd.DataFrame(data={})
        return df, None