import traceback import pandas as pd import json import uuid import time from apollo_apis import get_mixed_people, get_person_contact def convert_json_to_files(api_key,payload): try: # Parse the input JSON data = json.loads(payload) res = get_mixed_people(data,api_key) print("RES >>>>>>>>>>>>>>> ", res) # exit() starting_page_index = res["pagination"]["page"] ending_page_index = res["pagination"]["total_pages"] # print("Total Pages > ", ending_page_index) if ending_page_index > 5: ending_page_index = 5 # On free account only 5 pages can be scrapped name = [] first_name = [] email = [] company = [] website_link = [] company_size = [] job_title = [] city = [] country = [] linkedin_profile = [] phone_number = [] # seo_description = [] # industry = [] # seo_keywords = [] # technology = [] df_data = { "Name": name, "First name": first_name, "email": email, "Company": company, "Website Link": website_link, "Job Title": job_title, "City": city, "Country": country, "Linkedin Profile": linkedin_profile, "Phone Number": phone_number, # "Company Size": company_size, # "Industry": industry, # "SEO description": seo_description, # "SEO keywords": seo_keywords, # "Technology": technology, } for x in range(1, ending_page_index + 1): data["page"] = x # print("data > ", data) res = get_mixed_people(data,api_key) if res is None: continue people = res["people"] print("Total People : ", len(people)) for person in people: print("Name > ", person.get("name")) name.append(person.get("name")) first_name.append(person.get("first_name")) company.append(person["organization"]["name"]) website_link.append(person["organization"].get("website_url")) job_title.append(person.get("title")) city.append(person.get("city")) country.append(person.get("country")) linkedin_profile.append(person.get("linkedin_url")) chunks = [linkedin_profile[i:i + 10] for i in range(0, len(linkedin_profile), 10)] for chunk in chunks: print("Chunk size >>>>>>>> ", len(chunk)) chunk = [{"linkedin_url": url} for url in chunk] person_email, person_number = get_person_contact(chunk, api_key) email.extend(person_email) phone_number.extend(person_number) # bulk_linkedin_urls = [] # company_size.append() # industry.append() # seo_description # seo_keywords # technology # print("df_data > ", len(df_data)) with open('saving_df.json', 'w') as json_file: json.dump(df_data, json_file, indent=4) # `indent=4` makes the JSON file readable df = pd.DataFrame(data=df_data) # Save as XLSX unique_id = uuid.uuid4() xlsx_file = f"output_files/leads_{unique_id}.xlsx" df.to_excel(xlsx_file, index=False) print("=" * 70) return df, xlsx_file except Exception as e: print(res) print(f"Exception | convert_json_to_files | {str(e)}") traceback.print_exc() df = pd.DataFrame(data={}) return df, None