SuriRaja commited on
Commit
88068ae
·
verified ·
1 Parent(s): 982a090

Create sheets.py

Browse files
Files changed (1) hide show
  1. sheets.py +54 -0
sheets.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gspread
2
+ from oauth2client.service_account import ServiceAccountCredentials
3
+
4
+ def get_sheet():
5
+ scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
6
+ creds = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope)
7
+ client = gspread.authorize(creds)
8
+ sheet = client.open("Leads").sheet1
9
+ return sheet
10
+
11
+ import requests
12
+ from bs4 import BeautifulSoup
13
+
14
+ def fetch_google_search_results(query, num_results):
15
+ headers = {
16
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
17
+ search_url = f"https://www.google.com/search?q={query}&num={num_results}"
18
+ response = requests.get(search_url, headers=headers)
19
+ response.raise_for_status()
20
+ return response.text
21
+
22
+ def extract_company_info(html_content):
23
+ soup = BeautifulSoup(html_content, 'html.parser')
24
+ results = soup.find_all('div', class_='tF2Cxc')
25
+ data = []
26
+ for result in results:
27
+ company_info = {}
28
+ company_info['Name'] = result.find('h3').text if result.find('h3') else ''
29
+ company_info['Website'] = result.find('a')['href'] if result.find('a') else ''
30
+ company_info['Additional Info'] = result.find('span', class_='aCOpRe').text if result.find('span', class_='aCOpRe') else ''
31
+ data.append(company_info)
32
+ return data
33
+
34
+ def insert_data(sheet, data):
35
+ existing_records = sheet.get_all_records()
36
+ existing_emails = [record['Email'] for record in existing_records]
37
+
38
+ for entry in data:
39
+ if entry['Email'] not in existing_emails:
40
+ row = [entry['Name'], entry.get('Email', ''), entry.get('Mobile Number', ''), entry.get('POC', ''), entry['Website'], entry.get('LinkedIn Profile', ''), 'Not Contacted']
41
+ sheet.append_row(row)
42
+
43
+ def remove_duplicates(sheet):
44
+ records = sheet.get_all_records()
45
+ unique_records = []
46
+ emails = set()
47
+ for record in records:
48
+ if record['Email'] not in emails:
49
+ unique_records.append(record)
50
+ emails.add(record['Email'])
51
+ sheet.clear()
52
+ sheet.append_row(["Name", "Email", "Mobile Number", "POC", "Website", "LinkedIn Profile", "Status"])
53
+ for record in unique_records:
54
+ sheet.append_row([record['Name'], record['Email'], record['Mobile Number'], record['POC'], record['Website'], record['LinkedIn Profile'], record['Status']])