Spaces:
Build error
Build error
Create sheets.py
Browse files
sheets.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gspread
|
| 2 |
+
from oauth2client.service_account import ServiceAccountCredentials
|
| 3 |
+
|
| 4 |
+
def get_sheet():
|
| 5 |
+
scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
|
| 6 |
+
creds = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope)
|
| 7 |
+
client = gspread.authorize(creds)
|
| 8 |
+
sheet = client.open("Leads").sheet1
|
| 9 |
+
return sheet
|
| 10 |
+
|
| 11 |
+
import requests
|
| 12 |
+
from bs4 import BeautifulSoup
|
| 13 |
+
|
| 14 |
+
def fetch_google_search_results(query, num_results):
|
| 15 |
+
headers = {
|
| 16 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
|
| 17 |
+
search_url = f"https://www.google.com/search?q={query}&num={num_results}"
|
| 18 |
+
response = requests.get(search_url, headers=headers)
|
| 19 |
+
response.raise_for_status()
|
| 20 |
+
return response.text
|
| 21 |
+
|
| 22 |
+
def extract_company_info(html_content):
|
| 23 |
+
soup = BeautifulSoup(html_content, 'html.parser')
|
| 24 |
+
results = soup.find_all('div', class_='tF2Cxc')
|
| 25 |
+
data = []
|
| 26 |
+
for result in results:
|
| 27 |
+
company_info = {}
|
| 28 |
+
company_info['Name'] = result.find('h3').text if result.find('h3') else ''
|
| 29 |
+
company_info['Website'] = result.find('a')['href'] if result.find('a') else ''
|
| 30 |
+
company_info['Additional Info'] = result.find('span', class_='aCOpRe').text if result.find('span', class_='aCOpRe') else ''
|
| 31 |
+
data.append(company_info)
|
| 32 |
+
return data
|
| 33 |
+
|
| 34 |
+
def insert_data(sheet, data):
|
| 35 |
+
existing_records = sheet.get_all_records()
|
| 36 |
+
existing_emails = [record['Email'] for record in existing_records]
|
| 37 |
+
|
| 38 |
+
for entry in data:
|
| 39 |
+
if entry['Email'] not in existing_emails:
|
| 40 |
+
row = [entry['Name'], entry.get('Email', ''), entry.get('Mobile Number', ''), entry.get('POC', ''), entry['Website'], entry.get('LinkedIn Profile', ''), 'Not Contacted']
|
| 41 |
+
sheet.append_row(row)
|
| 42 |
+
|
| 43 |
+
def remove_duplicates(sheet):
|
| 44 |
+
records = sheet.get_all_records()
|
| 45 |
+
unique_records = []
|
| 46 |
+
emails = set()
|
| 47 |
+
for record in records:
|
| 48 |
+
if record['Email'] not in emails:
|
| 49 |
+
unique_records.append(record)
|
| 50 |
+
emails.add(record['Email'])
|
| 51 |
+
sheet.clear()
|
| 52 |
+
sheet.append_row(["Name", "Email", "Mobile Number", "POC", "Website", "LinkedIn Profile", "Status"])
|
| 53 |
+
for record in unique_records:
|
| 54 |
+
sheet.append_row([record['Name'], record['Email'], record['Mobile Number'], record['POC'], record['Website'], record['LinkedIn Profile'], record['Status']])
|