Contact Discovery Platform
Professional Contact Discovery & Lead Generation Tool
Advanced website analysis for contact intelligence gathering
import gradio as gr import requests import re import json import csv import io from urllib.parse import urlparse, urljoin import time import random # Mock contacts database (same as your API) CONTACTS_DB = [ { "id": 1, "first_name": "Sarah", "last_name": "Chen", "email": "sarah.chen@techflowsolutions.com", "phone": "+1-555-0101", "job_title": "CTO", "company": "TechFlow Solutions", "website": "https://techflowsolutions.com" }, { "id": 2, "first_name": "Marcus", "last_name": "Rodriguez", "email": "m.rodriguez@techflowsolutions.com", "phone": "+1-555-0102", "job_title": "Senior Developer", "company": "TechFlow Solutions", "website": "https://techflowsolutions.com" }, { "id": 3, "first_name": "Emma", "last_name": "Thompson", "email": "emma@greenleafconsult.com", "phone": "+1-555-0201", "job_title": "Managing Partner", "company": "GreenLeaf Consulting", "website": "https://greenleafconsult.com" }, { "id": 4, "first_name": "David", "last_name": "Park", "email": "david.park@greenleafconsult.com", "phone": "+1-555-0202", "job_title": "Environmental Analyst", "company": "GreenLeaf Consulting", "website": "https://greenleafconsult.com" }, { "id": 5, "first_name": "Jessica", "last_name": "Williams", "email": "jessica@blueskymarketing.net", "phone": "+1-555-0301", "job_title": "Creative Director", "company": "BlueSky Marketing", "website": "https://blueskymarketing.net" }, { "id": 6, "first_name": "Ryan", "last_name": "Mitchell", "email": "ryan.mitchell@blueskymarketing.net", "phone": "+1-555-0302", "job_title": "Account Manager", "company": "BlueSky Marketing", "website": "https://blueskymarketing.net" }, { "id": 7, "first_name": "Lisa", "last_name": "Zhang", "email": "l.zhang@quantumdynamics.org", "phone": "+1-555-0401", "job_title": "Research Director", "company": "Quantum Dynamics Corp", "website": "https://quantumdynamics.org" }, { "id": 8, "first_name": "Ahmed", "last_name": "Hassan", "email": "ahmed.hassan@quantumdynamics.org", "phone": "+1-555-0402", "job_title": "Quantum Engineer", "company": "Quantum Dynamics Corp", "website": "https://quantumdynamics.org" }, { "id": 9, "first_name": "Maria", "last_name": "Gonzalez", "email": "maria@stellarlogistics.biz", "phone": "+1-555-0501", "job_title": "Operations Manager", "company": "Stellar Logistics", "website": "https://stellarlogistics.biz" }, { "id": 10, "first_name": "James", "last_name": "O'Connor", "email": "james.oconnor@stellarlogistics.biz", "phone": "+1-555-0502", "job_title": "Fleet Coordinator", "company": "Stellar Logistics", "website": "https://stellarlogistics.biz" }, { "id": 11, "first_name": "Robert", "last_name": "Kim", "email": "robert.kim@nexusfinancial.pro", "phone": "+1-555-0601", "job_title": "Senior Advisor", "company": "Nexus Financial", "website": "https://nexusfinancial.pro" }, { "id": 12, "first_name": "Catherine", "last_name": "Lee", "email": "catherine@nexusfinancial.pro", "phone": "+1-555-0602", "job_title": "Investment Analyst", "company": "Nexus Financial", "website": "https://nexusfinancial.pro" }, { "id": 13, "first_name": "Michael", "last_name": "Johnson", "email": "m.johnson@horizonhealth.care", "phone": "+1-555-0701", "job_title": "Chief Medical Officer", "company": "Horizon Health Systems", "website": "https://horizonhealth.care" }, { "id": 14, "first_name": "Jennifer", "last_name": "Adams", "email": "jennifer.adams@horizonhealth.care", "phone": "+1-555-0702", "job_title": "Head Nurse", "company": "Horizon Health Systems", "website": "https://horizonhealth.care" }, { "id": 15, "first_name": "Tony", "last_name": "Ricci", "email": "tony.ricci@phoenixmfg.com", "phone": "+1-555-0801", "job_title": "Plant Manager", "company": "Phoenix Manufacturing", "website": "https://phoenixmfg.com" }, { "id": 16, "first_name": "Linda", "last_name": "Martinez", "email": "linda.martinez@phoenixmfg.com", "phone": "+1-555-0802", "job_title": "Quality Control Supervisor", "company": "Phoenix Manufacturing", "website": "https://phoenixmfg.com" }, { "id": 17, "first_name": "Patricia", "last_name": "White", "email": "patricia.white@alpineeducation.edu", "phone": "+1-555-0901", "job_title": "Director of Programs", "company": "Alpine Education Group", "website": "https://alpineeducation.edu" }, { "id": 18, "first_name": "Kevin", "last_name": "Brown", "email": "kevin.brown@alpineeducation.edu", "phone": "+1-555-0902", "job_title": "Curriculum Specialist", "company": "Alpine Education Group", "website": "https://alpineeducation.edu" }, { "id": 19, "first_name": "Sophia", "last_name": "Taylor", "email": "sophia@crimsoncreative.studio", "phone": "+1-555-1001", "job_title": "Art Director", "company": "Crimson Creative Studio", "website": "https://crimsoncreative.studio" }, { "id": 20, "first_name": "Alex", "last_name": "Cooper", "email": "alex.cooper@crimsoncreative.studio", "phone": "+1-555-1002", "job_title": "Graphic Designer", "company": "Crimson Creative Studio", "website": "https://crimsoncreative.studio" } ] def extract_domain(url): """Extract domain from URL""" try: if not url.startswith(('http://', 'https://')): url = 'https://' + url parsed = urlparse(url) domain = parsed.netloc.lower() # Remove www. if present if domain.startswith('www.'): domain = domain[4:] return domain except: return "" def find_contacts_by_website(website_url): """Find contacts that match the website domain""" target_domain = extract_domain(website_url) if not target_domain: return [] print(f"Searching for domain: {target_domain}") # Debug info matching_contacts = [] for contact in CONTACTS_DB: contact_domain = extract_domain(contact['website']) print(f"Comparing with: {contact_domain}") # Debug info # Exact domain match or subdomain match if target_domain == contact_domain or target_domain in contact_domain or contact_domain in target_domain: matching_contacts.append(contact) print(f"Found {len(matching_contacts)} matching contacts") # Debug info return matching_contacts def simulate_website_scraping(url): """Simulate scraping a website and finding contact information""" # Add some delay to simulate real scraping time.sleep(random.uniform(0.5, 1)) # Find matching contacts from our database contacts = find_contacts_by_website(url) return contacts def is_valid_url(url): """Check if a string looks like a valid URL""" url = url.strip() if not url: return False # Check if it contains common URL patterns url_patterns = [ r'^\w+\.\w+', # domain.com r'^\w+\.\w+\.\w+', # subdomain.domain.com r'^https?://', # starts with http/https r'www\.', # contains www ] for pattern in url_patterns: if re.search(pattern, url.lower()): return True return False def parse_csv_file(file_obj): """Parse CSV file and extract website URLs from column H (or auto-detect)""" websites = [] debug_info = [] try: # Handle different file input types if hasattr(file_obj, 'name'): # This is a file path (NamedString from Gradio) with open(file_obj.name, 'r', encoding='utf-8') as f: content = f.read() elif isinstance(file_obj, str): # This is already a string content = file_obj else: # This might be bytes content = file_obj.decode('utf-8') # Parse CSV csv_reader = csv.reader(io.StringIO(content)) rows = list(csv_reader) if not rows: debug_info.append("CSV file is empty") return [], debug_info debug_info.append(f"Total rows in CSV: {len(rows)}") # Analyze the first few rows to understand structure if len(rows) > 0: debug_info.append(f"First row has {len(rows[0])} columns: {rows[0]}") if len(rows) > 1: debug_info.append(f"Second row has {len(rows[1])} columns: {rows[1]}") # Check if first row looks like headers first_row = rows[0] has_headers = any(col.lower() in ['website', 'url', 'domain', 'site', 'web'] for col in first_row) # Try to find website column website_column_index = None # First, try column H (index 7) if len(first_row) >= 8: website_column_index = 7 debug_info.append(f"Using column H (index 7) as specified") else: # Auto-detect website column for i, col in enumerate(first_row): if col.lower() in ['website', 'url', 'domain', 'site', 'web']: website_column_index = i debug_info.append(f"Auto-detected website column at index {i}: '{col}'") break if website_column_index is None: # If no obvious column found, scan all columns for URLs debug_info.append("No obvious website column found, scanning all columns for URLs...") for row_idx, row in enumerate(rows[1:] if has_headers else rows, start=1): for col_idx, cell in enumerate(row): if is_valid_url(cell): website_column_index = col_idx debug_info.append(f"Found URLs in column {col_idx} (row {row_idx}): '{cell}'") break if website_column_index is not None: break if website_column_index is None: debug_info.append("ERROR: Could not find any column with website URLs") return [], debug_info # Extract websites from the identified column start_row = 1 if has_headers else 0 for row_idx, row in enumerate(rows[start_row:], start=start_row + 1): if len(row) > website_column_index: website_url = row[website_column_index].strip() if website_url and is_valid_url(website_url): websites.append(website_url) debug_info.append(f"Found website in row {row_idx}: {website_url}") elif website_url: debug_info.append(f"Row {row_idx}: '{website_url}' doesn't look like a valid URL") else: debug_info.append(f"Row {row_idx}: Empty cell in website column") else: debug_info.append(f"Row {row_idx}: Has only {len(row)} columns, need at least {website_column_index + 1}") debug_info.append(f"Total websites extracted: {len(websites)}") if websites: debug_info.append(f"Sample websites: {websites[:5]}") return websites, debug_info except Exception as e: debug_info.append(f"Error parsing CSV: {e}") return [], debug_info def search_csv_websites(csv_file, max_results=10): """Search for contacts from websites listed in CSV file""" if csv_file is None: return "Please upload a CSV file", "" try: # Parse CSV file websites, debug_info = parse_csv_file(csv_file) debug_text = "\n".join(debug_info) if not websites: error_msg = "No websites found in the CSV file.\n\n" error_msg += "DEBUG INFORMATION:\n" + debug_text + "\n\n" error_msg += "TROUBLESHOOTING:\n" error_msg += "1. Ensure your CSV has website URLs in column H (8th column)\n" error_msg += "2. Or have a column header named 'website', 'url', 'domain', etc.\n" error_msg += "3. Check that URLs are properly formatted (e.g., example.com or https://example.com)\n" error_msg += "4. Verify the CSV file is not corrupted\n" return error_msg, "" all_contacts = [] processed_websites = [] # Search each website for website in websites[:20]: # Limit to first 20 websites print(f"Processing website: {website}") contacts = simulate_website_scraping(website) if contacts: all_contacts.extend(contacts) processed_websites.append(website) print(f"Found {len(contacts)} contacts for {website}") else: print(f"No contacts found for {website}") # Remove duplicates based on email unique_contacts = [] seen_emails = set() for contact in all_contacts: if contact['email'] not in seen_emails: unique_contacts.append(contact) seen_emails.add(contact['email']) # Limit results unique_contacts = unique_contacts[:max_results] if not unique_contacts: result_msg = f"No contacts found for the {len(websites)} websites from the CSV file.\n\n" result_msg += "DEBUG INFORMATION:\n" + debug_text + "\n\n" result_msg += f"Websites processed: {', '.join(websites[:10])}\n" result_msg += "This might be because the websites are not in our sample database." return result_msg, "" # Format results results_text = f"CONTACT DISCOVERY REPORT\n" results_text += f"CSV Processing Details:\n" results_text += f"Total Websites in CSV: {len(websites)}\n" results_text += f"Websites Processed: {len(processed_websites)}\n" results_text += f"Websites with Contacts: {len(processed_websites)}\n" results_text += f"Unique Contacts Found: {len(unique_contacts)}\n" results_text += f"Processed Websites: {', '.join(processed_websites)}\n" results_text += f"{'='*60}\n\n" # Add debug info results_text += "DEBUG INFORMATION:\n" + debug_text + "\n\n" results_text += f"{'='*60}\n\n" for i, contact in enumerate(unique_contacts, 1): results_text += f"CONTACT #{i}\n" results_text += f"Name: {contact['first_name']} {contact['last_name']}\n" results_text += f"Position: {contact['job_title']}\n" results_text += f"Email: {contact['email']}\n" results_text += f"Phone: {contact['phone']}\n" results_text += f"Company: {contact['company']}\n" results_text += f"Website: {contact['website']}\n\n" # Create CSV output csv_output = "First Name,Last Name,Job Title,Email,Phone,Company,Website\n" for contact in unique_contacts: csv_output += f"{contact['first_name']},{contact['last_name']},{contact['job_title']},{contact['email']},{contact['phone']},{contact['company']},{contact['website']}\n" return results_text, csv_output except Exception as e: return f"Error processing CSV file: {str(e)}", "" def search_website_contacts(website_url, max_results=10): """Main function to search for contacts on a website""" if not website_url: return "Please enter a website URL", "" # Clean up URL if not website_url.startswith(('http://', 'https://')): website_url = 'https://' + website_url try: # Simulate finding contacts contacts = simulate_website_scraping(website_url) if not contacts: return f"No contacts found on {website_url}. \n\nThis website is not in our contact database. Try one of the sample websites listed below, or the website might not have publicly available contact information.", "" # Limit results contacts = contacts[:max_results] # Format results results_text = f"CONTACT INTELLIGENCE REPORT\n" results_text += f"Website: {website_url}\n" results_text += f"Contacts Found: {len(contacts)}\n" results_text += f"{'='*60}\n\n" for i, contact in enumerate(contacts, 1): results_text += f"CONTACT #{i}\n" results_text += f"First Name: {contact['first_name']}\n" results_text += f"Last Name: {contact['last_name']}\n" results_text += f"Position: {contact['job_title']}\n" results_text += f"Email: {contact['email']}\n" results_text += f"Phone: {contact['phone']}\n" results_text += f"Company: {contact['company']}\n\n" # Create a simple table format for the second output table_text = "First Name,Last Name,Job Title,Email,Phone,Company\n" for contact in contacts: table_text += f"{contact['first_name']},{contact['last_name']},{contact['job_title']},{contact['email']},{contact['phone']},{contact['company']}\n" return results_text, table_text except Exception as e: return f"Error searching website: {str(e)}", "" def get_all_available_websites(): """Get list of all available websites from the database""" websites = list(set([contact['website'] for contact in CONTACTS_DB])) return "\n".join(sorted(websites)) # Custom CSS custom_css = """ .gradio-container { background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; } .main-header { background: linear-gradient(135deg, #1e3a8a 0%, #3b82f6 50%, #1e40af 100%); color: white; padding: 40px 20px; text-align: center; border-radius: 15px; margin-bottom: 30px; box-shadow: 0 10px 30px rgba(30, 58, 138, 0.3); } .main-header h1 { font-size: 2.5em; margin: 0; font-weight: 700; text-shadow: 2px 2px 4px rgba(0,0,0,0.3); } .main-header p { font-size: 1.2em; margin: 15px 0 0 0; opacity: 0.9; } .corporate-card { background: white; border: 1px solid #d1d5db; border-radius: 12px; padding: 25px; margin: 15px 0; box-shadow: 0 4px 15px rgba(0,0,0,0.1); border-left: 5px solid #1e40af; } .tips-section { background: linear-gradient(135deg, #f1f5f9 0%, #e2e8f0 100%); border: 2px solid #cbd5e1; border-radius: 15px; padding: 20px; margin: 10px 0; } .tips-section h3 { color: #1e40af; margin-top: 0; font-weight: 600; } .primary-btn { background: linear-gradient(135deg, #1e40af 0%, #3b82f6 100%); color: white; border: none; border-radius: 8px; padding: 15px 30px; font-weight: 600; font-size: 16px; transition: all 0.3s ease; } .primary-btn:hover { background: linear-gradient(135deg, #1e3a8a 0%, #2563eb 100%); transform: translateY(-2px); box-shadow: 0 6px 20px rgba(30, 64, 175, 0.4); } .secondary-btn { background: white; color: #374151; border: 2px solid #d1d5db; border-radius: 6px; padding: 8px 16px; font-weight: 500; transition: all 0.3s ease; } .secondary-btn:hover { border-color: #1e40af; color: #1e40af; background: #f8fafc; } .custom-input { border: 2px solid #d1d5db; border-radius: 8px; padding: 12px; font-size: 16px; transition: border-color 0.3s ease; } .custom-input:focus { border-color: #3b82f6; box-shadow: 0 0 0 3px rgba(59, 130, 246, 0.1); } .results-container { background: white; border: 1px solid #e5e7eb; border-radius: 10px; padding: 20px; margin: 15px 0; box-shadow: 0 2px 10px rgba(0,0,0,0.05); } .section-header { background: linear-gradient(135deg, #64748b 0%, #475569 100%); color: white; padding: 15px 20px; border-radius: 10px; margin: 20px 0 15px 0; font-weight: 600; } """ # Create Gradio interface with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.themes.Base()) as app: gr.HTML("""
Professional Contact Discovery & Lead Generation Tool
Advanced website analysis for contact intelligence gathering
Professional-grade contact discovery and lead generation technology
Powered by advanced web intelligence algorithms