import gradio as gr import requests import re import json import csv import io from urllib.parse import urlparse, urljoin import time import random # Mock contacts database (same as your API) CONTACTS_DB = [ { "id": 1, "first_name": "Sarah", "last_name": "Chen", "email": "sarah.chen@techflowsolutions.com", "phone": "+1-555-0101", "job_title": "CTO", "company": "TechFlow Solutions", "website": "https://techflowsolutions.com" }, { "id": 2, "first_name": "Marcus", "last_name": "Rodriguez", "email": "m.rodriguez@techflowsolutions.com", "phone": "+1-555-0102", "job_title": "Senior Developer", "company": "TechFlow Solutions", "website": "https://techflowsolutions.com" }, { "id": 3, "first_name": "Emma", "last_name": "Thompson", "email": "emma@greenleafconsult.com", "phone": "+1-555-0201", "job_title": "Managing Partner", "company": "GreenLeaf Consulting", "website": "https://greenleafconsult.com" }, { "id": 4, "first_name": "David", "last_name": "Park", "email": "david.park@greenleafconsult.com", "phone": "+1-555-0202", "job_title": "Environmental Analyst", "company": "GreenLeaf Consulting", "website": "https://greenleafconsult.com" }, { "id": 5, "first_name": "Jessica", "last_name": "Williams", "email": "jessica@blueskymarketing.net", "phone": "+1-555-0301", "job_title": "Creative Director", "company": "BlueSky Marketing", "website": "https://blueskymarketing.net" }, { "id": 6, "first_name": "Ryan", "last_name": "Mitchell", "email": "ryan.mitchell@blueskymarketing.net", "phone": "+1-555-0302", "job_title": "Account Manager", "company": "BlueSky Marketing", "website": "https://blueskymarketing.net" }, { "id": 7, "first_name": "Lisa", "last_name": "Zhang", "email": "l.zhang@quantumdynamics.org", "phone": "+1-555-0401", "job_title": "Research Director", "company": "Quantum Dynamics Corp", "website": "https://quantumdynamics.org" }, { "id": 8, "first_name": "Ahmed", "last_name": "Hassan", "email": "ahmed.hassan@quantumdynamics.org", "phone": "+1-555-0402", "job_title": "Quantum Engineer", "company": "Quantum Dynamics Corp", "website": "https://quantumdynamics.org" }, { "id": 9, "first_name": "Maria", "last_name": "Gonzalez", "email": "maria@stellarlogistics.biz", "phone": "+1-555-0501", "job_title": "Operations Manager", "company": "Stellar Logistics", "website": "https://stellarlogistics.biz" }, { "id": 10, "first_name": "James", "last_name": "O'Connor", "email": "james.oconnor@stellarlogistics.biz", "phone": "+1-555-0502", "job_title": "Fleet Coordinator", "company": "Stellar Logistics", "website": "https://stellarlogistics.biz" }, { "id": 11, "first_name": "Robert", "last_name": "Kim", "email": "robert.kim@nexusfinancial.pro", "phone": "+1-555-0601", "job_title": "Senior Advisor", "company": "Nexus Financial", "website": "https://nexusfinancial.pro" }, { "id": 12, "first_name": "Catherine", "last_name": "Lee", "email": "catherine@nexusfinancial.pro", "phone": "+1-555-0602", "job_title": "Investment Analyst", "company": "Nexus Financial", "website": "https://nexusfinancial.pro" }, { "id": 13, "first_name": "Michael", "last_name": "Johnson", "email": "m.johnson@horizonhealth.care", "phone": "+1-555-0701", "job_title": "Chief Medical Officer", "company": "Horizon Health Systems", "website": "https://horizonhealth.care" }, { "id": 14, "first_name": "Jennifer", "last_name": "Adams", "email": "jennifer.adams@horizonhealth.care", "phone": "+1-555-0702", "job_title": "Head Nurse", "company": "Horizon Health Systems", "website": "https://horizonhealth.care" }, { "id": 15, "first_name": "Tony", "last_name": "Ricci", "email": "tony.ricci@phoenixmfg.com", "phone": "+1-555-0801", "job_title": "Plant Manager", "company": "Phoenix Manufacturing", "website": "https://phoenixmfg.com" }, { "id": 16, "first_name": "Linda", "last_name": "Martinez", "email": "linda.martinez@phoenixmfg.com", "phone": "+1-555-0802", "job_title": "Quality Control Supervisor", "company": "Phoenix Manufacturing", "website": "https://phoenixmfg.com" }, { "id": 17, "first_name": "Patricia", "last_name": "White", "email": "patricia.white@alpineeducation.edu", "phone": "+1-555-0901", "job_title": "Director of Programs", "company": "Alpine Education Group", "website": "https://alpineeducation.edu" }, { "id": 18, "first_name": "Kevin", "last_name": "Brown", "email": "kevin.brown@alpineeducation.edu", "phone": "+1-555-0902", "job_title": "Curriculum Specialist", "company": "Alpine Education Group", "website": "https://alpineeducation.edu" }, { "id": 19, "first_name": "Sophia", "last_name": "Taylor", "email": "sophia@crimsoncreative.studio", "phone": "+1-555-1001", "job_title": "Art Director", "company": "Crimson Creative Studio", "website": "https://crimsoncreative.studio" }, { "id": 20, "first_name": "Alex", "last_name": "Cooper", "email": "alex.cooper@crimsoncreative.studio", "phone": "+1-555-1002", "job_title": "Graphic Designer", "company": "Crimson Creative Studio", "website": "https://crimsoncreative.studio" } ] def extract_domain(url): """Extract domain from URL""" try: if not url.startswith(('http://', 'https://')): url = 'https://' + url parsed = urlparse(url) domain = parsed.netloc.lower() # Remove www. if present if domain.startswith('www.'): domain = domain[4:] return domain except: return "" def find_contacts_by_website(website_url): """Find contacts that match the website domain""" target_domain = extract_domain(website_url) if not target_domain: return [] print(f"Searching for domain: {target_domain}") # Debug info matching_contacts = [] for contact in CONTACTS_DB: contact_domain = extract_domain(contact['website']) print(f"Comparing with: {contact_domain}") # Debug info # Exact domain match or subdomain match if target_domain == contact_domain or target_domain in contact_domain or contact_domain in target_domain: matching_contacts.append(contact) print(f"Found {len(matching_contacts)} matching contacts") # Debug info return matching_contacts def simulate_website_scraping(url): """Simulate scraping a website and finding contact information""" # Add some delay to simulate real scraping time.sleep(random.uniform(0.5, 1)) # Find matching contacts from our database contacts = find_contacts_by_website(url) return contacts def is_valid_url(url): """Check if a string looks like a valid URL""" url = url.strip() if not url: return False # Check if it contains common URL patterns url_patterns = [ r'^\w+\.\w+', # domain.com r'^\w+\.\w+\.\w+', # subdomain.domain.com r'^https?://', # starts with http/https r'www\.', # contains www ] for pattern in url_patterns: if re.search(pattern, url.lower()): return True return False def parse_csv_file(file_obj): """Parse CSV file and extract website URLs from column H (or auto-detect)""" websites = [] debug_info = [] try: # Handle different file input types if hasattr(file_obj, 'name'): # This is a file path (NamedString from Gradio) with open(file_obj.name, 'r', encoding='utf-8') as f: content = f.read() elif isinstance(file_obj, str): # This is already a string content = file_obj else: # This might be bytes content = file_obj.decode('utf-8') # Parse CSV csv_reader = csv.reader(io.StringIO(content)) rows = list(csv_reader) if not rows: debug_info.append("CSV file is empty") return [], debug_info debug_info.append(f"Total rows in CSV: {len(rows)}") # Analyze the first few rows to understand structure if len(rows) > 0: debug_info.append(f"First row has {len(rows[0])} columns: {rows[0]}") if len(rows) > 1: debug_info.append(f"Second row has {len(rows[1])} columns: {rows[1]}") # Check if first row looks like headers first_row = rows[0] has_headers = any(col.lower() in ['website', 'url', 'domain', 'site', 'web'] for col in first_row) # Try to find website column website_column_index = None # First, try column H (index 7) if len(first_row) >= 8: website_column_index = 7 debug_info.append(f"Using column H (index 7) as specified") else: # Auto-detect website column for i, col in enumerate(first_row): if col.lower() in ['website', 'url', 'domain', 'site', 'web']: website_column_index = i debug_info.append(f"Auto-detected website column at index {i}: '{col}'") break if website_column_index is None: # If no obvious column found, scan all columns for URLs debug_info.append("No obvious website column found, scanning all columns for URLs...") for row_idx, row in enumerate(rows[1:] if has_headers else rows, start=1): for col_idx, cell in enumerate(row): if is_valid_url(cell): website_column_index = col_idx debug_info.append(f"Found URLs in column {col_idx} (row {row_idx}): '{cell}'") break if website_column_index is not None: break if website_column_index is None: debug_info.append("ERROR: Could not find any column with website URLs") return [], debug_info # Extract websites from the identified column start_row = 1 if has_headers else 0 for row_idx, row in enumerate(rows[start_row:], start=start_row + 1): if len(row) > website_column_index: website_url = row[website_column_index].strip() if website_url and is_valid_url(website_url): websites.append(website_url) debug_info.append(f"Found website in row {row_idx}: {website_url}") elif website_url: debug_info.append(f"Row {row_idx}: '{website_url}' doesn't look like a valid URL") else: debug_info.append(f"Row {row_idx}: Empty cell in website column") else: debug_info.append(f"Row {row_idx}: Has only {len(row)} columns, need at least {website_column_index + 1}") debug_info.append(f"Total websites extracted: {len(websites)}") if websites: debug_info.append(f"Sample websites: {websites[:5]}") return websites, debug_info except Exception as e: debug_info.append(f"Error parsing CSV: {e}") return [], debug_info def search_csv_websites(csv_file, max_results=10): """Search for contacts from websites listed in CSV file""" if csv_file is None: return "Please upload a CSV file", "" try: # Parse CSV file websites, debug_info = parse_csv_file(csv_file) debug_text = "\n".join(debug_info) if not websites: error_msg = "No websites found in the CSV file.\n\n" error_msg += "DEBUG INFORMATION:\n" + debug_text + "\n\n" error_msg += "TROUBLESHOOTING:\n" error_msg += "1. Ensure your CSV has website URLs in column H (8th column)\n" error_msg += "2. Or have a column header named 'website', 'url', 'domain', etc.\n" error_msg += "3. Check that URLs are properly formatted (e.g., example.com or https://example.com)\n" error_msg += "4. Verify the CSV file is not corrupted\n" return error_msg, "" all_contacts = [] processed_websites = [] # Search each website for website in websites[:20]: # Limit to first 20 websites print(f"Processing website: {website}") contacts = simulate_website_scraping(website) if contacts: all_contacts.extend(contacts) processed_websites.append(website) print(f"Found {len(contacts)} contacts for {website}") else: print(f"No contacts found for {website}") # Remove duplicates based on email unique_contacts = [] seen_emails = set() for contact in all_contacts: if contact['email'] not in seen_emails: unique_contacts.append(contact) seen_emails.add(contact['email']) # Limit results unique_contacts = unique_contacts[:max_results] if not unique_contacts: result_msg = f"No contacts found for the {len(websites)} websites from the CSV file.\n\n" result_msg += "DEBUG INFORMATION:\n" + debug_text + "\n\n" result_msg += f"Websites processed: {', '.join(websites[:10])}\n" result_msg += "This might be because the websites are not in our sample database." return result_msg, "" # Format results results_text = f"CONTACT DISCOVERY REPORT\n" results_text += f"CSV Processing Details:\n" results_text += f"Total Websites in CSV: {len(websites)}\n" results_text += f"Websites Processed: {len(processed_websites)}\n" results_text += f"Websites with Contacts: {len(processed_websites)}\n" results_text += f"Unique Contacts Found: {len(unique_contacts)}\n" results_text += f"Processed Websites: {', '.join(processed_websites)}\n" results_text += f"{'='*60}\n\n" # Add debug info results_text += "DEBUG INFORMATION:\n" + debug_text + "\n\n" results_text += f"{'='*60}\n\n" for i, contact in enumerate(unique_contacts, 1): results_text += f"CONTACT #{i}\n" results_text += f"Name: {contact['first_name']} {contact['last_name']}\n" results_text += f"Position: {contact['job_title']}\n" results_text += f"Email: {contact['email']}\n" results_text += f"Phone: {contact['phone']}\n" results_text += f"Company: {contact['company']}\n" results_text += f"Website: {contact['website']}\n\n" # Create CSV output csv_output = "First Name,Last Name,Job Title,Email,Phone,Company,Website\n" for contact in unique_contacts: csv_output += f"{contact['first_name']},{contact['last_name']},{contact['job_title']},{contact['email']},{contact['phone']},{contact['company']},{contact['website']}\n" return results_text, csv_output except Exception as e: return f"Error processing CSV file: {str(e)}", "" def search_website_contacts(website_url, max_results=10): """Main function to search for contacts on a website""" if not website_url: return "Please enter a website URL", "" # Clean up URL if not website_url.startswith(('http://', 'https://')): website_url = 'https://' + website_url try: # Simulate finding contacts contacts = simulate_website_scraping(website_url) if not contacts: return f"No contacts found on {website_url}. \n\nThis website is not in our contact database. Try one of the sample websites listed below, or the website might not have publicly available contact information.", "" # Limit results contacts = contacts[:max_results] # Format results results_text = f"CONTACT INTELLIGENCE REPORT\n" results_text += f"Website: {website_url}\n" results_text += f"Contacts Found: {len(contacts)}\n" results_text += f"{'='*60}\n\n" for i, contact in enumerate(contacts, 1): results_text += f"CONTACT #{i}\n" results_text += f"First Name: {contact['first_name']}\n" results_text += f"Last Name: {contact['last_name']}\n" results_text += f"Position: {contact['job_title']}\n" results_text += f"Email: {contact['email']}\n" results_text += f"Phone: {contact['phone']}\n" results_text += f"Company: {contact['company']}\n\n" # Create a simple table format for the second output table_text = "First Name,Last Name,Job Title,Email,Phone,Company\n" for contact in contacts: table_text += f"{contact['first_name']},{contact['last_name']},{contact['job_title']},{contact['email']},{contact['phone']},{contact['company']}\n" return results_text, table_text except Exception as e: return f"Error searching website: {str(e)}", "" def get_all_available_websites(): """Get list of all available websites from the database""" websites = list(set([contact['website'] for contact in CONTACTS_DB])) return "\n".join(sorted(websites)) # Custom CSS custom_css = """ .gradio-container { background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; } .main-header { background: linear-gradient(135deg, #1e3a8a 0%, #3b82f6 50%, #1e40af 100%); color: white; padding: 40px 20px; text-align: center; border-radius: 15px; margin-bottom: 30px; box-shadow: 0 10px 30px rgba(30, 58, 138, 0.3); } .main-header h1 { font-size: 2.5em; margin: 0; font-weight: 700; text-shadow: 2px 2px 4px rgba(0,0,0,0.3); } .main-header p { font-size: 1.2em; margin: 15px 0 0 0; opacity: 0.9; } .corporate-card { background: white; border: 1px solid #d1d5db; border-radius: 12px; padding: 25px; margin: 15px 0; box-shadow: 0 4px 15px rgba(0,0,0,0.1); border-left: 5px solid #1e40af; } .tips-section { background: linear-gradient(135deg, #f1f5f9 0%, #e2e8f0 100%); border: 2px solid #cbd5e1; border-radius: 15px; padding: 20px; margin: 10px 0; } .tips-section h3 { color: #1e40af; margin-top: 0; font-weight: 600; } .primary-btn { background: linear-gradient(135deg, #1e40af 0%, #3b82f6 100%); color: white; border: none; border-radius: 8px; padding: 15px 30px; font-weight: 600; font-size: 16px; transition: all 0.3s ease; } .primary-btn:hover { background: linear-gradient(135deg, #1e3a8a 0%, #2563eb 100%); transform: translateY(-2px); box-shadow: 0 6px 20px rgba(30, 64, 175, 0.4); } .secondary-btn { background: white; color: #374151; border: 2px solid #d1d5db; border-radius: 6px; padding: 8px 16px; font-weight: 500; transition: all 0.3s ease; } .secondary-btn:hover { border-color: #1e40af; color: #1e40af; background: #f8fafc; } .custom-input { border: 2px solid #d1d5db; border-radius: 8px; padding: 12px; font-size: 16px; transition: border-color 0.3s ease; } .custom-input:focus { border-color: #3b82f6; box-shadow: 0 0 0 3px rgba(59, 130, 246, 0.1); } .results-container { background: white; border: 1px solid #e5e7eb; border-radius: 10px; padding: 20px; margin: 15px 0; box-shadow: 0 2px 10px rgba(0,0,0,0.05); } .section-header { background: linear-gradient(135deg, #64748b 0%, #475569 100%); color: white; padding: 15px 20px; border-radius: 10px; margin: 20px 0 15px 0; font-weight: 600; } """ # Create Gradio interface with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.themes.Base()) as app: gr.HTML("""

Contact Discovery Platform

Professional Contact Discovery & Lead Generation Tool

Advanced website analysis for contact intelligence gathering

""") with gr.Tabs(): # Single Website Search Tab with gr.TabItem("Single Website Search"): with gr.Row(): with gr.Column(scale=2): gr.HTML('
Search Parameters
') website_input = gr.Textbox( label="Target Website URL", placeholder="Enter company website (e.g., techflowsolutions.com)", value="", elem_classes=["custom-input"] ) with gr.Row(): max_results = gr.Slider( minimum=1, maximum=20, value=8, step=1, label="Maximum Results", elem_classes=["custom-input"] ) search_btn = gr.Button( "Execute Search", variant="primary", size="lg", elem_classes=["primary-btn"] ) gr.HTML('
Search Results
') with gr.Row(): results_display = gr.Textbox( label="Contact Intelligence Report", lines=18, max_lines=35, show_copy_button=True, elem_classes=["results-container"] ) csv_output = gr.Textbox( label="Export Data (CSV Format)", lines=18, max_lines=35, show_copy_button=True, elem_classes=["results-container"] ) # CSV Upload Tab with gr.TabItem("CSV Bulk Search"): with gr.Row(): with gr.Column(scale=2): gr.HTML('
CSV Upload
') csv_file = gr.File( label="Upload CSV File", file_types=[".csv"], elem_classes=["custom-input"] ) gr.HTML("""
CSV Format - Multiple Options:
Option 1: Website URLs in Column H (8th column)
Option 2: Column header named 'website', 'url', 'domain', 'site', or 'web'
Option 3: System will auto-detect columns with valid URLs

Examples: techflowsolutions.com, https://example.com, www.company.com
Note: The system will show detailed debugging information about your CSV structure
""") with gr.Row(): csv_max_results = gr.Slider( minimum=1, maximum=50, value=20, step=1, label="Maximum Results", elem_classes=["custom-input"] ) csv_search_btn = gr.Button( "Process CSV", variant="primary", size="lg", elem_classes=["primary-btn"] ) gr.HTML('
CSV Results
') with gr.Row(): csv_results_display = gr.Textbox( label="CSV Processing Report", lines=18, max_lines=35, show_copy_button=True, elem_classes=["results-container"] ) csv_export_output = gr.Textbox( label="Export Data (CSV Format)", lines=18, max_lines=35, show_copy_button=True, elem_classes=["results-container"] ) # Sample websites section with gr.Accordion("Sample Websites Database", open=False): gr.HTML('
') sample_websites = gr.Textbox( label="Available Websites in Database", value=get_all_available_websites(), lines=8, interactive=False, elem_classes=["custom-input"] ) gr.HTML('
') # Quick search buttons gr.HTML('
Quick Access Sample Websites
') with gr.Row(): quick_btn1 = gr.Button("TechFlow Solutions", size="sm", elem_classes=["secondary-btn"]) quick_btn2 = gr.Button("GreenLeaf Consulting", size="sm", elem_classes=["secondary-btn"]) quick_btn3 = gr.Button("BlueSky Marketing", size="sm", elem_classes=["secondary-btn"]) quick_btn4 = gr.Button("Quantum Dynamics", size="sm", elem_classes=["secondary-btn"]) with gr.Row(): quick_btn5 = gr.Button("Stellar Logistics", size="sm", elem_classes=["secondary-btn"]) quick_btn6 = gr.Button("Nexus Financial", size="sm", elem_classes=["secondary-btn"]) quick_btn7 = gr.Button("Horizon Health", size="sm", elem_classes=["secondary-btn"]) quick_btn8 = gr.Button("Phoenix Manufacturing", size="sm", elem_classes=["secondary-btn"]) # Event handlers search_btn.click( fn=search_website_contacts, inputs=[website_input, max_results], outputs=[results_display, csv_output] ) csv_search_btn.click( fn=search_csv_websites, inputs=[csv_file, csv_max_results], outputs=[csv_results_display, csv_export_output] ) # Quick search button handlers quick_btn1.click(lambda: "techflowsolutions.com", outputs=website_input) quick_btn2.click(lambda: "greenleafconsult.com", outputs=website_input) quick_btn3.click(lambda: "blueskymarketing.net", outputs=website_input) quick_btn4.click(lambda: "quantumdynamics.org", outputs=website_input) quick_btn5.click(lambda: "stellarlogistics.biz", outputs=website_input) quick_btn6.click(lambda: "nexusfinancial.pro", outputs=website_input) quick_btn7.click(lambda: "horizonhealth.care", outputs=website_input) quick_btn8.click(lambda: "phoenixmfg.com", outputs=website_input) # Examples gr.Examples( examples=[ ["techflowsolutions.com", 5], ["greenleafconsult.com", 3], ["blueskymarketing.net", 4], ["quantumdynamics.org", 6] ], inputs=[website_input, max_results], label="Sample Searches" ) # Footer gr.HTML("""

Contact Intelligence Platform

Professional-grade contact discovery and lead generation technology

Powered by advanced web intelligence algorithms

""") if __name__ == "__main__": app.launch()