apptest2 / app.py
JayBene1's picture
Update app.py
f1f515b verified
import gradio as gr
import requests
import re
import json
import csv
import io
from urllib.parse import urlparse, urljoin
import time
import random
# Mock contacts database (same as your API)
CONTACTS_DB = [
{
"id": 1,
"first_name": "Sarah",
"last_name": "Chen",
"email": "sarah.chen@techflowsolutions.com",
"phone": "+1-555-0101",
"job_title": "CTO",
"company": "TechFlow Solutions",
"website": "https://techflowsolutions.com"
},
{
"id": 2,
"first_name": "Marcus",
"last_name": "Rodriguez",
"email": "m.rodriguez@techflowsolutions.com",
"phone": "+1-555-0102",
"job_title": "Senior Developer",
"company": "TechFlow Solutions",
"website": "https://techflowsolutions.com"
},
{
"id": 3,
"first_name": "Emma",
"last_name": "Thompson",
"email": "emma@greenleafconsult.com",
"phone": "+1-555-0201",
"job_title": "Managing Partner",
"company": "GreenLeaf Consulting",
"website": "https://greenleafconsult.com"
},
{
"id": 4,
"first_name": "David",
"last_name": "Park",
"email": "david.park@greenleafconsult.com",
"phone": "+1-555-0202",
"job_title": "Environmental Analyst",
"company": "GreenLeaf Consulting",
"website": "https://greenleafconsult.com"
},
{
"id": 5,
"first_name": "Jessica",
"last_name": "Williams",
"email": "jessica@blueskymarketing.net",
"phone": "+1-555-0301",
"job_title": "Creative Director",
"company": "BlueSky Marketing",
"website": "https://blueskymarketing.net"
},
{
"id": 6,
"first_name": "Ryan",
"last_name": "Mitchell",
"email": "ryan.mitchell@blueskymarketing.net",
"phone": "+1-555-0302",
"job_title": "Account Manager",
"company": "BlueSky Marketing",
"website": "https://blueskymarketing.net"
},
{
"id": 7,
"first_name": "Lisa",
"last_name": "Zhang",
"email": "l.zhang@quantumdynamics.org",
"phone": "+1-555-0401",
"job_title": "Research Director",
"company": "Quantum Dynamics Corp",
"website": "https://quantumdynamics.org"
},
{
"id": 8,
"first_name": "Ahmed",
"last_name": "Hassan",
"email": "ahmed.hassan@quantumdynamics.org",
"phone": "+1-555-0402",
"job_title": "Quantum Engineer",
"company": "Quantum Dynamics Corp",
"website": "https://quantumdynamics.org"
},
{
"id": 9,
"first_name": "Maria",
"last_name": "Gonzalez",
"email": "maria@stellarlogistics.biz",
"phone": "+1-555-0501",
"job_title": "Operations Manager",
"company": "Stellar Logistics",
"website": "https://stellarlogistics.biz"
},
{
"id": 10,
"first_name": "James",
"last_name": "O'Connor",
"email": "james.oconnor@stellarlogistics.biz",
"phone": "+1-555-0502",
"job_title": "Fleet Coordinator",
"company": "Stellar Logistics",
"website": "https://stellarlogistics.biz"
},
{
"id": 11,
"first_name": "Robert",
"last_name": "Kim",
"email": "robert.kim@nexusfinancial.pro",
"phone": "+1-555-0601",
"job_title": "Senior Advisor",
"company": "Nexus Financial",
"website": "https://nexusfinancial.pro"
},
{
"id": 12,
"first_name": "Catherine",
"last_name": "Lee",
"email": "catherine@nexusfinancial.pro",
"phone": "+1-555-0602",
"job_title": "Investment Analyst",
"company": "Nexus Financial",
"website": "https://nexusfinancial.pro"
},
{
"id": 13,
"first_name": "Michael",
"last_name": "Johnson",
"email": "m.johnson@horizonhealth.care",
"phone": "+1-555-0701",
"job_title": "Chief Medical Officer",
"company": "Horizon Health Systems",
"website": "https://horizonhealth.care"
},
{
"id": 14,
"first_name": "Jennifer",
"last_name": "Adams",
"email": "jennifer.adams@horizonhealth.care",
"phone": "+1-555-0702",
"job_title": "Head Nurse",
"company": "Horizon Health Systems",
"website": "https://horizonhealth.care"
},
{
"id": 15,
"first_name": "Tony",
"last_name": "Ricci",
"email": "tony.ricci@phoenixmfg.com",
"phone": "+1-555-0801",
"job_title": "Plant Manager",
"company": "Phoenix Manufacturing",
"website": "https://phoenixmfg.com"
},
{
"id": 16,
"first_name": "Linda",
"last_name": "Martinez",
"email": "linda.martinez@phoenixmfg.com",
"phone": "+1-555-0802",
"job_title": "Quality Control Supervisor",
"company": "Phoenix Manufacturing",
"website": "https://phoenixmfg.com"
},
{
"id": 17,
"first_name": "Patricia",
"last_name": "White",
"email": "patricia.white@alpineeducation.edu",
"phone": "+1-555-0901",
"job_title": "Director of Programs",
"company": "Alpine Education Group",
"website": "https://alpineeducation.edu"
},
{
"id": 18,
"first_name": "Kevin",
"last_name": "Brown",
"email": "kevin.brown@alpineeducation.edu",
"phone": "+1-555-0902",
"job_title": "Curriculum Specialist",
"company": "Alpine Education Group",
"website": "https://alpineeducation.edu"
},
{
"id": 19,
"first_name": "Sophia",
"last_name": "Taylor",
"email": "sophia@crimsoncreative.studio",
"phone": "+1-555-1001",
"job_title": "Art Director",
"company": "Crimson Creative Studio",
"website": "https://crimsoncreative.studio"
},
{
"id": 20,
"first_name": "Alex",
"last_name": "Cooper",
"email": "alex.cooper@crimsoncreative.studio",
"phone": "+1-555-1002",
"job_title": "Graphic Designer",
"company": "Crimson Creative Studio",
"website": "https://crimsoncreative.studio"
}
]
def extract_domain(url):
"""Extract domain from URL"""
try:
if not url.startswith(('http://', 'https://')):
url = 'https://' + url
parsed = urlparse(url)
domain = parsed.netloc.lower()
# Remove www. if present
if domain.startswith('www.'):
domain = domain[4:]
return domain
except:
return ""
def find_contacts_by_website(website_url):
"""Find contacts that match the website domain"""
target_domain = extract_domain(website_url)
if not target_domain:
return []
print(f"Searching for domain: {target_domain}") # Debug info
matching_contacts = []
for contact in CONTACTS_DB:
contact_domain = extract_domain(contact['website'])
print(f"Comparing with: {contact_domain}") # Debug info
# Exact domain match or subdomain match
if target_domain == contact_domain or target_domain in contact_domain or contact_domain in target_domain:
matching_contacts.append(contact)
print(f"Found {len(matching_contacts)} matching contacts") # Debug info
return matching_contacts
def simulate_website_scraping(url):
"""Simulate scraping a website and finding contact information"""
# Add some delay to simulate real scraping
time.sleep(random.uniform(0.5, 1))
# Find matching contacts from our database
contacts = find_contacts_by_website(url)
return contacts
def is_valid_url(url):
"""Check if a string looks like a valid URL"""
url = url.strip()
if not url:
return False
# Check if it contains common URL patterns
url_patterns = [
r'^\w+\.\w+', # domain.com
r'^\w+\.\w+\.\w+', # subdomain.domain.com
r'^https?://', # starts with http/https
r'www\.', # contains www
]
for pattern in url_patterns:
if re.search(pattern, url.lower()):
return True
return False
def parse_csv_file(file_obj):
"""Parse CSV file and extract website URLs from column H (or auto-detect)"""
websites = []
debug_info = []
try:
# Handle different file input types
if hasattr(file_obj, 'name'):
# This is a file path (NamedString from Gradio)
with open(file_obj.name, 'r', encoding='utf-8') as f:
content = f.read()
elif isinstance(file_obj, str):
# This is already a string
content = file_obj
else:
# This might be bytes
content = file_obj.decode('utf-8')
# Parse CSV
csv_reader = csv.reader(io.StringIO(content))
rows = list(csv_reader)
if not rows:
debug_info.append("CSV file is empty")
return [], debug_info
debug_info.append(f"Total rows in CSV: {len(rows)}")
# Analyze the first few rows to understand structure
if len(rows) > 0:
debug_info.append(f"First row has {len(rows[0])} columns: {rows[0]}")
if len(rows) > 1:
debug_info.append(f"Second row has {len(rows[1])} columns: {rows[1]}")
# Check if first row looks like headers
first_row = rows[0]
has_headers = any(col.lower() in ['website', 'url', 'domain', 'site', 'web'] for col in first_row)
# Try to find website column
website_column_index = None
# First, try column H (index 7)
if len(first_row) >= 8:
website_column_index = 7
debug_info.append(f"Using column H (index 7) as specified")
else:
# Auto-detect website column
for i, col in enumerate(first_row):
if col.lower() in ['website', 'url', 'domain', 'site', 'web']:
website_column_index = i
debug_info.append(f"Auto-detected website column at index {i}: '{col}'")
break
if website_column_index is None:
# If no obvious column found, scan all columns for URLs
debug_info.append("No obvious website column found, scanning all columns for URLs...")
for row_idx, row in enumerate(rows[1:] if has_headers else rows, start=1):
for col_idx, cell in enumerate(row):
if is_valid_url(cell):
website_column_index = col_idx
debug_info.append(f"Found URLs in column {col_idx} (row {row_idx}): '{cell}'")
break
if website_column_index is not None:
break
if website_column_index is None:
debug_info.append("ERROR: Could not find any column with website URLs")
return [], debug_info
# Extract websites from the identified column
start_row = 1 if has_headers else 0
for row_idx, row in enumerate(rows[start_row:], start=start_row + 1):
if len(row) > website_column_index:
website_url = row[website_column_index].strip()
if website_url and is_valid_url(website_url):
websites.append(website_url)
debug_info.append(f"Found website in row {row_idx}: {website_url}")
elif website_url:
debug_info.append(f"Row {row_idx}: '{website_url}' doesn't look like a valid URL")
else:
debug_info.append(f"Row {row_idx}: Empty cell in website column")
else:
debug_info.append(f"Row {row_idx}: Has only {len(row)} columns, need at least {website_column_index + 1}")
debug_info.append(f"Total websites extracted: {len(websites)}")
if websites:
debug_info.append(f"Sample websites: {websites[:5]}")
return websites, debug_info
except Exception as e:
debug_info.append(f"Error parsing CSV: {e}")
return [], debug_info
def search_csv_websites(csv_file, max_results=10):
"""Search for contacts from websites listed in CSV file"""
if csv_file is None:
return "Please upload a CSV file", ""
try:
# Parse CSV file
websites, debug_info = parse_csv_file(csv_file)
debug_text = "\n".join(debug_info)
if not websites:
error_msg = "No websites found in the CSV file.\n\n"
error_msg += "DEBUG INFORMATION:\n" + debug_text + "\n\n"
error_msg += "TROUBLESHOOTING:\n"
error_msg += "1. Ensure your CSV has website URLs in column H (8th column)\n"
error_msg += "2. Or have a column header named 'website', 'url', 'domain', etc.\n"
error_msg += "3. Check that URLs are properly formatted (e.g., example.com or https://example.com)\n"
error_msg += "4. Verify the CSV file is not corrupted\n"
return error_msg, ""
all_contacts = []
processed_websites = []
# Search each website
for website in websites[:20]: # Limit to first 20 websites
print(f"Processing website: {website}")
contacts = simulate_website_scraping(website)
if contacts:
all_contacts.extend(contacts)
processed_websites.append(website)
print(f"Found {len(contacts)} contacts for {website}")
else:
print(f"No contacts found for {website}")
# Remove duplicates based on email
unique_contacts = []
seen_emails = set()
for contact in all_contacts:
if contact['email'] not in seen_emails:
unique_contacts.append(contact)
seen_emails.add(contact['email'])
# Limit results
unique_contacts = unique_contacts[:max_results]
if not unique_contacts:
result_msg = f"No contacts found for the {len(websites)} websites from the CSV file.\n\n"
result_msg += "DEBUG INFORMATION:\n" + debug_text + "\n\n"
result_msg += f"Websites processed: {', '.join(websites[:10])}\n"
result_msg += "This might be because the websites are not in our sample database."
return result_msg, ""
# Format results
results_text = f"CONTACT DISCOVERY REPORT\n"
results_text += f"CSV Processing Details:\n"
results_text += f"Total Websites in CSV: {len(websites)}\n"
results_text += f"Websites Processed: {len(processed_websites)}\n"
results_text += f"Websites with Contacts: {len(processed_websites)}\n"
results_text += f"Unique Contacts Found: {len(unique_contacts)}\n"
results_text += f"Processed Websites: {', '.join(processed_websites)}\n"
results_text += f"{'='*60}\n\n"
# Add debug info
results_text += "DEBUG INFORMATION:\n" + debug_text + "\n\n"
results_text += f"{'='*60}\n\n"
for i, contact in enumerate(unique_contacts, 1):
results_text += f"CONTACT #{i}\n"
results_text += f"Name: {contact['first_name']} {contact['last_name']}\n"
results_text += f"Position: {contact['job_title']}\n"
results_text += f"Email: {contact['email']}\n"
results_text += f"Phone: {contact['phone']}\n"
results_text += f"Company: {contact['company']}\n"
results_text += f"Website: {contact['website']}\n\n"
# Create CSV output
csv_output = "First Name,Last Name,Job Title,Email,Phone,Company,Website\n"
for contact in unique_contacts:
csv_output += f"{contact['first_name']},{contact['last_name']},{contact['job_title']},{contact['email']},{contact['phone']},{contact['company']},{contact['website']}\n"
return results_text, csv_output
except Exception as e:
return f"Error processing CSV file: {str(e)}", ""
def search_website_contacts(website_url, max_results=10):
"""Main function to search for contacts on a website"""
if not website_url:
return "Please enter a website URL", ""
# Clean up URL
if not website_url.startswith(('http://', 'https://')):
website_url = 'https://' + website_url
try:
# Simulate finding contacts
contacts = simulate_website_scraping(website_url)
if not contacts:
return f"No contacts found on {website_url}. \n\nThis website is not in our contact database. Try one of the sample websites listed below, or the website might not have publicly available contact information.", ""
# Limit results
contacts = contacts[:max_results]
# Format results
results_text = f"CONTACT INTELLIGENCE REPORT\n"
results_text += f"Website: {website_url}\n"
results_text += f"Contacts Found: {len(contacts)}\n"
results_text += f"{'='*60}\n\n"
for i, contact in enumerate(contacts, 1):
results_text += f"CONTACT #{i}\n"
results_text += f"First Name: {contact['first_name']}\n"
results_text += f"Last Name: {contact['last_name']}\n"
results_text += f"Position: {contact['job_title']}\n"
results_text += f"Email: {contact['email']}\n"
results_text += f"Phone: {contact['phone']}\n"
results_text += f"Company: {contact['company']}\n\n"
# Create a simple table format for the second output
table_text = "First Name,Last Name,Job Title,Email,Phone,Company\n"
for contact in contacts:
table_text += f"{contact['first_name']},{contact['last_name']},{contact['job_title']},{contact['email']},{contact['phone']},{contact['company']}\n"
return results_text, table_text
except Exception as e:
return f"Error searching website: {str(e)}", ""
def get_all_available_websites():
"""Get list of all available websites from the database"""
websites = list(set([contact['website'] for contact in CONTACTS_DB]))
return "\n".join(sorted(websites))
# Custom CSS
custom_css = """
.gradio-container {
background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
}
.main-header {
background: linear-gradient(135deg, #1e3a8a 0%, #3b82f6 50%, #1e40af 100%);
color: white;
padding: 40px 20px;
text-align: center;
border-radius: 15px;
margin-bottom: 30px;
box-shadow: 0 10px 30px rgba(30, 58, 138, 0.3);
}
.main-header h1 {
font-size: 2.5em;
margin: 0;
font-weight: 700;
text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
}
.main-header p {
font-size: 1.2em;
margin: 15px 0 0 0;
opacity: 0.9;
}
.corporate-card {
background: white;
border: 1px solid #d1d5db;
border-radius: 12px;
padding: 25px;
margin: 15px 0;
box-shadow: 0 4px 15px rgba(0,0,0,0.1);
border-left: 5px solid #1e40af;
}
.tips-section {
background: linear-gradient(135deg, #f1f5f9 0%, #e2e8f0 100%);
border: 2px solid #cbd5e1;
border-radius: 15px;
padding: 20px;
margin: 10px 0;
}
.tips-section h3 {
color: #1e40af;
margin-top: 0;
font-weight: 600;
}
.primary-btn {
background: linear-gradient(135deg, #1e40af 0%, #3b82f6 100%);
color: white;
border: none;
border-radius: 8px;
padding: 15px 30px;
font-weight: 600;
font-size: 16px;
transition: all 0.3s ease;
}
.primary-btn:hover {
background: linear-gradient(135deg, #1e3a8a 0%, #2563eb 100%);
transform: translateY(-2px);
box-shadow: 0 6px 20px rgba(30, 64, 175, 0.4);
}
.secondary-btn {
background: white;
color: #374151;
border: 2px solid #d1d5db;
border-radius: 6px;
padding: 8px 16px;
font-weight: 500;
transition: all 0.3s ease;
}
.secondary-btn:hover {
border-color: #1e40af;
color: #1e40af;
background: #f8fafc;
}
.custom-input {
border: 2px solid #d1d5db;
border-radius: 8px;
padding: 12px;
font-size: 16px;
transition: border-color 0.3s ease;
}
.custom-input:focus {
border-color: #3b82f6;
box-shadow: 0 0 0 3px rgba(59, 130, 246, 0.1);
}
.results-container {
background: white;
border: 1px solid #e5e7eb;
border-radius: 10px;
padding: 20px;
margin: 15px 0;
box-shadow: 0 2px 10px rgba(0,0,0,0.05);
}
.section-header {
background: linear-gradient(135deg, #64748b 0%, #475569 100%);
color: white;
padding: 15px 20px;
border-radius: 10px;
margin: 20px 0 15px 0;
font-weight: 600;
}
"""
# Create Gradio interface
with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.themes.Base()) as app:
gr.HTML("""
<div class="main-header">
<h1>Contact Discovery Platform</h1>
<p>Professional Contact Discovery & Lead Generation Tool</p>
<p style="font-size: 0.95em; opacity: 0.8;">Advanced website analysis for contact intelligence gathering</p>
</div>
""")
with gr.Tabs():
# Single Website Search Tab
with gr.TabItem("Single Website Search"):
with gr.Row():
with gr.Column(scale=2):
gr.HTML('<div class="section-header">Search Parameters</div>')
website_input = gr.Textbox(
label="Target Website URL",
placeholder="Enter company website (e.g., techflowsolutions.com)",
value="",
elem_classes=["custom-input"]
)
with gr.Row():
max_results = gr.Slider(
minimum=1,
maximum=20,
value=8,
step=1,
label="Maximum Results",
elem_classes=["custom-input"]
)
search_btn = gr.Button(
"Execute Search",
variant="primary",
size="lg",
elem_classes=["primary-btn"]
)
gr.HTML('<div class="section-header">Search Results</div>')
with gr.Row():
results_display = gr.Textbox(
label="Contact Intelligence Report",
lines=18,
max_lines=35,
show_copy_button=True,
elem_classes=["results-container"]
)
csv_output = gr.Textbox(
label="Export Data (CSV Format)",
lines=18,
max_lines=35,
show_copy_button=True,
elem_classes=["results-container"]
)
# CSV Upload Tab
with gr.TabItem("CSV Bulk Search"):
with gr.Row():
with gr.Column(scale=2):
gr.HTML('<div class="section-header">CSV Upload</div>')
csv_file = gr.File(
label="Upload CSV File",
file_types=[".csv"],
elem_classes=["custom-input"]
)
gr.HTML("""
<div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af; margin: 10px 0;">
<strong>CSV Format - Multiple Options:</strong><br>
<strong>Option 1:</strong> Website URLs in <strong>Column H (8th column)</strong><br>
<strong>Option 2:</strong> Column header named 'website', 'url', 'domain', 'site', or 'web'<br>
<strong>Option 3:</strong> System will auto-detect columns with valid URLs<br><br>
<strong>Examples:</strong> techflowsolutions.com, https://example.com, www.company.com<br>
<strong>Note:</strong> The system will show detailed debugging information about your CSV structure
</div>
""")
with gr.Row():
csv_max_results = gr.Slider(
minimum=1,
maximum=50,
value=20,
step=1,
label="Maximum Results",
elem_classes=["custom-input"]
)
csv_search_btn = gr.Button(
"Process CSV",
variant="primary",
size="lg",
elem_classes=["primary-btn"]
)
gr.HTML('<div class="section-header">CSV Results</div>')
with gr.Row():
csv_results_display = gr.Textbox(
label="CSV Processing Report",
lines=18,
max_lines=35,
show_copy_button=True,
elem_classes=["results-container"]
)
csv_export_output = gr.Textbox(
label="Export Data (CSV Format)",
lines=18,
max_lines=35,
show_copy_button=True,
elem_classes=["results-container"]
)
# Sample websites section
with gr.Accordion("Sample Websites Database", open=False):
gr.HTML('<div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af;">')
sample_websites = gr.Textbox(
label="Available Websites in Database",
value=get_all_available_websites(),
lines=8,
interactive=False,
elem_classes=["custom-input"]
)
gr.HTML('</div>')
# Quick search buttons
gr.HTML('<div class="section-header">Quick Access Sample Websites</div>')
with gr.Row():
quick_btn1 = gr.Button("TechFlow Solutions", size="sm", elem_classes=["secondary-btn"])
quick_btn2 = gr.Button("GreenLeaf Consulting", size="sm", elem_classes=["secondary-btn"])
quick_btn3 = gr.Button("BlueSky Marketing", size="sm", elem_classes=["secondary-btn"])
quick_btn4 = gr.Button("Quantum Dynamics", size="sm", elem_classes=["secondary-btn"])
with gr.Row():
quick_btn5 = gr.Button("Stellar Logistics", size="sm", elem_classes=["secondary-btn"])
quick_btn6 = gr.Button("Nexus Financial", size="sm", elem_classes=["secondary-btn"])
quick_btn7 = gr.Button("Horizon Health", size="sm", elem_classes=["secondary-btn"])
quick_btn8 = gr.Button("Phoenix Manufacturing", size="sm", elem_classes=["secondary-btn"])
# Event handlers
search_btn.click(
fn=search_website_contacts,
inputs=[website_input, max_results],
outputs=[results_display, csv_output]
)
csv_search_btn.click(
fn=search_csv_websites,
inputs=[csv_file, csv_max_results],
outputs=[csv_results_display, csv_export_output]
)
# Quick search button handlers
quick_btn1.click(lambda: "techflowsolutions.com", outputs=website_input)
quick_btn2.click(lambda: "greenleafconsult.com", outputs=website_input)
quick_btn3.click(lambda: "blueskymarketing.net", outputs=website_input)
quick_btn4.click(lambda: "quantumdynamics.org", outputs=website_input)
quick_btn5.click(lambda: "stellarlogistics.biz", outputs=website_input)
quick_btn6.click(lambda: "nexusfinancial.pro", outputs=website_input)
quick_btn7.click(lambda: "horizonhealth.care", outputs=website_input)
quick_btn8.click(lambda: "phoenixmfg.com", outputs=website_input)
# Examples
gr.Examples(
examples=[
["techflowsolutions.com", 5],
["greenleafconsult.com", 3],
["blueskymarketing.net", 4],
["quantumdynamics.org", 6]
],
inputs=[website_input, max_results],
label="Sample Searches"
)
# Footer
gr.HTML("""
<div style="text-align: center; padding: 30px 20px; background: linear-gradient(135deg, #64748b 0%, #475569 100%); color: white; border-radius: 15px; margin-top: 30px;">
<h3 style="margin: 0 0 10px 0;">Contact Intelligence Platform</h3>
<p style="margin: 0; opacity: 0.9;">Professional-grade contact discovery and lead generation technology</p>
<p style="margin: 10px 0 0 0; font-size: 0.9em; opacity: 0.7;">Powered by advanced web intelligence algorithms</p>
</div>
""")
if __name__ == "__main__":
app.launch()