|
|
import gradio as gr |
|
|
import requests |
|
|
import json |
|
|
from typing import List, Dict, Any |
|
|
import pandas as pd |
|
|
from urllib.parse import urlparse |
|
|
|
|
|
|
|
|
API_ENDPOINT = "https://jaybene1-testapicontacts.hf.space/contacts" |
|
|
|
|
|
|
|
|
theme = gr.themes.Soft( |
|
|
primary_hue="blue", |
|
|
secondary_hue="slate", |
|
|
neutral_hue="slate", |
|
|
text_size="md", |
|
|
spacing_size="md", |
|
|
radius_size="md" |
|
|
).set( |
|
|
|
|
|
body_background_fill="#1a1a1a", |
|
|
body_background_fill_dark="#1a1a1a", |
|
|
|
|
|
|
|
|
body_text_color="#e5e5e5", |
|
|
body_text_color_dark="#e5e5e5", |
|
|
|
|
|
|
|
|
block_background_fill="#2d2d2d", |
|
|
block_background_fill_dark="#2d2d2d", |
|
|
block_border_color="#404040", |
|
|
block_border_color_dark="#404040", |
|
|
block_label_text_color="#e5e5e5", |
|
|
block_label_text_color_dark="#e5e5e5", |
|
|
block_title_text_color="#e5e5e5", |
|
|
block_title_text_color_dark="#e5e5e5", |
|
|
|
|
|
|
|
|
input_background_fill="#2d2d2d", |
|
|
input_background_fill_dark="#2d2d2d", |
|
|
input_border_color="#555555", |
|
|
input_border_color_dark="#555555", |
|
|
input_placeholder_color="#a0a0a0", |
|
|
input_placeholder_color_dark="#a0a0a0", |
|
|
|
|
|
|
|
|
button_primary_background_fill="#1e40af", |
|
|
button_primary_background_fill_hover="#2563eb", |
|
|
button_primary_text_color="#ffffff", |
|
|
button_secondary_background_fill="#404040", |
|
|
button_secondary_background_fill_hover="#555555", |
|
|
button_secondary_text_color="#ffffff" |
|
|
) |
|
|
|
|
|
def normalize_url(url: str) -> str: |
|
|
""" |
|
|
Normalize URL for consistent comparison. |
|
|
|
|
|
Args: |
|
|
url: The URL to normalize |
|
|
|
|
|
Returns: |
|
|
Normalized URL string |
|
|
""" |
|
|
if not url: |
|
|
return "" |
|
|
|
|
|
|
|
|
normalized = url.lower().replace('https://', '').replace('http://', '').replace('www.', '') |
|
|
|
|
|
|
|
|
normalized = normalized.split('/')[0] |
|
|
|
|
|
|
|
|
normalized = normalized.split(':')[0] |
|
|
|
|
|
return normalized |
|
|
|
|
|
def extract_domain_variations(url: str) -> List[str]: |
|
|
""" |
|
|
Extract various domain variations for matching. |
|
|
|
|
|
Args: |
|
|
url: The URL to extract domains from |
|
|
|
|
|
Returns: |
|
|
List of domain variations |
|
|
""" |
|
|
normalized = normalize_url(url) |
|
|
variations = [normalized] |
|
|
|
|
|
|
|
|
variations.append(f"www.{normalized}") |
|
|
|
|
|
|
|
|
if '.' in normalized: |
|
|
parts = normalized.split('.') |
|
|
if len(parts) > 2: |
|
|
|
|
|
variations.append('.'.join(parts[-2:])) |
|
|
|
|
|
return variations |
|
|
|
|
|
def filter_contacts_by_url(contacts, search_url): |
|
|
""" |
|
|
Enhanced filter contacts based on the search URL. |
|
|
This is a client-side filter in case the API doesn't filter properly. |
|
|
""" |
|
|
if not isinstance(contacts, list): |
|
|
return contacts |
|
|
|
|
|
|
|
|
search_domains = extract_domain_variations(search_url) |
|
|
|
|
|
filtered_contacts = [] |
|
|
for contact in contacts: |
|
|
if isinstance(contact, dict): |
|
|
|
|
|
fields_to_check = [ |
|
|
contact.get('website', ''), |
|
|
contact.get('company_website', ''), |
|
|
contact.get('domain', ''), |
|
|
contact.get('email', ''), |
|
|
contact.get('company', ''), |
|
|
str(contact.get('url', '')), |
|
|
str(contact.get('source', '')), |
|
|
str(contact.get('company_url', '')), |
|
|
str(contact.get('origin_url', '')) |
|
|
] |
|
|
|
|
|
|
|
|
contact_matched = False |
|
|
for field in fields_to_check: |
|
|
if field: |
|
|
field_normalized = normalize_url(str(field)) |
|
|
for search_domain in search_domains: |
|
|
if search_domain in field_normalized or field_normalized in search_domain: |
|
|
filtered_contacts.append(contact) |
|
|
contact_matched = True |
|
|
break |
|
|
if contact_matched: |
|
|
break |
|
|
|
|
|
|
|
|
if not contact_matched: |
|
|
email = contact.get('email', '') |
|
|
if email and '@' in email: |
|
|
email_domain = email.split('@')[1].lower() |
|
|
for search_domain in search_domains: |
|
|
if search_domain in email_domain or email_domain in search_domain: |
|
|
filtered_contacts.append(contact) |
|
|
break |
|
|
|
|
|
return filtered_contacts |
|
|
|
|
|
def search_contacts(url: str) -> tuple[str, str]: |
|
|
""" |
|
|
Search for contacts associated with a given URL using the Hugging Face API. |
|
|
|
|
|
Args: |
|
|
url: The website URL to search for contacts |
|
|
|
|
|
Returns: |
|
|
Tuple of (formatted_results, raw_json) |
|
|
""" |
|
|
|
|
|
if not url.strip(): |
|
|
return "Please enter a website URL", "" |
|
|
|
|
|
|
|
|
url = url.strip() |
|
|
original_url = url |
|
|
if not url.startswith(('http://', 'https://')): |
|
|
url = 'https://' + url |
|
|
|
|
|
try: |
|
|
|
|
|
params = { |
|
|
'url': url |
|
|
} |
|
|
|
|
|
|
|
|
response = requests.get( |
|
|
API_ENDPOINT, |
|
|
params=params, |
|
|
timeout=30 |
|
|
) |
|
|
|
|
|
if response.status_code == 200: |
|
|
try: |
|
|
result = response.json() |
|
|
|
|
|
|
|
|
if isinstance(result, dict) and 'data' in result: |
|
|
actual_result = result['data'][0] if result['data'] else {} |
|
|
else: |
|
|
actual_result = result |
|
|
|
|
|
|
|
|
if isinstance(actual_result, list): |
|
|
original_count = len(actual_result) |
|
|
filtered_result = filter_contacts_by_url(actual_result, url) |
|
|
|
|
|
|
|
|
|
|
|
if len(filtered_result) == 0: |
|
|
return f"No contacts found for: {original_url}\n\nThis URL may not be in our database.", json.dumps(result, indent=2) |
|
|
elif original_count > 50 and len(filtered_result) < 5: |
|
|
|
|
|
formatted_output = f"**Warning:** API returned {original_count} total contacts. After filtering for '{original_url}', found {len(filtered_result)} matches.\n\n" |
|
|
formatted_output += "These results may not be accurate. The URL might not be in our database.\n\n" |
|
|
formatted_output += format_contact_results(filtered_result, url) |
|
|
else: |
|
|
formatted_output = format_contact_results(filtered_result, url) |
|
|
|
|
|
actual_result = filtered_result |
|
|
|
|
|
elif isinstance(actual_result, dict) and 'contacts' in actual_result: |
|
|
original_count = len(actual_result['contacts']) |
|
|
filtered_contacts = filter_contacts_by_url(actual_result['contacts'], url) |
|
|
|
|
|
if len(filtered_contacts) == 0: |
|
|
return f"No contacts found for: {original_url}\n\nThis URL may not be in our database.", json.dumps(result, indent=2) |
|
|
elif original_count > 50 and len(filtered_contacts) < 5: |
|
|
formatted_output = f"**Warning:** API returned {original_count} total contacts. After filtering for '{original_url}', found {len(filtered_contacts)} matches.\n\n" |
|
|
formatted_output += "These results may not be accurate. The URL might not be in our database.\n\n" |
|
|
formatted_output += format_contact_results({'contacts': filtered_contacts}, url) |
|
|
else: |
|
|
formatted_output = format_contact_results({'contacts': filtered_contacts}, url) |
|
|
|
|
|
actual_result['contacts'] = filtered_contacts |
|
|
else: |
|
|
|
|
|
formatted_output = format_contact_results(actual_result, url) |
|
|
|
|
|
|
|
|
debug_info = f"\n\n**Debug Info:**\n" |
|
|
debug_info += f"- Request URL: {API_ENDPOINT}\n" |
|
|
debug_info += f"- Search URL: {url}\n" |
|
|
debug_info += f"- Original response size: {len(result.get('data', [result])) if isinstance(result, dict) else len(result) if isinstance(result, list) else 'N/A'}\n" |
|
|
debug_info += f"- Filtered results: {len(actual_result) if isinstance(actual_result, list) else len(actual_result.get('contacts', [])) if isinstance(actual_result, dict) else 'N/A'}\n" |
|
|
debug_info += f"- Client-side filtering applied: Yes\n" |
|
|
|
|
|
if 'formatted_output' not in locals(): |
|
|
formatted_output = format_contact_results(actual_result, url) |
|
|
|
|
|
formatted_output += debug_info |
|
|
raw_json = json.dumps(result, indent=2) |
|
|
|
|
|
return formatted_output, raw_json |
|
|
|
|
|
except json.JSONDecodeError: |
|
|
return f"Error: Invalid JSON response from API", response.text |
|
|
|
|
|
else: |
|
|
return f"API Error ({response.status_code}): {response.text}", "" |
|
|
|
|
|
except requests.exceptions.Timeout: |
|
|
return "Request timeout. Please try again.", "" |
|
|
except requests.exceptions.ConnectionError: |
|
|
return "Connection error. Please check your API endpoint.", "" |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}", "" |
|
|
|
|
|
def format_contact_results(results: Dict[Any, Any], url: str) -> str: |
|
|
""" |
|
|
Format the API results into a readable format. |
|
|
|
|
|
Args: |
|
|
results: The JSON response from the API |
|
|
url: The searched URL |
|
|
|
|
|
Returns: |
|
|
Formatted string with contact information |
|
|
""" |
|
|
|
|
|
output = f"# Contact Search Results for: {url}\n\n" |
|
|
|
|
|
|
|
|
if isinstance(results, dict): |
|
|
if 'contacts' in results: |
|
|
contacts = results['contacts'] |
|
|
elif 'results' in results: |
|
|
contacts = results['results'] |
|
|
elif 'data' in results: |
|
|
contacts = results['data'] |
|
|
else: |
|
|
contacts = results |
|
|
|
|
|
if isinstance(contacts, list) and len(contacts) > 0: |
|
|
output += f"**Found {len(contacts)} contact(s):**\n\n" |
|
|
|
|
|
for i, contact in enumerate(contacts, 1): |
|
|
output += f"## Contact {i}\n" |
|
|
|
|
|
|
|
|
name = contact.get('name') or contact.get('full_name') or contact.get('contact_name') or "N/A" |
|
|
email = contact.get('email') or contact.get('email_address') or "N/A" |
|
|
title = contact.get('title') or contact.get('job_title') or contact.get('position') or "N/A" |
|
|
company = contact.get('company') or contact.get('organization') or "N/A" |
|
|
phone = contact.get('phone') or contact.get('phone_number') or "N/A" |
|
|
linkedin = contact.get('linkedin') or contact.get('linkedin_url') or "N/A" |
|
|
|
|
|
output += f"- **Name:** {name}\n" |
|
|
output += f"- **Email:** {email}\n" |
|
|
output += f"- **Title:** {title}\n" |
|
|
output += f"- **Company:** {company}\n" |
|
|
output += f"- **Phone:** {phone}\n" |
|
|
output += f"- **LinkedIn:** {linkedin}\n\n" |
|
|
|
|
|
|
|
|
additional_fields = {k: v for k, v in contact.items() |
|
|
if k not in ['name', 'full_name', 'contact_name', 'email', 'email_address', |
|
|
'title', 'job_title', 'position', 'company', 'organization', |
|
|
'phone', 'phone_number', 'linkedin', 'linkedin_url']} |
|
|
|
|
|
if additional_fields: |
|
|
output += "**Additional Information:**\n" |
|
|
for key, value in additional_fields.items(): |
|
|
output += f"- **{key.replace('_', ' ').title()}:** {value}\n" |
|
|
output += "\n" |
|
|
|
|
|
output += "---\n\n" |
|
|
|
|
|
else: |
|
|
output += "No contacts found for this URL.\n\n" |
|
|
|
|
|
elif isinstance(results, list): |
|
|
if len(results) > 0: |
|
|
output += f"**Found {len(results)} contact(s):**\n\n" |
|
|
for i, contact in enumerate(results, 1): |
|
|
output += f"## Contact {i}\n" |
|
|
output += f"{contact}\n\n" |
|
|
else: |
|
|
output += "No contacts found for this URL.\n\n" |
|
|
else: |
|
|
output += f"**Result:** {results}\n\n" |
|
|
|
|
|
output += f"*Search completed at: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}*" |
|
|
|
|
|
return output |
|
|
|
|
|
|
|
|
with gr.Blocks(theme=theme, title="Kwekel Companies Contact Search Tool", css=""" |
|
|
/* Root and body styles */ |
|
|
:root { |
|
|
--body-text-color: #e5e5e5 !important; |
|
|
--neutral-50: #1a1a1a !important; |
|
|
--neutral-100: #2d2d2d !important; |
|
|
--neutral-200: #404040 !important; |
|
|
--neutral-300: #555555 !important; |
|
|
--neutral-400: #6b7280 !important; |
|
|
--neutral-500: #9ca3af !important; |
|
|
--neutral-600: #d1d5db !important; |
|
|
--neutral-700: #e5e7eb !important; |
|
|
--neutral-800: #f3f4f6 !important; |
|
|
--neutral-900: #ffffff !important; |
|
|
--neutral-950: #ffffff !important; |
|
|
} |
|
|
|
|
|
.gradio-container { |
|
|
max-width: 1200px !important; |
|
|
margin: 0 auto !important; |
|
|
background-color: #1a1a1a !important; |
|
|
color: #e5e5e5 !important; |
|
|
} |
|
|
|
|
|
/* Header styles with blue gradient */ |
|
|
.header { |
|
|
background: linear-gradient(135deg, #1e3a8a 0%, #3b82f6 50%, #1e40af 100%); |
|
|
color: white; |
|
|
padding: 2rem; |
|
|
border-radius: 0.5rem; |
|
|
margin-bottom: 2rem; |
|
|
text-align: center; |
|
|
border: 1px solid #404040; |
|
|
box-shadow: 0 4px 15px rgba(30, 58, 138, 0.3); |
|
|
} |
|
|
.header h1 { |
|
|
margin: 0; |
|
|
font-size: 2.5rem; |
|
|
font-weight: 700; |
|
|
margin-bottom: 0.5rem; |
|
|
color: #ffffff !important; |
|
|
text-shadow: 2px 2px 4px rgba(0,0,0,0.3); |
|
|
} |
|
|
|
|
|
/* Info box styles */ |
|
|
.info-box { |
|
|
background: #2d2d2d; |
|
|
border: 1px solid #1e40af; |
|
|
border-radius: 0.5rem; |
|
|
padding: 1rem; |
|
|
margin: 1rem 0; |
|
|
color: #ffffff; |
|
|
} |
|
|
.info-box h3 { |
|
|
color: #ffffff !important; |
|
|
margin-top: 0; |
|
|
} |
|
|
.info-box p, .info-box ol, .info-box li { |
|
|
color: #e5e5e5 !important; |
|
|
} |
|
|
|
|
|
/* Footer styles */ |
|
|
.footer { |
|
|
text-align: center; |
|
|
margin-top: 2rem; |
|
|
padding-top: 2rem; |
|
|
border-top: 1px solid #404040; |
|
|
color: #cccccc !important; |
|
|
} |
|
|
|
|
|
/* Global text color overrides */ |
|
|
body, .gradio-container, .gradio-container * { |
|
|
color: #e5e5e5 !important; |
|
|
} |
|
|
|
|
|
/* Input field styles */ |
|
|
.gr-textbox input, |
|
|
.gr-textbox textarea, |
|
|
input[type="text"], |
|
|
input[type="email"], |
|
|
input[type="url"], |
|
|
textarea { |
|
|
color: #e5e5e5 !important; |
|
|
background-color: #2d2d2d !important; |
|
|
border: 1px solid #555555 !important; |
|
|
} |
|
|
|
|
|
/* Input placeholder styles */ |
|
|
.gr-textbox input::placeholder, |
|
|
.gr-textbox textarea::placeholder, |
|
|
input::placeholder, |
|
|
textarea::placeholder { |
|
|
color: #a0a0a0 !important; |
|
|
} |
|
|
|
|
|
/* Label styles */ |
|
|
label, .gr-label { |
|
|
color: #e5e5e5 !important; |
|
|
} |
|
|
|
|
|
/* Specific styling for Website URL input label */ |
|
|
.gr-textbox:has(input[placeholder*="example.com"]) label { |
|
|
color: #3b3a3a !important; |
|
|
} |
|
|
|
|
|
/* Markdown and code styles */ |
|
|
.gr-markdown, .gr-markdown * { |
|
|
color: #e5e5e5 !important; |
|
|
} |
|
|
|
|
|
.gr-code, .gr-code * { |
|
|
color: #3b3a3a !important; |
|
|
background-color: #2d2d2d !important; |
|
|
} |
|
|
|
|
|
/* Code block specific styling */ |
|
|
.gr-code pre, .gr-code pre * { |
|
|
color: #3b3a3a !important; |
|
|
} |
|
|
|
|
|
/* Form and block styles */ |
|
|
.gr-form, .gr-form * { |
|
|
color: #e5e5e5 !important; |
|
|
} |
|
|
|
|
|
.gr-block, .gr-block * { |
|
|
color: #e5e5e5 !important; |
|
|
} |
|
|
|
|
|
/* Panel and container styles */ |
|
|
.gr-panel { |
|
|
background-color: #2d2d2d !important; |
|
|
border: 1px solid #404040 !important; |
|
|
} |
|
|
|
|
|
.gr-container { |
|
|
background-color: #1a1a1a !important; |
|
|
} |
|
|
|
|
|
/* Button text visibility */ |
|
|
.gr-button { |
|
|
color: #ffffff !important; |
|
|
} |
|
|
|
|
|
/* Primary button with blue gradient */ |
|
|
.gr-button.primary { |
|
|
background: linear-gradient(135deg, #1e3a8a 0%, #3b82f6 50%, #1e40af 100%) !important; |
|
|
border: none !important; |
|
|
box-shadow: 0 4px 15px rgba(30, 58, 138, 0.3) !important; |
|
|
transition: all 0.3s ease !important; |
|
|
} |
|
|
|
|
|
.gr-button.primary:hover { |
|
|
background: linear-gradient(135deg, #1e40af 0%, #3b82f6 50%, #2563eb 100%) !important; |
|
|
box-shadow: 0 6px 20px rgba(30, 58, 138, 0.4) !important; |
|
|
transform: translateY(-2px) !important; |
|
|
} |
|
|
|
|
|
/* Ensure info text is visible */ |
|
|
.gr-info { |
|
|
color: #a0a0a0 !important; |
|
|
} |
|
|
|
|
|
/* Additional specific overrides */ |
|
|
.gr-textbox-label, |
|
|
.gr-markdown-label, |
|
|
.gr-code-label { |
|
|
color: #e5e5e5 !important; |
|
|
} |
|
|
|
|
|
/* Override for Website URL label specifically */ |
|
|
.gr-textbox:has(input[placeholder*="example.com"]) .gr-textbox-label { |
|
|
color: #3b3a3a !important; |
|
|
} |
|
|
|
|
|
/* Override any remaining dark text */ |
|
|
p, span, div, h1, h2, h3, h4, h5, h6, li, td, th { |
|
|
color: #e5e5e5 !important; |
|
|
} |
|
|
|
|
|
/* Ensure all text inputs have proper contrast */ |
|
|
input:focus, textarea:focus { |
|
|
color: #e5e5e5 !important; |
|
|
background-color: #2d2d2d !important; |
|
|
border-color: #1e40af !important; |
|
|
} |
|
|
|
|
|
/* Specific targeting for the Website URL input text */ |
|
|
.gr-textbox input[placeholder*="example.com"] { |
|
|
color: #3b3a3a !important; |
|
|
} |
|
|
|
|
|
/* JSON code block text color */ |
|
|
.gr-code[data-testid*="json"] pre, |
|
|
.gr-code[data-testid*="json"] code, |
|
|
.gr-code[language="json"] pre, |
|
|
.gr-code[language="json"] code { |
|
|
color: #3b3a3a !important; |
|
|
} |
|
|
""") as demo: |
|
|
|
|
|
|
|
|
gr.HTML(""" |
|
|
<div class="header"> |
|
|
<h1>Kwekel Companies Contact Search Tool</h1> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
|
|
|
gr.HTML(f""" |
|
|
<div class="info-box"> |
|
|
<h3>Instructions:</h3> |
|
|
<ol> |
|
|
<li>Enter the website URL you want to search for contacts</li> |
|
|
<li>Click "Search Contacts" to get results</li> |
|
|
<li>View formatted results and raw JSON response</li> |
|
|
</ol> |
|
|
<p><strong>API Endpoint:</strong> {API_ENDPOINT}</p> |
|
|
<p><strong>Note:</strong> The system will filter results to match your specific URL and warn you if the URL might not be in the database.</p> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=2): |
|
|
url_input = gr.Textbox( |
|
|
label="Website URL", |
|
|
placeholder="example.com or https://example.com", |
|
|
info="Enter the website URL to search for contacts", |
|
|
lines=1 |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
search_btn = gr.Button("Search Contacts", variant="primary", scale=2) |
|
|
clear_btn = gr.Button("Clear", variant="secondary", scale=1) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
results_output = gr.Markdown( |
|
|
label="Contact Results", |
|
|
value="Enter a URL and click 'Search Contacts' to see results here." |
|
|
) |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
json_output = gr.Code( |
|
|
label="Raw JSON Response", |
|
|
language="json", |
|
|
value="" |
|
|
) |
|
|
|
|
|
|
|
|
search_btn.click( |
|
|
fn=search_contacts, |
|
|
inputs=[url_input], |
|
|
outputs=[results_output, json_output] |
|
|
) |
|
|
|
|
|
clear_btn.click( |
|
|
fn=lambda: ("", ""), |
|
|
outputs=[url_input, results_output] |
|
|
) |
|
|
|
|
|
|
|
|
gr.HTML(""" |
|
|
<div class="footer"> |
|
|
<p>© 2025 Kwekel Companies Contact Search Tool</p> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch( |
|
|
share=True, |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860, |
|
|
show_error=True, |
|
|
debug=True |
|
|
) |