apptest4 / app.py
JayBene1's picture
Update app.py
68ee454 verified
import gradio as gr
import requests
import json
from typing import List, Dict, Any
import pandas as pd
from urllib.parse import urlparse
# Hardcoded API endpoint - Updated to correct endpoint
API_ENDPOINT = "https://jaybene1-testapicontacts.hf.space/contacts"
# Professional color scheme inspired by sale leaseback theme - Fixed theme configuration
theme = gr.themes.Soft(
primary_hue="blue",
secondary_hue="slate",
neutral_hue="slate",
text_size="md",
spacing_size="md",
radius_size="md"
).set(
# Background colors
body_background_fill="#1a1a1a",
body_background_fill_dark="#1a1a1a",
# Text colors
body_text_color="#e5e5e5",
body_text_color_dark="#e5e5e5",
# Block colors
block_background_fill="#2d2d2d",
block_background_fill_dark="#2d2d2d",
block_border_color="#404040",
block_border_color_dark="#404040",
block_label_text_color="#e5e5e5",
block_label_text_color_dark="#e5e5e5",
block_title_text_color="#e5e5e5",
block_title_text_color_dark="#e5e5e5",
# Input colors (using valid property names)
input_background_fill="#2d2d2d",
input_background_fill_dark="#2d2d2d",
input_border_color="#555555",
input_border_color_dark="#555555",
input_placeholder_color="#a0a0a0",
input_placeholder_color_dark="#a0a0a0",
# Button colors
button_primary_background_fill="#1e40af",
button_primary_background_fill_hover="#2563eb",
button_primary_text_color="#ffffff",
button_secondary_background_fill="#404040",
button_secondary_background_fill_hover="#555555",
button_secondary_text_color="#ffffff"
)
def normalize_url(url: str) -> str:
"""
Normalize URL for consistent comparison.
Args:
url: The URL to normalize
Returns:
Normalized URL string
"""
if not url:
return ""
# Remove protocol and www
normalized = url.lower().replace('https://', '').replace('http://', '').replace('www.', '')
# Remove trailing slash and path
normalized = normalized.split('/')[0]
# Remove port numbers
normalized = normalized.split(':')[0]
return normalized
def extract_domain_variations(url: str) -> List[str]:
"""
Extract various domain variations for matching.
Args:
url: The URL to extract domains from
Returns:
List of domain variations
"""
normalized = normalize_url(url)
variations = [normalized]
# Add with www
variations.append(f"www.{normalized}")
# Add subdomains if any
if '.' in normalized:
parts = normalized.split('.')
if len(parts) > 2:
# Add without subdomain
variations.append('.'.join(parts[-2:]))
return variations
def filter_contacts_by_url(contacts, search_url):
"""
Enhanced filter contacts based on the search URL.
This is a client-side filter in case the API doesn't filter properly.
"""
if not isinstance(contacts, list):
return contacts
# Get domain variations for the search URL
search_domains = extract_domain_variations(search_url)
filtered_contacts = []
for contact in contacts:
if isinstance(contact, dict):
# Check various fields that might contain the domain
fields_to_check = [
contact.get('website', ''),
contact.get('company_website', ''),
contact.get('domain', ''),
contact.get('email', ''),
contact.get('company', ''),
str(contact.get('url', '')),
str(contact.get('source', '')),
str(contact.get('company_url', '')),
str(contact.get('origin_url', ''))
]
# Check if any field contains any of the search domains
contact_matched = False
for field in fields_to_check:
if field:
field_normalized = normalize_url(str(field))
for search_domain in search_domains:
if search_domain in field_normalized or field_normalized in search_domain:
filtered_contacts.append(contact)
contact_matched = True
break
if contact_matched:
break
# Also check email domains
if not contact_matched:
email = contact.get('email', '')
if email and '@' in email:
email_domain = email.split('@')[1].lower()
for search_domain in search_domains:
if search_domain in email_domain or email_domain in search_domain:
filtered_contacts.append(contact)
break
return filtered_contacts
def search_contacts(url: str) -> tuple[str, str]:
"""
Search for contacts associated with a given URL using the Hugging Face API.
Args:
url: The website URL to search for contacts
Returns:
Tuple of (formatted_results, raw_json)
"""
if not url.strip():
return "Please enter a website URL", ""
# Clean and validate URL
url = url.strip()
original_url = url
if not url.startswith(('http://', 'https://')):
url = 'https://' + url
try:
# Use GET request with URL parameter
params = {
'url': url
}
# Make the API request to the hardcoded endpoint using GET
response = requests.get(
API_ENDPOINT,
params=params,
timeout=30
)
if response.status_code == 200:
try:
result = response.json()
# Hugging Face Spaces API returns data in a different format
if isinstance(result, dict) and 'data' in result:
actual_result = result['data'][0] if result['data'] else {}
else:
actual_result = result
# Apply client-side filtering
if isinstance(actual_result, list):
original_count = len(actual_result)
filtered_result = filter_contacts_by_url(actual_result, url)
# If we filtered out everything or got very few results compared to original,
# it's likely the API returned all contacts instead of URL-specific ones
if len(filtered_result) == 0:
return f"No contacts found for: {original_url}\n\nThis URL may not be in our database.", json.dumps(result, indent=2)
elif original_count > 50 and len(filtered_result) < 5:
# Likely got all contacts, filtered to very few - show warning
formatted_output = f"**Warning:** API returned {original_count} total contacts. After filtering for '{original_url}', found {len(filtered_result)} matches.\n\n"
formatted_output += "These results may not be accurate. The URL might not be in our database.\n\n"
formatted_output += format_contact_results(filtered_result, url)
else:
formatted_output = format_contact_results(filtered_result, url)
actual_result = filtered_result
elif isinstance(actual_result, dict) and 'contacts' in actual_result:
original_count = len(actual_result['contacts'])
filtered_contacts = filter_contacts_by_url(actual_result['contacts'], url)
if len(filtered_contacts) == 0:
return f"No contacts found for: {original_url}\n\nThis URL may not be in our database.", json.dumps(result, indent=2)
elif original_count > 50 and len(filtered_contacts) < 5:
formatted_output = f"**Warning:** API returned {original_count} total contacts. After filtering for '{original_url}', found {len(filtered_contacts)} matches.\n\n"
formatted_output += "These results may not be accurate. The URL might not be in our database.\n\n"
formatted_output += format_contact_results({'contacts': filtered_contacts}, url)
else:
formatted_output = format_contact_results({'contacts': filtered_contacts}, url)
actual_result['contacts'] = filtered_contacts
else:
# Single contact or unknown format
formatted_output = format_contact_results(actual_result, url)
# Add debugging info
debug_info = f"\n\n**Debug Info:**\n"
debug_info += f"- Request URL: {API_ENDPOINT}\n"
debug_info += f"- Search URL: {url}\n"
debug_info += f"- Original response size: {len(result.get('data', [result])) if isinstance(result, dict) else len(result) if isinstance(result, list) else 'N/A'}\n"
debug_info += f"- Filtered results: {len(actual_result) if isinstance(actual_result, list) else len(actual_result.get('contacts', [])) if isinstance(actual_result, dict) else 'N/A'}\n"
debug_info += f"- Client-side filtering applied: Yes\n"
if 'formatted_output' not in locals():
formatted_output = format_contact_results(actual_result, url)
formatted_output += debug_info
raw_json = json.dumps(result, indent=2)
return formatted_output, raw_json
except json.JSONDecodeError:
return f"Error: Invalid JSON response from API", response.text
else:
return f"API Error ({response.status_code}): {response.text}", ""
except requests.exceptions.Timeout:
return "Request timeout. Please try again.", ""
except requests.exceptions.ConnectionError:
return "Connection error. Please check your API endpoint.", ""
except Exception as e:
return f"Error: {str(e)}", ""
def format_contact_results(results: Dict[Any, Any], url: str) -> str:
"""
Format the API results into a readable format.
Args:
results: The JSON response from the API
url: The searched URL
Returns:
Formatted string with contact information
"""
output = f"# Contact Search Results for: {url}\n\n"
# Handle different possible result structures
if isinstance(results, dict):
if 'contacts' in results:
contacts = results['contacts']
elif 'results' in results:
contacts = results['results']
elif 'data' in results:
contacts = results['data']
else:
contacts = results
if isinstance(contacts, list) and len(contacts) > 0:
output += f"**Found {len(contacts)} contact(s):**\n\n"
for i, contact in enumerate(contacts, 1):
output += f"## Contact {i}\n"
# Handle different contact field names
name = contact.get('name') or contact.get('full_name') or contact.get('contact_name') or "N/A"
email = contact.get('email') or contact.get('email_address') or "N/A"
title = contact.get('title') or contact.get('job_title') or contact.get('position') or "N/A"
company = contact.get('company') or contact.get('organization') or "N/A"
phone = contact.get('phone') or contact.get('phone_number') or "N/A"
linkedin = contact.get('linkedin') or contact.get('linkedin_url') or "N/A"
output += f"- **Name:** {name}\n"
output += f"- **Email:** {email}\n"
output += f"- **Title:** {title}\n"
output += f"- **Company:** {company}\n"
output += f"- **Phone:** {phone}\n"
output += f"- **LinkedIn:** {linkedin}\n\n"
# Add any additional fields
additional_fields = {k: v for k, v in contact.items()
if k not in ['name', 'full_name', 'contact_name', 'email', 'email_address',
'title', 'job_title', 'position', 'company', 'organization',
'phone', 'phone_number', 'linkedin', 'linkedin_url']}
if additional_fields:
output += "**Additional Information:**\n"
for key, value in additional_fields.items():
output += f"- **{key.replace('_', ' ').title()}:** {value}\n"
output += "\n"
output += "---\n\n"
else:
output += "No contacts found for this URL.\n\n"
elif isinstance(results, list):
if len(results) > 0:
output += f"**Found {len(results)} contact(s):**\n\n"
for i, contact in enumerate(results, 1):
output += f"## Contact {i}\n"
output += f"{contact}\n\n"
else:
output += "No contacts found for this URL.\n\n"
else:
output += f"**Result:** {results}\n\n"
output += f"*Search completed at: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}*"
return output
# Create the Gradio interface
with gr.Blocks(theme=theme, title="Kwekel Companies Contact Search Tool", css="""
/* Root and body styles */
:root {
--body-text-color: #e5e5e5 !important;
--neutral-50: #1a1a1a !important;
--neutral-100: #2d2d2d !important;
--neutral-200: #404040 !important;
--neutral-300: #555555 !important;
--neutral-400: #6b7280 !important;
--neutral-500: #9ca3af !important;
--neutral-600: #d1d5db !important;
--neutral-700: #e5e7eb !important;
--neutral-800: #f3f4f6 !important;
--neutral-900: #ffffff !important;
--neutral-950: #ffffff !important;
}
.gradio-container {
max-width: 1200px !important;
margin: 0 auto !important;
background-color: #1a1a1a !important;
color: #e5e5e5 !important;
}
/* Header styles with blue gradient */
.header {
background: linear-gradient(135deg, #1e3a8a 0%, #3b82f6 50%, #1e40af 100%);
color: white;
padding: 2rem;
border-radius: 0.5rem;
margin-bottom: 2rem;
text-align: center;
border: 1px solid #404040;
box-shadow: 0 4px 15px rgba(30, 58, 138, 0.3);
}
.header h1 {
margin: 0;
font-size: 2.5rem;
font-weight: 700;
margin-bottom: 0.5rem;
color: #ffffff !important;
text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
}
/* Info box styles */
.info-box {
background: #2d2d2d;
border: 1px solid #1e40af;
border-radius: 0.5rem;
padding: 1rem;
margin: 1rem 0;
color: #ffffff;
}
.info-box h3 {
color: #ffffff !important;
margin-top: 0;
}
.info-box p, .info-box ol, .info-box li {
color: #e5e5e5 !important;
}
/* Footer styles */
.footer {
text-align: center;
margin-top: 2rem;
padding-top: 2rem;
border-top: 1px solid #404040;
color: #cccccc !important;
}
/* Global text color overrides */
body, .gradio-container, .gradio-container * {
color: #e5e5e5 !important;
}
/* Input field styles */
.gr-textbox input,
.gr-textbox textarea,
input[type="text"],
input[type="email"],
input[type="url"],
textarea {
color: #e5e5e5 !important;
background-color: #2d2d2d !important;
border: 1px solid #555555 !important;
}
/* Input placeholder styles */
.gr-textbox input::placeholder,
.gr-textbox textarea::placeholder,
input::placeholder,
textarea::placeholder {
color: #a0a0a0 !important;
}
/* Label styles */
label, .gr-label {
color: #e5e5e5 !important;
}
/* Specific styling for Website URL input label */
.gr-textbox:has(input[placeholder*="example.com"]) label {
color: #3b3a3a !important;
}
/* Markdown and code styles */
.gr-markdown, .gr-markdown * {
color: #e5e5e5 !important;
}
.gr-code, .gr-code * {
color: #3b3a3a !important;
background-color: #2d2d2d !important;
}
/* Code block specific styling */
.gr-code pre, .gr-code pre * {
color: #3b3a3a !important;
}
/* Form and block styles */
.gr-form, .gr-form * {
color: #e5e5e5 !important;
}
.gr-block, .gr-block * {
color: #e5e5e5 !important;
}
/* Panel and container styles */
.gr-panel {
background-color: #2d2d2d !important;
border: 1px solid #404040 !important;
}
.gr-container {
background-color: #1a1a1a !important;
}
/* Button text visibility */
.gr-button {
color: #ffffff !important;
}
/* Primary button with blue gradient */
.gr-button.primary {
background: linear-gradient(135deg, #1e3a8a 0%, #3b82f6 50%, #1e40af 100%) !important;
border: none !important;
box-shadow: 0 4px 15px rgba(30, 58, 138, 0.3) !important;
transition: all 0.3s ease !important;
}
.gr-button.primary:hover {
background: linear-gradient(135deg, #1e40af 0%, #3b82f6 50%, #2563eb 100%) !important;
box-shadow: 0 6px 20px rgba(30, 58, 138, 0.4) !important;
transform: translateY(-2px) !important;
}
/* Ensure info text is visible */
.gr-info {
color: #a0a0a0 !important;
}
/* Additional specific overrides */
.gr-textbox-label,
.gr-markdown-label,
.gr-code-label {
color: #e5e5e5 !important;
}
/* Override for Website URL label specifically */
.gr-textbox:has(input[placeholder*="example.com"]) .gr-textbox-label {
color: #3b3a3a !important;
}
/* Override any remaining dark text */
p, span, div, h1, h2, h3, h4, h5, h6, li, td, th {
color: #e5e5e5 !important;
}
/* Ensure all text inputs have proper contrast */
input:focus, textarea:focus {
color: #e5e5e5 !important;
background-color: #2d2d2d !important;
border-color: #1e40af !important;
}
/* Specific targeting for the Website URL input text */
.gr-textbox input[placeholder*="example.com"] {
color: #3b3a3a !important;
}
/* JSON code block text color */
.gr-code[data-testid*="json"] pre,
.gr-code[data-testid*="json"] code,
.gr-code[language="json"] pre,
.gr-code[language="json"] code {
color: #3b3a3a !important;
}
""") as demo:
# Header
gr.HTML("""
<div class="header">
<h1>Kwekel Companies Contact Search Tool</h1>
</div>
""")
# Info box
gr.HTML(f"""
<div class="info-box">
<h3>Instructions:</h3>
<ol>
<li>Enter the website URL you want to search for contacts</li>
<li>Click "Search Contacts" to get results</li>
<li>View formatted results and raw JSON response</li>
</ol>
<p><strong>API Endpoint:</strong> {API_ENDPOINT}</p>
<p><strong>Note:</strong> The system will filter results to match your specific URL and warn you if the URL might not be in the database.</p>
</div>
""")
with gr.Row():
with gr.Column(scale=2):
url_input = gr.Textbox(
label="Website URL",
placeholder="example.com or https://example.com",
info="Enter the website URL to search for contacts",
lines=1
)
with gr.Row():
search_btn = gr.Button("Search Contacts", variant="primary", scale=2)
clear_btn = gr.Button("Clear", variant="secondary", scale=1)
with gr.Row():
with gr.Column(scale=1):
results_output = gr.Markdown(
label="Contact Results",
value="Enter a URL and click 'Search Contacts' to see results here."
)
with gr.Column(scale=1):
json_output = gr.Code(
label="Raw JSON Response",
language="json",
value=""
)
# Event handlers
search_btn.click(
fn=search_contacts,
inputs=[url_input],
outputs=[results_output, json_output]
)
clear_btn.click(
fn=lambda: ("", ""),
outputs=[url_input, results_output]
)
# Footer
gr.HTML("""
<div class="footer">
<p>© 2025 Kwekel Companies Contact Search Tool</p>
</div>
""")
# Launch the app
if __name__ == "__main__":
demo.launch(
share=True,
server_name="0.0.0.0",
server_port=7860,
show_error=True,
debug=True
)