import gradio as gr import requests import json from typing import List, Dict, Any import pandas as pd from urllib.parse import urlparse # Hardcoded API endpoint - Updated to correct endpoint API_ENDPOINT = "https://jaybene1-testapicontacts.hf.space/contacts" # Professional color scheme inspired by sale leaseback theme - Fixed theme configuration theme = gr.themes.Soft( primary_hue="blue", secondary_hue="slate", neutral_hue="slate", text_size="md", spacing_size="md", radius_size="md" ).set( # Background colors body_background_fill="#1a1a1a", body_background_fill_dark="#1a1a1a", # Text colors body_text_color="#e5e5e5", body_text_color_dark="#e5e5e5", # Block colors block_background_fill="#2d2d2d", block_background_fill_dark="#2d2d2d", block_border_color="#404040", block_border_color_dark="#404040", block_label_text_color="#e5e5e5", block_label_text_color_dark="#e5e5e5", block_title_text_color="#e5e5e5", block_title_text_color_dark="#e5e5e5", # Input colors (using valid property names) input_background_fill="#2d2d2d", input_background_fill_dark="#2d2d2d", input_border_color="#555555", input_border_color_dark="#555555", input_placeholder_color="#a0a0a0", input_placeholder_color_dark="#a0a0a0", # Button colors button_primary_background_fill="#1e40af", button_primary_background_fill_hover="#2563eb", button_primary_text_color="#ffffff", button_secondary_background_fill="#404040", button_secondary_background_fill_hover="#555555", button_secondary_text_color="#ffffff" ) def normalize_url(url: str) -> str: """ Normalize URL for consistent comparison. Args: url: The URL to normalize Returns: Normalized URL string """ if not url: return "" # Remove protocol and www normalized = url.lower().replace('https://', '').replace('http://', '').replace('www.', '') # Remove trailing slash and path normalized = normalized.split('/')[0] # Remove port numbers normalized = normalized.split(':')[0] return normalized def extract_domain_variations(url: str) -> List[str]: """ Extract various domain variations for matching. Args: url: The URL to extract domains from Returns: List of domain variations """ normalized = normalize_url(url) variations = [normalized] # Add with www variations.append(f"www.{normalized}") # Add subdomains if any if '.' in normalized: parts = normalized.split('.') if len(parts) > 2: # Add without subdomain variations.append('.'.join(parts[-2:])) return variations def filter_contacts_by_url(contacts, search_url): """ Enhanced filter contacts based on the search URL. This is a client-side filter in case the API doesn't filter properly. """ if not isinstance(contacts, list): return contacts # Get domain variations for the search URL search_domains = extract_domain_variations(search_url) filtered_contacts = [] for contact in contacts: if isinstance(contact, dict): # Check various fields that might contain the domain fields_to_check = [ contact.get('website', ''), contact.get('company_website', ''), contact.get('domain', ''), contact.get('email', ''), contact.get('company', ''), str(contact.get('url', '')), str(contact.get('source', '')), str(contact.get('company_url', '')), str(contact.get('origin_url', '')) ] # Check if any field contains any of the search domains contact_matched = False for field in fields_to_check: if field: field_normalized = normalize_url(str(field)) for search_domain in search_domains: if search_domain in field_normalized or field_normalized in search_domain: filtered_contacts.append(contact) contact_matched = True break if contact_matched: break # Also check email domains if not contact_matched: email = contact.get('email', '') if email and '@' in email: email_domain = email.split('@')[1].lower() for search_domain in search_domains: if search_domain in email_domain or email_domain in search_domain: filtered_contacts.append(contact) break return filtered_contacts def search_contacts(url: str) -> tuple[str, str]: """ Search for contacts associated with a given URL using the Hugging Face API. Args: url: The website URL to search for contacts Returns: Tuple of (formatted_results, raw_json) """ if not url.strip(): return "Please enter a website URL", "" # Clean and validate URL url = url.strip() original_url = url if not url.startswith(('http://', 'https://')): url = 'https://' + url try: # Use GET request with URL parameter params = { 'url': url } # Make the API request to the hardcoded endpoint using GET response = requests.get( API_ENDPOINT, params=params, timeout=30 ) if response.status_code == 200: try: result = response.json() # Hugging Face Spaces API returns data in a different format if isinstance(result, dict) and 'data' in result: actual_result = result['data'][0] if result['data'] else {} else: actual_result = result # Apply client-side filtering if isinstance(actual_result, list): original_count = len(actual_result) filtered_result = filter_contacts_by_url(actual_result, url) # If we filtered out everything or got very few results compared to original, # it's likely the API returned all contacts instead of URL-specific ones if len(filtered_result) == 0: return f"No contacts found for: {original_url}\n\nThis URL may not be in our database.", json.dumps(result, indent=2) elif original_count > 50 and len(filtered_result) < 5: # Likely got all contacts, filtered to very few - show warning formatted_output = f"**Warning:** API returned {original_count} total contacts. After filtering for '{original_url}', found {len(filtered_result)} matches.\n\n" formatted_output += "These results may not be accurate. The URL might not be in our database.\n\n" formatted_output += format_contact_results(filtered_result, url) else: formatted_output = format_contact_results(filtered_result, url) actual_result = filtered_result elif isinstance(actual_result, dict) and 'contacts' in actual_result: original_count = len(actual_result['contacts']) filtered_contacts = filter_contacts_by_url(actual_result['contacts'], url) if len(filtered_contacts) == 0: return f"No contacts found for: {original_url}\n\nThis URL may not be in our database.", json.dumps(result, indent=2) elif original_count > 50 and len(filtered_contacts) < 5: formatted_output = f"**Warning:** API returned {original_count} total contacts. After filtering for '{original_url}', found {len(filtered_contacts)} matches.\n\n" formatted_output += "These results may not be accurate. The URL might not be in our database.\n\n" formatted_output += format_contact_results({'contacts': filtered_contacts}, url) else: formatted_output = format_contact_results({'contacts': filtered_contacts}, url) actual_result['contacts'] = filtered_contacts else: # Single contact or unknown format formatted_output = format_contact_results(actual_result, url) # Add debugging info debug_info = f"\n\n**Debug Info:**\n" debug_info += f"- Request URL: {API_ENDPOINT}\n" debug_info += f"- Search URL: {url}\n" debug_info += f"- Original response size: {len(result.get('data', [result])) if isinstance(result, dict) else len(result) if isinstance(result, list) else 'N/A'}\n" debug_info += f"- Filtered results: {len(actual_result) if isinstance(actual_result, list) else len(actual_result.get('contacts', [])) if isinstance(actual_result, dict) else 'N/A'}\n" debug_info += f"- Client-side filtering applied: Yes\n" if 'formatted_output' not in locals(): formatted_output = format_contact_results(actual_result, url) formatted_output += debug_info raw_json = json.dumps(result, indent=2) return formatted_output, raw_json except json.JSONDecodeError: return f"Error: Invalid JSON response from API", response.text else: return f"API Error ({response.status_code}): {response.text}", "" except requests.exceptions.Timeout: return "Request timeout. Please try again.", "" except requests.exceptions.ConnectionError: return "Connection error. Please check your API endpoint.", "" except Exception as e: return f"Error: {str(e)}", "" def format_contact_results(results: Dict[Any, Any], url: str) -> str: """ Format the API results into a readable format. Args: results: The JSON response from the API url: The searched URL Returns: Formatted string with contact information """ output = f"# Contact Search Results for: {url}\n\n" # Handle different possible result structures if isinstance(results, dict): if 'contacts' in results: contacts = results['contacts'] elif 'results' in results: contacts = results['results'] elif 'data' in results: contacts = results['data'] else: contacts = results if isinstance(contacts, list) and len(contacts) > 0: output += f"**Found {len(contacts)} contact(s):**\n\n" for i, contact in enumerate(contacts, 1): output += f"## Contact {i}\n" # Handle different contact field names name = contact.get('name') or contact.get('full_name') or contact.get('contact_name') or "N/A" email = contact.get('email') or contact.get('email_address') or "N/A" title = contact.get('title') or contact.get('job_title') or contact.get('position') or "N/A" company = contact.get('company') or contact.get('organization') or "N/A" phone = contact.get('phone') or contact.get('phone_number') or "N/A" linkedin = contact.get('linkedin') or contact.get('linkedin_url') or "N/A" output += f"- **Name:** {name}\n" output += f"- **Email:** {email}\n" output += f"- **Title:** {title}\n" output += f"- **Company:** {company}\n" output += f"- **Phone:** {phone}\n" output += f"- **LinkedIn:** {linkedin}\n\n" # Add any additional fields additional_fields = {k: v for k, v in contact.items() if k not in ['name', 'full_name', 'contact_name', 'email', 'email_address', 'title', 'job_title', 'position', 'company', 'organization', 'phone', 'phone_number', 'linkedin', 'linkedin_url']} if additional_fields: output += "**Additional Information:**\n" for key, value in additional_fields.items(): output += f"- **{key.replace('_', ' ').title()}:** {value}\n" output += "\n" output += "---\n\n" else: output += "No contacts found for this URL.\n\n" elif isinstance(results, list): if len(results) > 0: output += f"**Found {len(results)} contact(s):**\n\n" for i, contact in enumerate(results, 1): output += f"## Contact {i}\n" output += f"{contact}\n\n" else: output += "No contacts found for this URL.\n\n" else: output += f"**Result:** {results}\n\n" output += f"*Search completed at: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}*" return output # Create the Gradio interface with gr.Blocks(theme=theme, title="Kwekel Companies Contact Search Tool", css=""" /* Root and body styles */ :root { --body-text-color: #e5e5e5 !important; --neutral-50: #1a1a1a !important; --neutral-100: #2d2d2d !important; --neutral-200: #404040 !important; --neutral-300: #555555 !important; --neutral-400: #6b7280 !important; --neutral-500: #9ca3af !important; --neutral-600: #d1d5db !important; --neutral-700: #e5e7eb !important; --neutral-800: #f3f4f6 !important; --neutral-900: #ffffff !important; --neutral-950: #ffffff !important; } .gradio-container { max-width: 1200px !important; margin: 0 auto !important; background-color: #1a1a1a !important; color: #e5e5e5 !important; } /* Header styles with blue gradient */ .header { background: linear-gradient(135deg, #1e3a8a 0%, #3b82f6 50%, #1e40af 100%); color: white; padding: 2rem; border-radius: 0.5rem; margin-bottom: 2rem; text-align: center; border: 1px solid #404040; box-shadow: 0 4px 15px rgba(30, 58, 138, 0.3); } .header h1 { margin: 0; font-size: 2.5rem; font-weight: 700; margin-bottom: 0.5rem; color: #ffffff !important; text-shadow: 2px 2px 4px rgba(0,0,0,0.3); } /* Info box styles */ .info-box { background: #2d2d2d; border: 1px solid #1e40af; border-radius: 0.5rem; padding: 1rem; margin: 1rem 0; color: #ffffff; } .info-box h3 { color: #ffffff !important; margin-top: 0; } .info-box p, .info-box ol, .info-box li { color: #e5e5e5 !important; } /* Footer styles */ .footer { text-align: center; margin-top: 2rem; padding-top: 2rem; border-top: 1px solid #404040; color: #cccccc !important; } /* Global text color overrides */ body, .gradio-container, .gradio-container * { color: #e5e5e5 !important; } /* Input field styles */ .gr-textbox input, .gr-textbox textarea, input[type="text"], input[type="email"], input[type="url"], textarea { color: #e5e5e5 !important; background-color: #2d2d2d !important; border: 1px solid #555555 !important; } /* Input placeholder styles */ .gr-textbox input::placeholder, .gr-textbox textarea::placeholder, input::placeholder, textarea::placeholder { color: #a0a0a0 !important; } /* Label styles */ label, .gr-label { color: #e5e5e5 !important; } /* Specific styling for Website URL input label */ .gr-textbox:has(input[placeholder*="example.com"]) label { color: #3b3a3a !important; } /* Markdown and code styles */ .gr-markdown, .gr-markdown * { color: #e5e5e5 !important; } .gr-code, .gr-code * { color: #3b3a3a !important; background-color: #2d2d2d !important; } /* Code block specific styling */ .gr-code pre, .gr-code pre * { color: #3b3a3a !important; } /* Form and block styles */ .gr-form, .gr-form * { color: #e5e5e5 !important; } .gr-block, .gr-block * { color: #e5e5e5 !important; } /* Panel and container styles */ .gr-panel { background-color: #2d2d2d !important; border: 1px solid #404040 !important; } .gr-container { background-color: #1a1a1a !important; } /* Button text visibility */ .gr-button { color: #ffffff !important; } /* Primary button with blue gradient */ .gr-button.primary { background: linear-gradient(135deg, #1e3a8a 0%, #3b82f6 50%, #1e40af 100%) !important; border: none !important; box-shadow: 0 4px 15px rgba(30, 58, 138, 0.3) !important; transition: all 0.3s ease !important; } .gr-button.primary:hover { background: linear-gradient(135deg, #1e40af 0%, #3b82f6 50%, #2563eb 100%) !important; box-shadow: 0 6px 20px rgba(30, 58, 138, 0.4) !important; transform: translateY(-2px) !important; } /* Ensure info text is visible */ .gr-info { color: #a0a0a0 !important; } /* Additional specific overrides */ .gr-textbox-label, .gr-markdown-label, .gr-code-label { color: #e5e5e5 !important; } /* Override for Website URL label specifically */ .gr-textbox:has(input[placeholder*="example.com"]) .gr-textbox-label { color: #3b3a3a !important; } /* Override any remaining dark text */ p, span, div, h1, h2, h3, h4, h5, h6, li, td, th { color: #e5e5e5 !important; } /* Ensure all text inputs have proper contrast */ input:focus, textarea:focus { color: #e5e5e5 !important; background-color: #2d2d2d !important; border-color: #1e40af !important; } /* Specific targeting for the Website URL input text */ .gr-textbox input[placeholder*="example.com"] { color: #3b3a3a !important; } /* JSON code block text color */ .gr-code[data-testid*="json"] pre, .gr-code[data-testid*="json"] code, .gr-code[language="json"] pre, .gr-code[language="json"] code { color: #3b3a3a !important; } """) as demo: # Header gr.HTML("""
API Endpoint: {API_ENDPOINT}
Note: The system will filter results to match your specific URL and warn you if the URL might not be in the database.