Update app.py
Browse files
app.py
CHANGED
|
@@ -370,42 +370,8 @@ def parse_csv_file(file_obj):
|
|
| 370 |
debug_info.append(f"Error parsing CSV: {e}")
|
| 371 |
return [], debug_info
|
| 372 |
|
| 373 |
-
def rank_contact_by_title(job_title):
|
| 374 |
-
"""Rank contacts by job title priority (lower number = higher priority)"""
|
| 375 |
-
title_lower = job_title.lower()
|
| 376 |
-
|
| 377 |
-
# Define ranking hierarchy
|
| 378 |
-
if 'president' in title_lower:
|
| 379 |
-
return 1
|
| 380 |
-
elif 'ceo' in title_lower or 'chief executive' in title_lower:
|
| 381 |
-
return 2
|
| 382 |
-
elif 'cfo' in title_lower or 'chief financial' in title_lower:
|
| 383 |
-
return 3
|
| 384 |
-
elif 'coo' in title_lower or 'chief operating' in title_lower:
|
| 385 |
-
return 4
|
| 386 |
-
elif 'vice president' in title_lower or 'vp' in title_lower or 'v.p.' in title_lower:
|
| 387 |
-
return 5
|
| 388 |
-
elif 'controller' in title_lower:
|
| 389 |
-
return 6
|
| 390 |
-
elif 'general manager' in title_lower or 'gm' in title_lower:
|
| 391 |
-
return 7
|
| 392 |
-
else:
|
| 393 |
-
return 100 # All other positions get lower priority
|
| 394 |
-
|
| 395 |
-
def get_best_contact_for_website(website_url):
|
| 396 |
-
"""Get the highest-ranked contact for a specific website"""
|
| 397 |
-
contacts = simulate_website_scraping(website_url)
|
| 398 |
-
if not contacts:
|
| 399 |
-
return None
|
| 400 |
-
|
| 401 |
-
# Sort contacts by job title ranking
|
| 402 |
-
contacts_with_rank = [(contact, rank_contact_by_title(contact['job_title'])) for contact in contacts]
|
| 403 |
-
contacts_with_rank.sort(key=lambda x: x[1]) # Sort by rank (lower number = higher priority)
|
| 404 |
-
|
| 405 |
-
return contacts_with_rank[0][0] # Return the highest-ranked contact
|
| 406 |
-
|
| 407 |
def search_csv_websites(csv_file, max_results=10):
|
| 408 |
-
"""Search for contacts from websites listed in CSV file
|
| 409 |
if csv_file is None:
|
| 410 |
return "Please upload a CSV file", ""
|
| 411 |
|
|
@@ -425,78 +391,45 @@ def search_csv_websites(csv_file, max_results=10):
|
|
| 425 |
error_msg += "4. Verify the CSV file is not corrupted\n"
|
| 426 |
return error_msg, ""
|
| 427 |
|
| 428 |
-
# Read the original CSV file to preserve all data
|
| 429 |
-
with open(csv_file.name, 'r', encoding='utf-8') as f:
|
| 430 |
-
content = f.read()
|
| 431 |
-
|
| 432 |
-
csv_reader = csv.reader(io.StringIO(content))
|
| 433 |
-
original_rows = list(csv_reader)
|
| 434 |
-
|
| 435 |
-
# Process websites and find contacts
|
| 436 |
all_contacts = []
|
| 437 |
processed_websites = []
|
| 438 |
-
contacts_by_website = {}
|
| 439 |
-
|
| 440 |
-
# Create a mapping of websites to their row indices
|
| 441 |
-
website_to_row = {}
|
| 442 |
-
for row_idx, row in enumerate(original_rows):
|
| 443 |
-
if len(row) > 7: # Column H exists
|
| 444 |
-
website_url = row[7].strip()
|
| 445 |
-
if website_url and is_valid_url(website_url):
|
| 446 |
-
website_to_row[website_url] = row_idx
|
| 447 |
|
| 448 |
-
# Search each website
|
| 449 |
for website in websites[:20]: # Limit to first 20 websites
|
| 450 |
print(f"Processing website: {website}")
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
contacts_by_website[website] = best_contact
|
| 455 |
-
all_contacts.append(best_contact)
|
| 456 |
processed_websites.append(website)
|
| 457 |
-
print(f"Found
|
| 458 |
else:
|
| 459 |
print(f"No contacts found for {website}")
|
| 460 |
|
| 461 |
-
#
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
# Check if this row has a website we found contacts for
|
| 472 |
-
if len(row) > 7:
|
| 473 |
-
website_url = row[7].strip()
|
| 474 |
-
if website_url in contacts_by_website:
|
| 475 |
-
contact = contacts_by_website[website_url]
|
| 476 |
-
|
| 477 |
-
# Populate contact information in specified columns
|
| 478 |
-
new_row[8] = contact['first_name'] # Column I (index 8)
|
| 479 |
-
new_row[9] = contact['last_name'] # Column J (index 9)
|
| 480 |
-
new_row[10] = contact['job_title'] # Column K (index 10)
|
| 481 |
-
new_row[11] = contact['phone'] # Column L (index 11)
|
| 482 |
-
new_row[12] = contact['email'] # Column M (index 12)
|
| 483 |
-
|
| 484 |
-
updated_rows.append(new_row)
|
| 485 |
|
| 486 |
-
if not
|
| 487 |
result_msg = f"No contacts found for the {len(websites)} websites from the CSV file.\n\n"
|
| 488 |
result_msg += "DEBUG INFORMATION:\n" + debug_text + "\n\n"
|
| 489 |
result_msg += f"Websites processed: {', '.join(websites[:10])}\n"
|
| 490 |
result_msg += "This might be because the websites are not in our sample database."
|
| 491 |
return result_msg, ""
|
| 492 |
|
| 493 |
-
# Format results
|
| 494 |
results_text = f"CONTACT DISCOVERY REPORT\n"
|
| 495 |
results_text += f"CSV Processing Details:\n"
|
| 496 |
results_text += f"Total Websites in CSV: {len(websites)}\n"
|
| 497 |
results_text += f"Websites Processed: {len(processed_websites)}\n"
|
| 498 |
results_text += f"Websites with Contacts: {len(processed_websites)}\n"
|
| 499 |
-
results_text += f"Contacts Found: {len(
|
| 500 |
results_text += f"Processed Websites: {', '.join(processed_websites)}\n"
|
| 501 |
results_text += f"{'='*60}\n\n"
|
| 502 |
|
|
@@ -504,17 +437,7 @@ def search_csv_websites(csv_file, max_results=10):
|
|
| 504 |
results_text += "DEBUG INFORMATION:\n" + debug_text + "\n\n"
|
| 505 |
results_text += f"{'='*60}\n\n"
|
| 506 |
|
| 507 |
-
|
| 508 |
-
results_text += "CONTACT RANKINGS (by job title priority):\n"
|
| 509 |
-
for i, contact in enumerate(all_contacts, 1):
|
| 510 |
-
rank = rank_contact_by_title(contact['job_title'])
|
| 511 |
-
results_text += f"{i}. {contact['first_name']} {contact['last_name']} - {contact['job_title']} "
|
| 512 |
-
results_text += f"(Priority Rank: {rank}) - {contact['company']}\n"
|
| 513 |
-
|
| 514 |
-
results_text += f"\n{'='*60}\n\n"
|
| 515 |
-
|
| 516 |
-
# Show detailed contact information
|
| 517 |
-
for i, contact in enumerate(all_contacts, 1):
|
| 518 |
results_text += f"CONTACT #{i}\n"
|
| 519 |
results_text += f"Name: {contact['first_name']} {contact['last_name']}\n"
|
| 520 |
results_text += f"Position: {contact['job_title']}\n"
|
|
@@ -523,17 +446,12 @@ def search_csv_websites(csv_file, max_results=10):
|
|
| 523 |
results_text += f"Company: {contact['company']}\n"
|
| 524 |
results_text += f"Website: {contact['website']}\n\n"
|
| 525 |
|
| 526 |
-
# Create CSV output
|
| 527 |
-
csv_output =
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
for row in updated_rows:
|
| 531 |
-
csv_writer.writerow(row)
|
| 532 |
|
| 533 |
-
|
| 534 |
-
csv_output.close()
|
| 535 |
-
|
| 536 |
-
return results_text, csv_content
|
| 537 |
|
| 538 |
except Exception as e:
|
| 539 |
return f"Error processing CSV file: {str(e)}", ""
|
|
@@ -727,22 +645,11 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
|
|
| 727 |
label="Target Website URL",
|
| 728 |
placeholder="Enter company website (e.g., techflowsolutions.com)",
|
| 729 |
value="",
|
| 730 |
-
|
| 731 |
)
|
| 732 |
|
| 733 |
-
gr.HTML("""
|
| 734 |
-
<div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af; margin: 10px 0;">
|
| 735 |
-
<strong>CSV Format - Multiple Options:</strong><br>
|
| 736 |
-
<strong>Option 1:</strong> Website URLs in <strong>Column H (8th column)</strong><br>
|
| 737 |
-
<strong>Option 2:</strong> Column header named 'website', 'url', 'domain', 'site', or 'web'<br>
|
| 738 |
-
<strong>Option 3:</strong> System will auto-detect columns with valid URLs<br><br>
|
| 739 |
-
<strong>Examples:</strong> techflowsolutions.com, https://example.com, www.company.com<br>
|
| 740 |
-
<strong>Note:</strong> The system will show detailed debugging information about your CSV structure
|
| 741 |
-
</div>
|
| 742 |
-
""")
|
| 743 |
-
|
| 744 |
with gr.Row():
|
| 745 |
-
|
| 746 |
minimum=1,
|
| 747 |
maximum=20,
|
| 748 |
value=8,
|
|
@@ -789,6 +696,17 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
|
|
| 789 |
elem_classes=["custom-input"]
|
| 790 |
)
|
| 791 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 792 |
with gr.Row():
|
| 793 |
csv_max_results = gr.Slider(
|
| 794 |
minimum=1,
|
|
@@ -825,7 +743,67 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
|
|
| 825 |
elem_classes=["results-container"]
|
| 826 |
)
|
| 827 |
|
| 828 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 829 |
|
| 830 |
# Footer
|
| 831 |
gr.HTML("""
|
|
|
|
| 370 |
debug_info.append(f"Error parsing CSV: {e}")
|
| 371 |
return [], debug_info
|
| 372 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 373 |
def search_csv_websites(csv_file, max_results=10):
|
| 374 |
+
"""Search for contacts from websites listed in CSV file"""
|
| 375 |
if csv_file is None:
|
| 376 |
return "Please upload a CSV file", ""
|
| 377 |
|
|
|
|
| 391 |
error_msg += "4. Verify the CSV file is not corrupted\n"
|
| 392 |
return error_msg, ""
|
| 393 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 394 |
all_contacts = []
|
| 395 |
processed_websites = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 396 |
|
| 397 |
+
# Search each website
|
| 398 |
for website in websites[:20]: # Limit to first 20 websites
|
| 399 |
print(f"Processing website: {website}")
|
| 400 |
+
contacts = simulate_website_scraping(website)
|
| 401 |
+
if contacts:
|
| 402 |
+
all_contacts.extend(contacts)
|
|
|
|
|
|
|
| 403 |
processed_websites.append(website)
|
| 404 |
+
print(f"Found {len(contacts)} contacts for {website}")
|
| 405 |
else:
|
| 406 |
print(f"No contacts found for {website}")
|
| 407 |
|
| 408 |
+
# Remove duplicates based on email
|
| 409 |
+
unique_contacts = []
|
| 410 |
+
seen_emails = set()
|
| 411 |
+
for contact in all_contacts:
|
| 412 |
+
if contact['email'] not in seen_emails:
|
| 413 |
+
unique_contacts.append(contact)
|
| 414 |
+
seen_emails.add(contact['email'])
|
| 415 |
+
|
| 416 |
+
# Limit results
|
| 417 |
+
unique_contacts = unique_contacts[:max_results]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 418 |
|
| 419 |
+
if not unique_contacts:
|
| 420 |
result_msg = f"No contacts found for the {len(websites)} websites from the CSV file.\n\n"
|
| 421 |
result_msg += "DEBUG INFORMATION:\n" + debug_text + "\n\n"
|
| 422 |
result_msg += f"Websites processed: {', '.join(websites[:10])}\n"
|
| 423 |
result_msg += "This might be because the websites are not in our sample database."
|
| 424 |
return result_msg, ""
|
| 425 |
|
| 426 |
+
# Format results
|
| 427 |
results_text = f"CONTACT DISCOVERY REPORT\n"
|
| 428 |
results_text += f"CSV Processing Details:\n"
|
| 429 |
results_text += f"Total Websites in CSV: {len(websites)}\n"
|
| 430 |
results_text += f"Websites Processed: {len(processed_websites)}\n"
|
| 431 |
results_text += f"Websites with Contacts: {len(processed_websites)}\n"
|
| 432 |
+
results_text += f"Unique Contacts Found: {len(unique_contacts)}\n"
|
| 433 |
results_text += f"Processed Websites: {', '.join(processed_websites)}\n"
|
| 434 |
results_text += f"{'='*60}\n\n"
|
| 435 |
|
|
|
|
| 437 |
results_text += "DEBUG INFORMATION:\n" + debug_text + "\n\n"
|
| 438 |
results_text += f"{'='*60}\n\n"
|
| 439 |
|
| 440 |
+
for i, contact in enumerate(unique_contacts, 1):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
results_text += f"CONTACT #{i}\n"
|
| 442 |
results_text += f"Name: {contact['first_name']} {contact['last_name']}\n"
|
| 443 |
results_text += f"Position: {contact['job_title']}\n"
|
|
|
|
| 446 |
results_text += f"Company: {contact['company']}\n"
|
| 447 |
results_text += f"Website: {contact['website']}\n\n"
|
| 448 |
|
| 449 |
+
# Create CSV output
|
| 450 |
+
csv_output = "First Name,Last Name,Job Title,Email,Phone,Company,Website\n"
|
| 451 |
+
for contact in unique_contacts:
|
| 452 |
+
csv_output += f"{contact['first_name']},{contact['last_name']},{contact['job_title']},{contact['email']},{contact['phone']},{contact['company']},{contact['website']}\n"
|
|
|
|
|
|
|
| 453 |
|
| 454 |
+
return results_text, csv_output
|
|
|
|
|
|
|
|
|
|
| 455 |
|
| 456 |
except Exception as e:
|
| 457 |
return f"Error processing CSV file: {str(e)}", ""
|
|
|
|
| 645 |
label="Target Website URL",
|
| 646 |
placeholder="Enter company website (e.g., techflowsolutions.com)",
|
| 647 |
value="",
|
| 648 |
+
elem_classes=["custom-input"]
|
| 649 |
)
|
| 650 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 651 |
with gr.Row():
|
| 652 |
+
max_results = gr.Slider(
|
| 653 |
minimum=1,
|
| 654 |
maximum=20,
|
| 655 |
value=8,
|
|
|
|
| 696 |
elem_classes=["custom-input"]
|
| 697 |
)
|
| 698 |
|
| 699 |
+
gr.HTML("""
|
| 700 |
+
<div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af; margin: 10px 0;">
|
| 701 |
+
<strong>CSV Format - Multiple Options:</strong><br>
|
| 702 |
+
<strong>Option 1:</strong> Website URLs in <strong>Column H (8th column)</strong><br>
|
| 703 |
+
<strong>Option 2:</strong> Column header named 'website', 'url', 'domain', 'site', or 'web'<br>
|
| 704 |
+
<strong>Option 3:</strong> System will auto-detect columns with valid URLs<br><br>
|
| 705 |
+
<strong>Examples:</strong> techflowsolutions.com, https://example.com, www.company.com<br>
|
| 706 |
+
<strong>Note:</strong> The system will show detailed debugging information about your CSV structure
|
| 707 |
+
</div>
|
| 708 |
+
""")
|
| 709 |
+
|
| 710 |
with gr.Row():
|
| 711 |
csv_max_results = gr.Slider(
|
| 712 |
minimum=1,
|
|
|
|
| 743 |
elem_classes=["results-container"]
|
| 744 |
)
|
| 745 |
|
| 746 |
+
# Sample websites section
|
| 747 |
+
with gr.Accordion("Sample Websites Database", open=False):
|
| 748 |
+
gr.HTML('<div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af;">')
|
| 749 |
+
sample_websites = gr.Textbox(
|
| 750 |
+
label="Available Websites in Database",
|
| 751 |
+
value=get_all_available_websites(),
|
| 752 |
+
lines=8,
|
| 753 |
+
interactive=False,
|
| 754 |
+
elem_classes=["custom-input"]
|
| 755 |
+
)
|
| 756 |
+
gr.HTML('</div>')
|
| 757 |
+
|
| 758 |
+
# Quick search buttons
|
| 759 |
+
gr.HTML('<div class="section-header">Quick Access Sample Websites</div>')
|
| 760 |
+
|
| 761 |
+
with gr.Row():
|
| 762 |
+
quick_btn1 = gr.Button("TechFlow Solutions", size="sm", elem_classes=["secondary-btn"])
|
| 763 |
+
quick_btn2 = gr.Button("GreenLeaf Consulting", size="sm", elem_classes=["secondary-btn"])
|
| 764 |
+
quick_btn3 = gr.Button("BlueSky Marketing", size="sm", elem_classes=["secondary-btn"])
|
| 765 |
+
quick_btn4 = gr.Button("Quantum Dynamics", size="sm", elem_classes=["secondary-btn"])
|
| 766 |
+
|
| 767 |
+
with gr.Row():
|
| 768 |
+
quick_btn5 = gr.Button("Stellar Logistics", size="sm", elem_classes=["secondary-btn"])
|
| 769 |
+
quick_btn6 = gr.Button("Nexus Financial", size="sm", elem_classes=["secondary-btn"])
|
| 770 |
+
quick_btn7 = gr.Button("Horizon Health", size="sm", elem_classes=["secondary-btn"])
|
| 771 |
+
quick_btn8 = gr.Button("Phoenix Manufacturing", size="sm", elem_classes=["secondary-btn"])
|
| 772 |
+
|
| 773 |
+
# Event handlers
|
| 774 |
+
search_btn.click(
|
| 775 |
+
fn=search_website_contacts,
|
| 776 |
+
inputs=[website_input, max_results],
|
| 777 |
+
outputs=[results_display, csv_output]
|
| 778 |
+
)
|
| 779 |
+
|
| 780 |
+
csv_search_btn.click(
|
| 781 |
+
fn=search_csv_websites,
|
| 782 |
+
inputs=[csv_file, csv_max_results],
|
| 783 |
+
outputs=[csv_results_display, csv_export_output]
|
| 784 |
+
)
|
| 785 |
+
|
| 786 |
+
# Quick search button handlers
|
| 787 |
+
quick_btn1.click(lambda: "techflowsolutions.com", outputs=website_input)
|
| 788 |
+
quick_btn2.click(lambda: "greenleafconsult.com", outputs=website_input)
|
| 789 |
+
quick_btn3.click(lambda: "blueskymarketing.net", outputs=website_input)
|
| 790 |
+
quick_btn4.click(lambda: "quantumdynamics.org", outputs=website_input)
|
| 791 |
+
quick_btn5.click(lambda: "stellarlogistics.biz", outputs=website_input)
|
| 792 |
+
quick_btn6.click(lambda: "nexusfinancial.pro", outputs=website_input)
|
| 793 |
+
quick_btn7.click(lambda: "horizonhealth.care", outputs=website_input)
|
| 794 |
+
quick_btn8.click(lambda: "phoenixmfg.com", outputs=website_input)
|
| 795 |
+
|
| 796 |
+
# Examples
|
| 797 |
+
gr.Examples(
|
| 798 |
+
examples=[
|
| 799 |
+
["techflowsolutions.com", 5],
|
| 800 |
+
["greenleafconsult.com", 3],
|
| 801 |
+
["blueskymarketing.net", 4],
|
| 802 |
+
["quantumdynamics.org", 6]
|
| 803 |
+
],
|
| 804 |
+
inputs=[website_input, max_results],
|
| 805 |
+
label="Sample Searches"
|
| 806 |
+
)
|
| 807 |
|
| 808 |
# Footer
|
| 809 |
gr.HTML("""
|