JayBene1 commited on
Commit
6fdcfd0
·
verified ·
1 Parent(s): c246f70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -121
app.py CHANGED
@@ -370,42 +370,8 @@ def parse_csv_file(file_obj):
370
  debug_info.append(f"Error parsing CSV: {e}")
371
  return [], debug_info
372
 
373
- def rank_contact_by_title(job_title):
374
- """Rank contacts by job title priority (lower number = higher priority)"""
375
- title_lower = job_title.lower()
376
-
377
- # Define ranking hierarchy
378
- if 'president' in title_lower:
379
- return 1
380
- elif 'ceo' in title_lower or 'chief executive' in title_lower:
381
- return 2
382
- elif 'cfo' in title_lower or 'chief financial' in title_lower:
383
- return 3
384
- elif 'coo' in title_lower or 'chief operating' in title_lower:
385
- return 4
386
- elif 'vice president' in title_lower or 'vp' in title_lower or 'v.p.' in title_lower:
387
- return 5
388
- elif 'controller' in title_lower:
389
- return 6
390
- elif 'general manager' in title_lower or 'gm' in title_lower:
391
- return 7
392
- else:
393
- return 100 # All other positions get lower priority
394
-
395
- def get_best_contact_for_website(website_url):
396
- """Get the highest-ranked contact for a specific website"""
397
- contacts = simulate_website_scraping(website_url)
398
- if not contacts:
399
- return None
400
-
401
- # Sort contacts by job title ranking
402
- contacts_with_rank = [(contact, rank_contact_by_title(contact['job_title'])) for contact in contacts]
403
- contacts_with_rank.sort(key=lambda x: x[1]) # Sort by rank (lower number = higher priority)
404
-
405
- return contacts_with_rank[0][0] # Return the highest-ranked contact
406
-
407
  def search_csv_websites(csv_file, max_results=10):
408
- """Search for contacts from websites listed in CSV file and populate the CSV"""
409
  if csv_file is None:
410
  return "Please upload a CSV file", ""
411
 
@@ -425,78 +391,45 @@ def search_csv_websites(csv_file, max_results=10):
425
  error_msg += "4. Verify the CSV file is not corrupted\n"
426
  return error_msg, ""
427
 
428
- # Read the original CSV file to preserve all data
429
- with open(csv_file.name, 'r', encoding='utf-8') as f:
430
- content = f.read()
431
-
432
- csv_reader = csv.reader(io.StringIO(content))
433
- original_rows = list(csv_reader)
434
-
435
- # Process websites and find contacts
436
  all_contacts = []
437
  processed_websites = []
438
- contacts_by_website = {}
439
-
440
- # Create a mapping of websites to their row indices
441
- website_to_row = {}
442
- for row_idx, row in enumerate(original_rows):
443
- if len(row) > 7: # Column H exists
444
- website_url = row[7].strip()
445
- if website_url and is_valid_url(website_url):
446
- website_to_row[website_url] = row_idx
447
 
448
- # Search each website and get the best contact
449
  for website in websites[:20]: # Limit to first 20 websites
450
  print(f"Processing website: {website}")
451
- best_contact = get_best_contact_for_website(website)
452
-
453
- if best_contact:
454
- contacts_by_website[website] = best_contact
455
- all_contacts.append(best_contact)
456
  processed_websites.append(website)
457
- print(f"Found best contact for {website}: {best_contact['first_name']} {best_contact['last_name']} - {best_contact['job_title']}")
458
  else:
459
  print(f"No contacts found for {website}")
460
 
461
- # Create updated CSV with contact information
462
- updated_rows = []
463
- for row_idx, row in enumerate(original_rows):
464
- # Make a copy of the row and ensure it has enough columns
465
- new_row = row[:]
466
-
467
- # Extend row to have at least 13 columns (A-M)
468
- while len(new_row) < 13:
469
- new_row.append("")
470
-
471
- # Check if this row has a website we found contacts for
472
- if len(row) > 7:
473
- website_url = row[7].strip()
474
- if website_url in contacts_by_website:
475
- contact = contacts_by_website[website_url]
476
-
477
- # Populate contact information in specified columns
478
- new_row[8] = contact['first_name'] # Column I (index 8)
479
- new_row[9] = contact['last_name'] # Column J (index 9)
480
- new_row[10] = contact['job_title'] # Column K (index 10)
481
- new_row[11] = contact['phone'] # Column L (index 11)
482
- new_row[12] = contact['email'] # Column M (index 12)
483
-
484
- updated_rows.append(new_row)
485
 
486
- if not all_contacts:
487
  result_msg = f"No contacts found for the {len(websites)} websites from the CSV file.\n\n"
488
  result_msg += "DEBUG INFORMATION:\n" + debug_text + "\n\n"
489
  result_msg += f"Websites processed: {', '.join(websites[:10])}\n"
490
  result_msg += "This might be because the websites are not in our sample database."
491
  return result_msg, ""
492
 
493
- # Format results text
494
  results_text = f"CONTACT DISCOVERY REPORT\n"
495
  results_text += f"CSV Processing Details:\n"
496
  results_text += f"Total Websites in CSV: {len(websites)}\n"
497
  results_text += f"Websites Processed: {len(processed_websites)}\n"
498
  results_text += f"Websites with Contacts: {len(processed_websites)}\n"
499
- results_text += f"Contacts Found: {len(all_contacts)}\n"
500
  results_text += f"Processed Websites: {', '.join(processed_websites)}\n"
501
  results_text += f"{'='*60}\n\n"
502
 
@@ -504,17 +437,7 @@ def search_csv_websites(csv_file, max_results=10):
504
  results_text += "DEBUG INFORMATION:\n" + debug_text + "\n\n"
505
  results_text += f"{'='*60}\n\n"
506
 
507
- # Show contact rankings
508
- results_text += "CONTACT RANKINGS (by job title priority):\n"
509
- for i, contact in enumerate(all_contacts, 1):
510
- rank = rank_contact_by_title(contact['job_title'])
511
- results_text += f"{i}. {contact['first_name']} {contact['last_name']} - {contact['job_title']} "
512
- results_text += f"(Priority Rank: {rank}) - {contact['company']}\n"
513
-
514
- results_text += f"\n{'='*60}\n\n"
515
-
516
- # Show detailed contact information
517
- for i, contact in enumerate(all_contacts, 1):
518
  results_text += f"CONTACT #{i}\n"
519
  results_text += f"Name: {contact['first_name']} {contact['last_name']}\n"
520
  results_text += f"Position: {contact['job_title']}\n"
@@ -523,17 +446,12 @@ def search_csv_websites(csv_file, max_results=10):
523
  results_text += f"Company: {contact['company']}\n"
524
  results_text += f"Website: {contact['website']}\n\n"
525
 
526
- # Create CSV output with updated data
527
- csv_output = io.StringIO()
528
- csv_writer = csv.writer(csv_output)
529
-
530
- for row in updated_rows:
531
- csv_writer.writerow(row)
532
 
533
- csv_content = csv_output.getvalue()
534
- csv_output.close()
535
-
536
- return results_text, csv_content
537
 
538
  except Exception as e:
539
  return f"Error processing CSV file: {str(e)}", ""
@@ -727,22 +645,11 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
727
  label="Target Website URL",
728
  placeholder="Enter company website (e.g., techflowsolutions.com)",
729
  value="",
730
- elem_classes=["custom-input"]
731
  )
732
 
733
- gr.HTML("""
734
- <div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af; margin: 10px 0;">
735
- <strong>CSV Format - Multiple Options:</strong><br>
736
- <strong>Option 1:</strong> Website URLs in <strong>Column H (8th column)</strong><br>
737
- <strong>Option 2:</strong> Column header named 'website', 'url', 'domain', 'site', or 'web'<br>
738
- <strong>Option 3:</strong> System will auto-detect columns with valid URLs<br><br>
739
- <strong>Examples:</strong> techflowsolutions.com, https://example.com, www.company.com<br>
740
- <strong>Note:</strong> The system will show detailed debugging information about your CSV structure
741
- </div>
742
- """)
743
-
744
  with gr.Row():
745
- csv_max_results = gr.Slider(
746
  minimum=1,
747
  maximum=20,
748
  value=8,
@@ -789,6 +696,17 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
789
  elem_classes=["custom-input"]
790
  )
791
 
 
 
 
 
 
 
 
 
 
 
 
792
  with gr.Row():
793
  csv_max_results = gr.Slider(
794
  minimum=1,
@@ -825,7 +743,67 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
825
  elem_classes=["results-container"]
826
  )
827
 
828
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
829
 
830
  # Footer
831
  gr.HTML("""
 
370
  debug_info.append(f"Error parsing CSV: {e}")
371
  return [], debug_info
372
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
  def search_csv_websites(csv_file, max_results=10):
374
+ """Search for contacts from websites listed in CSV file"""
375
  if csv_file is None:
376
  return "Please upload a CSV file", ""
377
 
 
391
  error_msg += "4. Verify the CSV file is not corrupted\n"
392
  return error_msg, ""
393
 
 
 
 
 
 
 
 
 
394
  all_contacts = []
395
  processed_websites = []
 
 
 
 
 
 
 
 
 
396
 
397
+ # Search each website
398
  for website in websites[:20]: # Limit to first 20 websites
399
  print(f"Processing website: {website}")
400
+ contacts = simulate_website_scraping(website)
401
+ if contacts:
402
+ all_contacts.extend(contacts)
 
 
403
  processed_websites.append(website)
404
+ print(f"Found {len(contacts)} contacts for {website}")
405
  else:
406
  print(f"No contacts found for {website}")
407
 
408
+ # Remove duplicates based on email
409
+ unique_contacts = []
410
+ seen_emails = set()
411
+ for contact in all_contacts:
412
+ if contact['email'] not in seen_emails:
413
+ unique_contacts.append(contact)
414
+ seen_emails.add(contact['email'])
415
+
416
+ # Limit results
417
+ unique_contacts = unique_contacts[:max_results]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
 
419
+ if not unique_contacts:
420
  result_msg = f"No contacts found for the {len(websites)} websites from the CSV file.\n\n"
421
  result_msg += "DEBUG INFORMATION:\n" + debug_text + "\n\n"
422
  result_msg += f"Websites processed: {', '.join(websites[:10])}\n"
423
  result_msg += "This might be because the websites are not in our sample database."
424
  return result_msg, ""
425
 
426
+ # Format results
427
  results_text = f"CONTACT DISCOVERY REPORT\n"
428
  results_text += f"CSV Processing Details:\n"
429
  results_text += f"Total Websites in CSV: {len(websites)}\n"
430
  results_text += f"Websites Processed: {len(processed_websites)}\n"
431
  results_text += f"Websites with Contacts: {len(processed_websites)}\n"
432
+ results_text += f"Unique Contacts Found: {len(unique_contacts)}\n"
433
  results_text += f"Processed Websites: {', '.join(processed_websites)}\n"
434
  results_text += f"{'='*60}\n\n"
435
 
 
437
  results_text += "DEBUG INFORMATION:\n" + debug_text + "\n\n"
438
  results_text += f"{'='*60}\n\n"
439
 
440
+ for i, contact in enumerate(unique_contacts, 1):
 
 
 
 
 
 
 
 
 
 
441
  results_text += f"CONTACT #{i}\n"
442
  results_text += f"Name: {contact['first_name']} {contact['last_name']}\n"
443
  results_text += f"Position: {contact['job_title']}\n"
 
446
  results_text += f"Company: {contact['company']}\n"
447
  results_text += f"Website: {contact['website']}\n\n"
448
 
449
+ # Create CSV output
450
+ csv_output = "First Name,Last Name,Job Title,Email,Phone,Company,Website\n"
451
+ for contact in unique_contacts:
452
+ csv_output += f"{contact['first_name']},{contact['last_name']},{contact['job_title']},{contact['email']},{contact['phone']},{contact['company']},{contact['website']}\n"
 
 
453
 
454
+ return results_text, csv_output
 
 
 
455
 
456
  except Exception as e:
457
  return f"Error processing CSV file: {str(e)}", ""
 
645
  label="Target Website URL",
646
  placeholder="Enter company website (e.g., techflowsolutions.com)",
647
  value="",
648
+ elem_classes=["custom-input"]
649
  )
650
 
 
 
 
 
 
 
 
 
 
 
 
651
  with gr.Row():
652
+ max_results = gr.Slider(
653
  minimum=1,
654
  maximum=20,
655
  value=8,
 
696
  elem_classes=["custom-input"]
697
  )
698
 
699
+ gr.HTML("""
700
+ <div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af; margin: 10px 0;">
701
+ <strong>CSV Format - Multiple Options:</strong><br>
702
+ <strong>Option 1:</strong> Website URLs in <strong>Column H (8th column)</strong><br>
703
+ <strong>Option 2:</strong> Column header named 'website', 'url', 'domain', 'site', or 'web'<br>
704
+ <strong>Option 3:</strong> System will auto-detect columns with valid URLs<br><br>
705
+ <strong>Examples:</strong> techflowsolutions.com, https://example.com, www.company.com<br>
706
+ <strong>Note:</strong> The system will show detailed debugging information about your CSV structure
707
+ </div>
708
+ """)
709
+
710
  with gr.Row():
711
  csv_max_results = gr.Slider(
712
  minimum=1,
 
743
  elem_classes=["results-container"]
744
  )
745
 
746
+ # Sample websites section
747
+ with gr.Accordion("Sample Websites Database", open=False):
748
+ gr.HTML('<div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af;">')
749
+ sample_websites = gr.Textbox(
750
+ label="Available Websites in Database",
751
+ value=get_all_available_websites(),
752
+ lines=8,
753
+ interactive=False,
754
+ elem_classes=["custom-input"]
755
+ )
756
+ gr.HTML('</div>')
757
+
758
+ # Quick search buttons
759
+ gr.HTML('<div class="section-header">Quick Access Sample Websites</div>')
760
+
761
+ with gr.Row():
762
+ quick_btn1 = gr.Button("TechFlow Solutions", size="sm", elem_classes=["secondary-btn"])
763
+ quick_btn2 = gr.Button("GreenLeaf Consulting", size="sm", elem_classes=["secondary-btn"])
764
+ quick_btn3 = gr.Button("BlueSky Marketing", size="sm", elem_classes=["secondary-btn"])
765
+ quick_btn4 = gr.Button("Quantum Dynamics", size="sm", elem_classes=["secondary-btn"])
766
+
767
+ with gr.Row():
768
+ quick_btn5 = gr.Button("Stellar Logistics", size="sm", elem_classes=["secondary-btn"])
769
+ quick_btn6 = gr.Button("Nexus Financial", size="sm", elem_classes=["secondary-btn"])
770
+ quick_btn7 = gr.Button("Horizon Health", size="sm", elem_classes=["secondary-btn"])
771
+ quick_btn8 = gr.Button("Phoenix Manufacturing", size="sm", elem_classes=["secondary-btn"])
772
+
773
+ # Event handlers
774
+ search_btn.click(
775
+ fn=search_website_contacts,
776
+ inputs=[website_input, max_results],
777
+ outputs=[results_display, csv_output]
778
+ )
779
+
780
+ csv_search_btn.click(
781
+ fn=search_csv_websites,
782
+ inputs=[csv_file, csv_max_results],
783
+ outputs=[csv_results_display, csv_export_output]
784
+ )
785
+
786
+ # Quick search button handlers
787
+ quick_btn1.click(lambda: "techflowsolutions.com", outputs=website_input)
788
+ quick_btn2.click(lambda: "greenleafconsult.com", outputs=website_input)
789
+ quick_btn3.click(lambda: "blueskymarketing.net", outputs=website_input)
790
+ quick_btn4.click(lambda: "quantumdynamics.org", outputs=website_input)
791
+ quick_btn5.click(lambda: "stellarlogistics.biz", outputs=website_input)
792
+ quick_btn6.click(lambda: "nexusfinancial.pro", outputs=website_input)
793
+ quick_btn7.click(lambda: "horizonhealth.care", outputs=website_input)
794
+ quick_btn8.click(lambda: "phoenixmfg.com", outputs=website_input)
795
+
796
+ # Examples
797
+ gr.Examples(
798
+ examples=[
799
+ ["techflowsolutions.com", 5],
800
+ ["greenleafconsult.com", 3],
801
+ ["blueskymarketing.net", 4],
802
+ ["quantumdynamics.org", 6]
803
+ ],
804
+ inputs=[website_input, max_results],
805
+ label="Sample Searches"
806
+ )
807
 
808
  # Footer
809
  gr.HTML("""