JayBene1 commited on
Commit
b5f0bb2
·
verified ·
1 Parent(s): 54ffe32

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +274 -363
app.py CHANGED
@@ -1,8 +1,9 @@
1
  import gradio as gr
 
 
2
  import requests
3
  import re
4
  import json
5
- import csv
6
  import io
7
  from urllib.parse import urlparse, urljoin
8
  import time
@@ -215,6 +216,9 @@ CONTACTS_DB = [
215
  def extract_domain(url):
216
  """Extract domain from URL"""
217
  try:
 
 
 
218
  if not url.startswith(('http://', 'https://')):
219
  url = 'https://' + url
220
  parsed = urlparse(url)
@@ -232,188 +236,189 @@ def find_contacts_by_website(website_url):
232
  if not target_domain:
233
  return []
234
 
235
- print(f"Searching for domain: {target_domain}") # Debug info
236
-
237
  matching_contacts = []
238
  for contact in CONTACTS_DB:
239
  contact_domain = extract_domain(contact['website'])
240
- print(f"Comparing with: {contact_domain}") # Debug info
241
 
242
  # Exact domain match or subdomain match
243
  if target_domain == contact_domain or target_domain in contact_domain or contact_domain in target_domain:
244
  matching_contacts.append(contact)
245
 
246
- print(f"Found {len(matching_contacts)} matching contacts") # Debug info
247
  return matching_contacts
248
 
249
  def simulate_website_scraping(url):
250
  """Simulate scraping a website and finding contact information"""
251
  # Add some delay to simulate real scraping
252
- time.sleep(random.uniform(0.5, 1))
253
 
254
  # Find matching contacts from our database
255
  contacts = find_contacts_by_website(url)
256
 
257
  return contacts
258
 
259
- def parse_csv_file(file_content):
260
- """Parse CSV file and extract website URLs"""
261
- websites = []
 
 
262
  try:
263
- # Decode file content
264
- content = file_content.decode('utf-8')
265
 
266
- # Parse CSV
267
- csv_reader = csv.DictReader(io.StringIO(content))
 
268
 
269
- # Look for common website column names (case-insensitive)
270
- website_columns = ['website', 'url', 'domain', 'site', 'web', 'homepage']
 
271
 
272
- # Get all column names and print for debugging
273
- all_columns = list(csv_reader.fieldnames) if csv_reader.fieldnames else []
274
- print(f"CSV columns found: {all_columns}")
 
275
 
276
- # Find the website column (case-insensitive)
277
- website_column = None
278
- for col_name in all_columns:
279
- if col_name and col_name.lower().strip() in website_columns:
280
- website_column = col_name
281
- print(f"Using website column: '{website_column}'")
282
- break
283
 
284
- if not website_column:
285
- print(f"No website column found. Available columns: {all_columns}")
286
- return []
 
 
 
287
 
288
- # Extract websites
289
- for row in csv_reader:
290
- website_url = row.get(website_column, '').strip()
291
- if website_url:
292
- websites.append(website_url)
293
 
294
- print(f"Extracted {len(websites)} websites: {websites[:5]}...") # Show first 5
295
- return websites
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
 
297
- except Exception as e:
298
- print(f"Error parsing CSV: {e}")
299
- return []
300
 
301
- def search_csv_websites(csv_file, max_results=10):
302
- """Search for contacts from websites listed in CSV file"""
303
- if csv_file is None:
304
- return "Please upload a CSV file", ""
305
-
306
- try:
307
- # Parse CSV file
308
- websites = parse_csv_file(csv_file)
309
-
310
- if not websites:
311
- return "No websites found in CSV file. Please ensure your CSV has a column named 'website', 'url', or 'domain'. Check the console for debugging info about your CSV columns.", ""
312
-
313
- all_contacts = []
314
- processed_websites = []
315
-
316
- # Search each website
317
- for website in websites[:20]: # Limit to first 20 websites
318
- print(f"Processing website: {website}")
319
- contacts = simulate_website_scraping(website)
320
- if contacts:
321
- all_contacts.extend(contacts)
322
- processed_websites.append(website)
323
- print(f"Found {len(contacts)} contacts for {website}")
324
- else:
325
- print(f"No contacts found for {website}")
326
-
327
- # Remove duplicates based on email
328
- unique_contacts = []
329
- seen_emails = set()
330
- for contact in all_contacts:
331
- if contact['email'] not in seen_emails:
332
- unique_contacts.append(contact)
333
- seen_emails.add(contact['email'])
334
-
335
- # Limit results
336
- unique_contacts = unique_contacts[:max_results]
337
-
338
- if not unique_contacts:
339
- return f"No contacts found for the {len(websites)} websites in the CSV file. Processed websites: {', '.join(websites[:10])}", ""
340
 
341
- # Format results
342
- results_text = f"CONTACT DISCOVERY REPORT\n"
343
- results_text += f"Websites Processed: {len(processed_websites)}\n"
344
- results_text += f"Total Websites in CSV: {len(websites)}\n"
345
- results_text += f"Websites with Contacts: {len(processed_websites)}\n"
346
- results_text += f"Unique Contacts Found: {len(unique_contacts)}\n"
347
- results_text += f"Processed Websites: {', '.join(processed_websites)}\n"
348
- results_text += f"{'='*60}\n\n"
 
349
 
350
- for i, contact in enumerate(unique_contacts, 1):
351
- results_text += f"CONTACT #{i}\n"
352
- results_text += f"Name: {contact['first_name']} {contact['last_name']}\n"
353
- results_text += f"Position: {contact['job_title']}\n"
354
- results_text += f"Email: {contact['email']}\n"
355
- results_text += f"Phone: {contact['phone']}\n"
356
- results_text += f"Company: {contact['company']}\n"
357
- results_text += f"Website: {contact['website']}\n\n"
358
 
359
- # Create CSV output
360
- csv_output = "First Name,Last Name,Job Title,Email,Phone,Company,Website\n"
361
- for contact in unique_contacts:
362
- csv_output += f"{contact['first_name']},{contact['last_name']},{contact['job_title']},{contact['email']},{contact['phone']},{contact['company']},{contact['website']}\n"
363
 
364
- return results_text, csv_output
365
 
366
  except Exception as e:
367
- return f"Error processing CSV file: {str(e)}", ""
368
 
369
- def search_website_contacts(website_url, max_results=10):
370
- """Main function to search for contacts on a website"""
371
- if not website_url:
372
- return "Please enter a website URL", ""
373
-
374
- # Clean up URL
375
- if not website_url.startswith(('http://', 'https://')):
376
- website_url = 'https://' + website_url
377
-
378
  try:
379
- # Simulate finding contacts
380
- contacts = simulate_website_scraping(website_url)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
 
382
- if not contacts:
383
- return f"No contacts found on {website_url}. \n\nThis website is not in our contact database. Try one of the sample websites listed below, or the website might not have publicly available contact information.", ""
384
 
385
- # Limit results
386
- contacts = contacts[:max_results]
 
 
387
 
388
- # Format results
389
- results_text = f"CONTACT INTELLIGENCE REPORT\n"
390
- results_text += f"Website: {website_url}\n"
391
- results_text += f"Contacts Found: {len(contacts)}\n"
392
- results_text += f"{'='*60}\n\n"
393
-
394
- for i, contact in enumerate(contacts, 1):
395
- results_text += f"CONTACT #{i}\n"
396
- results_text += f"First Name: {contact['first_name']}\n"
397
- results_text += f"Last Name: {contact['last_name']}\n"
398
- results_text += f"Position: {contact['job_title']}\n"
399
- results_text += f"Email: {contact['email']}\n"
400
- results_text += f"Phone: {contact['phone']}\n"
401
- results_text += f"Company: {contact['company']}\n\n"
402
-
403
- # Create a simple table format for the second output
404
- table_text = "First Name,Last Name,Job Title,Email,Phone,Company\n"
405
- for contact in contacts:
406
- table_text += f"{contact['first_name']},{contact['last_name']},{contact['job_title']},{contact['email']},{contact['phone']},{contact['company']}\n"
407
-
408
- return results_text, table_text
409
 
410
  except Exception as e:
411
- return f"Error searching website: {str(e)}", ""
412
-
413
- def get_all_available_websites():
414
- """Get list of all available websites from the database"""
415
- websites = list(set([contact['website'] for contact in CONTACTS_DB]))
416
- return "\n".join(sorted(websites))
417
 
418
  # Custom CSS
419
  custom_css = """
@@ -445,28 +450,22 @@ custom_css = """
445
  opacity: 0.9;
446
  }
447
 
448
- .corporate-card {
449
- background: white;
450
- border: 1px solid #d1d5db;
451
- border-radius: 12px;
452
- padding: 25px;
453
- margin: 15px 0;
454
- box-shadow: 0 4px 15px rgba(0,0,0,0.1);
455
- border-left: 5px solid #1e40af;
456
  }
457
 
458
- .tips-section {
459
  background: linear-gradient(135deg, #f1f5f9 0%, #e2e8f0 100%);
460
  border: 2px solid #cbd5e1;
461
  border-radius: 15px;
462
  padding: 20px;
463
  margin: 10px 0;
464
- }
465
-
466
- .tips-section h3 {
467
- color: #1e40af;
468
- margin-top: 0;
469
- font-weight: 600;
470
  }
471
 
472
  .primary-btn {
@@ -480,41 +479,6 @@ custom_css = """
480
  transition: all 0.3s ease;
481
  }
482
 
483
- .primary-btn:hover {
484
- background: linear-gradient(135deg, #1e3a8a 0%, #2563eb 100%);
485
- transform: translateY(-2px);
486
- box-shadow: 0 6px 20px rgba(30, 64, 175, 0.4);
487
- }
488
-
489
- .secondary-btn {
490
- background: white;
491
- color: #374151;
492
- border: 2px solid #d1d5db;
493
- border-radius: 6px;
494
- padding: 8px 16px;
495
- font-weight: 500;
496
- transition: all 0.3s ease;
497
- }
498
-
499
- .secondary-btn:hover {
500
- border-color: #1e40af;
501
- color: #1e40af;
502
- background: #f8fafc;
503
- }
504
-
505
- .custom-input {
506
- border: 2px solid #d1d5db;
507
- border-radius: 8px;
508
- padding: 12px;
509
- font-size: 16px;
510
- transition: border-color 0.3s ease;
511
- }
512
-
513
- .custom-input:focus {
514
- border-color: #3b82f6;
515
- box-shadow: 0 0 0 3px rgba(59, 130, 246, 0.1);
516
- }
517
-
518
  .results-container {
519
  background: white;
520
  border: 1px solid #e5e7eb;
@@ -523,202 +487,149 @@ custom_css = """
523
  margin: 15px 0;
524
  box-shadow: 0 2px 10px rgba(0,0,0,0.05);
525
  }
526
-
527
- .section-header {
528
- background: linear-gradient(135deg, #64748b 0%, #475569 100%);
529
- color: white;
530
- padding: 15px 20px;
531
- border-radius: 10px;
532
- margin: 20px 0 15px 0;
533
- font-weight: 600;
534
- }
535
  """
536
 
537
  # Create Gradio interface
538
- with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.themes.Base()) as app:
539
  gr.HTML("""
540
  <div class="main-header">
541
- <h1>Contact Discovery Platform</h1>
542
- <p>Professional Contact Discovery & Lead Generation Tool</p>
543
- <p style="font-size: 0.95em; opacity: 0.8;">Advanced website analysis for contact intelligence gathering</p>
544
  </div>
545
  """)
546
 
547
- with gr.Tabs():
548
- # Single Website Search Tab
549
- with gr.TabItem("Single Website Search"):
550
- with gr.Row():
551
- with gr.Column(scale=2):
552
- gr.HTML('<div class="section-header">Search Parameters</div>')
553
-
554
- website_input = gr.Textbox(
555
- label="Target Website URL",
556
- placeholder="Enter company website (e.g., techflowsolutions.com)",
557
- value="",
558
- elem_classes=["custom-input"]
559
- )
560
-
561
- with gr.Row():
562
- max_results = gr.Slider(
563
- minimum=1,
564
- maximum=20,
565
- value=8,
566
- step=1,
567
- label="Maximum Results",
568
- elem_classes=["custom-input"]
569
- )
570
-
571
- search_btn = gr.Button(
572
- "Execute Search",
573
- variant="primary",
574
- size="lg",
575
- elem_classes=["primary-btn"]
576
- )
577
-
578
- gr.HTML('<div class="section-header">Search Results</div>')
579
-
580
- with gr.Row():
581
- results_display = gr.Textbox(
582
- label="Contact Intelligence Report",
583
- lines=18,
584
- max_lines=35,
585
- show_copy_button=True,
586
- elem_classes=["results-container"]
587
- )
588
-
589
- csv_output = gr.Textbox(
590
- label="Export Data (CSV Format)",
591
- lines=18,
592
- max_lines=35,
593
- show_copy_button=True,
594
- elem_classes=["results-container"]
595
- )
596
 
597
- # CSV Upload Tab
598
- with gr.TabItem("CSV Bulk Search"):
599
- with gr.Row():
600
- with gr.Column(scale=2):
601
- gr.HTML('<div class="section-header">CSV Upload</div>')
602
-
603
- csv_file = gr.File(
604
- label="Upload CSV File",
605
- file_types=[".csv"],
606
- elem_classes=["custom-input"]
607
- )
608
-
609
- gr.HTML("""
610
- <div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af; margin: 10px 0;">
611
- <strong>CSV Format Requirements:</strong><br>
612
- • Include a column named 'website', 'url', or 'domain'<br>
613
- • One website per row<br>
614
- • Example: techflowsolutions.com, greenleafconsult.com
615
- </div>
616
- """)
617
-
618
- with gr.Row():
619
- csv_max_results = gr.Slider(
620
- minimum=1,
621
- maximum=50,
622
- value=20,
623
- step=1,
624
- label="Maximum Results",
625
- elem_classes=["custom-input"]
626
- )
627
-
628
- csv_search_btn = gr.Button(
629
- "Process CSV",
630
- variant="primary",
631
- size="lg",
632
- elem_classes=["primary-btn"]
633
- )
634
-
635
- gr.HTML('<div class="section-header">CSV Results</div>')
636
-
637
- with gr.Row():
638
- csv_results_display = gr.Textbox(
639
- label="CSV Processing Report",
640
- lines=18,
641
- max_lines=35,
642
- show_copy_button=True,
643
- elem_classes=["results-container"]
644
  )
645
 
646
- csv_export_output = gr.Textbox(
647
- label="Export Data (CSV Format)",
648
- lines=18,
649
- max_lines=35,
650
- show_copy_button=True,
651
- elem_classes=["results-container"]
652
  )
653
-
654
- # Sample websites section
655
- with gr.Accordion("Sample Websites Database", open=False):
656
- gr.HTML('<div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af;">')
657
- sample_websites = gr.Textbox(
658
- label="Available Websites in Database",
659
- value=get_all_available_websites(),
660
- lines=8,
661
- interactive=False,
662
- elem_classes=["custom-input"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
663
  )
664
- gr.HTML('</div>')
665
-
666
- # Quick search buttons
667
- gr.HTML('<div class="section-header">Quick Access Sample Websites</div>')
668
 
669
- with gr.Row():
670
- quick_btn1 = gr.Button("TechFlow Solutions", size="sm", elem_classes=["secondary-btn"])
671
- quick_btn2 = gr.Button("GreenLeaf Consulting", size="sm", elem_classes=["secondary-btn"])
672
- quick_btn3 = gr.Button("BlueSky Marketing", size="sm", elem_classes=["secondary-btn"])
673
- quick_btn4 = gr.Button("Quantum Dynamics", size="sm", elem_classes=["secondary-btn"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
674
 
675
- with gr.Row():
676
- quick_btn5 = gr.Button("Stellar Logistics", size="sm", elem_classes=["secondary-btn"])
677
- quick_btn6 = gr.Button("Nexus Financial", size="sm", elem_classes=["secondary-btn"])
678
- quick_btn7 = gr.Button("Horizon Health", size="sm", elem_classes=["secondary-btn"])
679
- quick_btn8 = gr.Button("Phoenix Manufacturing", size="sm", elem_classes=["secondary-btn"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
680
 
681
  # Event handlers
682
- search_btn.click(
683
- fn=search_website_contacts,
684
- inputs=[website_input, max_results],
685
- outputs=[results_display, csv_output]
686
  )
687
 
688
- csv_search_btn.click(
689
- fn=search_csv_websites,
690
- inputs=[csv_file, csv_max_results],
691
- outputs=[csv_results_display, csv_export_output]
692
- )
693
-
694
- # Quick search button handlers
695
- quick_btn1.click(lambda: "techflowsolutions.com", outputs=website_input)
696
- quick_btn2.click(lambda: "greenleafconsult.com", outputs=website_input)
697
- quick_btn3.click(lambda: "blueskymarketing.net", outputs=website_input)
698
- quick_btn4.click(lambda: "quantumdynamics.org", outputs=website_input)
699
- quick_btn5.click(lambda: "stellarlogistics.biz", outputs=website_input)
700
- quick_btn6.click(lambda: "nexusfinancial.pro", outputs=website_input)
701
- quick_btn7.click(lambda: "horizonhealth.care", outputs=website_input)
702
- quick_btn8.click(lambda: "phoenixmfg.com", outputs=website_input)
703
-
704
- # Examples
705
- gr.Examples(
706
- examples=[
707
- ["techflowsolutions.com", 5],
708
- ["greenleafconsult.com", 3],
709
- ["blueskymarketing.net", 4],
710
- ["quantumdynamics.org", 6]
711
- ],
712
- inputs=[website_input, max_results],
713
- label="Sample Searches"
714
  )
715
 
716
  # Footer
717
  gr.HTML("""
718
  <div style="text-align: center; padding: 30px 20px; background: linear-gradient(135deg, #64748b 0%, #475569 100%); color: white; border-radius: 15px; margin-top: 30px;">
719
- <h3 style="margin: 0 0 10px 0;">Contact Intelligence Platform</h3>
720
- <p style="margin: 0; opacity: 0.9;">Professional-grade contact discovery and lead generation technology</p>
721
- <p style="margin: 10px 0 0 0; font-size: 0.9em; opacity: 0.7;">Powered by advanced web intelligence algorithms</p>
722
  </div>
723
  """)
724
 
 
1
  import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
  import requests
5
  import re
6
  import json
 
7
  import io
8
  from urllib.parse import urlparse, urljoin
9
  import time
 
216
  def extract_domain(url):
217
  """Extract domain from URL"""
218
  try:
219
+ if not url or pd.isna(url):
220
+ return ""
221
+ url = str(url).strip()
222
  if not url.startswith(('http://', 'https://')):
223
  url = 'https://' + url
224
  parsed = urlparse(url)
 
236
  if not target_domain:
237
  return []
238
 
 
 
239
  matching_contacts = []
240
  for contact in CONTACTS_DB:
241
  contact_domain = extract_domain(contact['website'])
 
242
 
243
  # Exact domain match or subdomain match
244
  if target_domain == contact_domain or target_domain in contact_domain or contact_domain in target_domain:
245
  matching_contacts.append(contact)
246
 
 
247
  return matching_contacts
248
 
249
  def simulate_website_scraping(url):
250
  """Simulate scraping a website and finding contact information"""
251
  # Add some delay to simulate real scraping
252
+ time.sleep(random.uniform(0.1, 0.3))
253
 
254
  # Find matching contacts from our database
255
  contacts = find_contacts_by_website(url)
256
 
257
  return contacts
258
 
259
+ def process_excel_file(excel_file):
260
+ """Process Excel file and populate contact information"""
261
+ if excel_file is None:
262
+ return "Please upload an Excel file", None
263
+
264
  try:
265
+ # Read the Excel file
266
+ df = pd.read_excel(excel_file.name)
267
 
268
+ # Check if column H exists (index 7)
269
+ if df.shape[1] < 8:
270
+ return "Excel file must have at least 8 columns (up to column H for websites)", None
271
 
272
+ # Ensure we have enough columns for all contact fields
273
+ required_columns = 23 # Up to column W (index 22)
274
+ current_columns = df.shape[1]
275
 
276
+ # Add missing columns if needed
277
+ if current_columns < required_columns:
278
+ for i in range(current_columns, required_columns):
279
+ df[f'Column_{chr(65+i)}'] = ''
280
 
281
+ # Define column mappings (0-indexed)
282
+ website_col = 7 # Column H
283
+ first_name_col = 8 # Column I
284
+ last_name_col = 9 # Column J
285
+ job_title_col = 10 # Column K
286
+ phone_col = 11 # Column L
287
+ email_col = 12 # Column M
288
 
289
+ # Second contact columns
290
+ first_name_2_col = 18 # Column S
291
+ last_name_2_col = 19 # Column T
292
+ job_title_2_col = 20 # Column U
293
+ phone_2_col = 21 # Column V
294
+ email_2_col = 22 # Column W
295
 
296
+ total_processed = 0
297
+ contacts_found = 0
 
 
 
298
 
299
+ # Process each row
300
+ for index, row in df.iterrows():
301
+ website_url = row.iloc[website_col] if pd.notna(row.iloc[website_col]) else ""
302
+
303
+ if website_url:
304
+ total_processed += 1
305
+ print(f"Processing row {index + 1}: {website_url}")
306
+
307
+ # Find contacts for this website
308
+ contacts = simulate_website_scraping(website_url)
309
+
310
+ if contacts:
311
+ contacts_found += len(contacts)
312
+
313
+ # Fill in first contact
314
+ if len(contacts) >= 1:
315
+ contact1 = contacts[0]
316
+ df.iloc[index, first_name_col] = contact1['first_name']
317
+ df.iloc[index, last_name_col] = contact1['last_name']
318
+ df.iloc[index, job_title_col] = contact1['job_title']
319
+ df.iloc[index, phone_col] = contact1['phone']
320
+ df.iloc[index, email_col] = contact1['email']
321
+
322
+ # Fill in second contact if available
323
+ if len(contacts) >= 2:
324
+ contact2 = contacts[1]
325
+ df.iloc[index, first_name_2_col] = contact2['first_name']
326
+ df.iloc[index, last_name_2_col] = contact2['last_name']
327
+ df.iloc[index, job_title_2_col] = contact2['job_title']
328
+ df.iloc[index, phone_2_col] = contact2['phone']
329
+ df.iloc[index, email_2_col] = contact2['email']
330
 
331
+ # Create a summary report
332
+ report = f"""EXCEL CONTACT PROCESSING REPORT
333
+ {'='*50}
334
 
335
+ File Processing Summary:
336
+ - Total rows processed: {len(df)}
337
+ - Rows with websites: {total_processed}
338
+ - Total contacts found: {contacts_found}
339
+ - Rows updated with contact info: {sum(1 for _, row in df.iterrows() if pd.notna(row.iloc[first_name_col]) and row.iloc[first_name_col] != '')}
340
+
341
+ Column Mapping:
342
+ - Column H: Website URLs (source)
343
+ - Column I: Contact First Name
344
+ - Column J: Contact Last Name
345
+ - Column K: Job Title
346
+ - Column L: Phone Number
347
+ - Column M: Email Address
348
+ - Column S: Second Contact First Name
349
+ - Column T: Second Contact Last Name
350
+ - Column U: Second Contact Job Title
351
+ - Column V: Second Contact Phone
352
+ - Column W: Second Contact Email
353
+
354
+ Processing Details:
355
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
 
357
+ # Add details for each processed row
358
+ for index, row in df.iterrows():
359
+ website_url = row.iloc[website_col] if pd.notna(row.iloc[website_col]) else ""
360
+ if website_url:
361
+ first_name = row.iloc[first_name_col] if pd.notna(row.iloc[first_name_col]) else ""
362
+ if first_name:
363
+ report += f"Row {index + 1}: {website_url} -> Found contact: {first_name}\n"
364
+ else:
365
+ report += f"Row {index + 1}: {website_url} -> No contacts found\n"
366
 
367
+ # Save the updated Excel file
368
+ output_buffer = io.BytesIO()
369
+ with pd.ExcelWriter(output_buffer, engine='openpyxl') as writer:
370
+ df.to_excel(writer, index=False, sheet_name='Updated_Contacts')
 
 
 
 
371
 
372
+ output_buffer.seek(0)
 
 
 
373
 
374
+ return report, output_buffer.getvalue()
375
 
376
  except Exception as e:
377
+ return f"Error processing Excel file: {str(e)}\n\nPlease ensure your Excel file:\n- Has websites in column H\n- Is a valid Excel format (.xlsx, .xls)\n- Is not password protected", None
378
 
379
+ def download_sample_excel():
380
+ """Create a sample Excel file for download"""
 
 
 
 
 
 
 
381
  try:
382
+ # Create sample data
383
+ sample_data = {
384
+ 'A': ['Company 1', 'Company 2', 'Company 3', 'Company 4', 'Company 5'],
385
+ 'B': ['Industry 1', 'Industry 2', 'Industry 3', 'Industry 4', 'Industry 5'],
386
+ 'C': ['City 1', 'City 2', 'City 3', 'City 4', 'City 5'],
387
+ 'D': ['State 1', 'State 2', 'State 3', 'State 4', 'State 5'],
388
+ 'E': ['Country 1', 'Country 2', 'Country 3', 'Country 4', 'Country 5'],
389
+ 'F': ['Notes 1', 'Notes 2', 'Notes 3', 'Notes 4', 'Notes 5'],
390
+ 'G': ['Status 1', 'Status 2', 'Status 3', 'Status 4', 'Status 5'],
391
+ 'H': ['techflowsolutions.com', 'greenleafconsult.com', 'blueskymarketing.net', 'quantumdynamics.org', 'stellarlogistics.biz'],
392
+ 'I': ['', '', '', '', ''], # Contact First Name
393
+ 'J': ['', '', '', '', ''], # Contact Last Name
394
+ 'K': ['', '', '', '', ''], # Job Title
395
+ 'L': ['', '', '', '', ''], # Phone
396
+ 'M': ['', '', '', '', ''], # Email
397
+ 'N': ['', '', '', '', ''],
398
+ 'O': ['', '', '', '', ''],
399
+ 'P': ['', '', '', '', ''],
400
+ 'Q': ['', '', '', '', ''],
401
+ 'R': ['', '', '', '', ''],
402
+ 'S': ['', '', '', '', ''], # Second Contact First Name
403
+ 'T': ['', '', '', '', ''], # Second Contact Last Name
404
+ 'U': ['', '', '', '', ''], # Second Contact Job Title
405
+ 'V': ['', '', '', '', ''], # Second Contact Phone
406
+ 'W': ['', '', '', '', ''], # Second Contact Email
407
+ }
408
 
409
+ df = pd.DataFrame(sample_data)
 
410
 
411
+ # Save to buffer
412
+ output_buffer = io.BytesIO()
413
+ with pd.ExcelWriter(output_buffer, engine='openpyxl') as writer:
414
+ df.to_excel(writer, index=False, sheet_name='Sample_Template')
415
 
416
+ output_buffer.seek(0)
417
+ return output_buffer.getvalue()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
 
419
  except Exception as e:
420
+ print(f"Error creating sample Excel: {e}")
421
+ return None
 
 
 
 
422
 
423
  # Custom CSS
424
  custom_css = """
 
450
  opacity: 0.9;
451
  }
452
 
453
+ .section-header {
454
+ background: linear-gradient(135deg, #64748b 0%, #475569 100%);
455
+ color: white;
456
+ padding: 15px 20px;
457
+ border-radius: 10px;
458
+ margin: 20px 0 15px 0;
459
+ font-weight: 600;
 
460
  }
461
 
462
+ .info-box {
463
  background: linear-gradient(135deg, #f1f5f9 0%, #e2e8f0 100%);
464
  border: 2px solid #cbd5e1;
465
  border-radius: 15px;
466
  padding: 20px;
467
  margin: 10px 0;
468
+ border-left: 5px solid #1e40af;
 
 
 
 
 
469
  }
470
 
471
  .primary-btn {
 
479
  transition: all 0.3s ease;
480
  }
481
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
482
  .results-container {
483
  background: white;
484
  border: 1px solid #e5e7eb;
 
487
  margin: 15px 0;
488
  box-shadow: 0 2px 10px rgba(0,0,0,0.05);
489
  }
 
 
 
 
 
 
 
 
 
490
  """
491
 
492
  # Create Gradio interface
493
+ with gr.Blocks(css=custom_css, title="Excel Contact Discovery Platform", theme=gr.themes.Base()) as app:
494
  gr.HTML("""
495
  <div class="main-header">
496
+ <h1>Excel Contact Discovery Platform</h1>
497
+ <p>Automated Contact Discovery for Excel Spreadsheets</p>
498
+ <p style="font-size: 0.95em; opacity: 0.8;">Upload your Excel file and automatically populate contact information</p>
499
  </div>
500
  """)
501
 
502
+ with gr.Tab("Excel Processing"):
503
+ gr.HTML('<div class="section-header">Excel File Upload</div>')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
 
505
+ with gr.Row():
506
+ with gr.Column(scale=2):
507
+ excel_file = gr.File(
508
+ label="Upload Excel File (.xlsx, .xls)",
509
+ file_types=[".xlsx", ".xls"],
510
+ elem_classes=["custom-input"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
  )
512
 
513
+ process_btn = gr.Button(
514
+ "Process Excel File",
515
+ variant="primary",
516
+ size="lg",
517
+ elem_classes=["primary-btn"]
 
518
  )
519
+
520
+ gr.HTML("""
521
+ <div class="info-box">
522
+ <h3>Excel File Requirements:</h3>
523
+ <ul>
524
+ <li><strong>Column H:</strong> Must contain website URLs (e.g., techflowsolutions.com)</li>
525
+ <li><strong>Contact Info will be populated in:</strong></li>
526
+ <ul>
527
+ <li>Column I: Contact First Name</li>
528
+ <li>Column J: Contact Last Name</li>
529
+ <li>Column K: Job Title</li>
530
+ <li>Column L: Phone Number</li>
531
+ <li>Column M: Email Address</li>
532
+ </ul>
533
+ <li><strong>Second Contact (if found) will be populated in:</strong></li>
534
+ <ul>
535
+ <li>Column S: Second Contact First Name</li>
536
+ <li>Column T: Second Contact Last Name</li>
537
+ <li>Column U: Second Contact Job Title</li>
538
+ <li>Column V: Second Contact Phone</li>
539
+ <li>Column W: Second Contact Email</li>
540
+ </ul>
541
+ </ul>
542
+ </div>
543
+ """)
544
+
545
+ gr.HTML('<div class="section-header">Processing Results</div>')
546
+
547
+ processing_report = gr.Textbox(
548
+ label="Processing Report",
549
+ lines=20,
550
+ max_lines=30,
551
+ show_copy_button=True,
552
+ elem_classes=["results-container"]
553
+ )
554
+
555
+ download_file = gr.File(
556
+ label="Download Updated Excel File",
557
+ elem_classes=["results-container"]
558
  )
 
 
 
 
559
 
560
+ with gr.Tab("Sample Template"):
561
+ gr.HTML('<div class="section-header">Download Sample Template</div>')
562
+
563
+ gr.HTML("""
564
+ <div class="info-box">
565
+ <h3>Sample Excel Template</h3>
566
+ <p>Download this sample template to see the expected format. The template includes:</p>
567
+ <ul>
568
+ <li>Sample data in columns A-G</li>
569
+ <li>Website URLs in column H</li>
570
+ <li>Empty contact columns (I-M) ready to be populated</li>
571
+ <li>Empty second contact columns (S-W) ready to be populated</li>
572
+ </ul>
573
+ <p>Replace the sample websites in column H with your actual website URLs.</p>
574
+ </div>
575
+ """)
576
+
577
+ sample_download_btn = gr.Button(
578
+ "Download Sample Template",
579
+ variant="secondary",
580
+ size="lg"
581
+ )
582
+
583
+ sample_file = gr.File(
584
+ label="Sample Template Download",
585
+ elem_classes=["results-container"]
586
+ )
587
 
588
+ with gr.Tab("Available Sample Websites"):
589
+ gr.HTML('<div class="section-header">Test Websites Database</div>')
590
+
591
+ sample_websites_text = """Available websites in our test database:
592
+
593
+ techflowsolutions.com - TechFlow Solutions
594
+ greenleafconsult.com - GreenLeaf Consulting
595
+ blueskymarketing.net - BlueSky Marketing
596
+ quantumdynamics.org - Quantum Dynamics Corp
597
+ stellarlogistics.biz - Stellar Logistics
598
+ nexusfinancial.pro - Nexus Financial
599
+ horizonhealth.care - Horizon Health Systems
600
+ phoenixmfg.com - Phoenix Manufacturing
601
+ alpineeducation.edu - Alpine Education Group
602
+ crimsoncreative.studio - Crimson Creative Studio
603
+
604
+ You can use these websites in column H of your Excel file to test the system."""
605
+
606
+ gr.Textbox(
607
+ value=sample_websites_text,
608
+ label="Sample Websites for Testing",
609
+ lines=15,
610
+ interactive=False,
611
+ elem_classes=["results-container"]
612
+ )
613
 
614
  # Event handlers
615
+ process_btn.click(
616
+ fn=process_excel_file,
617
+ inputs=[excel_file],
618
+ outputs=[processing_report, download_file]
619
  )
620
 
621
+ sample_download_btn.click(
622
+ fn=download_sample_excel,
623
+ inputs=[],
624
+ outputs=[sample_file]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
625
  )
626
 
627
  # Footer
628
  gr.HTML("""
629
  <div style="text-align: center; padding: 30px 20px; background: linear-gradient(135deg, #64748b 0%, #475569 100%); color: white; border-radius: 15px; margin-top: 30px;">
630
+ <h3 style="margin: 0 0 10px 0;">Excel Contact Discovery Platform</h3>
631
+ <p style="margin: 0; opacity: 0.9;">Automated contact discovery and Excel integration</p>
632
+ <p style="margin: 10px 0 0 0; font-size: 0.9em; opacity: 0.7;">Upload Excel Find Contacts → Download Updated File</p>
633
  </div>
634
  """)
635