JayBene1 commited on
Commit
640980e
·
verified ·
1 Parent(s): 13b4428

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +211 -152
app.py CHANGED
@@ -7,6 +7,9 @@ import io
7
  from urllib.parse import urlparse, urljoin
8
  import time
9
  import random
 
 
 
10
 
11
  # Mock contacts database (same as your API)
12
  CONTACTS_DB = [
@@ -256,93 +259,202 @@ def simulate_website_scraping(url):
256
 
257
  return contacts
258
 
259
- ...
260
- # After `parse_csv_file`, add this new function
261
-
262
- def parse_excel_file(file_content):
263
- """Parse Excel file and extract website URLs and row mapping"""
264
- import pandas as pd
265
  try:
266
- df = pd.read_excel(file_content, engine='openpyxl')
 
 
 
 
 
 
267
  website_columns = ['website', 'url', 'domain', 'site', 'web', 'homepage']
268
-
 
 
 
 
 
269
  website_column = None
270
- for col in df.columns:
271
- if col.lower().strip() in website_columns:
272
- website_column = col
 
273
  break
274
-
275
  if not website_column:
276
- return [], None
277
-
278
- return df, website_column
 
 
 
 
 
 
 
 
 
279
  except Exception as e:
280
- print(f"Error parsing Excel: {e}")
281
- return [], None
282
-
283
- # Modify `search_csv_websites` to support both CSV and Excel
284
-
285
- def search_csv_websites(uploaded_file, max_results=10):
286
- import pandas as pd
287
- if uploaded_file is None:
288
- return "Please upload a CSV or Excel file", ""
289
 
 
 
290
  try:
291
- filename = uploaded_file.name.lower()
292
-
293
- if filename.endswith(".csv"):
294
- content = uploaded_file.read()
295
- df = pd.read_csv(io.BytesIO(content))
296
- elif filename.endswith(".xls") or filename.endswith(".xlsx"):
297
- df = pd.read_excel(uploaded_file, engine='openpyxl')
298
- else:
299
- return "Unsupported file type. Please upload a .csv or .xlsx file.", ""
300
-
301
- if 'H' not in df.columns and len(df.columns) < 8:
302
- return "Column H (for websites) is missing.", ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
 
304
- updated_rows = 0
305
- for idx, row in df.iterrows():
306
- website = row.iloc[7] # Column H
307
- if pd.isna(website):
308
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  contacts = simulate_website_scraping(website)
310
- if not contacts:
311
- continue
312
-
313
- if len(contacts) > 0:
314
- df.at[idx, 'I'] = contacts[0]['first_name']
315
- df.at[idx, 'J'] = contacts[0]['last_name']
316
- df.at[idx, 'K'] = contacts[0]['job_title']
317
- df.at[idx, 'L'] = contacts[0]['phone']
318
- df.at[idx, 'M'] = contacts[0]['email']
319
-
320
- if len(contacts) > 1:
321
- df.at[idx, 'S'] = contacts[1]['first_name']
322
- df.at[idx, 'T'] = contacts[1]['last_name']
323
- df.at[idx, 'U'] = contacts[1]['job_title']
324
- df.at[idx, 'V'] = contacts[1]['phone']
325
- df.at[idx, 'W'] = contacts[1]['email']
326
-
327
- updated_rows += 1
328
-
329
- output_buffer = io.StringIO()
330
- df.to_csv(output_buffer, index=False)
331
- csv_data = output_buffer.getvalue()
332
-
333
- return f"Processed {updated_rows} rows with matching contacts.", csv_data
334
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
  except Exception as e:
336
- return f"Error: {e}", ""
337
 
338
- # Update Gradio file upload to allow Excel
339
- csv_file = gr.File(
340
- label="Upload CSV or Excel File",
341
- file_types=[".csv", ".xlsx", ".xls"],
342
- elem_classes=["custom-input"]
343
- )
344
-
345
- # Everything else remains unchanged
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
 
347
  def search_website_contacts(website_url, max_results=10):
348
  """Main function to search for contacts on a website"""
@@ -576,7 +688,7 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
576
  with gr.TabItem("CSV Bulk Search"):
577
  with gr.Row():
578
  with gr.Column(scale=2):
579
- gr.HTML('<div class="section-header">CSV Upload</div>')
580
 
581
  csv_file = gr.File(
582
  label="Upload CSV File",
@@ -584,12 +696,22 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
584
  elem_classes=["custom-input"]
585
  )
586
 
 
 
 
 
 
 
587
  gr.HTML("""
588
  <div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af; margin: 10px 0;">
589
  <strong>CSV Format Requirements:</strong><br>
590
  • Include a column named 'website', 'url', or 'domain'<br>
591
  • One website per row<br>
592
- • Example: techflowsolutions.com, greenleafconsult.com
 
 
 
 
593
  </div>
594
  """)
595
 
@@ -599,7 +721,7 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
599
  maximum=50,
600
  value=20,
601
  step=1,
602
- label="Maximum Results",
603
  elem_classes=["custom-input"]
604
  )
605
 
@@ -609,12 +731,19 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
609
  size="lg",
610
  elem_classes=["primary-btn"]
611
  )
 
 
 
 
 
 
 
612
 
613
- gr.HTML('<div class="section-header">CSV Results</div>')
614
 
615
  with gr.Row():
616
  csv_results_display = gr.Textbox(
617
- label="CSV Processing Report",
618
  lines=18,
619
  max_lines=35,
620
  show_copy_button=True,
@@ -622,7 +751,7 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
622
  )
623
 
624
  csv_export_output = gr.Textbox(
625
- label="Export Data (CSV Format)",
626
  lines=18,
627
  max_lines=35,
628
  show_copy_button=True,
@@ -631,74 +760,4 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
631
 
632
  # Sample websites section
633
  with gr.Accordion("Sample Websites Database", open=False):
634
- gr.HTML('<div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af;">')
635
- sample_websites = gr.Textbox(
636
- label="Available Websites in Database",
637
- value=get_all_available_websites(),
638
- lines=8,
639
- interactive=False,
640
- elem_classes=["custom-input"]
641
- )
642
- gr.HTML('</div>')
643
-
644
- # Quick search buttons
645
- gr.HTML('<div class="section-header">Quick Access Sample Websites</div>')
646
-
647
- with gr.Row():
648
- quick_btn1 = gr.Button("TechFlow Solutions", size="sm", elem_classes=["secondary-btn"])
649
- quick_btn2 = gr.Button("GreenLeaf Consulting", size="sm", elem_classes=["secondary-btn"])
650
- quick_btn3 = gr.Button("BlueSky Marketing", size="sm", elem_classes=["secondary-btn"])
651
- quick_btn4 = gr.Button("Quantum Dynamics", size="sm", elem_classes=["secondary-btn"])
652
-
653
- with gr.Row():
654
- quick_btn5 = gr.Button("Stellar Logistics", size="sm", elem_classes=["secondary-btn"])
655
- quick_btn6 = gr.Button("Nexus Financial", size="sm", elem_classes=["secondary-btn"])
656
- quick_btn7 = gr.Button("Horizon Health", size="sm", elem_classes=["secondary-btn"])
657
- quick_btn8 = gr.Button("Phoenix Manufacturing", size="sm", elem_classes=["secondary-btn"])
658
-
659
- # Event handlers
660
- search_btn.click(
661
- fn=search_website_contacts,
662
- inputs=[website_input, max_results],
663
- outputs=[results_display, csv_output]
664
- )
665
-
666
- csv_search_btn.click(
667
- fn=search_csv_websites,
668
- inputs=[csv_file, csv_max_results],
669
- outputs=[csv_results_display, csv_export_output]
670
- )
671
-
672
- # Quick search button handlers
673
- quick_btn1.click(lambda: "techflowsolutions.com", outputs=website_input)
674
- quick_btn2.click(lambda: "greenleafconsult.com", outputs=website_input)
675
- quick_btn3.click(lambda: "blueskymarketing.net", outputs=website_input)
676
- quick_btn4.click(lambda: "quantumdynamics.org", outputs=website_input)
677
- quick_btn5.click(lambda: "stellarlogistics.biz", outputs=website_input)
678
- quick_btn6.click(lambda: "nexusfinancial.pro", outputs=website_input)
679
- quick_btn7.click(lambda: "horizonhealth.care", outputs=website_input)
680
- quick_btn8.click(lambda: "phoenixmfg.com", outputs=website_input)
681
-
682
- # Examples
683
- gr.Examples(
684
- examples=[
685
- ["techflowsolutions.com", 5],
686
- ["greenleafconsult.com", 3],
687
- ["blueskymarketing.net", 4],
688
- ["quantumdynamics.org", 6]
689
- ],
690
- inputs=[website_input, max_results],
691
- label="Sample Searches"
692
- )
693
-
694
- # Footer
695
- gr.HTML("""
696
- <div style="text-align: center; padding: 30px 20px; background: linear-gradient(135deg, #64748b 0%, #475569 100%); color: white; border-radius: 15px; margin-top: 30px;">
697
- <h3 style="margin: 0 0 10px 0;">Contact Intelligence Platform</h3>
698
- <p style="margin: 0; opacity: 0.9;">Professional-grade contact discovery and lead generation technology</p>
699
- <p style="margin: 10px 0 0 0; font-size: 0.9em; opacity: 0.7;">Powered by advanced web intelligence algorithms</p>
700
- </div>
701
- """)
702
-
703
- if __name__ == "__main__":
704
- app.launch()
 
7
  from urllib.parse import urlparse, urljoin
8
  import time
9
  import random
10
+ import pandas as pd
11
+ import openpyxl
12
+ from io import BytesIO
13
 
14
  # Mock contacts database (same as your API)
15
  CONTACTS_DB = [
 
259
 
260
  return contacts
261
 
262
+ def parse_csv_file(file_content):
263
+ """Parse CSV file and extract website URLs"""
264
+ websites = []
 
 
 
265
  try:
266
+ # Decode file content
267
+ content = file_content.decode('utf-8')
268
+
269
+ # Parse CSV
270
+ csv_reader = csv.DictReader(io.StringIO(content))
271
+
272
+ # Look for common website column names (case-insensitive)
273
  website_columns = ['website', 'url', 'domain', 'site', 'web', 'homepage']
274
+
275
+ # Get all column names and print for debugging
276
+ all_columns = list(csv_reader.fieldnames) if csv_reader.fieldnames else []
277
+ print(f"CSV columns found: {all_columns}")
278
+
279
+ # Find the website column (case-insensitive)
280
  website_column = None
281
+ for col_name in all_columns:
282
+ if col_name and col_name.lower().strip() in website_columns:
283
+ website_column = col_name
284
+ print(f"Using website column: '{website_column}'")
285
  break
286
+
287
  if not website_column:
288
+ print(f"No website column found. Available columns: {all_columns}")
289
+ return []
290
+
291
+ # Extract websites
292
+ for row in csv_reader:
293
+ website_url = row.get(website_column, '').strip()
294
+ if website_url:
295
+ websites.append(website_url)
296
+
297
+ print(f"Extracted {len(websites)} websites: {websites[:5]}...") # Show first 5
298
+ return websites
299
+
300
  except Exception as e:
301
+ print(f"Error parsing CSV: {e}")
302
+ return []
 
 
 
 
 
 
 
303
 
304
+ def parse_excel_file(file_path):
305
+ """Parse Excel file and extract website URLs from column H, fill contact info in specific columns"""
306
  try:
307
+ # Read Excel file
308
+ workbook = openpyxl.load_workbook(file_path)
309
+ sheet = workbook.active
310
+
311
+ # Process each row
312
+ for row_num in range(2, sheet.max_row + 1): # Start from row 2 to skip header
313
+ website_cell = sheet[f'H{row_num}']
314
+ website_url = website_cell.value
315
+
316
+ if website_url and str(website_url).strip():
317
+ website_url = str(website_url).strip()
318
+ print(f"Processing website: {website_url}")
319
+
320
+ # Find contacts for this website
321
+ contacts = simulate_website_scraping(website_url)
322
+
323
+ if contacts:
324
+ # Fill first contact info
325
+ first_contact = contacts[0]
326
+ sheet[f'I{row_num}'] = first_contact['first_name'] # Contact First Name
327
+ sheet[f'J{row_num}'] = first_contact['last_name'] # Contact Last Name
328
+ sheet[f'K{row_num}'] = first_contact['job_title'] # Job Title
329
+ sheet[f'L{row_num}'] = first_contact['phone'] # Phone
330
+ sheet[f'M{row_num}'] = first_contact['email'] # Email
331
+
332
+ # Fill second contact info if available
333
+ if len(contacts) > 1:
334
+ second_contact = contacts[1]
335
+ sheet[f'S{row_num}'] = second_contact['first_name'] # Second Contact First Name
336
+ sheet[f'T{row_num}'] = second_contact['last_name'] # Second Contact Last Name
337
+ sheet[f'U{row_num}'] = second_contact['job_title'] # Second Contact Job Title
338
+ sheet[f'V{row_num}'] = second_contact['phone'] # Second Contact Phone
339
+ sheet[f'W{row_num}'] = second_contact['email'] # Second Contact Email
340
+
341
+ # Save the modified Excel file
342
+ output_path = file_path.replace('.xlsx', '_with_contacts.xlsx')
343
+ workbook.save(output_path)
344
+
345
+ return output_path, len([row for row in range(2, sheet.max_row + 1) if sheet[f'H{row}'].value])
346
+
347
+ except Exception as e:
348
+ print(f"Error processing Excel file: {e}")
349
+ return None, 0
350
 
351
+ def search_csv_websites(csv_file, max_results=10):
352
+ """Search for contacts from websites listed in CSV file"""
353
+ if csv_file is None:
354
+ return "Please upload a CSV file", ""
355
+
356
+ try:
357
+ # Parse CSV file
358
+ websites = parse_csv_file(csv_file)
359
+
360
+ if not websites:
361
+ return "No websites found in CSV file. Please ensure your CSV has a column named 'website', 'url', or 'domain'. Check the console for debugging info about your CSV columns.", ""
362
+
363
+ all_contacts = []
364
+ processed_websites = []
365
+
366
+ # Search each website
367
+ for website in websites[:20]: # Limit to first 20 websites
368
+ print(f"Processing website: {website}")
369
  contacts = simulate_website_scraping(website)
370
+ if contacts:
371
+ all_contacts.extend(contacts)
372
+ processed_websites.append(website)
373
+ print(f"Found {len(contacts)} contacts for {website}")
374
+ else:
375
+ print(f"No contacts found for {website}")
376
+
377
+ # Remove duplicates based on email
378
+ unique_contacts = []
379
+ seen_emails = set()
380
+ for contact in all_contacts:
381
+ if contact['email'] not in seen_emails:
382
+ unique_contacts.append(contact)
383
+ seen_emails.add(contact['email'])
384
+
385
+ # Limit results
386
+ unique_contacts = unique_contacts[:max_results]
387
+
388
+ if not unique_contacts:
389
+ return f"No contacts found for the {len(websites)} websites in the CSV file. Processed websites: {', '.join(websites[:10])}", ""
390
+
391
+ # Format results
392
+ results_text = f"CONTACT DISCOVERY REPORT\n"
393
+ results_text += f"Websites Processed: {len(processed_websites)}\n"
394
+ results_text += f"Total Websites in CSV: {len(websites)}\n"
395
+ results_text += f"Websites with Contacts: {len(processed_websites)}\n"
396
+ results_text += f"Unique Contacts Found: {len(unique_contacts)}\n"
397
+ results_text += f"Processed Websites: {', '.join(processed_websites)}\n"
398
+ results_text += f"{'='*60}\n\n"
399
+
400
+ for i, contact in enumerate(unique_contacts, 1):
401
+ results_text += f"CONTACT #{i}\n"
402
+ results_text += f"Name: {contact['first_name']} {contact['last_name']}\n"
403
+ results_text += f"Position: {contact['job_title']}\n"
404
+ results_text += f"Email: {contact['email']}\n"
405
+ results_text += f"Phone: {contact['phone']}\n"
406
+ results_text += f"Company: {contact['company']}\n"
407
+ results_text += f"Website: {contact['website']}\n\n"
408
+
409
+ # Create CSV output
410
+ csv_output = "First Name,Last Name,Job Title,Email,Phone,Company,Website\n"
411
+ for contact in unique_contacts:
412
+ csv_output += f"{contact['first_name']},{contact['last_name']},{contact['job_title']},{contact['email']},{contact['phone']},{contact['company']},{contact['website']}\n"
413
+
414
+ return results_text, csv_output
415
+
416
  except Exception as e:
417
+ return f"Error processing CSV file: {str(e)}", ""
418
 
419
+ def search_excel_websites(excel_file, max_results=10):
420
+ """Search for contacts from websites listed in Excel file column H and fill contact info"""
421
+ if excel_file is None:
422
+ return "Please upload an Excel file", ""
423
+
424
+ try:
425
+ # Process Excel file
426
+ output_path, total_websites = parse_excel_file(excel_file.name)
427
+
428
+ if not output_path:
429
+ return "Error processing Excel file. Please ensure your Excel file has websites in column H.", ""
430
+
431
+ # Format results
432
+ results_text = f"EXCEL CONTACT DISCOVERY REPORT\n"
433
+ results_text += f"Total Websites Processed: {total_websites}\n"
434
+ results_text += f"Modified Excel File: {output_path}\n"
435
+ results_text += f"{'='*60}\n\n"
436
+ results_text += f"Contact information has been filled in the following columns:\n"
437
+ results_text += f"• Column I: Contact First Name\n"
438
+ results_text += f"• Column J: Contact Last Name\n"
439
+ results_text += f"• Column K: Job Title\n"
440
+ results_text += f"• Column L: Phone\n"
441
+ results_text += f"• Column M: Email\n\n"
442
+ results_text += f"Second contact information (if available):\n"
443
+ results_text += f"• Column S: Second Contact First Name\n"
444
+ results_text += f"• Column T: Second Contact Last Name\n"
445
+ results_text += f"• Column U: Second Contact Job Title\n"
446
+ results_text += f"• Column V: Second Contact Phone\n"
447
+ results_text += f"• Column W: Second Contact Email\n\n"
448
+ results_text += f"The modified Excel file has been saved as: {output_path}\n"
449
+ results_text += f"You can download it from the file system."
450
+
451
+ # Create a simple export message
452
+ export_output = f"Excel file processed successfully.\nModified file saved as: {output_path}\nTotal websites processed: {total_websites}"
453
+
454
+ return results_text, export_output
455
+
456
+ except Exception as e:
457
+ return f"Error processing Excel file: {str(e)}", ""
458
 
459
  def search_website_contacts(website_url, max_results=10):
460
  """Main function to search for contacts on a website"""
 
688
  with gr.TabItem("CSV Bulk Search"):
689
  with gr.Row():
690
  with gr.Column(scale=2):
691
+ gr.HTML('<div class="section-header">File Upload</div>')
692
 
693
  csv_file = gr.File(
694
  label="Upload CSV File",
 
696
  elem_classes=["custom-input"]
697
  )
698
 
699
+ excel_file = gr.File(
700
+ label="Upload Excel File",
701
+ file_types=[".xlsx", ".xls"],
702
+ elem_classes=["custom-input"]
703
+ )
704
+
705
  gr.HTML("""
706
  <div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af; margin: 10px 0;">
707
  <strong>CSV Format Requirements:</strong><br>
708
  • Include a column named 'website', 'url', or 'domain'<br>
709
  • One website per row<br>
710
+ • Example: techflowsolutions.com, greenleafconsult.com<br><br>
711
+ <strong>Excel Format Requirements:</strong><br>
712
+ • Websites should be in column H<br>
713
+ • Contact info will be filled in columns I-M (first contact) and S-W (second contact)<br>
714
+ • The modified file will be saved with '_with_contacts' suffix
715
  </div>
716
  """)
717
 
 
721
  maximum=50,
722
  value=20,
723
  step=1,
724
+ label="Maximum Results (CSV only)",
725
  elem_classes=["custom-input"]
726
  )
727
 
 
731
  size="lg",
732
  elem_classes=["primary-btn"]
733
  )
734
+
735
+ excel_search_btn = gr.Button(
736
+ "Process Excel",
737
+ variant="primary",
738
+ size="lg",
739
+ elem_classes=["primary-btn"]
740
+ )
741
 
742
+ gr.HTML('<div class="section-header">Processing Results</div>')
743
 
744
  with gr.Row():
745
  csv_results_display = gr.Textbox(
746
+ label="File Processing Report",
747
  lines=18,
748
  max_lines=35,
749
  show_copy_button=True,
 
751
  )
752
 
753
  csv_export_output = gr.Textbox(
754
+ label="Export Data / File Info",
755
  lines=18,
756
  max_lines=35,
757
  show_copy_button=True,
 
760
 
761
  # Sample websites section
762
  with gr.Accordion("Sample Websites Database", open=False):
763
+ gr.HTML('<div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af