JayBene1 commited on
Commit
54ffe32
·
verified ·
1 Parent(s): 622cce7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -112
app.py CHANGED
@@ -4,7 +4,6 @@ import re
4
  import json
5
  import csv
6
  import io
7
- import pandas as pd
8
  from urllib.parse import urlparse, urljoin
9
  import time
10
  import random
@@ -257,92 +256,6 @@ def simulate_website_scraping(url):
257
 
258
  return contacts
259
 
260
- def process_excel_file(file_path):
261
- """Process Excel file and update with contact information"""
262
- if file_path is None:
263
- return "Please upload an Excel file", ""
264
-
265
- try:
266
- # Read the Excel file
267
- df = pd.read_excel(file_path)
268
-
269
- print(f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns")
270
- print(f"Columns: {list(df.columns)}")
271
-
272
- # Check if column H exists (0-indexed, so H is column 7)
273
- if len(df.columns) < 8:
274
- return "Excel file must have at least 8 columns (up to column H for websites)", ""
275
-
276
- # Extend dataframe to have all required columns (up to column W = 22 columns)
277
- required_columns = 23 # A through W (0-indexed)
278
- current_columns = len(df.columns)
279
-
280
- if current_columns < required_columns:
281
- for i in range(current_columns, required_columns):
282
- df[f'Column_{chr(65+i)}'] = ''
283
-
284
- processed_websites = []
285
- contacts_found = 0
286
-
287
- # Process each row
288
- for index, row in df.iterrows():
289
- website_url = str(row.iloc[7]).strip() if pd.notna(row.iloc[7]) else "" # Column H (0-indexed 7)
290
-
291
- if not website_url or website_url.lower() in ['nan', 'none', '']:
292
- continue
293
-
294
- print(f"Processing row {index + 1}: {website_url}")
295
-
296
- # Find contacts for this website
297
- contacts = simulate_website_scraping(website_url)
298
-
299
- if contacts:
300
- processed_websites.append(website_url)
301
- contacts_found += len(contacts)
302
-
303
- # First contact - columns I, J, K, L, M (0-indexed: 8, 9, 10, 11, 12)
304
- if len(contacts) >= 1:
305
- df.iloc[index, 8] = contacts[0]['first_name'] # Column I
306
- df.iloc[index, 9] = contacts[0]['last_name'] # Column J
307
- df.iloc[index, 10] = contacts[0]['job_title'] # Column K
308
- df.iloc[index, 11] = contacts[0]['phone'] # Column L
309
- df.iloc[index, 12] = contacts[0]['email'] # Column M
310
-
311
- # Second contact - columns S, T, U, V, W (0-indexed: 18, 19, 20, 21, 22)
312
- if len(contacts) >= 2:
313
- df.iloc[index, 18] = contacts[1]['first_name'] # Column S
314
- df.iloc[index, 19] = contacts[1]['last_name'] # Column T
315
- df.iloc[index, 20] = contacts[1]['job_title'] # Column U
316
- df.iloc[index, 21] = contacts[1]['phone'] # Column V
317
- df.iloc[index, 22] = contacts[1]['email'] # Column W
318
-
319
- # Create results summary
320
- results_text = f"EXCEL PROCESSING REPORT\n"
321
- results_text += f"Total Rows Processed: {len(df)}\n"
322
- results_text += f"Websites Found in Column H: {len([x for x in df.iloc[:, 7] if pd.notna(x) and str(x).strip() and str(x).lower() not in ['nan', 'none']])}\n"
323
- results_text += f"Websites with Contacts: {len(processed_websites)}\n"
324
- results_text += f"Total Contacts Found: {contacts_found}\n"
325
- results_text += f"{'='*60}\n\n"
326
-
327
- if processed_websites:
328
- results_text += f"Websites Successfully Processed:\n"
329
- for i, website in enumerate(processed_websites, 1):
330
- results_text += f"{i}. {website}\n"
331
- else:
332
- results_text += "No contacts found for any websites in the Excel file.\n"
333
- results_text += f"Sample websites from Column H: {list(df.iloc[:5, 7].dropna())}\n"
334
-
335
- # Save updated Excel file to a temporary location and convert to CSV for display
336
- csv_buffer = io.StringIO()
337
- df.to_csv(csv_buffer, index=False)
338
- csv_output = csv_buffer.getvalue()
339
-
340
- return results_text, csv_output
341
-
342
- except Exception as e:
343
- print(f"Error processing Excel file: {e}")
344
- return f"Error processing Excel file: {str(e)}", ""
345
-
346
  def parse_csv_file(file_content):
347
  """Parse CSV file and extract website URLs"""
348
  websites = []
@@ -681,59 +594,57 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
681
  elem_classes=["results-container"]
682
  )
683
 
684
- # Excel/CSV Upload Tab
685
- with gr.TabItem("Excel/CSV Bulk Search"):
686
  with gr.Row():
687
  with gr.Column(scale=2):
688
- gr.HTML('<div class="section-header">File Upload</div>')
689
 
690
- excel_file = gr.File(
691
- label="Upload Excel or CSV File",
692
- file_types=[".xlsx", ".xls", ".csv"],
693
  elem_classes=["custom-input"]
694
  )
695
 
696
  gr.HTML("""
697
  <div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af; margin: 10px 0;">
698
- <strong>Excel File Requirements:</strong><br>
699
- • Websites should be in column H<br>
700
- • Contact info will be populated in:<br>
701
- &nbsp;&nbsp;- First contact: I(First Name), J(Last Name), K(Job Title), L(Phone), M(Email)<br>
702
- &nbsp;&nbsp;- Second contact: S(First Name), T(Last Name), U(Job Title), V(Phone), W(Email)<br><br>
703
- <strong>CSV File Requirements:</strong><br>
704
  • Include a column named 'website', 'url', or 'domain'<br>
705
- • One website per row
 
706
  </div>
707
  """)
708
 
709
  with gr.Row():
710
- file_type = gr.Radio(
711
- choices=["Excel Processing", "CSV Processing"],
712
- value="Excel Processing",
713
- label="File Type",
 
 
714
  elem_classes=["custom-input"]
715
  )
716
 
717
- process_btn = gr.Button(
718
- "Process File",
719
  variant="primary",
720
  size="lg",
721
  elem_classes=["primary-btn"]
722
  )
723
 
724
- gr.HTML('<div class="section-header">Processing Results</div>')
725
 
726
  with gr.Row():
727
- file_results_display = gr.Textbox(
728
- label="File Processing Report",
729
  lines=18,
730
  max_lines=35,
731
  show_copy_button=True,
732
  elem_classes=["results-container"]
733
  )
734
 
735
- file_export_output = gr.Textbox(
736
- label="Updated File Data (CSV Format)",
737
  lines=18,
738
  max_lines=35,
739
  show_copy_button=True,
@@ -753,4 +664,63 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
753
  gr.HTML('</div>')
754
 
755
  # Quick search buttons
756
- gr.HTML('<div class="section-header">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import json
5
  import csv
6
  import io
 
7
  from urllib.parse import urlparse, urljoin
8
  import time
9
  import random
 
256
 
257
  return contacts
258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  def parse_csv_file(file_content):
260
  """Parse CSV file and extract website URLs"""
261
  websites = []
 
594
  elem_classes=["results-container"]
595
  )
596
 
597
+ # CSV Upload Tab
598
+ with gr.TabItem("CSV Bulk Search"):
599
  with gr.Row():
600
  with gr.Column(scale=2):
601
+ gr.HTML('<div class="section-header">CSV Upload</div>')
602
 
603
+ csv_file = gr.File(
604
+ label="Upload CSV File",
605
+ file_types=[".csv"],
606
  elem_classes=["custom-input"]
607
  )
608
 
609
  gr.HTML("""
610
  <div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af; margin: 10px 0;">
611
+ <strong>CSV Format Requirements:</strong><br>
 
 
 
 
 
612
  • Include a column named 'website', 'url', or 'domain'<br>
613
+ • One website per row<br>
614
+ • Example: techflowsolutions.com, greenleafconsult.com
615
  </div>
616
  """)
617
 
618
  with gr.Row():
619
+ csv_max_results = gr.Slider(
620
+ minimum=1,
621
+ maximum=50,
622
+ value=20,
623
+ step=1,
624
+ label="Maximum Results",
625
  elem_classes=["custom-input"]
626
  )
627
 
628
+ csv_search_btn = gr.Button(
629
+ "Process CSV",
630
  variant="primary",
631
  size="lg",
632
  elem_classes=["primary-btn"]
633
  )
634
 
635
+ gr.HTML('<div class="section-header">CSV Results</div>')
636
 
637
  with gr.Row():
638
+ csv_results_display = gr.Textbox(
639
+ label="CSV Processing Report",
640
  lines=18,
641
  max_lines=35,
642
  show_copy_button=True,
643
  elem_classes=["results-container"]
644
  )
645
 
646
+ csv_export_output = gr.Textbox(
647
+ label="Export Data (CSV Format)",
648
  lines=18,
649
  max_lines=35,
650
  show_copy_button=True,
 
664
  gr.HTML('</div>')
665
 
666
  # Quick search buttons
667
+ gr.HTML('<div class="section-header">Quick Access Sample Websites</div>')
668
+
669
+ with gr.Row():
670
+ quick_btn1 = gr.Button("TechFlow Solutions", size="sm", elem_classes=["secondary-btn"])
671
+ quick_btn2 = gr.Button("GreenLeaf Consulting", size="sm", elem_classes=["secondary-btn"])
672
+ quick_btn3 = gr.Button("BlueSky Marketing", size="sm", elem_classes=["secondary-btn"])
673
+ quick_btn4 = gr.Button("Quantum Dynamics", size="sm", elem_classes=["secondary-btn"])
674
+
675
+ with gr.Row():
676
+ quick_btn5 = gr.Button("Stellar Logistics", size="sm", elem_classes=["secondary-btn"])
677
+ quick_btn6 = gr.Button("Nexus Financial", size="sm", elem_classes=["secondary-btn"])
678
+ quick_btn7 = gr.Button("Horizon Health", size="sm", elem_classes=["secondary-btn"])
679
+ quick_btn8 = gr.Button("Phoenix Manufacturing", size="sm", elem_classes=["secondary-btn"])
680
+
681
+ # Event handlers
682
+ search_btn.click(
683
+ fn=search_website_contacts,
684
+ inputs=[website_input, max_results],
685
+ outputs=[results_display, csv_output]
686
+ )
687
+
688
+ csv_search_btn.click(
689
+ fn=search_csv_websites,
690
+ inputs=[csv_file, csv_max_results],
691
+ outputs=[csv_results_display, csv_export_output]
692
+ )
693
+
694
+ # Quick search button handlers
695
+ quick_btn1.click(lambda: "techflowsolutions.com", outputs=website_input)
696
+ quick_btn2.click(lambda: "greenleafconsult.com", outputs=website_input)
697
+ quick_btn3.click(lambda: "blueskymarketing.net", outputs=website_input)
698
+ quick_btn4.click(lambda: "quantumdynamics.org", outputs=website_input)
699
+ quick_btn5.click(lambda: "stellarlogistics.biz", outputs=website_input)
700
+ quick_btn6.click(lambda: "nexusfinancial.pro", outputs=website_input)
701
+ quick_btn7.click(lambda: "horizonhealth.care", outputs=website_input)
702
+ quick_btn8.click(lambda: "phoenixmfg.com", outputs=website_input)
703
+
704
+ # Examples
705
+ gr.Examples(
706
+ examples=[
707
+ ["techflowsolutions.com", 5],
708
+ ["greenleafconsult.com", 3],
709
+ ["blueskymarketing.net", 4],
710
+ ["quantumdynamics.org", 6]
711
+ ],
712
+ inputs=[website_input, max_results],
713
+ label="Sample Searches"
714
+ )
715
+
716
+ # Footer
717
+ gr.HTML("""
718
+ <div style="text-align: center; padding: 30px 20px; background: linear-gradient(135deg, #64748b 0%, #475569 100%); color: white; border-radius: 15px; margin-top: 30px;">
719
+ <h3 style="margin: 0 0 10px 0;">Contact Intelligence Platform</h3>
720
+ <p style="margin: 0; opacity: 0.9;">Professional-grade contact discovery and lead generation technology</p>
721
+ <p style="margin: 10px 0 0 0; font-size: 0.9em; opacity: 0.7;">Powered by advanced web intelligence algorithms</p>
722
+ </div>
723
+ """)
724
+
725
+ if __name__ == "__main__":
726
+ app.launch()