Update app.py
Browse files
app.py
CHANGED
|
@@ -4,7 +4,6 @@ import re
|
|
| 4 |
import json
|
| 5 |
import csv
|
| 6 |
import io
|
| 7 |
-
import pandas as pd
|
| 8 |
from urllib.parse import urlparse, urljoin
|
| 9 |
import time
|
| 10 |
import random
|
|
@@ -257,92 +256,6 @@ def simulate_website_scraping(url):
|
|
| 257 |
|
| 258 |
return contacts
|
| 259 |
|
| 260 |
-
def process_excel_file(file_path):
|
| 261 |
-
"""Process Excel file and update with contact information"""
|
| 262 |
-
if file_path is None:
|
| 263 |
-
return "Please upload an Excel file", ""
|
| 264 |
-
|
| 265 |
-
try:
|
| 266 |
-
# Read the Excel file
|
| 267 |
-
df = pd.read_excel(file_path)
|
| 268 |
-
|
| 269 |
-
print(f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns")
|
| 270 |
-
print(f"Columns: {list(df.columns)}")
|
| 271 |
-
|
| 272 |
-
# Check if column H exists (0-indexed, so H is column 7)
|
| 273 |
-
if len(df.columns) < 8:
|
| 274 |
-
return "Excel file must have at least 8 columns (up to column H for websites)", ""
|
| 275 |
-
|
| 276 |
-
# Extend dataframe to have all required columns (up to column W = 22 columns)
|
| 277 |
-
required_columns = 23 # A through W (0-indexed)
|
| 278 |
-
current_columns = len(df.columns)
|
| 279 |
-
|
| 280 |
-
if current_columns < required_columns:
|
| 281 |
-
for i in range(current_columns, required_columns):
|
| 282 |
-
df[f'Column_{chr(65+i)}'] = ''
|
| 283 |
-
|
| 284 |
-
processed_websites = []
|
| 285 |
-
contacts_found = 0
|
| 286 |
-
|
| 287 |
-
# Process each row
|
| 288 |
-
for index, row in df.iterrows():
|
| 289 |
-
website_url = str(row.iloc[7]).strip() if pd.notna(row.iloc[7]) else "" # Column H (0-indexed 7)
|
| 290 |
-
|
| 291 |
-
if not website_url or website_url.lower() in ['nan', 'none', '']:
|
| 292 |
-
continue
|
| 293 |
-
|
| 294 |
-
print(f"Processing row {index + 1}: {website_url}")
|
| 295 |
-
|
| 296 |
-
# Find contacts for this website
|
| 297 |
-
contacts = simulate_website_scraping(website_url)
|
| 298 |
-
|
| 299 |
-
if contacts:
|
| 300 |
-
processed_websites.append(website_url)
|
| 301 |
-
contacts_found += len(contacts)
|
| 302 |
-
|
| 303 |
-
# First contact - columns I, J, K, L, M (0-indexed: 8, 9, 10, 11, 12)
|
| 304 |
-
if len(contacts) >= 1:
|
| 305 |
-
df.iloc[index, 8] = contacts[0]['first_name'] # Column I
|
| 306 |
-
df.iloc[index, 9] = contacts[0]['last_name'] # Column J
|
| 307 |
-
df.iloc[index, 10] = contacts[0]['job_title'] # Column K
|
| 308 |
-
df.iloc[index, 11] = contacts[0]['phone'] # Column L
|
| 309 |
-
df.iloc[index, 12] = contacts[0]['email'] # Column M
|
| 310 |
-
|
| 311 |
-
# Second contact - columns S, T, U, V, W (0-indexed: 18, 19, 20, 21, 22)
|
| 312 |
-
if len(contacts) >= 2:
|
| 313 |
-
df.iloc[index, 18] = contacts[1]['first_name'] # Column S
|
| 314 |
-
df.iloc[index, 19] = contacts[1]['last_name'] # Column T
|
| 315 |
-
df.iloc[index, 20] = contacts[1]['job_title'] # Column U
|
| 316 |
-
df.iloc[index, 21] = contacts[1]['phone'] # Column V
|
| 317 |
-
df.iloc[index, 22] = contacts[1]['email'] # Column W
|
| 318 |
-
|
| 319 |
-
# Create results summary
|
| 320 |
-
results_text = f"EXCEL PROCESSING REPORT\n"
|
| 321 |
-
results_text += f"Total Rows Processed: {len(df)}\n"
|
| 322 |
-
results_text += f"Websites Found in Column H: {len([x for x in df.iloc[:, 7] if pd.notna(x) and str(x).strip() and str(x).lower() not in ['nan', 'none']])}\n"
|
| 323 |
-
results_text += f"Websites with Contacts: {len(processed_websites)}\n"
|
| 324 |
-
results_text += f"Total Contacts Found: {contacts_found}\n"
|
| 325 |
-
results_text += f"{'='*60}\n\n"
|
| 326 |
-
|
| 327 |
-
if processed_websites:
|
| 328 |
-
results_text += f"Websites Successfully Processed:\n"
|
| 329 |
-
for i, website in enumerate(processed_websites, 1):
|
| 330 |
-
results_text += f"{i}. {website}\n"
|
| 331 |
-
else:
|
| 332 |
-
results_text += "No contacts found for any websites in the Excel file.\n"
|
| 333 |
-
results_text += f"Sample websites from Column H: {list(df.iloc[:5, 7].dropna())}\n"
|
| 334 |
-
|
| 335 |
-
# Save updated Excel file to a temporary location and convert to CSV for display
|
| 336 |
-
csv_buffer = io.StringIO()
|
| 337 |
-
df.to_csv(csv_buffer, index=False)
|
| 338 |
-
csv_output = csv_buffer.getvalue()
|
| 339 |
-
|
| 340 |
-
return results_text, csv_output
|
| 341 |
-
|
| 342 |
-
except Exception as e:
|
| 343 |
-
print(f"Error processing Excel file: {e}")
|
| 344 |
-
return f"Error processing Excel file: {str(e)}", ""
|
| 345 |
-
|
| 346 |
def parse_csv_file(file_content):
|
| 347 |
"""Parse CSV file and extract website URLs"""
|
| 348 |
websites = []
|
|
@@ -681,59 +594,57 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
|
|
| 681 |
elem_classes=["results-container"]
|
| 682 |
)
|
| 683 |
|
| 684 |
-
#
|
| 685 |
-
with gr.TabItem("
|
| 686 |
with gr.Row():
|
| 687 |
with gr.Column(scale=2):
|
| 688 |
-
gr.HTML('<div class="section-header">
|
| 689 |
|
| 690 |
-
|
| 691 |
-
label="Upload
|
| 692 |
-
file_types=[".
|
| 693 |
elem_classes=["custom-input"]
|
| 694 |
)
|
| 695 |
|
| 696 |
gr.HTML("""
|
| 697 |
<div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af; margin: 10px 0;">
|
| 698 |
-
<strong>
|
| 699 |
-
• Websites should be in column H<br>
|
| 700 |
-
• Contact info will be populated in:<br>
|
| 701 |
-
- First contact: I(First Name), J(Last Name), K(Job Title), L(Phone), M(Email)<br>
|
| 702 |
-
- Second contact: S(First Name), T(Last Name), U(Job Title), V(Phone), W(Email)<br><br>
|
| 703 |
-
<strong>CSV File Requirements:</strong><br>
|
| 704 |
• Include a column named 'website', 'url', or 'domain'<br>
|
| 705 |
-
• One website per row
|
|
|
|
| 706 |
</div>
|
| 707 |
""")
|
| 708 |
|
| 709 |
with gr.Row():
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
|
|
|
|
|
|
| 714 |
elem_classes=["custom-input"]
|
| 715 |
)
|
| 716 |
|
| 717 |
-
|
| 718 |
-
"Process
|
| 719 |
variant="primary",
|
| 720 |
size="lg",
|
| 721 |
elem_classes=["primary-btn"]
|
| 722 |
)
|
| 723 |
|
| 724 |
-
gr.HTML('<div class="section-header">
|
| 725 |
|
| 726 |
with gr.Row():
|
| 727 |
-
|
| 728 |
-
label="
|
| 729 |
lines=18,
|
| 730 |
max_lines=35,
|
| 731 |
show_copy_button=True,
|
| 732 |
elem_classes=["results-container"]
|
| 733 |
)
|
| 734 |
|
| 735 |
-
|
| 736 |
-
label="
|
| 737 |
lines=18,
|
| 738 |
max_lines=35,
|
| 739 |
show_copy_button=True,
|
|
@@ -753,4 +664,63 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
|
|
| 753 |
gr.HTML('</div>')
|
| 754 |
|
| 755 |
# Quick search buttons
|
| 756 |
-
gr.HTML('<div class="section-header">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import json
|
| 5 |
import csv
|
| 6 |
import io
|
|
|
|
| 7 |
from urllib.parse import urlparse, urljoin
|
| 8 |
import time
|
| 9 |
import random
|
|
|
|
| 256 |
|
| 257 |
return contacts
|
| 258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
def parse_csv_file(file_content):
|
| 260 |
"""Parse CSV file and extract website URLs"""
|
| 261 |
websites = []
|
|
|
|
| 594 |
elem_classes=["results-container"]
|
| 595 |
)
|
| 596 |
|
| 597 |
+
# CSV Upload Tab
|
| 598 |
+
with gr.TabItem("CSV Bulk Search"):
|
| 599 |
with gr.Row():
|
| 600 |
with gr.Column(scale=2):
|
| 601 |
+
gr.HTML('<div class="section-header">CSV Upload</div>')
|
| 602 |
|
| 603 |
+
csv_file = gr.File(
|
| 604 |
+
label="Upload CSV File",
|
| 605 |
+
file_types=[".csv"],
|
| 606 |
elem_classes=["custom-input"]
|
| 607 |
)
|
| 608 |
|
| 609 |
gr.HTML("""
|
| 610 |
<div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af; margin: 10px 0;">
|
| 611 |
+
<strong>CSV Format Requirements:</strong><br>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 612 |
• Include a column named 'website', 'url', or 'domain'<br>
|
| 613 |
+
• One website per row<br>
|
| 614 |
+
• Example: techflowsolutions.com, greenleafconsult.com
|
| 615 |
</div>
|
| 616 |
""")
|
| 617 |
|
| 618 |
with gr.Row():
|
| 619 |
+
csv_max_results = gr.Slider(
|
| 620 |
+
minimum=1,
|
| 621 |
+
maximum=50,
|
| 622 |
+
value=20,
|
| 623 |
+
step=1,
|
| 624 |
+
label="Maximum Results",
|
| 625 |
elem_classes=["custom-input"]
|
| 626 |
)
|
| 627 |
|
| 628 |
+
csv_search_btn = gr.Button(
|
| 629 |
+
"Process CSV",
|
| 630 |
variant="primary",
|
| 631 |
size="lg",
|
| 632 |
elem_classes=["primary-btn"]
|
| 633 |
)
|
| 634 |
|
| 635 |
+
gr.HTML('<div class="section-header">CSV Results</div>')
|
| 636 |
|
| 637 |
with gr.Row():
|
| 638 |
+
csv_results_display = gr.Textbox(
|
| 639 |
+
label="CSV Processing Report",
|
| 640 |
lines=18,
|
| 641 |
max_lines=35,
|
| 642 |
show_copy_button=True,
|
| 643 |
elem_classes=["results-container"]
|
| 644 |
)
|
| 645 |
|
| 646 |
+
csv_export_output = gr.Textbox(
|
| 647 |
+
label="Export Data (CSV Format)",
|
| 648 |
lines=18,
|
| 649 |
max_lines=35,
|
| 650 |
show_copy_button=True,
|
|
|
|
| 664 |
gr.HTML('</div>')
|
| 665 |
|
| 666 |
# Quick search buttons
|
| 667 |
+
gr.HTML('<div class="section-header">Quick Access Sample Websites</div>')
|
| 668 |
+
|
| 669 |
+
with gr.Row():
|
| 670 |
+
quick_btn1 = gr.Button("TechFlow Solutions", size="sm", elem_classes=["secondary-btn"])
|
| 671 |
+
quick_btn2 = gr.Button("GreenLeaf Consulting", size="sm", elem_classes=["secondary-btn"])
|
| 672 |
+
quick_btn3 = gr.Button("BlueSky Marketing", size="sm", elem_classes=["secondary-btn"])
|
| 673 |
+
quick_btn4 = gr.Button("Quantum Dynamics", size="sm", elem_classes=["secondary-btn"])
|
| 674 |
+
|
| 675 |
+
with gr.Row():
|
| 676 |
+
quick_btn5 = gr.Button("Stellar Logistics", size="sm", elem_classes=["secondary-btn"])
|
| 677 |
+
quick_btn6 = gr.Button("Nexus Financial", size="sm", elem_classes=["secondary-btn"])
|
| 678 |
+
quick_btn7 = gr.Button("Horizon Health", size="sm", elem_classes=["secondary-btn"])
|
| 679 |
+
quick_btn8 = gr.Button("Phoenix Manufacturing", size="sm", elem_classes=["secondary-btn"])
|
| 680 |
+
|
| 681 |
+
# Event handlers
|
| 682 |
+
search_btn.click(
|
| 683 |
+
fn=search_website_contacts,
|
| 684 |
+
inputs=[website_input, max_results],
|
| 685 |
+
outputs=[results_display, csv_output]
|
| 686 |
+
)
|
| 687 |
+
|
| 688 |
+
csv_search_btn.click(
|
| 689 |
+
fn=search_csv_websites,
|
| 690 |
+
inputs=[csv_file, csv_max_results],
|
| 691 |
+
outputs=[csv_results_display, csv_export_output]
|
| 692 |
+
)
|
| 693 |
+
|
| 694 |
+
# Quick search button handlers
|
| 695 |
+
quick_btn1.click(lambda: "techflowsolutions.com", outputs=website_input)
|
| 696 |
+
quick_btn2.click(lambda: "greenleafconsult.com", outputs=website_input)
|
| 697 |
+
quick_btn3.click(lambda: "blueskymarketing.net", outputs=website_input)
|
| 698 |
+
quick_btn4.click(lambda: "quantumdynamics.org", outputs=website_input)
|
| 699 |
+
quick_btn5.click(lambda: "stellarlogistics.biz", outputs=website_input)
|
| 700 |
+
quick_btn6.click(lambda: "nexusfinancial.pro", outputs=website_input)
|
| 701 |
+
quick_btn7.click(lambda: "horizonhealth.care", outputs=website_input)
|
| 702 |
+
quick_btn8.click(lambda: "phoenixmfg.com", outputs=website_input)
|
| 703 |
+
|
| 704 |
+
# Examples
|
| 705 |
+
gr.Examples(
|
| 706 |
+
examples=[
|
| 707 |
+
["techflowsolutions.com", 5],
|
| 708 |
+
["greenleafconsult.com", 3],
|
| 709 |
+
["blueskymarketing.net", 4],
|
| 710 |
+
["quantumdynamics.org", 6]
|
| 711 |
+
],
|
| 712 |
+
inputs=[website_input, max_results],
|
| 713 |
+
label="Sample Searches"
|
| 714 |
+
)
|
| 715 |
+
|
| 716 |
+
# Footer
|
| 717 |
+
gr.HTML("""
|
| 718 |
+
<div style="text-align: center; padding: 30px 20px; background: linear-gradient(135deg, #64748b 0%, #475569 100%); color: white; border-radius: 15px; margin-top: 30px;">
|
| 719 |
+
<h3 style="margin: 0 0 10px 0;">Contact Intelligence Platform</h3>
|
| 720 |
+
<p style="margin: 0; opacity: 0.9;">Professional-grade contact discovery and lead generation technology</p>
|
| 721 |
+
<p style="margin: 10px 0 0 0; font-size: 0.9em; opacity: 0.7;">Powered by advanced web intelligence algorithms</p>
|
| 722 |
+
</div>
|
| 723 |
+
""")
|
| 724 |
+
|
| 725 |
+
if __name__ == "__main__":
|
| 726 |
+
app.launch()
|