Spaces:

JayBene1
/

apptest2

Sleeping

App Files Files Community

JayBene1 commited on Jul 7, 2025

Commit

640980e

verified ·

1 Parent(s): 13b4428

Update app.py

Browse files

Files changed (1) hide show

app.py +211 -152

app.py CHANGED Viewed

@@ -7,6 +7,9 @@ import io
 from urllib.parse import urlparse, urljoin
 import time
 import random
 # Mock contacts database (same as your API)
 CONTACTS_DB = [
@@ -256,93 +259,202 @@ def simulate_website_scraping(url):
     return contacts
-...
-# After `parse_csv_file`, add this new function
-def parse_excel_file(file_content):
-    """Parse Excel file and extract website URLs and row mapping"""
-    import pandas as pd
     try:
-        df = pd.read_excel(file_content, engine='openpyxl')
         website_columns = ['website', 'url', 'domain', 'site', 'web', 'homepage']
         website_column = None
-        for col in df.columns:
-            if col.lower().strip() in website_columns:
-                website_column = col
                 break
         if not website_column:
-            return [], None
-        return df, website_column
     except Exception as e:
-        print(f"Error parsing Excel: {e}")
-        return [], None
-# Modify `search_csv_websites` to support both CSV and Excel
-def search_csv_websites(uploaded_file, max_results=10):
-    import pandas as pd
-    if uploaded_file is None:
-        return "Please upload a CSV or Excel file", ""
     try:
-        filename = uploaded_file.name.lower()
-        if filename.endswith(".csv"):
-            content = uploaded_file.read()
-            df = pd.read_csv(io.BytesIO(content))
-        elif filename.endswith(".xls") or filename.endswith(".xlsx"):
-            df = pd.read_excel(uploaded_file, engine='openpyxl')
-        else:
-            return "Unsupported file type. Please upload a .csv or .xlsx file.", ""
-        if 'H' not in df.columns and len(df.columns) < 8:
-            return "Column H (for websites) is missing.", ""
-        updated_rows = 0
-        for idx, row in df.iterrows():
-            website = row.iloc[7]  # Column H
-            if pd.isna(website):
-                continue
             contacts = simulate_website_scraping(website)
-            if not contacts:
-                continue
-            if len(contacts) > 0:
-                df.at[idx, 'I'] = contacts[0]['first_name']
-                df.at[idx, 'J'] = contacts[0]['last_name']
-                df.at[idx, 'K'] = contacts[0]['job_title']
-                df.at[idx, 'L'] = contacts[0]['phone']
-                df.at[idx, 'M'] = contacts[0]['email']
-            if len(contacts) > 1:
-                df.at[idx, 'S'] = contacts[1]['first_name']
-                df.at[idx, 'T'] = contacts[1]['last_name']
-                df.at[idx, 'U'] = contacts[1]['job_title']
-                df.at[idx, 'V'] = contacts[1]['phone']
-                df.at[idx, 'W'] = contacts[1]['email']
-            updated_rows += 1
-        output_buffer = io.StringIO()
-        df.to_csv(output_buffer, index=False)
-        csv_data = output_buffer.getvalue()
-        return f"Processed {updated_rows} rows with matching contacts.", csv_data
     except Exception as e:
-        return f"Error: {e}", ""
-# Update Gradio file upload to allow Excel
-csv_file = gr.File(
-    label="Upload CSV or Excel File",
-    file_types=[".csv", ".xlsx", ".xls"],
-    elem_classes=["custom-input"]
-)
-# Everything else remains unchanged
 def search_website_contacts(website_url, max_results=10):
     """Main function to search for contacts on a website"""
@@ -576,7 +688,7 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
         with gr.TabItem("CSV Bulk Search"):
             with gr.Row():
                 with gr.Column(scale=2):
-                    gr.HTML('<div class="section-header">CSV Upload</div>')
                     csv_file = gr.File(
                         label="Upload CSV File",
@@ -584,12 +696,22 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
                         elem_classes=["custom-input"]
                     )
                     gr.HTML("""
                     <div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af; margin: 10px 0;">
                         <strong>CSV Format Requirements:</strong><br>
                         • Include a column named 'website', 'url', or 'domain'<br>
                         • One website per row<br>
-                        • Example: techflowsolutions.com, greenleafconsult.com
                     </div>
                     """)
@@ -599,7 +721,7 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
                             maximum=50,
                             value=20,
                             step=1,
-                            label="Maximum Results",
                             elem_classes=["custom-input"]
                         )
@@ -609,12 +731,19 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
                             size="lg",
                             elem_classes=["primary-btn"]
                         )
-            gr.HTML('<div class="section-header">CSV Results</div>')
             with gr.Row():
                 csv_results_display = gr.Textbox(
-                    label="CSV Processing Report",
                     lines=18,
                     max_lines=35,
                     show_copy_button=True,
@@ -622,7 +751,7 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
                 )
                 csv_export_output = gr.Textbox(
-                    label="Export Data (CSV Format)",
                     lines=18,
                     max_lines=35,
                     show_copy_button=True,
@@ -631,74 +760,4 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
     # Sample websites section
     with gr.Accordion("Sample Websites Database", open=False):
-        gr.HTML('<div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af;">')
-        sample_websites = gr.Textbox(
-            label="Available Websites in Database",
-            value=get_all_available_websites(),
-            lines=8,
-            interactive=False,
-            elem_classes=["custom-input"]
-        )
-        gr.HTML('</div>')
-    # Quick search buttons
-    gr.HTML('<div class="section-header">Quick Access Sample Websites</div>')
-    with gr.Row():
-        quick_btn1 = gr.Button("TechFlow Solutions", size="sm", elem_classes=["secondary-btn"])
-        quick_btn2 = gr.Button("GreenLeaf Consulting", size="sm", elem_classes=["secondary-btn"])
-        quick_btn3 = gr.Button("BlueSky Marketing", size="sm", elem_classes=["secondary-btn"])
-        quick_btn4 = gr.Button("Quantum Dynamics", size="sm", elem_classes=["secondary-btn"])
-    with gr.Row():
-        quick_btn5 = gr.Button("Stellar Logistics", size="sm", elem_classes=["secondary-btn"])
-        quick_btn6 = gr.Button("Nexus Financial", size="sm", elem_classes=["secondary-btn"])
-        quick_btn7 = gr.Button("Horizon Health", size="sm", elem_classes=["secondary-btn"])
-        quick_btn8 = gr.Button("Phoenix Manufacturing", size="sm", elem_classes=["secondary-btn"])
-    # Event handlers
-    search_btn.click(
-        fn=search_website_contacts,
-        inputs=[website_input, max_results],
-        outputs=[results_display, csv_output]
-    )
-    csv_search_btn.click(
-        fn=search_csv_websites,
-        inputs=[csv_file, csv_max_results],
-        outputs=[csv_results_display, csv_export_output]
-    )
-    # Quick search button handlers
-    quick_btn1.click(lambda: "techflowsolutions.com", outputs=website_input)
-    quick_btn2.click(lambda: "greenleafconsult.com", outputs=website_input)
-    quick_btn3.click(lambda: "blueskymarketing.net", outputs=website_input)
-    quick_btn4.click(lambda: "quantumdynamics.org", outputs=website_input)
-    quick_btn5.click(lambda: "stellarlogistics.biz", outputs=website_input)
-    quick_btn6.click(lambda: "nexusfinancial.pro", outputs=website_input)
-    quick_btn7.click(lambda: "horizonhealth.care", outputs=website_input)
-    quick_btn8.click(lambda: "phoenixmfg.com", outputs=website_input)
-    # Examples
-    gr.Examples(
-        examples=[
-            ["techflowsolutions.com", 5],
-            ["greenleafconsult.com", 3],
-            ["blueskymarketing.net", 4],
-            ["quantumdynamics.org", 6]
-        ],
-        inputs=[website_input, max_results],
-        label="Sample Searches"
-    )
-    # Footer
-    gr.HTML("""
-    <div style="text-align: center; padding: 30px 20px; background: linear-gradient(135deg, #64748b 0%, #475569 100%); color: white; border-radius: 15px; margin-top: 30px;">
-        <h3 style="margin: 0 0 10px 0;">Contact Intelligence Platform</h3>
-        <p style="margin: 0; opacity: 0.9;">Professional-grade contact discovery and lead generation technology</p>
-        <p style="margin: 10px 0 0 0; font-size: 0.9em; opacity: 0.7;">Powered by advanced web intelligence algorithms</p>
-    </div>
-    """)
-if __name__ == "__main__":
-    app.launch()

 from urllib.parse import urlparse, urljoin
 import time
 import random
+import pandas as pd
+import openpyxl
+from io import BytesIO
 # Mock contacts database (same as your API)
 CONTACTS_DB = [
     return contacts
+def parse_csv_file(file_content):
+    """Parse CSV file and extract website URLs"""
+    websites = []
     try:
+        # Decode file content
+        content = file_content.decode('utf-8')
+        # Parse CSV
+        csv_reader = csv.DictReader(io.StringIO(content))
+        # Look for common website column names (case-insensitive)
         website_columns = ['website', 'url', 'domain', 'site', 'web', 'homepage']
+        # Get all column names and print for debugging
+        all_columns = list(csv_reader.fieldnames) if csv_reader.fieldnames else []
+        print(f"CSV columns found: {all_columns}")
+        # Find the website column (case-insensitive)
         website_column = None
+        for col_name in all_columns:
+            if col_name and col_name.lower().strip() in website_columns:
+                website_column = col_name
+                print(f"Using website column: '{website_column}'")
                 break
         if not website_column:
+            print(f"No website column found. Available columns: {all_columns}")
+            return []
+        # Extract websites
+        for row in csv_reader:
+            website_url = row.get(website_column, '').strip()
+            if website_url:
+                websites.append(website_url)
+        print(f"Extracted {len(websites)} websites: {websites[:5]}...")  # Show first 5
+        return websites
     except Exception as e:
+        print(f"Error parsing CSV: {e}")
+        return []
+def parse_excel_file(file_path):
+    """Parse Excel file and extract website URLs from column H, fill contact info in specific columns"""
     try:
+        # Read Excel file
+        workbook = openpyxl.load_workbook(file_path)
+        sheet = workbook.active
+        # Process each row
+        for row_num in range(2, sheet.max_row + 1):  # Start from row 2 to skip header
+            website_cell = sheet[f'H{row_num}']
+            website_url = website_cell.value
+            if website_url and str(website_url).strip():
+                website_url = str(website_url).strip()
+                print(f"Processing website: {website_url}")
+                # Find contacts for this website
+                contacts = simulate_website_scraping(website_url)
+                if contacts:
+                    # Fill first contact info
+                    first_contact = contacts[0]
+                    sheet[f'I{row_num}'] = first_contact['first_name']  # Contact First Name
+                    sheet[f'J{row_num}'] = first_contact['last_name']   # Contact Last Name
+                    sheet[f'K{row_num}'] = first_contact['job_title']   # Job Title
+                    sheet[f'L{row_num}'] = first_contact['phone']       # Phone
+                    sheet[f'M{row_num}'] = first_contact['email']       # Email
+                    # Fill second contact info if available
+                    if len(contacts) > 1:
+                        second_contact = contacts[1]
+                        sheet[f'S{row_num}'] = second_contact['first_name']  # Second Contact First Name
+                        sheet[f'T{row_num}'] = second_contact['last_name']   # Second Contact Last Name
+                        sheet[f'U{row_num}'] = second_contact['job_title']   # Second Contact Job Title
+                        sheet[f'V{row_num}'] = second_contact['phone']       # Second Contact Phone
+                        sheet[f'W{row_num}'] = second_contact['email']       # Second Contact Email
+        # Save the modified Excel file
+        output_path = file_path.replace('.xlsx', '_with_contacts.xlsx')
+        workbook.save(output_path)
+        return output_path, len([row for row in range(2, sheet.max_row + 1) if sheet[f'H{row}'].value])
+    except Exception as e:
+        print(f"Error processing Excel file: {e}")
+        return None, 0
+def search_csv_websites(csv_file, max_results=10):
+    """Search for contacts from websites listed in CSV file"""
+    if csv_file is None:
+        return "Please upload a CSV file", ""
+    try:
+        # Parse CSV file
+        websites = parse_csv_file(csv_file)
+        if not websites:
+            return "No websites found in CSV file. Please ensure your CSV has a column named 'website', 'url', or 'domain'. Check the console for debugging info about your CSV columns.", ""
+        all_contacts = []
+        processed_websites = []
+        # Search each website
+        for website in websites[:20]:  # Limit to first 20 websites
+            print(f"Processing website: {website}")
             contacts = simulate_website_scraping(website)
+            if contacts:
+                all_contacts.extend(contacts)
+                processed_websites.append(website)
+                print(f"Found {len(contacts)} contacts for {website}")
+            else:
+                print(f"No contacts found for {website}")
+        # Remove duplicates based on email
+        unique_contacts = []
+        seen_emails = set()
+        for contact in all_contacts:
+            if contact['email'] not in seen_emails:
+                unique_contacts.append(contact)
+                seen_emails.add(contact['email'])
+        # Limit results
+        unique_contacts = unique_contacts[:max_results]
+        if not unique_contacts:
+            return f"No contacts found for the {len(websites)} websites in the CSV file. Processed websites: {', '.join(websites[:10])}", ""
+        # Format results
+        results_text = f"CONTACT DISCOVERY REPORT\n"
+        results_text += f"Websites Processed: {len(processed_websites)}\n"
+        results_text += f"Total Websites in CSV: {len(websites)}\n"
+        results_text += f"Websites with Contacts: {len(processed_websites)}\n"
+        results_text += f"Unique Contacts Found: {len(unique_contacts)}\n"
+        results_text += f"Processed Websites: {', '.join(processed_websites)}\n"
+        results_text += f"{'='*60}\n\n"
+        for i, contact in enumerate(unique_contacts, 1):
+            results_text += f"CONTACT #{i}\n"
+            results_text += f"Name: {contact['first_name']} {contact['last_name']}\n"
+            results_text += f"Position: {contact['job_title']}\n"
+            results_text += f"Email: {contact['email']}\n"
+            results_text += f"Phone: {contact['phone']}\n"
+            results_text += f"Company: {contact['company']}\n"
+            results_text += f"Website: {contact['website']}\n\n"
+        # Create CSV output
+        csv_output = "First Name,Last Name,Job Title,Email,Phone,Company,Website\n"
+        for contact in unique_contacts:
+            csv_output += f"{contact['first_name']},{contact['last_name']},{contact['job_title']},{contact['email']},{contact['phone']},{contact['company']},{contact['website']}\n"
+        return results_text, csv_output
     except Exception as e:
+        return f"Error processing CSV file: {str(e)}", ""
+def search_excel_websites(excel_file, max_results=10):
+    """Search for contacts from websites listed in Excel file column H and fill contact info"""
+    if excel_file is None:
+        return "Please upload an Excel file", ""
+    try:
+        # Process Excel file
+        output_path, total_websites = parse_excel_file(excel_file.name)
+        if not output_path:
+            return "Error processing Excel file. Please ensure your Excel file has websites in column H.", ""
+        # Format results
+        results_text = f"EXCEL CONTACT DISCOVERY REPORT\n"
+        results_text += f"Total Websites Processed: {total_websites}\n"
+        results_text += f"Modified Excel File: {output_path}\n"
+        results_text += f"{'='*60}\n\n"
+        results_text += f"Contact information has been filled in the following columns:\n"
+        results_text += f"• Column I: Contact First Name\n"
+        results_text += f"• Column J: Contact Last Name\n"
+        results_text += f"• Column K: Job Title\n"
+        results_text += f"• Column L: Phone\n"
+        results_text += f"• Column M: Email\n\n"
+        results_text += f"Second contact information (if available):\n"
+        results_text += f"• Column S: Second Contact First Name\n"
+        results_text += f"• Column T: Second Contact Last Name\n"
+        results_text += f"• Column U: Second Contact Job Title\n"
+        results_text += f"• Column V: Second Contact Phone\n"
+        results_text += f"• Column W: Second Contact Email\n\n"
+        results_text += f"The modified Excel file has been saved as: {output_path}\n"
+        results_text += f"You can download it from the file system."
+        # Create a simple export message
+        export_output = f"Excel file processed successfully.\nModified file saved as: {output_path}\nTotal websites processed: {total_websites}"
+        return results_text, export_output
+    except Exception as e:
+        return f"Error processing Excel file: {str(e)}", ""
 def search_website_contacts(website_url, max_results=10):
     """Main function to search for contacts on a website"""
         with gr.TabItem("CSV Bulk Search"):
             with gr.Row():
                 with gr.Column(scale=2):
+                    gr.HTML('<div class="section-header">File Upload</div>')
                     csv_file = gr.File(
                         label="Upload CSV File",
                         elem_classes=["custom-input"]
                     )
+                    excel_file = gr.File(
+                        label="Upload Excel File",
+                        file_types=[".xlsx", ".xls"],
+                        elem_classes=["custom-input"]
+                    )
                     gr.HTML("""
                     <div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af; margin: 10px 0;">
                         <strong>CSV Format Requirements:</strong><br>
                         • Include a column named 'website', 'url', or 'domain'<br>
                         • One website per row<br>
+                        • Example: techflowsolutions.com, greenleafconsult.com<br><br>
+                        <strong>Excel Format Requirements:</strong><br>
+                        • Websites should be in column H<br>
+                        • Contact info will be filled in columns I-M (first contact) and S-W (second contact)<br>
+                        • The modified file will be saved with '_with_contacts' suffix
                     </div>
                     """)
                             maximum=50,
                             value=20,
                             step=1,
+                            label="Maximum Results (CSV only)",
                             elem_classes=["custom-input"]
                         )
                             size="lg",
                             elem_classes=["primary-btn"]
                         )
+                        excel_search_btn = gr.Button(
+                            "Process Excel",
+                            variant="primary",
+                            size="lg",
+                            elem_classes=["primary-btn"]
+                        )
+            gr.HTML('<div class="section-header">Processing Results</div>')
             with gr.Row():
                 csv_results_display = gr.Textbox(
+                    label="File Processing Report",
                     lines=18,
                     max_lines=35,
                     show_copy_button=True,
                 )
                 csv_export_output = gr.Textbox(
+                    label="Export Data / File Info",
                     lines=18,
                     max_lines=35,
                     show_copy_button=True,
     # Sample websites section
     with gr.Accordion("Sample Websites Database", open=False):
+        gr.HTML('<div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af