Spaces:

JayBene1
/

apptest3

Runtime error

App Files Files Community

JayBene1 commited on Jul 14, 2025

Commit

6fdcfd0

verified ·

1 Parent(s): c246f70

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -121

app.py CHANGED Viewed

@@ -370,42 +370,8 @@ def parse_csv_file(file_obj):
         debug_info.append(f"Error parsing CSV: {e}")
         return [], debug_info
-def rank_contact_by_title(job_title):
-    """Rank contacts by job title priority (lower number = higher priority)"""
-    title_lower = job_title.lower()
-    # Define ranking hierarchy
-    if 'president' in title_lower:
-        return 1
-    elif 'ceo' in title_lower or 'chief executive' in title_lower:
-        return 2
-    elif 'cfo' in title_lower or 'chief financial' in title_lower:
-        return 3
-    elif 'coo' in title_lower or 'chief operating' in title_lower:
-        return 4
-    elif 'vice president' in title_lower or 'vp' in title_lower or 'v.p.' in title_lower:
-        return 5
-    elif 'controller' in title_lower:
-        return 6
-    elif 'general manager' in title_lower or 'gm' in title_lower:
-        return 7
-    else:
-        return 100  # All other positions get lower priority
-def get_best_contact_for_website(website_url):
-    """Get the highest-ranked contact for a specific website"""
-    contacts = simulate_website_scraping(website_url)
-    if not contacts:
-        return None
-    # Sort contacts by job title ranking
-    contacts_with_rank = [(contact, rank_contact_by_title(contact['job_title'])) for contact in contacts]
-    contacts_with_rank.sort(key=lambda x: x[1])  # Sort by rank (lower number = higher priority)
-    return contacts_with_rank[0][0]  # Return the highest-ranked contact
 def search_csv_websites(csv_file, max_results=10):
-    """Search for contacts from websites listed in CSV file and populate the CSV"""
     if csv_file is None:
         return "Please upload a CSV file", ""
@@ -425,78 +391,45 @@ def search_csv_websites(csv_file, max_results=10):
             error_msg += "4. Verify the CSV file is not corrupted\n"
             return error_msg, ""
-        # Read the original CSV file to preserve all data
-        with open(csv_file.name, 'r', encoding='utf-8') as f:
-            content = f.read()
-        csv_reader = csv.reader(io.StringIO(content))
-        original_rows = list(csv_reader)
-        # Process websites and find contacts
         all_contacts = []
         processed_websites = []
-        contacts_by_website = {}
-        # Create a mapping of websites to their row indices
-        website_to_row = {}
-        for row_idx, row in enumerate(original_rows):
-            if len(row) > 7:  # Column H exists
-                website_url = row[7].strip()
-                if website_url and is_valid_url(website_url):
-                    website_to_row[website_url] = row_idx
-        # Search each website and get the best contact
         for website in websites[:20]:  # Limit to first 20 websites
             print(f"Processing website: {website}")
-            best_contact = get_best_contact_for_website(website)
-            if best_contact:
-                contacts_by_website[website] = best_contact
-                all_contacts.append(best_contact)
                 processed_websites.append(website)
-                print(f"Found best contact for {website}: {best_contact['first_name']} {best_contact['last_name']} - {best_contact['job_title']}")
             else:
                 print(f"No contacts found for {website}")
-        # Create updated CSV with contact information
-        updated_rows = []
-        for row_idx, row in enumerate(original_rows):
-            # Make a copy of the row and ensure it has enough columns
-            new_row = row[:]
-            # Extend row to have at least 13 columns (A-M)
-            while len(new_row) < 13:
-                new_row.append("")
-            # Check if this row has a website we found contacts for
-            if len(row) > 7:
-                website_url = row[7].strip()
-                if website_url in contacts_by_website:
-                    contact = contacts_by_website[website_url]
-                    # Populate contact information in specified columns
-                    new_row[8] = contact['first_name']     # Column I (index 8)
-                    new_row[9] = contact['last_name']      # Column J (index 9)
-                    new_row[10] = contact['job_title']     # Column K (index 10)
-                    new_row[11] = contact['phone']         # Column L (index 11)
-                    new_row[12] = contact['email']         # Column M (index 12)
-            updated_rows.append(new_row)
-        if not all_contacts:
             result_msg = f"No contacts found for the {len(websites)} websites from the CSV file.\n\n"
             result_msg += "DEBUG INFORMATION:\n" + debug_text + "\n\n"
             result_msg += f"Websites processed: {', '.join(websites[:10])}\n"
             result_msg += "This might be because the websites are not in our sample database."
             return result_msg, ""
-        # Format results text
         results_text = f"CONTACT DISCOVERY REPORT\n"
         results_text += f"CSV Processing Details:\n"
         results_text += f"Total Websites in CSV: {len(websites)}\n"
         results_text += f"Websites Processed: {len(processed_websites)}\n"
         results_text += f"Websites with Contacts: {len(processed_websites)}\n"
-        results_text += f"Contacts Found: {len(all_contacts)}\n"
         results_text += f"Processed Websites: {', '.join(processed_websites)}\n"
         results_text += f"{'='*60}\n\n"
@@ -504,17 +437,7 @@ def search_csv_websites(csv_file, max_results=10):
         results_text += "DEBUG INFORMATION:\n" + debug_text + "\n\n"
         results_text += f"{'='*60}\n\n"
-        # Show contact rankings
-        results_text += "CONTACT RANKINGS (by job title priority):\n"
-        for i, contact in enumerate(all_contacts, 1):
-            rank = rank_contact_by_title(contact['job_title'])
-            results_text += f"{i}. {contact['first_name']} {contact['last_name']} - {contact['job_title']} "
-            results_text += f"(Priority Rank: {rank}) - {contact['company']}\n"
-        results_text += f"\n{'='*60}\n\n"
-        # Show detailed contact information
-        for i, contact in enumerate(all_contacts, 1):
             results_text += f"CONTACT #{i}\n"
             results_text += f"Name: {contact['first_name']} {contact['last_name']}\n"
             results_text += f"Position: {contact['job_title']}\n"
@@ -523,17 +446,12 @@ def search_csv_websites(csv_file, max_results=10):
             results_text += f"Company: {contact['company']}\n"
             results_text += f"Website: {contact['website']}\n\n"
-        # Create CSV output with updated data
-        csv_output = io.StringIO()
-        csv_writer = csv.writer(csv_output)
-        for row in updated_rows:
-            csv_writer.writerow(row)
-        csv_content = csv_output.getvalue()
-        csv_output.close()
-        return results_text, csv_content
     except Exception as e:
         return f"Error processing CSV file: {str(e)}", ""
@@ -727,22 +645,11 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
                         label="Target Website URL",
                         placeholder="Enter company website (e.g., techflowsolutions.com)",
                         value="",
-                         elem_classes=["custom-input"]
                     )
-                    gr.HTML("""
-                    <div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af; margin: 10px 0;">
-                        <strong>CSV Format - Multiple Options:</strong><br>
-                        <strong>Option 1:</strong> Website URLs in <strong>Column H (8th column)</strong><br>
-                        <strong>Option 2:</strong> Column header named 'website', 'url', 'domain', 'site', or 'web'<br>
-                        <strong>Option 3:</strong> System will auto-detect columns with valid URLs<br><br>
-                        <strong>Examples:</strong> techflowsolutions.com, https://example.com, www.company.com<br>
-                        <strong>Note:</strong> The system will show detailed debugging information about your CSV structure
-                    </div>
-                    """)
                     with gr.Row():
-                        csv_max_results = gr.Slider(
                             minimum=1,
                             maximum=20,
                             value=8,
@@ -789,6 +696,17 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
                         elem_classes=["custom-input"]
                     )
                     with gr.Row():
                         csv_max_results = gr.Slider(
                             minimum=1,
@@ -825,7 +743,67 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
                     elem_classes=["results-container"]
                 )
     # Footer
     gr.HTML("""

         debug_info.append(f"Error parsing CSV: {e}")
         return [], debug_info
 def search_csv_websites(csv_file, max_results=10):
+    """Search for contacts from websites listed in CSV file"""
     if csv_file is None:
         return "Please upload a CSV file", ""
             error_msg += "4. Verify the CSV file is not corrupted\n"
             return error_msg, ""
         all_contacts = []
         processed_websites = []
+        # Search each website
         for website in websites[:20]:  # Limit to first 20 websites
             print(f"Processing website: {website}")
+            contacts = simulate_website_scraping(website)
+            if contacts:
+                all_contacts.extend(contacts)
                 processed_websites.append(website)
+                print(f"Found {len(contacts)} contacts for {website}")
             else:
                 print(f"No contacts found for {website}")
+        # Remove duplicates based on email
+        unique_contacts = []
+        seen_emails = set()
+        for contact in all_contacts:
+            if contact['email'] not in seen_emails:
+                unique_contacts.append(contact)
+                seen_emails.add(contact['email'])
+        # Limit results
+        unique_contacts = unique_contacts[:max_results]
+        if not unique_contacts:
             result_msg = f"No contacts found for the {len(websites)} websites from the CSV file.\n\n"
             result_msg += "DEBUG INFORMATION:\n" + debug_text + "\n\n"
             result_msg += f"Websites processed: {', '.join(websites[:10])}\n"
             result_msg += "This might be because the websites are not in our sample database."
             return result_msg, ""
+        # Format results
         results_text = f"CONTACT DISCOVERY REPORT\n"
         results_text += f"CSV Processing Details:\n"
         results_text += f"Total Websites in CSV: {len(websites)}\n"
         results_text += f"Websites Processed: {len(processed_websites)}\n"
         results_text += f"Websites with Contacts: {len(processed_websites)}\n"
+        results_text += f"Unique Contacts Found: {len(unique_contacts)}\n"
         results_text += f"Processed Websites: {', '.join(processed_websites)}\n"
         results_text += f"{'='*60}\n\n"
         results_text += "DEBUG INFORMATION:\n" + debug_text + "\n\n"
         results_text += f"{'='*60}\n\n"
+        for i, contact in enumerate(unique_contacts, 1):
             results_text += f"CONTACT #{i}\n"
             results_text += f"Name: {contact['first_name']} {contact['last_name']}\n"
             results_text += f"Position: {contact['job_title']}\n"
             results_text += f"Company: {contact['company']}\n"
             results_text += f"Website: {contact['website']}\n\n"
+        # Create CSV output
+        csv_output = "First Name,Last Name,Job Title,Email,Phone,Company,Website\n"
+        for contact in unique_contacts:
+            csv_output += f"{contact['first_name']},{contact['last_name']},{contact['job_title']},{contact['email']},{contact['phone']},{contact['company']},{contact['website']}\n"
+        return results_text, csv_output
     except Exception as e:
         return f"Error processing CSV file: {str(e)}", ""
                         label="Target Website URL",
                         placeholder="Enter company website (e.g., techflowsolutions.com)",
                         value="",
+                        elem_classes=["custom-input"]
                     )
                     with gr.Row():
+                        max_results = gr.Slider(
                             minimum=1,
                             maximum=20,
                             value=8,
                         elem_classes=["custom-input"]
                     )
+                    gr.HTML("""
+                    <div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af; margin: 10px 0;">
+                        <strong>CSV Format - Multiple Options:</strong><br>
+                        <strong>Option 1:</strong> Website URLs in <strong>Column H (8th column)</strong><br>
+                        <strong>Option 2:</strong> Column header named 'website', 'url', 'domain', 'site', or 'web'<br>
+                        <strong>Option 3:</strong> System will auto-detect columns with valid URLs<br><br>
+                        <strong>Examples:</strong> techflowsolutions.com, https://example.com, www.company.com<br>
+                        <strong>Note:</strong> The system will show detailed debugging information about your CSV structure
+                    </div>
+                    """)
                     with gr.Row():
                         csv_max_results = gr.Slider(
                             minimum=1,
                     elem_classes=["results-container"]
                 )
+    # Sample websites section
+    with gr.Accordion("Sample Websites Database", open=False):
+        gr.HTML('<div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af;">')
+        sample_websites = gr.Textbox(
+            label="Available Websites in Database",
+            value=get_all_available_websites(),
+            lines=8,
+            interactive=False,
+            elem_classes=["custom-input"]
+        )
+        gr.HTML('</div>')
+    # Quick search buttons
+    gr.HTML('<div class="section-header">Quick Access Sample Websites</div>')
+    with gr.Row():
+        quick_btn1 = gr.Button("TechFlow Solutions", size="sm", elem_classes=["secondary-btn"])
+        quick_btn2 = gr.Button("GreenLeaf Consulting", size="sm", elem_classes=["secondary-btn"])
+        quick_btn3 = gr.Button("BlueSky Marketing", size="sm", elem_classes=["secondary-btn"])
+        quick_btn4 = gr.Button("Quantum Dynamics", size="sm", elem_classes=["secondary-btn"])
+    with gr.Row():
+        quick_btn5 = gr.Button("Stellar Logistics", size="sm", elem_classes=["secondary-btn"])
+        quick_btn6 = gr.Button("Nexus Financial", size="sm", elem_classes=["secondary-btn"])
+        quick_btn7 = gr.Button("Horizon Health", size="sm", elem_classes=["secondary-btn"])
+        quick_btn8 = gr.Button("Phoenix Manufacturing", size="sm", elem_classes=["secondary-btn"])
+    # Event handlers
+    search_btn.click(
+        fn=search_website_contacts,
+        inputs=[website_input, max_results],
+        outputs=[results_display, csv_output]
+    )
+    csv_search_btn.click(
+        fn=search_csv_websites,
+        inputs=[csv_file, csv_max_results],
+        outputs=[csv_results_display, csv_export_output]
+    )
+    # Quick search button handlers
+    quick_btn1.click(lambda: "techflowsolutions.com", outputs=website_input)
+    quick_btn2.click(lambda: "greenleafconsult.com", outputs=website_input)
+    quick_btn3.click(lambda: "blueskymarketing.net", outputs=website_input)
+    quick_btn4.click(lambda: "quantumdynamics.org", outputs=website_input)
+    quick_btn5.click(lambda: "stellarlogistics.biz", outputs=website_input)
+    quick_btn6.click(lambda: "nexusfinancial.pro", outputs=website_input)
+    quick_btn7.click(lambda: "horizonhealth.care", outputs=website_input)
+    quick_btn8.click(lambda: "phoenixmfg.com", outputs=website_input)
+    # Examples
+    gr.Examples(
+        examples=[
+            ["techflowsolutions.com", 5],
+            ["greenleafconsult.com", 3],
+            ["blueskymarketing.net", 4],
+            ["quantumdynamics.org", 6]
+        ],
+        inputs=[website_input, max_results],
+        label="Sample Searches"
+    )
     # Footer
     gr.HTML("""