Update app.py
Browse files
app.py
CHANGED
|
@@ -257,41 +257,47 @@ def simulate_website_scraping(url):
|
|
| 257 |
return contacts
|
| 258 |
|
| 259 |
def parse_csv_file(file_content):
|
| 260 |
-
"""Parse CSV file and extract website URLs"""
|
| 261 |
websites = []
|
| 262 |
try:
|
| 263 |
# Decode file content
|
| 264 |
content = file_content.decode('utf-8')
|
| 265 |
|
| 266 |
# Parse CSV
|
| 267 |
-
csv_reader = csv.
|
| 268 |
|
| 269 |
-
#
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
print(f"CSV columns found: {all_columns}")
|
| 275 |
|
| 276 |
-
#
|
| 277 |
-
|
| 278 |
-
for col_name in all_columns:
|
| 279 |
-
if col_name and col_name.lower().strip() in website_columns:
|
| 280 |
-
website_column = col_name
|
| 281 |
-
print(f"Using website column: '{website_column}'")
|
| 282 |
-
break
|
| 283 |
|
| 284 |
-
if
|
| 285 |
-
|
| 286 |
-
|
| 287 |
|
| 288 |
-
|
|
|
|
| 289 |
for row in csv_reader:
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
|
| 294 |
-
print(f"
|
|
|
|
| 295 |
return websites
|
| 296 |
|
| 297 |
except Exception as e:
|
|
@@ -299,7 +305,7 @@ def parse_csv_file(file_content):
|
|
| 299 |
return []
|
| 300 |
|
| 301 |
def search_csv_websites(csv_file, max_results=10):
|
| 302 |
-
"""Search for contacts from websites listed in CSV file"""
|
| 303 |
if csv_file is None:
|
| 304 |
return "Please upload a CSV file", ""
|
| 305 |
|
|
@@ -308,7 +314,7 @@ def search_csv_websites(csv_file, max_results=10):
|
|
| 308 |
websites = parse_csv_file(csv_file)
|
| 309 |
|
| 310 |
if not websites:
|
| 311 |
-
return "No websites found in CSV file. Please ensure your CSV has
|
| 312 |
|
| 313 |
all_contacts = []
|
| 314 |
processed_websites = []
|
|
@@ -336,10 +342,11 @@ def search_csv_websites(csv_file, max_results=10):
|
|
| 336 |
unique_contacts = unique_contacts[:max_results]
|
| 337 |
|
| 338 |
if not unique_contacts:
|
| 339 |
-
return f"No contacts found for the {len(websites)} websites in the CSV file.
|
| 340 |
|
| 341 |
# Format results
|
| 342 |
results_text = f"CONTACT DISCOVERY REPORT\n"
|
|
|
|
| 343 |
results_text += f"Websites Processed: {len(processed_websites)}\n"
|
| 344 |
results_text += f"Total Websites in CSV: {len(websites)}\n"
|
| 345 |
results_text += f"Websites with Contacts: {len(processed_websites)}\n"
|
|
@@ -609,9 +616,10 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
|
|
| 609 |
gr.HTML("""
|
| 610 |
<div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af; margin: 10px 0;">
|
| 611 |
<strong>CSV Format Requirements:</strong><br>
|
| 612 |
-
•
|
| 613 |
• One website per row<br>
|
| 614 |
-
• Example: techflowsolutions.com, greenleafconsult.com
|
|
|
|
| 615 |
</div>
|
| 616 |
""")
|
| 617 |
|
|
@@ -723,4 +731,4 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
|
|
| 723 |
""")
|
| 724 |
|
| 725 |
if __name__ == "__main__":
|
| 726 |
-
app.launch()
|
|
|
|
| 257 |
return contacts
|
| 258 |
|
| 259 |
def parse_csv_file(file_content):
|
| 260 |
+
"""Parse CSV file and extract website URLs from column H"""
|
| 261 |
websites = []
|
| 262 |
try:
|
| 263 |
# Decode file content
|
| 264 |
content = file_content.decode('utf-8')
|
| 265 |
|
| 266 |
# Parse CSV
|
| 267 |
+
csv_reader = csv.reader(io.StringIO(content))
|
| 268 |
|
| 269 |
+
# Get the first row to determine column count
|
| 270 |
+
first_row = next(csv_reader, None)
|
| 271 |
+
if not first_row:
|
| 272 |
+
print("CSV file is empty")
|
| 273 |
+
return []
|
|
|
|
| 274 |
|
| 275 |
+
# Reset reader
|
| 276 |
+
csv_reader = csv.reader(io.StringIO(content))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
|
| 278 |
+
# Skip header row if it exists (you can modify this logic if needed)
|
| 279 |
+
header_row = next(csv_reader, None)
|
| 280 |
+
print(f"Header row: {header_row}")
|
| 281 |
|
| 282 |
+
row_count = 0
|
| 283 |
+
# Process each row
|
| 284 |
for row in csv_reader:
|
| 285 |
+
row_count += 1
|
| 286 |
+
print(f"Processing row {row_count}: {row}")
|
| 287 |
+
|
| 288 |
+
# Check if row has at least 8 columns (column H is index 7)
|
| 289 |
+
if len(row) >= 8:
|
| 290 |
+
website_url = row[7].strip() # Column H (index 7)
|
| 291 |
+
if website_url:
|
| 292 |
+
websites.append(website_url)
|
| 293 |
+
print(f"Found website in row {row_count}: {website_url}")
|
| 294 |
+
else:
|
| 295 |
+
print(f"Empty website in row {row_count}, column H")
|
| 296 |
+
else:
|
| 297 |
+
print(f"Row {row_count} has only {len(row)} columns, skipping (need at least 8 for column H)")
|
| 298 |
|
| 299 |
+
print(f"Total rows processed: {row_count}")
|
| 300 |
+
print(f"Extracted {len(websites)} websites from column H: {websites[:5]}...") # Show first 5
|
| 301 |
return websites
|
| 302 |
|
| 303 |
except Exception as e:
|
|
|
|
| 305 |
return []
|
| 306 |
|
| 307 |
def search_csv_websites(csv_file, max_results=10):
|
| 308 |
+
"""Search for contacts from websites listed in CSV file (column H)"""
|
| 309 |
if csv_file is None:
|
| 310 |
return "Please upload a CSV file", ""
|
| 311 |
|
|
|
|
| 314 |
websites = parse_csv_file(csv_file)
|
| 315 |
|
| 316 |
if not websites:
|
| 317 |
+
return "No websites found in column H of the CSV file. Please ensure your CSV has data in column H (the 8th column). Check the console for debugging info about your CSV structure.", ""
|
| 318 |
|
| 319 |
all_contacts = []
|
| 320 |
processed_websites = []
|
|
|
|
| 342 |
unique_contacts = unique_contacts[:max_results]
|
| 343 |
|
| 344 |
if not unique_contacts:
|
| 345 |
+
return f"No contacts found for the {len(websites)} websites from column H in the CSV file. Websites processed: {', '.join(websites[:10])}", ""
|
| 346 |
|
| 347 |
# Format results
|
| 348 |
results_text = f"CONTACT DISCOVERY REPORT\n"
|
| 349 |
+
results_text += f"CSV Column Used: H (8th column)\n"
|
| 350 |
results_text += f"Websites Processed: {len(processed_websites)}\n"
|
| 351 |
results_text += f"Total Websites in CSV: {len(websites)}\n"
|
| 352 |
results_text += f"Websites with Contacts: {len(processed_websites)}\n"
|
|
|
|
| 616 |
gr.HTML("""
|
| 617 |
<div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af; margin: 10px 0;">
|
| 618 |
<strong>CSV Format Requirements:</strong><br>
|
| 619 |
+
• Website URLs should be in <strong>Column H (8th column)</strong><br>
|
| 620 |
• One website per row<br>
|
| 621 |
+
• Example: techflowsolutions.com, greenleafconsult.com<br>
|
| 622 |
+
• The system will automatically read from column H regardless of headers
|
| 623 |
</div>
|
| 624 |
""")
|
| 625 |
|
|
|
|
| 731 |
""")
|
| 732 |
|
| 733 |
if __name__ == "__main__":
|
| 734 |
+
app.launch()
|