JayBene1 commited on
Commit
b3942db
·
verified ·
1 Parent(s): 91ab66f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -29
app.py CHANGED
@@ -257,41 +257,47 @@ def simulate_website_scraping(url):
257
  return contacts
258
 
259
  def parse_csv_file(file_content):
260
- """Parse CSV file and extract website URLs"""
261
  websites = []
262
  try:
263
  # Decode file content
264
  content = file_content.decode('utf-8')
265
 
266
  # Parse CSV
267
- csv_reader = csv.DictReader(io.StringIO(content))
268
 
269
- # Look for common website column names (case-insensitive)
270
- website_columns = ['website', 'url', 'domain', 'site', 'web', 'homepage']
271
-
272
- # Get all column names and print for debugging
273
- all_columns = list(csv_reader.fieldnames) if csv_reader.fieldnames else []
274
- print(f"CSV columns found: {all_columns}")
275
 
276
- # Find the website column (case-insensitive)
277
- website_column = None
278
- for col_name in all_columns:
279
- if col_name and col_name.lower().strip() in website_columns:
280
- website_column = col_name
281
- print(f"Using website column: '{website_column}'")
282
- break
283
 
284
- if not website_column:
285
- print(f"No website column found. Available columns: {all_columns}")
286
- return []
287
 
288
- # Extract websites
 
289
  for row in csv_reader:
290
- website_url = row.get(website_column, '').strip()
291
- if website_url:
292
- websites.append(website_url)
 
 
 
 
 
 
 
 
 
 
293
 
294
- print(f"Extracted {len(websites)} websites: {websites[:5]}...") # Show first 5
 
295
  return websites
296
 
297
  except Exception as e:
@@ -299,7 +305,7 @@ def parse_csv_file(file_content):
299
  return []
300
 
301
  def search_csv_websites(csv_file, max_results=10):
302
- """Search for contacts from websites listed in CSV file"""
303
  if csv_file is None:
304
  return "Please upload a CSV file", ""
305
 
@@ -308,7 +314,7 @@ def search_csv_websites(csv_file, max_results=10):
308
  websites = parse_csv_file(csv_file)
309
 
310
  if not websites:
311
- return "No websites found in CSV file. Please ensure your CSV has a column named 'website', 'url', or 'domain'. Check the console for debugging info about your CSV columns.", ""
312
 
313
  all_contacts = []
314
  processed_websites = []
@@ -336,10 +342,11 @@ def search_csv_websites(csv_file, max_results=10):
336
  unique_contacts = unique_contacts[:max_results]
337
 
338
  if not unique_contacts:
339
- return f"No contacts found for the {len(websites)} websites in the CSV file. Processed websites: {', '.join(websites[:10])}", ""
340
 
341
  # Format results
342
  results_text = f"CONTACT DISCOVERY REPORT\n"
 
343
  results_text += f"Websites Processed: {len(processed_websites)}\n"
344
  results_text += f"Total Websites in CSV: {len(websites)}\n"
345
  results_text += f"Websites with Contacts: {len(processed_websites)}\n"
@@ -609,9 +616,10 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
609
  gr.HTML("""
610
  <div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af; margin: 10px 0;">
611
  <strong>CSV Format Requirements:</strong><br>
612
- Include a column named 'website', 'url', or 'domain'<br>
613
  • One website per row<br>
614
- • Example: techflowsolutions.com, greenleafconsult.com
 
615
  </div>
616
  """)
617
 
@@ -723,4 +731,4 @@ with gr.Blocks(css=custom_css, title="Contact Discovery Platform", theme=gr.them
723
  """)
724
 
725
  if __name__ == "__main__":
726
- app.launch()
 
257
  return contacts
258
 
259
  def parse_csv_file(file_content):
260
+ """Parse CSV file and extract website URLs from column H"""
261
  websites = []
262
  try:
263
  # Decode file content
264
  content = file_content.decode('utf-8')
265
 
266
  # Parse CSV
267
+ csv_reader = csv.reader(io.StringIO(content))
268
 
269
+ # Get the first row to determine column count
270
+ first_row = next(csv_reader, None)
271
+ if not first_row:
272
+ print("CSV file is empty")
273
+ return []
 
274
 
275
+ # Reset reader
276
+ csv_reader = csv.reader(io.StringIO(content))
 
 
 
 
 
277
 
278
+ # Skip header row if it exists (you can modify this logic if needed)
279
+ header_row = next(csv_reader, None)
280
+ print(f"Header row: {header_row}")
281
 
282
+ row_count = 0
283
+ # Process each row
284
  for row in csv_reader:
285
+ row_count += 1
286
+ print(f"Processing row {row_count}: {row}")
287
+
288
+ # Check if row has at least 8 columns (column H is index 7)
289
+ if len(row) >= 8:
290
+ website_url = row[7].strip() # Column H (index 7)
291
+ if website_url:
292
+ websites.append(website_url)
293
+ print(f"Found website in row {row_count}: {website_url}")
294
+ else:
295
+ print(f"Empty website in row {row_count}, column H")
296
+ else:
297
+ print(f"Row {row_count} has only {len(row)} columns, skipping (need at least 8 for column H)")
298
 
299
+ print(f"Total rows processed: {row_count}")
300
+ print(f"Extracted {len(websites)} websites from column H: {websites[:5]}...") # Show first 5
301
  return websites
302
 
303
  except Exception as e:
 
305
  return []
306
 
307
  def search_csv_websites(csv_file, max_results=10):
308
+ """Search for contacts from websites listed in CSV file (column H)"""
309
  if csv_file is None:
310
  return "Please upload a CSV file", ""
311
 
 
314
  websites = parse_csv_file(csv_file)
315
 
316
  if not websites:
317
+ return "No websites found in column H of the CSV file. Please ensure your CSV has data in column H (the 8th column). Check the console for debugging info about your CSV structure.", ""
318
 
319
  all_contacts = []
320
  processed_websites = []
 
342
  unique_contacts = unique_contacts[:max_results]
343
 
344
  if not unique_contacts:
345
+ return f"No contacts found for the {len(websites)} websites from column H in the CSV file. Websites processed: {', '.join(websites[:10])}", ""
346
 
347
  # Format results
348
  results_text = f"CONTACT DISCOVERY REPORT\n"
349
+ results_text += f"CSV Column Used: H (8th column)\n"
350
  results_text += f"Websites Processed: {len(processed_websites)}\n"
351
  results_text += f"Total Websites in CSV: {len(websites)}\n"
352
  results_text += f"Websites with Contacts: {len(processed_websites)}\n"
 
616
  gr.HTML("""
617
  <div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #1e40af; margin: 10px 0;">
618
  <strong>CSV Format Requirements:</strong><br>
619
+ Website URLs should be in <strong>Column H (8th column)</strong><br>
620
  • One website per row<br>
621
+ • Example: techflowsolutions.com, greenleafconsult.com<br>
622
+ • The system will automatically read from column H regardless of headers
623
  </div>
624
  """)
625
 
 
731
  """)
732
 
733
  if __name__ == "__main__":
734
+ app.launch()