Spaces:

JayBene1
/

apptest2

Sleeping

App Files Files Community

JayBene1 commited on Jul 10, 2025

Commit

c175eac

verified ·

1 Parent(s): 0bafe91

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -29

app.py CHANGED Viewed

@@ -257,47 +257,47 @@ def simulate_website_scraping(url):
     return contacts
 def parse_csv_file(file_content):
-    """Parse CSV file and extract website URLs"""
     websites = []
     try:
         # Decode file content
         content = file_content.decode('utf-8')
-        # Parse CSV
-        csv_reader = csv.DictReader(io.StringIO(content))
-        # Look for common website column names (case-insensitive)
-        website_columns = ['website', 'url', 'domain', 'site', 'web', 'homepage']
-        # Get all column names and print for debugging
-        all_columns = list(csv_reader.fieldnames) if csv_reader.fieldnames else []
-        print(f"CSV columns found: {all_columns}")
-        # Find the website column (case-insensitive)
-        website_column = None
-        for col_name in all_columns:
-            if col_name:
-                # Clean and normalize column name for comparison
-                clean_col_name = col_name.lower().strip()
-                print(f"Checking column: '{col_name}' -> '{clean_col_name}'")
-                if clean_col_name in website_columns:
-                    website_column = col_name  # Use original column name
-                    print(f"Using website column: '{website_column}'")
-                    break
-        if not website_column:
-            print(f"No website column found. Available columns: {all_columns}")
-            print(f"Looking for columns matching: {website_columns}")
             return []
-        # Extract websites
-        for row in csv_reader:
-            website_url = row.get(website_column, '').strip()
-            if website_url:
-                websites.append(website_url)
-        print(f"Extracted {len(websites)} websites: {websites[:5]}...")  # Show first 5
         return websites
     except Exception as e:

     return contacts
 def parse_csv_file(file_content):
+    """Parse CSV file and extract website URLs from column H"""
     websites = []
     try:
         # Decode file content
         content = file_content.decode('utf-8')
+        # Parse CSV without headers first to access by column index
+        csv_reader = csv.reader(io.StringIO(content))
+        rows = list(csv_reader)
+        print(f"Total rows in CSV: {len(rows)}")
+        if len(rows) == 0:
+            print("CSV file is empty")
+            return []
+        # Check if we have at least column H (index 7, since A=0, B=1, ..., H=7)
+        first_row = rows[0]
+        print(f"First row columns: {len(first_row)}")
+        print(f"First row content: {first_row}")
+        if len(first_row) < 8:  # Column H is index 7
+            print(f"CSV doesn't have column H. Only has {len(first_row)} columns")
             return []
+        print(f"Column H header: '{first_row[7]}'")
+        # Extract websites from column H (index 7)
+        # Start from row 1 to skip header
+        for i, row in enumerate(rows[1:], start=2):  # Start=2 for proper row numbering
+            if len(row) > 7:  # Make sure row has column H
+                website_url = row[7].strip()
+                if website_url:
+                    websites.append(website_url)
+                    print(f"Row {i}: Found website '{website_url}'")
+                else:
+                    print(f"Row {i}: Column H is empty")
+            else:
+                print(f"Row {i}: Row too short, only has {len(row)} columns")
+        print(f"Extracted {len(websites)} websites from column H: {websites[:5]}...")  # Show first 5
         return websites
     except Exception as e: