Spaces:

kkhushisaid
/

Startup_Investment_Query_Generator

Sleeping

App Files Files Community

kkhushisaid commited on Mar 16, 2025

Commit

7e25c09

verified ·

1 Parent(s): 4765d25

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -4

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ print("Available files:", os.listdir(dataset_folder))
 import warnings
-# Ignore dtype warnings
 warnings.simplefilter("ignore", category=pd.errors.DtypeWarning)
 # Load all CSV files in the dataset folder
@@ -29,12 +29,25 @@ dataframes = []
 for file in os.listdir(dataset_folder):
     if file.endswith(".csv"):  # Check if the file is a CSV
         try:
             df = pd.read_csv(
                 os.path.join(dataset_folder, file),
-                dtype=str,  # Force all columns to be read as strings
-                low_memory=False,  # Avoids dtype warnings by reading the entire file at once
-                encoding="utf-8"
             ).fillna('')  # Fill NaN values with empty strings
             dataframes.append(df)  # Append DataFrame to the list
         except Exception as e:
             print(f"Error reading {file}: {e}")
@@ -45,6 +58,7 @@ if dataframes:
 else:
     print("Warning: No valid CSV files found in the dataset folder.")
     full_data = pd.DataFrame()  # Create an empty DataFrame as a fallback
 def load_dataset_metadata(dataset_folder):
     """Loads metadata from all CSV files in the dataset folder."""

 import warnings
+# Ignore DtypeWarning
 warnings.simplefilter("ignore", category=pd.errors.DtypeWarning)
 # Load all CSV files in the dataset folder
 for file in os.listdir(dataset_folder):
     if file.endswith(".csv"):  # Check if the file is a CSV
         try:
+            # Read first few rows to identify column names
+            sample_df = pd.read_csv(
+                os.path.join(dataset_folder, file),
+                nrows=5,  # Read only first 5 rows for column type inference
+                encoding="utf-8",
+                errors="replace"  # Replace encoding errors with a placeholder
+            )
+            column_types = {col: str for col in sample_df.columns}  # Force all columns to string
+            # Read the entire file with enforced column types
             df = pd.read_csv(
                 os.path.join(dataset_folder, file),
+                dtype=column_types,  # Apply enforced string types
+                low_memory=False,  # Avoid chunk-based reading issues
+                encoding="utf-8",
+                errors="replace"
             ).fillna('')  # Fill NaN values with empty strings
             dataframes.append(df)  # Append DataFrame to the list
         except Exception as e:
             print(f"Error reading {file}: {e}")
 else:
     print("Warning: No valid CSV files found in the dataset folder.")
     full_data = pd.DataFrame()  # Create an empty DataFrame as a fallback
 def load_dataset_metadata(dataset_folder):
     """Loads metadata from all CSV files in the dataset folder."""