kkhushisaid commited on
Commit
7e25c09
·
verified ·
1 Parent(s): 4765d25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -4
app.py CHANGED
@@ -21,7 +21,7 @@ print("Available files:", os.listdir(dataset_folder))
21
 
22
  import warnings
23
 
24
- # Ignore dtype warnings
25
  warnings.simplefilter("ignore", category=pd.errors.DtypeWarning)
26
 
27
  # Load all CSV files in the dataset folder
@@ -29,12 +29,25 @@ dataframes = []
29
  for file in os.listdir(dataset_folder):
30
  if file.endswith(".csv"): # Check if the file is a CSV
31
  try:
 
 
 
 
 
 
 
 
 
 
 
32
  df = pd.read_csv(
33
  os.path.join(dataset_folder, file),
34
- dtype=str, # Force all columns to be read as strings
35
- low_memory=False, # Avoids dtype warnings by reading the entire file at once
36
- encoding="utf-8"
 
37
  ).fillna('') # Fill NaN values with empty strings
 
38
  dataframes.append(df) # Append DataFrame to the list
39
  except Exception as e:
40
  print(f"Error reading {file}: {e}")
@@ -45,6 +58,7 @@ if dataframes:
45
  else:
46
  print("Warning: No valid CSV files found in the dataset folder.")
47
  full_data = pd.DataFrame() # Create an empty DataFrame as a fallback
 
48
 
49
  def load_dataset_metadata(dataset_folder):
50
  """Loads metadata from all CSV files in the dataset folder."""
 
21
 
22
  import warnings
23
 
24
+ # Ignore DtypeWarning
25
  warnings.simplefilter("ignore", category=pd.errors.DtypeWarning)
26
 
27
  # Load all CSV files in the dataset folder
 
29
  for file in os.listdir(dataset_folder):
30
  if file.endswith(".csv"): # Check if the file is a CSV
31
  try:
32
+ # Read first few rows to identify column names
33
+ sample_df = pd.read_csv(
34
+ os.path.join(dataset_folder, file),
35
+ nrows=5, # Read only first 5 rows for column type inference
36
+ encoding="utf-8",
37
+ errors="replace" # Replace encoding errors with a placeholder
38
+ )
39
+
40
+ column_types = {col: str for col in sample_df.columns} # Force all columns to string
41
+
42
+ # Read the entire file with enforced column types
43
  df = pd.read_csv(
44
  os.path.join(dataset_folder, file),
45
+ dtype=column_types, # Apply enforced string types
46
+ low_memory=False, # Avoid chunk-based reading issues
47
+ encoding="utf-8",
48
+ errors="replace"
49
  ).fillna('') # Fill NaN values with empty strings
50
+
51
  dataframes.append(df) # Append DataFrame to the list
52
  except Exception as e:
53
  print(f"Error reading {file}: {e}")
 
58
  else:
59
  print("Warning: No valid CSV files found in the dataset folder.")
60
  full_data = pd.DataFrame() # Create an empty DataFrame as a fallback
61
+
62
 
63
  def load_dataset_metadata(dataset_folder):
64
  """Loads metadata from all CSV files in the dataset folder."""