Update app.py
Browse files
app.py
CHANGED
|
@@ -21,7 +21,7 @@ print("Available files:", os.listdir(dataset_folder))
|
|
| 21 |
|
| 22 |
import warnings
|
| 23 |
|
| 24 |
-
# Ignore
|
| 25 |
warnings.simplefilter("ignore", category=pd.errors.DtypeWarning)
|
| 26 |
|
| 27 |
# Load all CSV files in the dataset folder
|
|
@@ -29,12 +29,25 @@ dataframes = []
|
|
| 29 |
for file in os.listdir(dataset_folder):
|
| 30 |
if file.endswith(".csv"): # Check if the file is a CSV
|
| 31 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
df = pd.read_csv(
|
| 33 |
os.path.join(dataset_folder, file),
|
| 34 |
-
dtype=
|
| 35 |
-
low_memory=False, #
|
| 36 |
-
encoding="utf-8"
|
|
|
|
| 37 |
).fillna('') # Fill NaN values with empty strings
|
|
|
|
| 38 |
dataframes.append(df) # Append DataFrame to the list
|
| 39 |
except Exception as e:
|
| 40 |
print(f"Error reading {file}: {e}")
|
|
@@ -45,6 +58,7 @@ if dataframes:
|
|
| 45 |
else:
|
| 46 |
print("Warning: No valid CSV files found in the dataset folder.")
|
| 47 |
full_data = pd.DataFrame() # Create an empty DataFrame as a fallback
|
|
|
|
| 48 |
|
| 49 |
def load_dataset_metadata(dataset_folder):
|
| 50 |
"""Loads metadata from all CSV files in the dataset folder."""
|
|
|
|
| 21 |
|
| 22 |
import warnings
|
| 23 |
|
| 24 |
+
# Ignore DtypeWarning
|
| 25 |
warnings.simplefilter("ignore", category=pd.errors.DtypeWarning)
|
| 26 |
|
| 27 |
# Load all CSV files in the dataset folder
|
|
|
|
| 29 |
for file in os.listdir(dataset_folder):
|
| 30 |
if file.endswith(".csv"): # Check if the file is a CSV
|
| 31 |
try:
|
| 32 |
+
# Read first few rows to identify column names
|
| 33 |
+
sample_df = pd.read_csv(
|
| 34 |
+
os.path.join(dataset_folder, file),
|
| 35 |
+
nrows=5, # Read only first 5 rows for column type inference
|
| 36 |
+
encoding="utf-8",
|
| 37 |
+
errors="replace" # Replace encoding errors with a placeholder
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
column_types = {col: str for col in sample_df.columns} # Force all columns to string
|
| 41 |
+
|
| 42 |
+
# Read the entire file with enforced column types
|
| 43 |
df = pd.read_csv(
|
| 44 |
os.path.join(dataset_folder, file),
|
| 45 |
+
dtype=column_types, # Apply enforced string types
|
| 46 |
+
low_memory=False, # Avoid chunk-based reading issues
|
| 47 |
+
encoding="utf-8",
|
| 48 |
+
errors="replace"
|
| 49 |
).fillna('') # Fill NaN values with empty strings
|
| 50 |
+
|
| 51 |
dataframes.append(df) # Append DataFrame to the list
|
| 52 |
except Exception as e:
|
| 53 |
print(f"Error reading {file}: {e}")
|
|
|
|
| 58 |
else:
|
| 59 |
print("Warning: No valid CSV files found in the dataset folder.")
|
| 60 |
full_data = pd.DataFrame() # Create an empty DataFrame as a fallback
|
| 61 |
+
|
| 62 |
|
| 63 |
def load_dataset_metadata(dataset_folder):
|
| 64 |
"""Loads metadata from all CSV files in the dataset folder."""
|