HaryaniAnjali commited on
Commit
6099979
·
verified ·
1 Parent(s): 802ca33

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -40
app.py CHANGED
@@ -74,40 +74,47 @@ def read_file(file):
74
  try:
75
  # Handle different file types
76
  if file_name.endswith('.csv'):
77
- # First try with comma
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  try:
79
- df = pd.read_csv(file)
80
-
81
- # Check if we got only one column but it contains semicolons
82
- if len(df.columns) == 1 and ';' in str(df.columns[0]):
83
- print("Detected potential semicolon-separated file")
84
- # Reset file position
85
- file.seek(0)
86
- # Try with semicolon
87
- df = pd.read_csv(file, sep=';')
88
- print(f"Read file with semicolon separator: {df.shape}")
89
  else:
90
- print(f"Read file with comma separator: {df.shape}")
91
-
92
- # Convert columns to appropriate types
93
- for col in df.columns:
94
- # Try to convert string columns to numeric
95
- if df[col].dtype == 'object':
96
- df[col] = pd.to_numeric(df[col], errors='ignore')
97
-
98
- return df
99
  except Exception as e:
100
- print(f"Error with standard separators: {e}")
101
- # Try with semicolon
102
- file.seek(0)
103
- try:
104
- df = pd.read_csv(file, sep=';')
105
- print(f"Read file with semicolon separator after error: {df.shape}")
106
- return df
107
- except:
108
- # Final attempt with Python's csv sniffer
109
- file.seek(0)
110
- return pd.read_csv(file, sep=None, engine='python')
111
 
112
  elif file_name.endswith(('.xls', '.xlsx')):
113
  return pd.read_excel(file)
@@ -117,15 +124,14 @@ def read_file(file):
117
  # Try tab separator first for text files
118
  try:
119
  df = pd.read_csv(file, delimiter='\t')
120
- if len(df.columns) <= 1:
121
- # If tab doesn't work well, try with separator detection
122
- file.seek(0)
123
- df = pd.read_csv(file, sep=None, engine='python')
124
- return df
125
- except:
126
- # Fall back to separator detection
127
- file.seek(0)
128
- return pd.read_csv(file, sep=None, engine='python')
129
  else:
130
  return "Unsupported file format. Please upload .csv, .xlsx, .xls, .json, or .txt files."
131
  except Exception as e:
 
74
  try:
75
  # Handle different file types
76
  if file_name.endswith('.csv'):
77
+ # Try multiple separators in sequence
78
+ separators = [',', ';', '\t', '|']
79
+ errors = []
80
+
81
+ for sep in separators:
82
+ try:
83
+ # For each attempt, we need a fresh file upload
84
+ # Try with the current separator
85
+ df = pd.read_csv(file, sep=sep)
86
+
87
+ # If we got a reasonable number of columns, it probably worked
88
+ if len(df.columns) > 1:
89
+ print(f"Successfully read CSV with separator '{sep}': {df.shape}")
90
+
91
+ # Convert columns to appropriate types
92
+ for col in df.columns:
93
+ # Try to convert string columns to numeric
94
+ if df[col].dtype == 'object':
95
+ df[col] = pd.to_numeric(df[col], errors='ignore')
96
+
97
+ return df
98
+ else:
99
+ errors.append(f"Only got {len(df.columns)} columns with '{sep}' separator")
100
+ except Exception as e:
101
+ errors.append(f"Error with '{sep}' separator: {str(e)}")
102
+
103
+ # If we reach here, all separators failed
104
+ error_msg = "\n".join(errors)
105
+ print(f"All separators failed: {error_msg}")
106
+
107
+ # Make one final attempt with Python's CSV sniffer
108
  try:
109
+ df = pd.read_csv(file, sep=None, engine='python')
110
+ if len(df.columns) > 1:
111
+ print(f"Read CSV with automatic separator detection: {df.shape}")
112
+ return df
 
 
 
 
 
 
113
  else:
114
+ return "Could not detect the appropriate separator for this CSV file."
 
 
 
 
 
 
 
 
115
  except Exception as e:
116
+ print(f"Error with automatic separator detection: {e}")
117
+ return "Could not read the CSV file. Please check the file format and try again."
 
 
 
 
 
 
 
 
 
118
 
119
  elif file_name.endswith(('.xls', '.xlsx')):
120
  return pd.read_excel(file)
 
124
  # Try tab separator first for text files
125
  try:
126
  df = pd.read_csv(file, delimiter='\t')
127
+ if len(df.columns) > 1:
128
+ return df
129
+ else:
130
+ # Try with automatic separator detection
131
+ return pd.read_csv(file, sep=None, engine='python')
132
+ except Exception as e:
133
+ print(f"Error reading text file: {e}")
134
+ return f"Error reading text file: {str(e)}"
 
135
  else:
136
  return "Unsupported file format. Please upload .csv, .xlsx, .xls, .json, or .txt files."
137
  except Exception as e: