Mohaddz commited on
Commit
febe156
·
verified ·
1 Parent(s): 1bb89fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -10
app.py CHANGED
@@ -91,14 +91,28 @@ class MultiClientThemeClassifier:
91
  return f"Error: {str(e)}", 0.0, {}
92
 
93
  def benchmark_csv(self, csv_filepath: str, client_id: str) -> Tuple[str, Optional[str], Optional[str]]:
94
- """Benchmark the model on a CSV file from a given filepath."""
95
  error_status = self._ensure_model_is_loaded()
96
  if error_status: return f"❌ Model could not be loaded: {error_status}", None, None
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  try:
99
- # CORRECTED: Read directly from the filepath and handle BOM with utf-8-sig
100
- df = pd.read_csv(csv_filepath, encoding='utf-8-sig')
101
-
102
  if 'text' not in df.columns or 'real_tag' not in df.columns:
103
  return f"❌ CSV must have 'text' and 'real_tag' columns! Found: {df.columns.to_list()}", None, None
104
 
@@ -159,16 +173,10 @@ def classify_interface(text: str, client_id: str, confidence_threshold: float):
159
 
160
  @spaces.GPU(duration=300)
161
  def benchmark_interface(csv_file_obj, client_id: str):
162
- """
163
- Handles the Gradio file object and passes the filepath to the benchmark function.
164
- """
165
  if csv_file_obj is None:
166
  return "Please upload a CSV file!", None, None
167
  try:
168
- # THE FINAL, CORRECT FIX: Get the filepath from the .name attribute of the Gradio file object
169
  csv_filepath = csv_file_obj.name
170
-
171
- # Pass the filepath to the actual processing function
172
  return classifier.benchmark_csv(csv_filepath, client_id)
173
  except Exception as e:
174
  error_details = traceback.format_exc()
 
91
  return f"Error: {str(e)}", 0.0, {}
92
 
93
  def benchmark_csv(self, csv_filepath: str, client_id: str) -> Tuple[str, Optional[str], Optional[str]]:
94
+ """Benchmark the model on a CSV file, trying multiple encodings."""
95
  error_status = self._ensure_model_is_loaded()
96
  if error_status: return f"❌ Model could not be loaded: {error_status}", None, None
97
 
98
+ # FINAL FIX: Try a list of common encodings to handle different file types.
99
+ encodings_to_try = ['utf-8-sig', 'utf-8', 'cp1256', 'latin1', 'cp1252']
100
+ df = None
101
+
102
+ for encoding in encodings_to_try:
103
+ try:
104
+ df = pd.read_csv(csv_filepath, encoding=encoding)
105
+ print(f"Successfully read CSV with encoding: {encoding}")
106
+ break # Exit loop if successful
107
+ except (UnicodeDecodeError, pd.errors.ParserError):
108
+ print(f"Failed to read with encoding: {encoding}, trying next...")
109
+ continue
110
+
111
+ if df is None:
112
+ error_message = "❌ Could not decode the CSV file. Please save it in a common format like 'UTF-8' and try again."
113
+ return error_message, None, None
114
+
115
  try:
 
 
 
116
  if 'text' not in df.columns or 'real_tag' not in df.columns:
117
  return f"❌ CSV must have 'text' and 'real_tag' columns! Found: {df.columns.to_list()}", None, None
118
 
 
173
 
174
  @spaces.GPU(duration=300)
175
  def benchmark_interface(csv_file_obj, client_id: str):
 
 
 
176
  if csv_file_obj is None:
177
  return "Please upload a CSV file!", None, None
178
  try:
 
179
  csv_filepath = csv_file_obj.name
 
 
180
  return classifier.benchmark_csv(csv_filepath, client_id)
181
  except Exception as e:
182
  error_details = traceback.format_exc()