Spaces:

Mohaddz
/

Customer-classify

Sleeping

App Files Files Community

Mohaddz commited on Aug 29, 2025

Commit

febe156

verified ·

1 Parent(s): 1bb89fc

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -10

app.py CHANGED Viewed

@@ -91,14 +91,28 @@ class MultiClientThemeClassifier:
             return f"Error: {str(e)}", 0.0, {}
     def benchmark_csv(self, csv_filepath: str, client_id: str) -> Tuple[str, Optional[str], Optional[str]]:
-        """Benchmark the model on a CSV file from a given filepath."""
         error_status = self._ensure_model_is_loaded()
         if error_status: return f"❌ Model could not be loaded: {error_status}", None, None
         try:
-            # CORRECTED: Read directly from the filepath and handle BOM with utf-8-sig
-            df = pd.read_csv(csv_filepath, encoding='utf-8-sig')
             if 'text' not in df.columns or 'real_tag' not in df.columns:
                 return f"❌ CSV must have 'text' and 'real_tag' columns! Found: {df.columns.to_list()}", None, None
@@ -159,16 +173,10 @@ def classify_interface(text: str, client_id: str, confidence_threshold: float):
 @spaces.GPU(duration=300)
 def benchmark_interface(csv_file_obj, client_id: str):
-    """
-    Handles the Gradio file object and passes the filepath to the benchmark function.
-    """
     if csv_file_obj is None:
         return "Please upload a CSV file!", None, None
     try:
-        # THE FINAL, CORRECT FIX: Get the filepath from the .name attribute of the Gradio file object
         csv_filepath = csv_file_obj.name
-        # Pass the filepath to the actual processing function
         return classifier.benchmark_csv(csv_filepath, client_id)
     except Exception as e:
         error_details = traceback.format_exc()

             return f"Error: {str(e)}", 0.0, {}
     def benchmark_csv(self, csv_filepath: str, client_id: str) -> Tuple[str, Optional[str], Optional[str]]:
+        """Benchmark the model on a CSV file, trying multiple encodings."""
         error_status = self._ensure_model_is_loaded()
         if error_status: return f"❌ Model could not be loaded: {error_status}", None, None
+        # FINAL FIX: Try a list of common encodings to handle different file types.
+        encodings_to_try = ['utf-8-sig', 'utf-8', 'cp1256', 'latin1', 'cp1252']
+        df = None
+        for encoding in encodings_to_try:
+            try:
+                df = pd.read_csv(csv_filepath, encoding=encoding)
+                print(f"Successfully read CSV with encoding: {encoding}")
+                break  # Exit loop if successful
+            except (UnicodeDecodeError, pd.errors.ParserError):
+                print(f"Failed to read with encoding: {encoding}, trying next...")
+                continue
+        if df is None:
+            error_message = "❌ Could not decode the CSV file. Please save it in a common format like 'UTF-8' and try again."
+            return error_message, None, None
         try:
             if 'text' not in df.columns or 'real_tag' not in df.columns:
                 return f"❌ CSV must have 'text' and 'real_tag' columns! Found: {df.columns.to_list()}", None, None
 @spaces.GPU(duration=300)
 def benchmark_interface(csv_file_obj, client_id: str):
     if csv_file_obj is None:
         return "Please upload a CSV file!", None, None
     try:
         csv_filepath = csv_file_obj.name
         return classifier.benchmark_csv(csv_filepath, client_id)
     except Exception as e:
         error_details = traceback.format_exc()