Spaces:

Mohaddz
/

Customer-classify

Running

App Files Files Community

Mohaddz commited on Aug 29, 2025

Commit

1bb89fc

verified ·

1 Parent(s): 2c7390f

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -20

app.py CHANGED Viewed

@@ -90,16 +90,15 @@ class MultiClientThemeClassifier:
         except Exception as e:
             return f"Error: {str(e)}", 0.0, {}
-    def benchmark_csv(self, csv_content: str, client_id: str) -> Tuple[str, Optional[str], Optional[str]]:
-        """Benchmark the model on a CSV file. Assumes csv_content is a clean string."""
         error_status = self._ensure_model_is_loaded()
         if error_status: return f"❌ Model could not be loaded: {error_status}", None, None
         try:
-            # The string is now clean, so no special encoding is needed here.
-            df = pd.read_csv(io.StringIO(csv_content))
-            # Check for columns after reading
             if 'text' not in df.columns or 'real_tag' not in df.columns:
                 return f"❌ CSV must have 'text' and 'real_tag' columns! Found: {df.columns.to_list()}", None, None
@@ -110,7 +109,8 @@ class MultiClientThemeClassifier:
             unique_themes = df['real_tag'].unique().tolist()
             self.add_client_themes(client_id, unique_themes)
-            results = [self.classify_text(str(row['text'])[:500], client_id) for _, row in df.iterrows()]
             df['predicted_tag'] = [res[0] for res in results]
             df['confidence'] = [res[1] for res in results]
@@ -158,24 +158,21 @@ def classify_interface(text: str, client_id: str, confidence_threshold: float):
     return result, ""
 @spaces.GPU(duration=300)
-def benchmark_interface(csv_file, client_id: str):
-    if csv_file is None:
         return "Please upload a CSV file!", None, None
     try:
-        # CORRECTED AND FINAL FIX: Handle the BOM at the point of file reading.
-        if hasattr(csv_file, 'read'):
-            # It's a file-like object (TemporaryFile), read its bytes and decode with utf-8-sig
-            csv_content = csv_file.read().decode('utf-8-sig')
-        else:
-            # It's a string (NamedString), which was likely decoded with 'utf-8'.
-            # Manually remove the BOM if it exists.
-            csv_content = str(csv_file).lstrip('\ufeff')
-        # Now, pass the clean string to the benchmark function
-        return classifier.benchmark_csv(csv_content, client_id)
     except Exception as e:
         error_details = traceback.format_exc()
-        return f"❌ Error processing CSV file: {str(e)}\n\nDetails:\n{error_details}", None, None
 # --- Gradio Interface ---
 with gr.Blocks(title="Custom Themes Classification MVP", theme=gr.themes.Soft()) as demo:

         except Exception as e:
             return f"Error: {str(e)}", 0.0, {}
+    def benchmark_csv(self, csv_filepath: str, client_id: str) -> Tuple[str, Optional[str], Optional[str]]:
+        """Benchmark the model on a CSV file from a given filepath."""
         error_status = self._ensure_model_is_loaded()
         if error_status: return f"❌ Model could not be loaded: {error_status}", None, None
         try:
+            # CORRECTED: Read directly from the filepath and handle BOM with utf-8-sig
+            df = pd.read_csv(csv_filepath, encoding='utf-8-sig')
             if 'text' not in df.columns or 'real_tag' not in df.columns:
                 return f"❌ CSV must have 'text' and 'real_tag' columns! Found: {df.columns.to_list()}", None, None
             unique_themes = df['real_tag'].unique().tolist()
             self.add_client_themes(client_id, unique_themes)
+            texts_to_classify = df['text'].str.slice(0, 500).tolist()
+            results = [self.classify_text(text, client_id) for text in texts_to_classify]
             df['predicted_tag'] = [res[0] for res in results]
             df['confidence'] = [res[1] for res in results]
     return result, ""
 @spaces.GPU(duration=300)
+def benchmark_interface(csv_file_obj, client_id: str):
+    """
+    Handles the Gradio file object and passes the filepath to the benchmark function.
+    """
+    if csv_file_obj is None:
         return "Please upload a CSV file!", None, None
     try:
+        # THE FINAL, CORRECT FIX: Get the filepath from the .name attribute of the Gradio file object
+        csv_filepath = csv_file_obj.name
+        # Pass the filepath to the actual processing function
+        return classifier.benchmark_csv(csv_filepath, client_id)
     except Exception as e:
         error_details = traceback.format_exc()
+        return f"❌ Error processing CSV file object: {str(e)}\n\nDetails:\n{error_details}", None, None
 # --- Gradio Interface ---
 with gr.Blocks(title="Custom Themes Classification MVP", theme=gr.themes.Soft()) as demo: