Spaces:

Mohaddz
/

Customer-classify

Sleeping

App Files Files Community

Mohaddz commited on Aug 29, 2025

Commit

30f9702

verified ·

1 Parent(s): febe156

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -21

app.py CHANGED Viewed

@@ -16,17 +16,17 @@ import tempfile
 class MultiClientThemeClassifier:
     def __init__(self):
         self.model = None
-        self.client_themes = {}  # {client_id: {theme: prototype_embedding}}
         self.model_loaded = False
         self.default_model = 'Qwen/Qwen3-Embedding-0.6B'
-    def load_model(self, model_name: str = None):
-        """Load the embedding model onto the GPU"""
-        if model_name is None:
-            model_name = self.default_model
         try:
-            if self.model_loaded and hasattr(self.model, 'tokenizer') and self.model.tokenizer.name_or_path == model_name:
                 return f"✅ Model '{model_name}' is already loaded."
             self.model = None
@@ -36,6 +36,8 @@ class MultiClientThemeClassifier:
             print(f"Loading model: {model_name} onto CUDA device")
             self.model = SentenceTransformer(model_name, device='cuda', trust_remote_code=True)
             self.model_loaded = True
             return f"✅ Model '{model_name}' loaded successfully onto GPU!"
         except Exception as e:
             self.model_loaded = False
@@ -43,15 +45,16 @@ class MultiClientThemeClassifier:
             return f"❌ Error loading model '{model_name}': {str(e)}\n\nDetails:\n{error_details}"
     def _ensure_model_is_loaded(self) -> Optional[str]:
-        """Internal helper to load model if it's not already loaded."""
         if not self.model_loaded:
-            print("Model not loaded. Automatically loading default model...")
-            status = self.load_model()
             if "Error" in status:
                 return status
         return None
-    def add_client_themes(self, client_id: str, themes: List[str], examples_per_theme: Dict[str, List[str]] = None):
         """Add themes for a specific client"""
         error_status = self._ensure_model_is_loaded()
         if error_status: return error_status
@@ -95,22 +98,18 @@ class MultiClientThemeClassifier:
         error_status = self._ensure_model_is_loaded()
         if error_status: return f"❌ Model could not be loaded: {error_status}", None, None
-        # FINAL FIX: Try a list of common encodings to handle different file types.
-        encodings_to_try = ['utf-8-sig', 'utf-8', 'cp1256', 'latin1', 'cp1252']
         df = None
         for encoding in encodings_to_try:
             try:
                 df = pd.read_csv(csv_filepath, encoding=encoding)
                 print(f"Successfully read CSV with encoding: {encoding}")
-                break  # Exit loop if successful
             except (UnicodeDecodeError, pd.errors.ParserError):
-                print(f"Failed to read with encoding: {encoding}, trying next...")
                 continue
         if df is None:
-            error_message = "❌ Could not decode the CSV file. Please save it in a common format like 'UTF-8' and try again."
-            return error_message, None, None
         try:
             if 'text' not in df.columns or 'real_tag' not in df.columns:
@@ -123,8 +122,8 @@ class MultiClientThemeClassifier:
             unique_themes = df['real_tag'].unique().tolist()
             self.add_client_themes(client_id, unique_themes)
-            texts_to_classify = df['text'].str.slice(0, 500).tolist()
-            results = [self.classify_text(text, client_id) for text in texts_to_classify]
             df['predicted_tag'] = [res[0] for res in results]
             df['confidence'] = [res[1] for res in results]
@@ -133,7 +132,7 @@ class MultiClientThemeClassifier:
             total = len(df)
             accuracy = correct / total if total > 0 else 0
-            results_summary = f"📊 **Benchmarking Results**\n\n**Accuracy: {accuracy:.2%}** ({correct}/{total})"
             fig = px.bar(df['real_tag'].value_counts(), title="Theme Distribution", labels={'index': 'Theme', 'value': 'Count'})
             visualization_html = fig.to_html()

 class MultiClientThemeClassifier:
     def __init__(self):
         self.model = None
+        self.client_themes = {}
         self.model_loaded = False
         self.default_model = 'Qwen/Qwen3-Embedding-0.6B'
+        # CORRECTED: Add attribute to remember the last loaded model's name
+        self.current_model_name = self.default_model
+    def load_model(self, model_name: str):
+        """Load the embedding model onto the GPU, remembering the choice."""
         try:
+            # Prevent reloading the same model
+            if self.model_loaded and self.current_model_name == model_name:
                 return f"✅ Model '{model_name}' is already loaded."
             self.model = None
             print(f"Loading model: {model_name} onto CUDA device")
             self.model = SentenceTransformer(model_name, device='cuda', trust_remote_code=True)
             self.model_loaded = True
+            # CORRECTED: Remember the name of the successfully loaded model
+            self.current_model_name = model_name
             return f"✅ Model '{model_name}' loaded successfully onto GPU!"
         except Exception as e:
             self.model_loaded = False
             return f"❌ Error loading model '{model_name}': {str(e)}\n\nDetails:\n{error_details}"
     def _ensure_model_is_loaded(self) -> Optional[str]:
+        """Internal helper to load the correct model if it's not already loaded."""
         if not self.model_loaded:
+            print(f"Model not loaded. Automatically loading last selected model: {self.current_model_name}...")
+            # CORRECTED: Load the last selected model, not the default one
+            status = self.load_model(self.current_model_name)
             if "Error" in status:
                 return status
         return None
+    def add_client_themes(self, client_id: str, themes: List[str]):
         """Add themes for a specific client"""
         error_status = self._ensure_model_is_loaded()
         if error_status: return error_status
         error_status = self._ensure_model_is_loaded()
         if error_status: return f"❌ Model could not be loaded: {error_status}", None, None
+        encodings_to_try = ['utf-8-sig', 'utf-8', 'cp1256', 'latin1']
         df = None
         for encoding in encodings_to_try:
             try:
                 df = pd.read_csv(csv_filepath, encoding=encoding)
                 print(f"Successfully read CSV with encoding: {encoding}")
+                break
             except (UnicodeDecodeError, pd.errors.ParserError):
                 continue
         if df is None:
+            return "❌ Could not decode the CSV. Please save it as 'UTF-8' and try again.", None, None
         try:
             if 'text' not in df.columns or 'real_tag' not in df.columns:
             unique_themes = df['real_tag'].unique().tolist()
             self.add_client_themes(client_id, unique_themes)
+            texts = df['text'].str.slice(0, 500).tolist()
+            results = [self.classify_text(text, client_id) for text in texts]
             df['predicted_tag'] = [res[0] for res in results]
             df['confidence'] = [res[1] for res in results]
             total = len(df)
             accuracy = correct / total if total > 0 else 0
+            results_summary = f"📊 **Benchmarking Results for `{self.current_model_name}`**\n\n**Accuracy: {accuracy:.2%}** ({correct}/{total})"
             fig = px.bar(df['real_tag'].value_counts(), title="Theme Distribution", labels={'index': 'Theme', 'value': 'Count'})
             visualization_html = fig.to_html()