Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -91,14 +91,28 @@ class MultiClientThemeClassifier:
|
|
| 91 |
return f"Error: {str(e)}", 0.0, {}
|
| 92 |
|
| 93 |
def benchmark_csv(self, csv_filepath: str, client_id: str) -> Tuple[str, Optional[str], Optional[str]]:
|
| 94 |
-
"""Benchmark the model on a CSV file
|
| 95 |
error_status = self._ensure_model_is_loaded()
|
| 96 |
if error_status: return f"❌ Model could not be loaded: {error_status}", None, None
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
try:
|
| 99 |
-
# CORRECTED: Read directly from the filepath and handle BOM with utf-8-sig
|
| 100 |
-
df = pd.read_csv(csv_filepath, encoding='utf-8-sig')
|
| 101 |
-
|
| 102 |
if 'text' not in df.columns or 'real_tag' not in df.columns:
|
| 103 |
return f"❌ CSV must have 'text' and 'real_tag' columns! Found: {df.columns.to_list()}", None, None
|
| 104 |
|
|
@@ -159,16 +173,10 @@ def classify_interface(text: str, client_id: str, confidence_threshold: float):
|
|
| 159 |
|
| 160 |
@spaces.GPU(duration=300)
|
| 161 |
def benchmark_interface(csv_file_obj, client_id: str):
|
| 162 |
-
"""
|
| 163 |
-
Handles the Gradio file object and passes the filepath to the benchmark function.
|
| 164 |
-
"""
|
| 165 |
if csv_file_obj is None:
|
| 166 |
return "Please upload a CSV file!", None, None
|
| 167 |
try:
|
| 168 |
-
# THE FINAL, CORRECT FIX: Get the filepath from the .name attribute of the Gradio file object
|
| 169 |
csv_filepath = csv_file_obj.name
|
| 170 |
-
|
| 171 |
-
# Pass the filepath to the actual processing function
|
| 172 |
return classifier.benchmark_csv(csv_filepath, client_id)
|
| 173 |
except Exception as e:
|
| 174 |
error_details = traceback.format_exc()
|
|
|
|
| 91 |
return f"Error: {str(e)}", 0.0, {}
|
| 92 |
|
| 93 |
def benchmark_csv(self, csv_filepath: str, client_id: str) -> Tuple[str, Optional[str], Optional[str]]:
|
| 94 |
+
"""Benchmark the model on a CSV file, trying multiple encodings."""
|
| 95 |
error_status = self._ensure_model_is_loaded()
|
| 96 |
if error_status: return f"❌ Model could not be loaded: {error_status}", None, None
|
| 97 |
|
| 98 |
+
# FINAL FIX: Try a list of common encodings to handle different file types.
|
| 99 |
+
encodings_to_try = ['utf-8-sig', 'utf-8', 'cp1256', 'latin1', 'cp1252']
|
| 100 |
+
df = None
|
| 101 |
+
|
| 102 |
+
for encoding in encodings_to_try:
|
| 103 |
+
try:
|
| 104 |
+
df = pd.read_csv(csv_filepath, encoding=encoding)
|
| 105 |
+
print(f"Successfully read CSV with encoding: {encoding}")
|
| 106 |
+
break # Exit loop if successful
|
| 107 |
+
except (UnicodeDecodeError, pd.errors.ParserError):
|
| 108 |
+
print(f"Failed to read with encoding: {encoding}, trying next...")
|
| 109 |
+
continue
|
| 110 |
+
|
| 111 |
+
if df is None:
|
| 112 |
+
error_message = "❌ Could not decode the CSV file. Please save it in a common format like 'UTF-8' and try again."
|
| 113 |
+
return error_message, None, None
|
| 114 |
+
|
| 115 |
try:
|
|
|
|
|
|
|
|
|
|
| 116 |
if 'text' not in df.columns or 'real_tag' not in df.columns:
|
| 117 |
return f"❌ CSV must have 'text' and 'real_tag' columns! Found: {df.columns.to_list()}", None, None
|
| 118 |
|
|
|
|
| 173 |
|
| 174 |
@spaces.GPU(duration=300)
|
| 175 |
def benchmark_interface(csv_file_obj, client_id: str):
|
|
|
|
|
|
|
|
|
|
| 176 |
if csv_file_obj is None:
|
| 177 |
return "Please upload a CSV file!", None, None
|
| 178 |
try:
|
|
|
|
| 179 |
csv_filepath = csv_file_obj.name
|
|
|
|
|
|
|
| 180 |
return classifier.benchmark_csv(csv_filepath, client_id)
|
| 181 |
except Exception as e:
|
| 182 |
error_details = traceback.format_exc()
|