Update app.py
Browse files
app.py
CHANGED
|
@@ -236,22 +236,29 @@ class SemanticSignMatcher:
|
|
| 236 |
return self._normalizer.normalize_label(label)
|
| 237 |
return label
|
| 238 |
|
| 239 |
-
|
| 240 |
if not os.path.exists(csv_path):
|
| 241 |
-
logger.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
return
|
| 243 |
-
df = pd.read_csv(csv_path, low_memory=False)
|
| 244 |
-
if label_column not in df.columns:
|
| 245 |
-
raise ValueError(f"Column '{label_column}' not found. Available: {list(df.columns)}")
|
| 246 |
-
all_labels = df[label_column].dropna().unique().tolist()
|
| 247 |
-
arabic_labels = [
|
| 248 |
-
str(l) for l in all_labels
|
| 249 |
-
if isinstance(l, str) and any("\u0600" <= c <= "\u06ff" for c in str(l))
|
| 250 |
-
]
|
| 251 |
-
self._raw_labels = arabic_labels
|
| 252 |
-
self._word_signs = arabic_labels.copy()
|
| 253 |
-
logger.info(f"Database: {len(arabic_labels)} Arabic word labels loaded.")
|
| 254 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
def _finalize_labels(self):
|
| 256 |
if self._normalizer and self._raw_labels:
|
| 257 |
self._word_signs = [self._normalize_label(l) for l in self._raw_labels]
|
|
|
|
| 236 |
return self._normalizer.normalize_label(label)
|
| 237 |
return label
|
| 238 |
|
| 239 |
+
def _load_database(self, csv_path: str, label_column: str):
|
| 240 |
if not os.path.exists(csv_path):
|
| 241 |
+
logger.info("CSV not found locally. Downloading from Hugging Face...")
|
| 242 |
+
import urllib.request
|
| 243 |
+
url = "https://huggingface.co/spaces/SondosM/avatarAPI/resolve/main/arabic_sign_lang_features.csv"
|
| 244 |
+
try:
|
| 245 |
+
urllib.request.urlretrieve(url, csv_path)
|
| 246 |
+
logger.info("CSV downloaded successfully.")
|
| 247 |
+
except Exception as e:
|
| 248 |
+
logger.warning(f"Failed to download CSV: {e}. No word signs loaded.")
|
| 249 |
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
|
| 251 |
+
df = pd.read_csv(csv_path, low_memory=False)
|
| 252 |
+
if label_column not in df.columns:
|
| 253 |
+
raise ValueError(f"Column '{label_column}' not found. Available: {list(df.columns)}")
|
| 254 |
+
all_labels = df[label_column].dropna().unique().tolist()
|
| 255 |
+
arabic_labels = [
|
| 256 |
+
str(l) for l in all_labels
|
| 257 |
+
if isinstance(l, str) and any("\u0600" <= c <= "\u06ff" for c in str(l))
|
| 258 |
+
]
|
| 259 |
+
self._raw_labels = arabic_labels
|
| 260 |
+
self._word_signs = arabic_labels.copy()
|
| 261 |
+
logger.info(f"Database: {len(arabic_labels)} Arabic word labels loaded.")
|
| 262 |
def _finalize_labels(self):
|
| 263 |
if self._normalizer and self._raw_labels:
|
| 264 |
self._word_signs = [self._normalize_label(l) for l in self._raw_labels]
|