Update app.py
Browse files
app.py
CHANGED
|
@@ -25,30 +25,14 @@ logger = logging.getLogger("ArabicSignNLP")
|
|
| 25 |
|
| 26 |
# ----- Project Configuration -----
|
| 27 |
class Config:
|
| 28 |
-
# Path to your CSV dataset containing sign labels
|
| 29 |
-
# On HF Spaces, upload your CSV to the repo and set the path here
|
| 30 |
CSV_PATH: str = os.getenv("CSV_PATH", "arabic_sign_lang_features.csv")
|
| 31 |
-
|
| 32 |
-
# Folder where .npy keypoint files are stored (optional on HF Spaces)
|
| 33 |
KEYPOINTS_FOLDER: str = os.getenv("KEYPOINTS_FOLDER", "keypoints")
|
| 34 |
-
|
| 35 |
-
# Output file path for Blender sequence
|
| 36 |
SEQUENCE_OUTPUT_PATH: str = "/tmp/sequence.txt"
|
| 37 |
-
|
| 38 |
-
# AraBERT model for Arabic semantic understanding
|
| 39 |
EMBEDDING_MODEL: str = "aubmindlab/bert-base-arabertv2"
|
| 40 |
-
|
| 41 |
-
# Similarity threshold for sign matching
|
| 42 |
SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.72"))
|
| 43 |
-
|
| 44 |
-
# Include prepositions in signing
|
| 45 |
INCLUDE_PREPOSITION_WORDS: bool = False
|
| 46 |
-
|
| 47 |
-
# FastAPI server settings
|
| 48 |
API_HOST: str = "0.0.0.0"
|
| 49 |
-
API_PORT: int = 7860
|
| 50 |
-
|
| 51 |
-
# Column name in your CSV that contains the sign labels
|
| 52 |
CSV_LABEL_COLUMN: str = "label"
|
| 53 |
|
| 54 |
|
|
@@ -237,9 +221,19 @@ class SemanticSignMatcher:
|
|
| 237 |
return label
|
| 238 |
|
| 239 |
def _load_database(self, csv_path: str, label_column: str):
|
|
|
|
| 240 |
if not os.path.exists(csv_path):
|
| 241 |
-
logger.
|
| 242 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
df = pd.read_csv(csv_path, low_memory=False)
|
| 244 |
if label_column not in df.columns:
|
| 245 |
raise ValueError(f"Column '{label_column}' not found. Available: {list(df.columns)}")
|
|
@@ -539,4 +533,4 @@ def read_sequence_file():
|
|
| 539 |
|
| 540 |
if __name__ == "__main__":
|
| 541 |
import uvicorn
|
| 542 |
-
uvicorn.run(app, host=Config.API_HOST, port=Config.API_PORT)
|
|
|
|
| 25 |
|
| 26 |
# ----- Project Configuration -----
|
| 27 |
class Config:
|
|
|
|
|
|
|
| 28 |
CSV_PATH: str = os.getenv("CSV_PATH", "arabic_sign_lang_features.csv")
|
|
|
|
|
|
|
| 29 |
KEYPOINTS_FOLDER: str = os.getenv("KEYPOINTS_FOLDER", "keypoints")
|
|
|
|
|
|
|
| 30 |
SEQUENCE_OUTPUT_PATH: str = "/tmp/sequence.txt"
|
|
|
|
|
|
|
| 31 |
EMBEDDING_MODEL: str = "aubmindlab/bert-base-arabertv2"
|
|
|
|
|
|
|
| 32 |
SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.72"))
|
|
|
|
|
|
|
| 33 |
INCLUDE_PREPOSITION_WORDS: bool = False
|
|
|
|
|
|
|
| 34 |
API_HOST: str = "0.0.0.0"
|
| 35 |
+
API_PORT: int = 7860
|
|
|
|
|
|
|
| 36 |
CSV_LABEL_COLUMN: str = "label"
|
| 37 |
|
| 38 |
|
|
|
|
| 221 |
return label
|
| 222 |
|
| 223 |
def _load_database(self, csv_path: str, label_column: str):
|
| 224 |
+
# ---- التعديل: لو الـ CSV مش موجود، حمّله من HF ----
|
| 225 |
if not os.path.exists(csv_path):
|
| 226 |
+
logger.info("CSV not found locally. Downloading from Hugging Face...")
|
| 227 |
+
import urllib.request
|
| 228 |
+
url = "https://huggingface.co/spaces/SondosM/avatarAPI/resolve/main/arabic_sign_lang_features.csv"
|
| 229 |
+
try:
|
| 230 |
+
urllib.request.urlretrieve(url, csv_path)
|
| 231 |
+
logger.info("CSV downloaded successfully.")
|
| 232 |
+
except Exception as e:
|
| 233 |
+
logger.warning(f"Failed to download CSV: {e}. No word signs loaded.")
|
| 234 |
+
return
|
| 235 |
+
# -----------------------------------------------------
|
| 236 |
+
|
| 237 |
df = pd.read_csv(csv_path, low_memory=False)
|
| 238 |
if label_column not in df.columns:
|
| 239 |
raise ValueError(f"Column '{label_column}' not found. Available: {list(df.columns)}")
|
|
|
|
| 533 |
|
| 534 |
if __name__ == "__main__":
|
| 535 |
import uvicorn
|
| 536 |
+
uvicorn.run(app, host=Config.API_HOST, port=Config.API_PORT)
|