Spaces:

SondosM
/

avatarAPI

Running

App Files Files Community

SondosM commited on 3 days ago

Commit

ea286f0

verified ·

1 Parent(s): ea1a0a7

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -69

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import re
 import json
 import logging
 import warnings
 from pathlib import Path
 from typing import List, Dict, Optional, Tuple
 from dataclasses import dataclass, field
@@ -25,30 +26,14 @@ logger = logging.getLogger("ArabicSignNLP")
 # ----- Project Configuration -----
 class Config:
-    # Path to your CSV dataset containing sign labels
-    # On HF Spaces, upload your CSV to the repo and set the path here
     CSV_PATH: str = os.getenv("CSV_PATH", "arabic_sign_lang_features.csv")
-    # Folder where .npy keypoint files are stored (optional on HF Spaces)
     KEYPOINTS_FOLDER: str = os.getenv("KEYPOINTS_FOLDER", "keypoints")
-    # Output file path for Blender sequence
     SEQUENCE_OUTPUT_PATH: str = "/tmp/sequence.txt"
-    # AraBERT model for Arabic semantic understanding
     EMBEDDING_MODEL: str = "aubmindlab/bert-base-arabertv2"
-    # Similarity threshold for sign matching
     SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.72"))
-    # Include prepositions in signing
     INCLUDE_PREPOSITION_WORDS: bool = False
-    # FastAPI server settings
     API_HOST: str = "0.0.0.0"
-    API_PORT: int = 7860  # HF Spaces uses port 7860
-    # Column name in your CSV that contains the sign labels
     CSV_LABEL_COLUMN: str = "label"
@@ -236,29 +221,30 @@ class SemanticSignMatcher:
             return self._normalizer.normalize_label(label)
         return label
-     def _load_database(self, csv_path: str, label_column: str):
         if not os.path.exists(csv_path):
             logger.info("CSV not found locally. Downloading from Hugging Face...")
-            import urllib.request
             url = "https://huggingface.co/spaces/SondosM/avatarAPI/resolve/main/arabic_sign_lang_features.csv"
             try:
                 urllib.request.urlretrieve(url, csv_path)
                 logger.info("CSV downloaded successfully.")
             except Exception as e:
                 logger.warning(f"Failed to download CSV: {e}. No word signs loaded.")
-            return
-    df = pd.read_csv(csv_path, low_memory=False)
-    if label_column not in df.columns:
-        raise ValueError(f"Column '{label_column}' not found. Available: {list(df.columns)}")
-    all_labels = df[label_column].dropna().unique().tolist()
-    arabic_labels = [
-        str(l) for l in all_labels
-        if isinstance(l, str) and any("\u0600" <= c <= "\u06ff" for c in str(l))
-    ]
-    self._raw_labels = arabic_labels
-    self._word_signs = arabic_labels.copy()
-    logger.info(f"Database: {len(arabic_labels)} Arabic word labels loaded.")
     def _finalize_labels(self):
         if self._normalizer and self._raw_labels:
             self._word_signs = [self._normalize_label(l) for l in self._raw_labels]
@@ -282,26 +268,28 @@ class SemanticSignMatcher:
             return SignMatch(found=False, sign_label="", confidence=0.0, method="none")
         norm_word = self._normalize_label(word_text)
         norm_lemma = self._normalize_label(lemma) if lemma else ""
         if norm_word in self._word_signs:
             idx = self._word_signs.index(norm_word)
             return SignMatch(True, self._raw_labels[idx], 1.0, "exact")
         if norm_lemma and norm_lemma != norm_word and norm_lemma in self._word_signs:
             idx = self._word_signs.index(norm_lemma)
             return SignMatch(True, self._raw_labels[idx], 0.95, "lemma")
         if self._model is None or self._sign_embeddings is None:
             return SignMatch(False, "", 0.0, "none")
         candidates = list({norm_word, norm_lemma} - {""})
         embs = self._model.encode(candidates, convert_to_tensor=True, device=self._device, batch_size=len(candidates))
         scores = util.cos_sim(embs, self._sign_embeddings)
         best_val = float(scores.max())
         best_idx = int(scores.argmax() % len(self._word_signs))
         if best_val >= self.threshold:
             return SignMatch(True, self._raw_labels[best_idx], best_val, "semantic")
         return SignMatch(False, self._raw_labels[best_idx] if self._raw_labels else "", best_val, "none")
-    def letter_to_label(self, arabic_letter: str) -> Optional[str]:
-        return ARABIC_LETTER_TO_LABEL.get(arabic_letter)
     @property
     def available_signs(self) -> List[str]:
         return self._raw_labels.copy()
@@ -376,6 +364,7 @@ class BlenderSequenceWriter:
         missing_files = self._check_missing_keypoints(plan)
         with open(self.output_path, "w", encoding="utf-8") as f:
             f.write("\n".join(identifiers))
         sign_steps = [s for s in plan if s.action_type == ActionType.SIGN]
         letter_steps = [s for s in plan if s.action_type == ActionType.LETTER]
         return {
@@ -451,8 +440,8 @@ logger.info("All components ready.")
 # ----- FastAPI App -----
 class TranslateRequest(BaseModel):
-    text: str = Field(description="Arabic input text (Fus-ha or Ammiya)", min_length=1, max_length=4000, examples=["انا عايز اروح المدرسة"])
-    save_sequence: bool = Field(default=False, description="Save sequence file to /tmp/sequence.txt")
 class StepDetail(BaseModel):
@@ -474,11 +463,7 @@ class TranslateResponse(BaseModel):
     detailed_plan: List[StepDetail]
-app = FastAPI(
-    title="Arabic Sign Language NLP API",
-    description="Translates Arabic text (Fus-ha and Ammiya) into sign animation sequences.",
-    version="1.0.0",
-)
 app.add_middleware(
     CORSMiddleware,
@@ -503,8 +488,10 @@ def translate_post(request: TranslateRequest):
         result = translator.translate(request.text, save_to_file=request.save_sequence)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
     if result["status"] == "error":
         raise HTTPException(status_code=422, detail=result["message"])
     return TranslateResponse(
         status=result["status"],
         input_text=request.text,
@@ -514,36 +501,10 @@ def translate_post(request: TranslateRequest):
         letter_count=result.get("letter_count", 0),
         missing_keypoint_files=result.get("missing_keypoint_files", []),
         detailed_plan=[
-            StepDetail(type=s["type"], identifier=s["identifier"], source_word=s["source_word"],
-                       confidence=s["confidence"], method=s["method"])
-            for s in result.get("detailed_plan", [])
         ],
     )
-@app.get("/translate")
-def translate_get(
-    text: str = Query(description="Arabic text to translate"),
-    save_sequence: bool = Query(default=False),
-):
-    return translate_post(TranslateRequest(text=text, save_sequence=save_sequence))
-@app.get("/signs")
-def list_signs():
-    return {"total": len(sign_matcher.available_signs), "signs": sign_matcher.available_signs}
-@app.get("/sequence-file")
-def read_sequence_file():
-    path = Config.SEQUENCE_OUTPUT_PATH
-    if not os.path.exists(path):
-        raise HTTPException(status_code=404, detail="Sequence file not found. Run a translation first.")
-    with open(path, "r", encoding="utf-8") as f:
-        lines = [line.strip() for line in f.readlines() if line.strip()]
-    return {"file_path": path, "sequence": lines, "count": len(lines)}
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host=Config.API_HOST, port=Config.API_PORT)

 import json
 import logging
 import warnings
+import urllib.request
 from pathlib import Path
 from typing import List, Dict, Optional, Tuple
 from dataclasses import dataclass, field
 # ----- Project Configuration -----
 class Config:
     CSV_PATH: str = os.getenv("CSV_PATH", "arabic_sign_lang_features.csv")
     KEYPOINTS_FOLDER: str = os.getenv("KEYPOINTS_FOLDER", "keypoints")
     SEQUENCE_OUTPUT_PATH: str = "/tmp/sequence.txt"
     EMBEDDING_MODEL: str = "aubmindlab/bert-base-arabertv2"
     SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.72"))
     INCLUDE_PREPOSITION_WORDS: bool = False
     API_HOST: str = "0.0.0.0"
+    API_PORT: int = 7860
     CSV_LABEL_COLUMN: str = "label"
             return self._normalizer.normalize_label(label)
         return label
+    def _load_database(self, csv_path: str, label_column: str):
         if not os.path.exists(csv_path):
             logger.info("CSV not found locally. Downloading from Hugging Face...")
             url = "https://huggingface.co/spaces/SondosM/avatarAPI/resolve/main/arabic_sign_lang_features.csv"
             try:
                 urllib.request.urlretrieve(url, csv_path)
                 logger.info("CSV downloaded successfully.")
             except Exception as e:
                 logger.warning(f"Failed to download CSV: {e}. No word signs loaded.")
+                return
+        df = pd.read_csv(csv_path, low_memory=False)
+        if label_column not in df.columns:
+            raise ValueError(f"Column '{label_column}' not found. Available: {list(df.columns)}")
+        all_labels = df[label_column].dropna().unique().tolist()
+        arabic_labels = [
+            str(l) for l in all_labels
+            if isinstance(l, str) and any("\u0600" <= c <= "\u06ff" for c in str(l))
+        ]
+        self._raw_labels = arabic_labels
+        self._word_signs = arabic_labels.copy()
+        logger.info(f"Database: {len(arabic_labels)} Arabic word labels loaded.")
     def _finalize_labels(self):
         if self._normalizer and self._raw_labels:
             self._word_signs = [self._normalize_label(l) for l in self._raw_labels]
             return SignMatch(found=False, sign_label="", confidence=0.0, method="none")
         norm_word = self._normalize_label(word_text)
         norm_lemma = self._normalize_label(lemma) if lemma else ""
         if norm_word in self._word_signs:
             idx = self._word_signs.index(norm_word)
             return SignMatch(True, self._raw_labels[idx], 1.0, "exact")
         if norm_lemma and norm_lemma != norm_word and norm_lemma in self._word_signs:
             idx = self._word_signs.index(norm_lemma)
             return SignMatch(True, self._raw_labels[idx], 0.95, "lemma")
         if self._model is None or self._sign_embeddings is None:
             return SignMatch(False, "", 0.0, "none")
         candidates = list({norm_word, norm_lemma} - {""})
         embs = self._model.encode(candidates, convert_to_tensor=True, device=self._device, batch_size=len(candidates))
         scores = util.cos_sim(embs, self._sign_embeddings)
         best_val = float(scores.max())
         best_idx = int(scores.argmax() % len(self._word_signs))
         if best_val >= self.threshold:
             return SignMatch(True, self._raw_labels[best_idx], best_val, "semantic")
         return SignMatch(False, self._raw_labels[best_idx] if self._raw_labels else "", best_val, "none")
     @property
     def available_signs(self) -> List[str]:
         return self._raw_labels.copy()
         missing_files = self._check_missing_keypoints(plan)
         with open(self.output_path, "w", encoding="utf-8") as f:
             f.write("\n".join(identifiers))
         sign_steps = [s for s in plan if s.action_type == ActionType.SIGN]
         letter_steps = [s for s in plan if s.action_type == ActionType.LETTER]
         return {
 # ----- FastAPI App -----
 class TranslateRequest(BaseModel):
+    text: str = Field(description="Arabic input text (Fus-ha or Ammiya)", min_length=1, max_length=4000)
+    save_sequence: bool = Field(default=False)
 class StepDetail(BaseModel):
     detailed_plan: List[StepDetail]
+app = FastAPI(title="Arabic Sign Language NLP API")
 app.add_middleware(
     CORSMiddleware,
         result = translator.translate(request.text, save_to_file=request.save_sequence)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
     if result["status"] == "error":
         raise HTTPException(status_code=422, detail=result["message"])
     return TranslateResponse(
         status=result["status"],
         input_text=request.text,
         letter_count=result.get("letter_count", 0),
         missing_keypoint_files=result.get("missing_keypoint_files", []),
         detailed_plan=[
+            StepDetail(**s) for s in result.get("detailed_plan", [])
         ],
     )
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host=Config.API_HOST, port=Config.API_PORT)