Spaces:

SondosM
/

avatarAPI

Running

App Files Files Community

SondosM commited on 3 days ago

Commit

666de0b

verified ·

1 Parent(s): ea286f0

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -25

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import re
 import json
 import logging
 import warnings
-import urllib.request
 from pathlib import Path
 from typing import List, Dict, Optional, Tuple
 from dataclasses import dataclass, field
@@ -26,14 +25,30 @@ logger = logging.getLogger("ArabicSignNLP")
 # ----- Project Configuration -----
 class Config:
     CSV_PATH: str = os.getenv("CSV_PATH", "arabic_sign_lang_features.csv")
     KEYPOINTS_FOLDER: str = os.getenv("KEYPOINTS_FOLDER", "keypoints")
     SEQUENCE_OUTPUT_PATH: str = "/tmp/sequence.txt"
     EMBEDDING_MODEL: str = "aubmindlab/bert-base-arabertv2"
     SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.72"))
     INCLUDE_PREPOSITION_WORDS: bool = False
     API_HOST: str = "0.0.0.0"
-    API_PORT: int = 7860
     CSV_LABEL_COLUMN: str = "label"
@@ -223,19 +238,11 @@ class SemanticSignMatcher:
     def _load_database(self, csv_path: str, label_column: str):
         if not os.path.exists(csv_path):
-            logger.info("CSV not found locally. Downloading from Hugging Face...")
-            url = "https://huggingface.co/spaces/SondosM/avatarAPI/resolve/main/arabic_sign_lang_features.csv"
-            try:
-                urllib.request.urlretrieve(url, csv_path)
-                logger.info("CSV downloaded successfully.")
-            except Exception as e:
-                logger.warning(f"Failed to download CSV: {e}. No word signs loaded.")
-                return
         df = pd.read_csv(csv_path, low_memory=False)
         if label_column not in df.columns:
             raise ValueError(f"Column '{label_column}' not found. Available: {list(df.columns)}")
         all_labels = df[label_column].dropna().unique().tolist()
         arabic_labels = [
             str(l) for l in all_labels
@@ -268,28 +275,26 @@ class SemanticSignMatcher:
             return SignMatch(found=False, sign_label="", confidence=0.0, method="none")
         norm_word = self._normalize_label(word_text)
         norm_lemma = self._normalize_label(lemma) if lemma else ""
         if norm_word in self._word_signs:
             idx = self._word_signs.index(norm_word)
             return SignMatch(True, self._raw_labels[idx], 1.0, "exact")
         if norm_lemma and norm_lemma != norm_word and norm_lemma in self._word_signs:
             idx = self._word_signs.index(norm_lemma)
             return SignMatch(True, self._raw_labels[idx], 0.95, "lemma")
         if self._model is None or self._sign_embeddings is None:
             return SignMatch(False, "", 0.0, "none")
         candidates = list({norm_word, norm_lemma} - {""})
         embs = self._model.encode(candidates, convert_to_tensor=True, device=self._device, batch_size=len(candidates))
         scores = util.cos_sim(embs, self._sign_embeddings)
         best_val = float(scores.max())
         best_idx = int(scores.argmax() % len(self._word_signs))
         if best_val >= self.threshold:
             return SignMatch(True, self._raw_labels[best_idx], best_val, "semantic")
         return SignMatch(False, self._raw_labels[best_idx] if self._raw_labels else "", best_val, "none")
     @property
     def available_signs(self) -> List[str]:
         return self._raw_labels.copy()
@@ -364,7 +369,6 @@ class BlenderSequenceWriter:
         missing_files = self._check_missing_keypoints(plan)
         with open(self.output_path, "w", encoding="utf-8") as f:
             f.write("\n".join(identifiers))
         sign_steps = [s for s in plan if s.action_type == ActionType.SIGN]
         letter_steps = [s for s in plan if s.action_type == ActionType.LETTER]
         return {
@@ -440,8 +444,8 @@ logger.info("All components ready.")
 # ----- FastAPI App -----
 class TranslateRequest(BaseModel):
-    text: str = Field(description="Arabic input text (Fus-ha or Ammiya)", min_length=1, max_length=4000)
-    save_sequence: bool = Field(default=False)
 class StepDetail(BaseModel):
@@ -463,7 +467,11 @@ class TranslateResponse(BaseModel):
     detailed_plan: List[StepDetail]
-app = FastAPI(title="Arabic Sign Language NLP API")
 app.add_middleware(
     CORSMiddleware,
@@ -488,10 +496,8 @@ def translate_post(request: TranslateRequest):
         result = translator.translate(request.text, save_to_file=request.save_sequence)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
     if result["status"] == "error":
         raise HTTPException(status_code=422, detail=result["message"])
     return TranslateResponse(
         status=result["status"],
         input_text=request.text,
@@ -501,10 +507,36 @@ def translate_post(request: TranslateRequest):
         letter_count=result.get("letter_count", 0),
         missing_keypoint_files=result.get("missing_keypoint_files", []),
         detailed_plan=[
-            StepDetail(**s) for s in result.get("detailed_plan", [])
         ],
     )
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host=Config.API_HOST, port=Config.API_PORT)

 import json
 import logging
 import warnings
 from pathlib import Path
 from typing import List, Dict, Optional, Tuple
 from dataclasses import dataclass, field
 # ----- Project Configuration -----
 class Config:
+    # Path to your CSV dataset containing sign labels
+    # On HF Spaces, upload your CSV to the repo and set the path here
     CSV_PATH: str = os.getenv("CSV_PATH", "arabic_sign_lang_features.csv")
+    # Folder where .npy keypoint files are stored (optional on HF Spaces)
     KEYPOINTS_FOLDER: str = os.getenv("KEYPOINTS_FOLDER", "keypoints")
+    # Output file path for Blender sequence
     SEQUENCE_OUTPUT_PATH: str = "/tmp/sequence.txt"
+    # AraBERT model for Arabic semantic understanding
     EMBEDDING_MODEL: str = "aubmindlab/bert-base-arabertv2"
+    # Similarity threshold for sign matching
     SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.72"))
+    # Include prepositions in signing
     INCLUDE_PREPOSITION_WORDS: bool = False
+    # FastAPI server settings
     API_HOST: str = "0.0.0.0"
+    API_PORT: int = 7860  # HF Spaces uses port 7860
+    # Column name in your CSV that contains the sign labels
     CSV_LABEL_COLUMN: str = "label"
     def _load_database(self, csv_path: str, label_column: str):
         if not os.path.exists(csv_path):
+            logger.warning(f"CSV not found at {csv_path}. No word signs loaded.")
+            return
         df = pd.read_csv(csv_path, low_memory=False)
         if label_column not in df.columns:
             raise ValueError(f"Column '{label_column}' not found. Available: {list(df.columns)}")
         all_labels = df[label_column].dropna().unique().tolist()
         arabic_labels = [
             str(l) for l in all_labels
             return SignMatch(found=False, sign_label="", confidence=0.0, method="none")
         norm_word = self._normalize_label(word_text)
         norm_lemma = self._normalize_label(lemma) if lemma else ""
         if norm_word in self._word_signs:
             idx = self._word_signs.index(norm_word)
             return SignMatch(True, self._raw_labels[idx], 1.0, "exact")
         if norm_lemma and norm_lemma != norm_word and norm_lemma in self._word_signs:
             idx = self._word_signs.index(norm_lemma)
             return SignMatch(True, self._raw_labels[idx], 0.95, "lemma")
         if self._model is None or self._sign_embeddings is None:
             return SignMatch(False, "", 0.0, "none")
         candidates = list({norm_word, norm_lemma} - {""})
         embs = self._model.encode(candidates, convert_to_tensor=True, device=self._device, batch_size=len(candidates))
         scores = util.cos_sim(embs, self._sign_embeddings)
         best_val = float(scores.max())
         best_idx = int(scores.argmax() % len(self._word_signs))
         if best_val >= self.threshold:
             return SignMatch(True, self._raw_labels[best_idx], best_val, "semantic")
         return SignMatch(False, self._raw_labels[best_idx] if self._raw_labels else "", best_val, "none")
+    def letter_to_label(self, arabic_letter: str) -> Optional[str]:
+        return ARABIC_LETTER_TO_LABEL.get(arabic_letter)
     @property
     def available_signs(self) -> List[str]:
         return self._raw_labels.copy()
         missing_files = self._check_missing_keypoints(plan)
         with open(self.output_path, "w", encoding="utf-8") as f:
             f.write("\n".join(identifiers))
         sign_steps = [s for s in plan if s.action_type == ActionType.SIGN]
         letter_steps = [s for s in plan if s.action_type == ActionType.LETTER]
         return {
 # ----- FastAPI App -----
 class TranslateRequest(BaseModel):
+    text: str = Field(description="Arabic input text (Fus-ha or Ammiya)", min_length=1, max_length=4000, examples=["انا عايز اروح المدرسة"])
+    save_sequence: bool = Field(default=False, description="Save sequence file to /tmp/sequence.txt")
 class StepDetail(BaseModel):
     detailed_plan: List[StepDetail]
+app = FastAPI(
+    title="Arabic Sign Language NLP API",
+    description="Translates Arabic text (Fus-ha and Ammiya) into sign animation sequences.",
+    version="1.0.0",
+)
 app.add_middleware(
     CORSMiddleware,
         result = translator.translate(request.text, save_to_file=request.save_sequence)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
     if result["status"] == "error":
         raise HTTPException(status_code=422, detail=result["message"])
     return TranslateResponse(
         status=result["status"],
         input_text=request.text,
         letter_count=result.get("letter_count", 0),
         missing_keypoint_files=result.get("missing_keypoint_files", []),
         detailed_plan=[
+            StepDetail(type=s["type"], identifier=s["identifier"], source_word=s["source_word"],
+                       confidence=s["confidence"], method=s["method"])
+            for s in result.get("detailed_plan", [])
         ],
     )
+@app.get("/translate")
+def translate_get(
+    text: str = Query(description="Arabic text to translate"),
+    save_sequence: bool = Query(default=False),
+):
+    return translate_post(TranslateRequest(text=text, save_sequence=save_sequence))
+@app.get("/signs")
+def list_signs():
+    return {"total": len(sign_matcher.available_signs), "signs": sign_matcher.available_signs}
+@app.get("/sequence-file")
+def read_sequence_file():
+    path = Config.SEQUENCE_OUTPUT_PATH
+    if not os.path.exists(path):
+        raise HTTPException(status_code=404, detail="Sequence file not found. Run a translation first.")
+    with open(path, "r", encoding="utf-8") as f:
+        lines = [line.strip() for line in f.readlines() if line.strip()]
+    return {"file_path": path, "sequence": lines, "count": len(lines)}
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host=Config.API_HOST, port=Config.API_PORT)