Upload 10 files
Browse files- .gitattributes +1 -0
- Dockerfile +35 -0
- README.md +58 -4
- app.py +542 -0
- arabic_sign_lang_features.csv +3 -0
- keypoints/أرقام.npy +3 -0
- keypoints/حجاب.npy +3 -0
- keypoints/طاوله.npy +3 -0
- keypoints/كلمه.npy +3 -0
- keypoints/مع_السلامه.npy +3 -0
- requirements.txt +11 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
arabic_sign_lang_features.csv filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
# Set working directory
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
# Install system dependencies
|
| 7 |
+
RUN apt-get update && apt-get install -y \
|
| 8 |
+
build-essential \
|
| 9 |
+
curl \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
# Copy requirements first for layer caching
|
| 13 |
+
COPY requirements.txt .
|
| 14 |
+
|
| 15 |
+
# Install Python dependencies
|
| 16 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 17 |
+
|
| 18 |
+
# Copy application code
|
| 19 |
+
COPY app.py .
|
| 20 |
+
|
| 21 |
+
# Copy data files (add your CSV and keypoints here)
|
| 22 |
+
# COPY arabic_sign_lang_features.csv .
|
| 23 |
+
# COPY keypoints/ keypoints/
|
| 24 |
+
|
| 25 |
+
# Expose port (HF Spaces requires 7860)
|
| 26 |
+
EXPOSE 7860
|
| 27 |
+
|
| 28 |
+
# Set environment variables
|
| 29 |
+
ENV PYTHONUNBUFFERED=1
|
| 30 |
+
ENV CSV_PATH=arabic_sign_lang_features.csv
|
| 31 |
+
ENV KEYPOINTS_FOLDER=keypoints
|
| 32 |
+
ENV SIMILARITY_THRESHOLD=0.72
|
| 33 |
+
|
| 34 |
+
# Run the app
|
| 35 |
+
CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -1,10 +1,64 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: green
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
|
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Arabic Sign Language NLP API
|
| 3 |
+
emoji: 🤟
|
| 4 |
colorFrom: green
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
pinned: false
|
| 9 |
---
|
| 10 |
|
| 11 |
+
# Arabic Sign Language NLP API
|
| 12 |
+
|
| 13 |
+
Translates Arabic text (Fus-ha and Ammiya) into sign animation sequences.
|
| 14 |
+
|
| 15 |
+
## Endpoints
|
| 16 |
+
|
| 17 |
+
| Method | Path | Description |
|
| 18 |
+
|--------|------|-------------|
|
| 19 |
+
| GET | `/` | Health check — returns model info and sign count |
|
| 20 |
+
| POST | `/translate` | Translate Arabic text (JSON body) |
|
| 21 |
+
| GET | `/translate?text=...` | Quick translate via URL param |
|
| 22 |
+
| GET | `/signs` | List all signs in the database |
|
| 23 |
+
| GET | `/sequence-file` | Read the last saved sequence file |
|
| 24 |
+
|
| 25 |
+
## POST /translate
|
| 26 |
+
|
| 27 |
+
```json
|
| 28 |
+
{
|
| 29 |
+
"text": "انا عايز اروح المدرسة",
|
| 30 |
+
"save_sequence": false
|
| 31 |
+
}
|
| 32 |
+
```
|
| 33 |
+
|
| 34 |
+
**Response:**
|
| 35 |
+
```json
|
| 36 |
+
{
|
| 37 |
+
"status": "success",
|
| 38 |
+
"input_text": "انا عايز اروح المدرسة",
|
| 39 |
+
"sequence": ["انا", "يريد", "يذهب", "مدرسة"],
|
| 40 |
+
"total_steps": 4,
|
| 41 |
+
"sign_count": 4,
|
| 42 |
+
"letter_count": 0,
|
| 43 |
+
"missing_keypoint_files": [],
|
| 44 |
+
"detailed_plan": [...]
|
| 45 |
+
}
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
## Setup
|
| 49 |
+
|
| 50 |
+
1. Upload your `arabic_sign_lang_features.csv` to the Space files.
|
| 51 |
+
2. (Optional) Upload your `keypoints/` folder for `.npy` validation.
|
| 52 |
+
3. Set `CSV_PATH` env variable if your CSV has a different name.
|
| 53 |
+
|
| 54 |
+
## Environment Variables
|
| 55 |
+
|
| 56 |
+
| Variable | Default | Description |
|
| 57 |
+
|----------|---------|-------------|
|
| 58 |
+
| `CSV_PATH` | `arabic_sign_lang_features.csv` | Path to sign label CSV |
|
| 59 |
+
| `KEYPOINTS_FOLDER` | `keypoints` | Folder with .npy files |
|
| 60 |
+
| `SIMILARITY_THRESHOLD` | `0.72` | AraBERT match threshold |
|
| 61 |
+
|
| 62 |
+
## Interactive Docs
|
| 63 |
+
|
| 64 |
+
Visit `/docs` for the Swagger UI.
|
app.py
ADDED
|
@@ -0,0 +1,542 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
import json
|
| 4 |
+
import logging
|
| 5 |
+
import warnings
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import List, Dict, Optional, Tuple
|
| 8 |
+
from dataclasses import dataclass, field
|
| 9 |
+
from enum import Enum
|
| 10 |
+
|
| 11 |
+
import numpy as np
|
| 12 |
+
import pandas as pd
|
| 13 |
+
import torch
|
| 14 |
+
import stanza
|
| 15 |
+
import pyarabic.araby as araby
|
| 16 |
+
from sentence_transformers import SentenceTransformer, util
|
| 17 |
+
from fastapi import FastAPI, HTTPException, Query
|
| 18 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 19 |
+
from pydantic import BaseModel, Field
|
| 20 |
+
|
| 21 |
+
warnings.filterwarnings("ignore")
|
| 22 |
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")
|
| 23 |
+
logger = logging.getLogger("ArabicSignNLP")
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
# ----- Project Configuration -----
|
| 27 |
+
class Config:
|
| 28 |
+
# Path to your CSV dataset containing sign labels
|
| 29 |
+
# On HF Spaces, upload your CSV to the repo and set the path here
|
| 30 |
+
CSV_PATH: str = os.getenv("CSV_PATH", "arabic_sign_lang_features.csv")
|
| 31 |
+
|
| 32 |
+
# Folder where .npy keypoint files are stored (optional on HF Spaces)
|
| 33 |
+
KEYPOINTS_FOLDER: str = os.getenv("KEYPOINTS_FOLDER", "keypoints")
|
| 34 |
+
|
| 35 |
+
# Output file path for Blender sequence
|
| 36 |
+
SEQUENCE_OUTPUT_PATH: str = "/tmp/sequence.txt"
|
| 37 |
+
|
| 38 |
+
# AraBERT model for Arabic semantic understanding
|
| 39 |
+
EMBEDDING_MODEL: str = "aubmindlab/bert-base-arabertv2"
|
| 40 |
+
|
| 41 |
+
# Similarity threshold for sign matching
|
| 42 |
+
SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.72"))
|
| 43 |
+
|
| 44 |
+
# Include prepositions in signing
|
| 45 |
+
INCLUDE_PREPOSITION_WORDS: bool = False
|
| 46 |
+
|
| 47 |
+
# FastAPI server settings
|
| 48 |
+
API_HOST: str = "0.0.0.0"
|
| 49 |
+
API_PORT: int = 7860 # HF Spaces uses port 7860
|
| 50 |
+
|
| 51 |
+
# Column name in your CSV that contains the sign labels
|
| 52 |
+
CSV_LABEL_COLUMN: str = "label"
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
# ----- Arabic Letter Mapping -----
|
| 56 |
+
ARABIC_LETTER_TO_LABEL: Dict[str, str] = {
|
| 57 |
+
"ا": "Alef", "أ": "Alef", "إ": "Alef", "آ": "Alef",
|
| 58 |
+
"ب": "Beh", "ت": "Teh", "ة": "Teh_Marbuta",
|
| 59 |
+
"ث": "Theh", "ج": "Jeem", "ح": "Hah",
|
| 60 |
+
"خ": "Khah", "د": "Dal", "ذ": "Thal",
|
| 61 |
+
"ر": "Reh", "ز": "Zain", "س": "Seen",
|
| 62 |
+
"ش": "Sheen", "ص": "Sad", "ض": "Dad",
|
| 63 |
+
"ط": "Tah", "ظ": "Zah", "ع": "Ain",
|
| 64 |
+
"غ": "Ghain", "ف": "Feh", "ق": "Qaf",
|
| 65 |
+
"ك": "Kaf", "ل": "Lam", "م": "Meem",
|
| 66 |
+
"ن": "Noon", "ه": "Heh", "و": "Waw",
|
| 67 |
+
"ي": "Yeh", "ى": "Yeh", "لا": "Laa",
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
# ----- Text Normalizer -----
|
| 72 |
+
class ArabicTextNormalizer:
|
| 73 |
+
DIALECT_TO_FUSA: Dict[str, str] = {
|
| 74 |
+
"مش": "لا", "مو": "لا", "ماش": "لا",
|
| 75 |
+
"عايز": "يريد", "عاوز": "يريد", "بدي": "يريد", "بدك": "يريد", "بده": "يريد",
|
| 76 |
+
"حابب": "يحب", "بحب": "يحب", "باحب": "يحب", "بتحب": "يحب",
|
| 77 |
+
"فين": "اين", "وين": "اين", "منين": "من اين", "منيين": "من اين",
|
| 78 |
+
"ايه": "ماذا", "ايش": "ماذا", "شو": "ماذا", "وش": "ماذا",
|
| 79 |
+
"كيفك": "كيف حالك", "كيفكم": "كيف حالكم", "عامل ايه": "كيف حالك",
|
| 80 |
+
"تعال": "اقبل", "تعالى": "اقبل",
|
| 81 |
+
"هيك": "هكذا", "كده": "هكذا", "كدا": "هكذا", "هكيه": "هكذا",
|
| 82 |
+
"دلوقتي": "الان", "دلوقت": "الان", "هلا": "الان", "هلق": "الان", "هسه": "الان",
|
| 83 |
+
"بكره": "غدا", "بكرا": "غدا", "بكرة": "غدا",
|
| 84 |
+
"امبارح": "امس", "مبارح": "امس",
|
| 85 |
+
"ليش": "لماذا", "ليه": "لماذا", "علاش": "لماذا",
|
| 86 |
+
"تمام": "جيد", "ماشي": "جيد", "عادي": "جيد",
|
| 87 |
+
"روح": "يذهب", "اروح": "يذهب", "يروح": "يذهب", "رايح": "يذهب",
|
| 88 |
+
"جاي": "يأتي", "جاية": "يأتي", "جاييين": "يأتي",
|
| 89 |
+
"اشتري": "يشتري", "اشترى": "يشتري", "بشتري": "يشتري", "بيشتري": "يشتري",
|
| 90 |
+
"باكل": "ياكل", "بياكل": "ياكل",
|
| 91 |
+
"بشرب": "يشرب", "بيشرب": "يشرب",
|
| 92 |
+
"عارف": "يعرف", "عارفة": "يعرف", "بعرف": "يعرف",
|
| 93 |
+
"شغل": "عمل", "بشتغل": "يعمل", "بيشتغل": "يعمل",
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
_SUFFIXES = ["ين", "ون", "ات", "ة", "ها", "هم", "هن", "كم", "كن", "نا", "وا", "ا"]
|
| 97 |
+
|
| 98 |
+
def __init__(self):
|
| 99 |
+
self._non_arabic_pattern = re.compile(r"[^\u0600-\u06FF\s]")
|
| 100 |
+
self._multi_space_pattern = re.compile(r"\s+")
|
| 101 |
+
self._tatweel_pattern = re.compile(r"\u0640+")
|
| 102 |
+
|
| 103 |
+
def normalize(self, text: str) -> str:
|
| 104 |
+
if not text or not isinstance(text, str):
|
| 105 |
+
raise ValueError("Input text must be a non-empty string.")
|
| 106 |
+
text = text.strip()
|
| 107 |
+
text = self._apply_dialect_mapping(text)
|
| 108 |
+
text = araby.strip_tashkeel(text)
|
| 109 |
+
text = self._tatweel_pattern.sub("", text)
|
| 110 |
+
text = re.sub(r"[\u0625\u0623\u0622]", "\u0627", text)
|
| 111 |
+
text = re.sub(r"[\u0624\u0626]", "\u0648", text)
|
| 112 |
+
text = re.sub(r"\u0649(?=\s|$)", "\u064a", text)
|
| 113 |
+
text = re.sub(r"\u0629(?=\s|$)", "\u0647", text)
|
| 114 |
+
text = self._non_arabic_pattern.sub(" ", text)
|
| 115 |
+
text = self._multi_space_pattern.sub(" ", text).strip()
|
| 116 |
+
if not text:
|
| 117 |
+
raise ValueError("Text became empty after normalization.")
|
| 118 |
+
return text
|
| 119 |
+
|
| 120 |
+
def _apply_dialect_mapping(self, text: str) -> str:
|
| 121 |
+
words = text.split()
|
| 122 |
+
result = []
|
| 123 |
+
for word in words:
|
| 124 |
+
if word in self.DIALECT_TO_FUSA:
|
| 125 |
+
result.append(self.DIALECT_TO_FUSA[word])
|
| 126 |
+
continue
|
| 127 |
+
matched = False
|
| 128 |
+
for suffix in self._SUFFIXES:
|
| 129 |
+
if word.endswith(suffix) and len(word) > len(suffix) + 1:
|
| 130 |
+
root = word[: -len(suffix)]
|
| 131 |
+
if root in self.DIALECT_TO_FUSA:
|
| 132 |
+
result.append(self.DIALECT_TO_FUSA[root])
|
| 133 |
+
matched = True
|
| 134 |
+
break
|
| 135 |
+
if not matched:
|
| 136 |
+
result.append(word)
|
| 137 |
+
return " ".join(result)
|
| 138 |
+
|
| 139 |
+
def normalize_label(self, label: str) -> str:
|
| 140 |
+
try:
|
| 141 |
+
return self.normalize(label)
|
| 142 |
+
except ValueError:
|
| 143 |
+
return label
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
# ----- NLP Processor -----
|
| 147 |
+
@dataclass
|
| 148 |
+
class ProcessedWord:
|
| 149 |
+
original: str
|
| 150 |
+
normalized: str
|
| 151 |
+
lemma: str
|
| 152 |
+
pos: str
|
| 153 |
+
is_person_name: bool
|
| 154 |
+
is_place_name: bool
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
class ArabicNLPProcessor:
|
| 158 |
+
SKIP_WORDS_CORE = {"و", "ف", "ب", "ل", "ك", "ال", "قد", "لقد", "سوف", "ان", "إن", "لان", "حتى", "كي"}
|
| 159 |
+
SKIP_WORDS_PREPOSITIONS = {"في", "من", "الى", "على", "عن", "مع", "عند", "لدى"}
|
| 160 |
+
_AL_WHITELIST = {"الان", "الله", "الذي", "التي", "اللذين", "اللتين"}
|
| 161 |
+
|
| 162 |
+
def _active_skip_words(self) -> set:
|
| 163 |
+
s = set(self.SKIP_WORDS_CORE)
|
| 164 |
+
if not Config.INCLUDE_PREPOSITION_WORDS:
|
| 165 |
+
s.update(self.SKIP_WORDS_PREPOSITIONS)
|
| 166 |
+
return s
|
| 167 |
+
|
| 168 |
+
def __init__(self):
|
| 169 |
+
self._pipeline = None
|
| 170 |
+
|
| 171 |
+
def load(self):
|
| 172 |
+
logger.info("Downloading Stanza Arabic models...")
|
| 173 |
+
stanza.download("ar", verbose=False)
|
| 174 |
+
self._pipeline = stanza.Pipeline(lang="ar", processors="tokenize,mwt,pos,lemma,ner", verbose=False)
|
| 175 |
+
logger.info("Stanza Arabic pipeline ready.")
|
| 176 |
+
|
| 177 |
+
def _strip_al(self, word: str) -> str:
|
| 178 |
+
if word in self._AL_WHITELIST:
|
| 179 |
+
return word
|
| 180 |
+
if word.startswith("ال") and len(word) > 3:
|
| 181 |
+
return word[2:]
|
| 182 |
+
return word
|
| 183 |
+
|
| 184 |
+
def process(self, normalized_text: str) -> List[ProcessedWord]:
|
| 185 |
+
if self._pipeline is None:
|
| 186 |
+
raise RuntimeError("Call load() before process().")
|
| 187 |
+
doc = self._pipeline(normalized_text)
|
| 188 |
+
results: List[ProcessedWord] = []
|
| 189 |
+
skip_words = self._active_skip_words()
|
| 190 |
+
for sentence in doc.sentences:
|
| 191 |
+
for word in sentence.words:
|
| 192 |
+
if word.text in skip_words:
|
| 193 |
+
continue
|
| 194 |
+
if word.pos in {"PUNCT", "SYM", "X", "DET", "CCONJ", "SCONJ"}:
|
| 195 |
+
continue
|
| 196 |
+
if len(word.text) <= 1:
|
| 197 |
+
continue
|
| 198 |
+
ner_tag = word.parent.ner if word.parent else "O"
|
| 199 |
+
normalized = self._strip_al(word.text)
|
| 200 |
+
results.append(ProcessedWord(
|
| 201 |
+
original=word.text,
|
| 202 |
+
normalized=normalized,
|
| 203 |
+
lemma=word.lemma if word.lemma else word.text,
|
| 204 |
+
pos=word.pos if word.pos else "NOUN",
|
| 205 |
+
is_person_name="PER" in ner_tag or "PERS" in ner_tag,
|
| 206 |
+
is_place_name="LOC" in ner_tag or "GPE" in ner_tag,
|
| 207 |
+
))
|
| 208 |
+
return results
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
# ----- Sign Matcher -----
|
| 212 |
+
@dataclass
|
| 213 |
+
class SignMatch:
|
| 214 |
+
found: bool
|
| 215 |
+
sign_label: str
|
| 216 |
+
confidence: float
|
| 217 |
+
method: str
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
class SemanticSignMatcher:
|
| 221 |
+
def __init__(self, csv_path: str, label_column: str, threshold: float):
|
| 222 |
+
self.threshold = threshold
|
| 223 |
+
self._word_signs: List[str] = []
|
| 224 |
+
self._raw_labels: List[str] = []
|
| 225 |
+
self._sign_embeddings = None
|
| 226 |
+
self._model: Optional[SentenceTransformer] = None
|
| 227 |
+
self._device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 228 |
+
self._normalizer: Optional[ArabicTextNormalizer] = None
|
| 229 |
+
self._load_database(csv_path, label_column)
|
| 230 |
+
|
| 231 |
+
def set_normalizer(self, normalizer: ArabicTextNormalizer):
|
| 232 |
+
self._normalizer = normalizer
|
| 233 |
+
|
| 234 |
+
def _normalize_label(self, label: str) -> str:
|
| 235 |
+
if self._normalizer:
|
| 236 |
+
return self._normalizer.normalize_label(label)
|
| 237 |
+
return label
|
| 238 |
+
|
| 239 |
+
def _load_database(self, csv_path: str, label_column: str):
|
| 240 |
+
if not os.path.exists(csv_path):
|
| 241 |
+
logger.warning(f"CSV not found at {csv_path}. No word signs loaded.")
|
| 242 |
+
return
|
| 243 |
+
df = pd.read_csv(csv_path, low_memory=False)
|
| 244 |
+
if label_column not in df.columns:
|
| 245 |
+
raise ValueError(f"Column '{label_column}' not found. Available: {list(df.columns)}")
|
| 246 |
+
all_labels = df[label_column].dropna().unique().tolist()
|
| 247 |
+
arabic_labels = [
|
| 248 |
+
str(l) for l in all_labels
|
| 249 |
+
if isinstance(l, str) and any("\u0600" <= c <= "\u06ff" for c in str(l))
|
| 250 |
+
]
|
| 251 |
+
self._raw_labels = arabic_labels
|
| 252 |
+
self._word_signs = arabic_labels.copy()
|
| 253 |
+
logger.info(f"Database: {len(arabic_labels)} Arabic word labels loaded.")
|
| 254 |
+
|
| 255 |
+
def _finalize_labels(self):
|
| 256 |
+
if self._normalizer and self._raw_labels:
|
| 257 |
+
self._word_signs = [self._normalize_label(l) for l in self._raw_labels]
|
| 258 |
+
|
| 259 |
+
def load_model(self):
|
| 260 |
+
self._finalize_labels()
|
| 261 |
+
if not self._word_signs:
|
| 262 |
+
logger.warning("No Arabic words to encode. Skipping model load.")
|
| 263 |
+
return
|
| 264 |
+
logger.info(f"Loading {Config.EMBEDDING_MODEL} on {self._device} ...")
|
| 265 |
+
self._model = SentenceTransformer(Config.EMBEDDING_MODEL, device=self._device)
|
| 266 |
+
logger.info(f"Encoding {len(self._word_signs)} labels...")
|
| 267 |
+
self._sign_embeddings = self._model.encode(
|
| 268 |
+
self._word_signs, convert_to_tensor=True, device=self._device,
|
| 269 |
+
show_progress_bar=True, batch_size=64,
|
| 270 |
+
)
|
| 271 |
+
logger.info("Sign matcher ready.")
|
| 272 |
+
|
| 273 |
+
def find_sign(self, word_text: str, lemma: str) -> SignMatch:
|
| 274 |
+
if not self._word_signs:
|
| 275 |
+
return SignMatch(found=False, sign_label="", confidence=0.0, method="none")
|
| 276 |
+
norm_word = self._normalize_label(word_text)
|
| 277 |
+
norm_lemma = self._normalize_label(lemma) if lemma else ""
|
| 278 |
+
if norm_word in self._word_signs:
|
| 279 |
+
idx = self._word_signs.index(norm_word)
|
| 280 |
+
return SignMatch(True, self._raw_labels[idx], 1.0, "exact")
|
| 281 |
+
if norm_lemma and norm_lemma != norm_word and norm_lemma in self._word_signs:
|
| 282 |
+
idx = self._word_signs.index(norm_lemma)
|
| 283 |
+
return SignMatch(True, self._raw_labels[idx], 0.95, "lemma")
|
| 284 |
+
if self._model is None or self._sign_embeddings is None:
|
| 285 |
+
return SignMatch(False, "", 0.0, "none")
|
| 286 |
+
candidates = list({norm_word, norm_lemma} - {""})
|
| 287 |
+
embs = self._model.encode(candidates, convert_to_tensor=True, device=self._device, batch_size=len(candidates))
|
| 288 |
+
scores = util.cos_sim(embs, self._sign_embeddings)
|
| 289 |
+
best_val = float(scores.max())
|
| 290 |
+
best_idx = int(scores.argmax() % len(self._word_signs))
|
| 291 |
+
if best_val >= self.threshold:
|
| 292 |
+
return SignMatch(True, self._raw_labels[best_idx], best_val, "semantic")
|
| 293 |
+
return SignMatch(False, self._raw_labels[best_idx] if self._raw_labels else "", best_val, "none")
|
| 294 |
+
|
| 295 |
+
def letter_to_label(self, arabic_letter: str) -> Optional[str]:
|
| 296 |
+
return ARABIC_LETTER_TO_LABEL.get(arabic_letter)
|
| 297 |
+
|
| 298 |
+
@property
|
| 299 |
+
def available_signs(self) -> List[str]:
|
| 300 |
+
return self._raw_labels.copy()
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
# ----- Execution Plan Builder -----
|
| 304 |
+
class ActionType(str, Enum):
|
| 305 |
+
SIGN = "SIGN"
|
| 306 |
+
LETTER = "LETTER"
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
@dataclass
|
| 310 |
+
class ExecutionStep:
|
| 311 |
+
action_type: ActionType
|
| 312 |
+
identifier: str
|
| 313 |
+
source_word: str
|
| 314 |
+
confidence: float
|
| 315 |
+
match_method: str
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
class ExecutionPlanBuilder:
|
| 319 |
+
def __init__(self, normalizer: ArabicTextNormalizer, nlp_proc: ArabicNLPProcessor, matcher: SemanticSignMatcher):
|
| 320 |
+
self.normalizer = normalizer
|
| 321 |
+
self.nlp_proc = nlp_proc
|
| 322 |
+
self.matcher = matcher
|
| 323 |
+
|
| 324 |
+
def build(self, raw_text: str) -> List[ExecutionStep]:
|
| 325 |
+
normalized = self.normalizer.normalize(raw_text)
|
| 326 |
+
processed_words = self.nlp_proc.process(normalized)
|
| 327 |
+
plan: List[ExecutionStep] = []
|
| 328 |
+
for word in processed_words:
|
| 329 |
+
if word.is_person_name or word.is_place_name:
|
| 330 |
+
plan.extend(self._fingerspell(word.original))
|
| 331 |
+
continue
|
| 332 |
+
match = self.matcher.find_sign(word.normalized, word.lemma)
|
| 333 |
+
if match.found:
|
| 334 |
+
plan.append(ExecutionStep(ActionType.SIGN, match.sign_label, word.original, match.confidence, match.method))
|
| 335 |
+
else:
|
| 336 |
+
plan.extend(self._fingerspell(word.original))
|
| 337 |
+
return plan
|
| 338 |
+
|
| 339 |
+
def _fingerspell(self, word: str) -> List[ExecutionStep]:
|
| 340 |
+
steps = []
|
| 341 |
+
i = 0
|
| 342 |
+
while i < len(word):
|
| 343 |
+
if i + 1 < len(word) and word[i:i+2] == "لا":
|
| 344 |
+
label = ARABIC_LETTER_TO_LABEL.get("لا")
|
| 345 |
+
if label:
|
| 346 |
+
steps.append(ExecutionStep(ActionType.LETTER, label, word, 1.0, "fingerspell"))
|
| 347 |
+
i += 2
|
| 348 |
+
continue
|
| 349 |
+
letter = word[i]
|
| 350 |
+
label = ARABIC_LETTER_TO_LABEL.get(letter)
|
| 351 |
+
if label:
|
| 352 |
+
steps.append(ExecutionStep(ActionType.LETTER, label, word, 1.0, "fingerspell"))
|
| 353 |
+
i += 1
|
| 354 |
+
return steps
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
# ----- Sequence Writer -----
|
| 358 |
+
class BlenderSequenceWriter:
|
| 359 |
+
def __init__(self, output_path: str, keypoints_folder: str):
|
| 360 |
+
self.output_path = output_path
|
| 361 |
+
self.keypoints_folder = keypoints_folder
|
| 362 |
+
|
| 363 |
+
def write(self, plan: List[ExecutionStep]) -> Dict:
|
| 364 |
+
if not plan:
|
| 365 |
+
raise ValueError("Execution plan is empty.")
|
| 366 |
+
output_dir = Path(self.output_path).parent
|
| 367 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 368 |
+
identifiers = [step.identifier for step in plan]
|
| 369 |
+
missing_files = self._check_missing_keypoints(plan)
|
| 370 |
+
with open(self.output_path, "w", encoding="utf-8") as f:
|
| 371 |
+
f.write("\n".join(identifiers))
|
| 372 |
+
sign_steps = [s for s in plan if s.action_type == ActionType.SIGN]
|
| 373 |
+
letter_steps = [s for s in plan if s.action_type == ActionType.LETTER]
|
| 374 |
+
return {
|
| 375 |
+
"output_file": self.output_path,
|
| 376 |
+
"total_steps": len(plan),
|
| 377 |
+
"sign_count": len(sign_steps),
|
| 378 |
+
"letter_count": len(letter_steps),
|
| 379 |
+
"missing_keypoint_files": missing_files,
|
| 380 |
+
"sequence": identifiers,
|
| 381 |
+
"detailed_plan": [
|
| 382 |
+
{"step": i+1, "type": s.action_type.value, "identifier": s.identifier,
|
| 383 |
+
"source_word": s.source_word, "confidence": round(s.confidence, 3), "method": s.match_method}
|
| 384 |
+
for i, s in enumerate(plan)
|
| 385 |
+
],
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
def _check_missing_keypoints(self, plan: List[ExecutionStep]) -> List[str]:
|
| 389 |
+
missing = []
|
| 390 |
+
for step in plan:
|
| 391 |
+
npy_path = os.path.join(self.keypoints_folder, f"{step.identifier}.npy")
|
| 392 |
+
if not os.path.exists(npy_path):
|
| 393 |
+
missing.append(f"{step.identifier}.npy")
|
| 394 |
+
return missing
|
| 395 |
+
|
| 396 |
+
|
| 397 |
+
# ----- Main Translator -----
|
| 398 |
+
class ArabicSignTranslator:
|
| 399 |
+
def __init__(self, plan_builder: ExecutionPlanBuilder, writer: BlenderSequenceWriter):
|
| 400 |
+
self.builder = plan_builder
|
| 401 |
+
self.writer = writer
|
| 402 |
+
|
| 403 |
+
def translate(self, text: str, save_to_file: bool = True) -> Dict:
|
| 404 |
+
plan = self.builder.build(text)
|
| 405 |
+
if not plan:
|
| 406 |
+
return {"status": "error", "message": "No translatable content found.", "input": text}
|
| 407 |
+
result = {"status": "success", "input": text}
|
| 408 |
+
if save_to_file:
|
| 409 |
+
report = self.writer.write(plan)
|
| 410 |
+
result.update(report)
|
| 411 |
+
else:
|
| 412 |
+
result["sequence"] = [step.identifier for step in plan]
|
| 413 |
+
result["total_steps"] = len(plan)
|
| 414 |
+
result["sign_count"] = sum(1 for s in plan if s.action_type == ActionType.SIGN)
|
| 415 |
+
result["letter_count"] = sum(1 for s in plan if s.action_type == ActionType.LETTER)
|
| 416 |
+
result["missing_keypoint_files"] = []
|
| 417 |
+
result["detailed_plan"] = [
|
| 418 |
+
{"type": s.action_type.value, "identifier": s.identifier,
|
| 419 |
+
"source_word": s.source_word, "confidence": round(s.confidence, 3), "method": s.match_method}
|
| 420 |
+
for s in plan
|
| 421 |
+
]
|
| 422 |
+
return result
|
| 423 |
+
|
| 424 |
+
|
| 425 |
+
# ----- Initialize Components -----
|
| 426 |
+
logger.info("Initializing pipeline components...")
|
| 427 |
+
normalizer = ArabicTextNormalizer()
|
| 428 |
+
nlp_processor = ArabicNLPProcessor()
|
| 429 |
+
nlp_processor.load()
|
| 430 |
+
|
| 431 |
+
sign_matcher = SemanticSignMatcher(
|
| 432 |
+
csv_path=Config.CSV_PATH,
|
| 433 |
+
label_column=Config.CSV_LABEL_COLUMN,
|
| 434 |
+
threshold=Config.SIMILARITY_THRESHOLD,
|
| 435 |
+
)
|
| 436 |
+
sign_matcher.set_normalizer(normalizer)
|
| 437 |
+
sign_matcher.load_model()
|
| 438 |
+
|
| 439 |
+
plan_builder = ExecutionPlanBuilder(normalizer, nlp_processor, sign_matcher)
|
| 440 |
+
writer = BlenderSequenceWriter(Config.SEQUENCE_OUTPUT_PATH, Config.KEYPOINTS_FOLDER)
|
| 441 |
+
translator = ArabicSignTranslator(plan_builder, writer)
|
| 442 |
+
logger.info("All components ready.")
|
| 443 |
+
|
| 444 |
+
|
| 445 |
+
# ----- FastAPI App -----
|
| 446 |
+
class TranslateRequest(BaseModel):
|
| 447 |
+
text: str = Field(description="Arabic input text (Fus-ha or Ammiya)", min_length=1, max_length=4000, examples=["انا عايز اروح المدرسة"])
|
| 448 |
+
save_sequence: bool = Field(default=False, description="Save sequence file to /tmp/sequence.txt")
|
| 449 |
+
|
| 450 |
+
|
| 451 |
+
class StepDetail(BaseModel):
|
| 452 |
+
type: str
|
| 453 |
+
identifier: str
|
| 454 |
+
source_word: str
|
| 455 |
+
confidence: float
|
| 456 |
+
method: str
|
| 457 |
+
|
| 458 |
+
|
| 459 |
+
class TranslateResponse(BaseModel):
|
| 460 |
+
status: str
|
| 461 |
+
input_text: str
|
| 462 |
+
sequence: List[str]
|
| 463 |
+
total_steps: int
|
| 464 |
+
sign_count: int
|
| 465 |
+
letter_count: int
|
| 466 |
+
missing_keypoint_files: List[str]
|
| 467 |
+
detailed_plan: List[StepDetail]
|
| 468 |
+
|
| 469 |
+
|
| 470 |
+
app = FastAPI(
|
| 471 |
+
title="Arabic Sign Language NLP API",
|
| 472 |
+
description="Translates Arabic text (Fus-ha and Ammiya) into sign animation sequences.",
|
| 473 |
+
version="1.0.0",
|
| 474 |
+
)
|
| 475 |
+
|
| 476 |
+
app.add_middleware(
|
| 477 |
+
CORSMiddleware,
|
| 478 |
+
allow_origins=["*"],
|
| 479 |
+
allow_methods=["*"],
|
| 480 |
+
allow_headers=["*"],
|
| 481 |
+
)
|
| 482 |
+
|
| 483 |
+
|
| 484 |
+
@app.get("/")
|
| 485 |
+
def health_check():
|
| 486 |
+
return {
|
| 487 |
+
"status": "running",
|
| 488 |
+
"model": Config.EMBEDDING_MODEL,
|
| 489 |
+
"signs_in_database": len(sign_matcher.available_signs),
|
| 490 |
+
}
|
| 491 |
+
|
| 492 |
+
|
| 493 |
+
@app.post("/translate", response_model=TranslateResponse)
|
| 494 |
+
def translate_post(request: TranslateRequest):
|
| 495 |
+
try:
|
| 496 |
+
result = translator.translate(request.text, save_to_file=request.save_sequence)
|
| 497 |
+
except Exception as e:
|
| 498 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 499 |
+
if result["status"] == "error":
|
| 500 |
+
raise HTTPException(status_code=422, detail=result["message"])
|
| 501 |
+
return TranslateResponse(
|
| 502 |
+
status=result["status"],
|
| 503 |
+
input_text=request.text,
|
| 504 |
+
sequence=result.get("sequence", []),
|
| 505 |
+
total_steps=result.get("total_steps", 0),
|
| 506 |
+
sign_count=result.get("sign_count", 0),
|
| 507 |
+
letter_count=result.get("letter_count", 0),
|
| 508 |
+
missing_keypoint_files=result.get("missing_keypoint_files", []),
|
| 509 |
+
detailed_plan=[
|
| 510 |
+
StepDetail(type=s["type"], identifier=s["identifier"], source_word=s["source_word"],
|
| 511 |
+
confidence=s["confidence"], method=s["method"])
|
| 512 |
+
for s in result.get("detailed_plan", [])
|
| 513 |
+
],
|
| 514 |
+
)
|
| 515 |
+
|
| 516 |
+
|
| 517 |
+
@app.get("/translate")
|
| 518 |
+
def translate_get(
|
| 519 |
+
text: str = Query(description="Arabic text to translate"),
|
| 520 |
+
save_sequence: bool = Query(default=False),
|
| 521 |
+
):
|
| 522 |
+
return translate_post(TranslateRequest(text=text, save_sequence=save_sequence))
|
| 523 |
+
|
| 524 |
+
|
| 525 |
+
@app.get("/signs")
|
| 526 |
+
def list_signs():
|
| 527 |
+
return {"total": len(sign_matcher.available_signs), "signs": sign_matcher.available_signs}
|
| 528 |
+
|
| 529 |
+
|
| 530 |
+
@app.get("/sequence-file")
|
| 531 |
+
def read_sequence_file():
|
| 532 |
+
path = Config.SEQUENCE_OUTPUT_PATH
|
| 533 |
+
if not os.path.exists(path):
|
| 534 |
+
raise HTTPException(status_code=404, detail="Sequence file not found. Run a translation first.")
|
| 535 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 536 |
+
lines = [line.strip() for line in f.readlines() if line.strip()]
|
| 537 |
+
return {"file_path": path, "sequence": lines, "count": len(lines)}
|
| 538 |
+
|
| 539 |
+
|
| 540 |
+
if __name__ == "__main__":
|
| 541 |
+
import uvicorn
|
| 542 |
+
uvicorn.run(app, host=Config.API_HOST, port=Config.API_PORT)
|
arabic_sign_lang_features.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:239288257f7eb9dff6c3f957536b066887f2ef30224caf97453ad087e1df34af
|
| 3 |
+
size 16081522
|
keypoints/أرقام.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8fcca94e751888514e14418aba9743d8ca80f2ff73085befb669f42a3b6d2290
|
| 3 |
+
size 40808
|
keypoints/حجاب.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f60d098fde9afb65ed8bf554003ebc70ec222e516664890340df3cf23cb32997
|
| 3 |
+
size 81488
|
keypoints/طاوله.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d66024eb0527d27eb6107ca78201623987ae88a1e628686cd19e0f8d9a5e3b39
|
| 3 |
+
size 81488
|
keypoints/كلمه.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d5d74b528ae26e9927b825d8919108de076ab47b4e831079857219a45e7925e
|
| 3 |
+
size 54368
|
keypoints/مع_السلامه.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43cff4780d51b6ae7758ba13a9cba505205e3afa1cc2c6d9f10a918d22d2d249
|
| 3 |
+
size 81488
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn[standard]
|
| 3 |
+
pyarabic
|
| 4 |
+
stanza
|
| 5 |
+
sentence-transformers
|
| 6 |
+
transformers
|
| 7 |
+
torch
|
| 8 |
+
pandas
|
| 9 |
+
numpy
|
| 10 |
+
python-multipart
|
| 11 |
+
pydantic
|