Upload 29 files
Browse files- .gitattributes +1 -0
- Dockerfile +14 -0
- README.md +8 -6
- app.py +66 -0
- libqutrub/__init__.py +1 -0
- libqutrub/__pycache__/__init__.cpython-313.pyc +0 -0
- libqutrub/__pycache__/ar_verb.cpython-313.pyc +0 -0
- libqutrub/__pycache__/classverb.cpython-313.pyc +0 -0
- libqutrub/__pycache__/conjugatedisplay.cpython-313.pyc +0 -0
- libqutrub/__pycache__/mosaref_main.cpython-313.pyc +0 -0
- libqutrub/__pycache__/triverbtable.cpython-313.pyc +3 -0
- libqutrub/__pycache__/verb_const.cpython-313.pyc +0 -0
- libqutrub/__pycache__/verb_db.cpython-313.pyc +0 -0
- libqutrub/__pycache__/verb_valid.cpython-313.pyc +0 -0
- libqutrub/alefmaddaverbtable.py +77 -0
- libqutrub/ar_ctype.py +88 -0
- libqutrub/ar_verb.py +1232 -0
- libqutrub/arabic_const.py +113 -0
- libqutrub/classnoun.py +368 -0
- libqutrub/classverb.py +1101 -0
- libqutrub/conjugate.py +166 -0
- libqutrub/conjugatedisplay.py +568 -0
- libqutrub/mosaref_main.py +140 -0
- libqutrub/stack.py +177 -0
- libqutrub/triverbtable.py +0 -0
- libqutrub/verb_const.py +613 -0
- libqutrub/verb_db.py +149 -0
- libqutrub/verb_valid.py +359 -0
- requirements.txt +2 -0
- templates/index.html +94 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
libqutrub/__pycache__/triverbtable.cpython-313.pyc filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
COPY requirements.txt .
|
| 6 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 7 |
+
|
| 8 |
+
COPY . .
|
| 9 |
+
|
| 10 |
+
ENV PYTHONUNBUFFERED=1
|
| 11 |
+
|
| 12 |
+
EXPOSE 7860
|
| 13 |
+
|
| 14 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
|
@@ -1,12 +1,14 @@
|
|
| 1 |
---
|
| 2 |
-
title: Mosaref
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
license: mit
|
| 9 |
-
short_description:
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Al Mosaref
|
| 3 |
+
emoji: 📚
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
license: mit
|
| 9 |
+
short_description: Arabic verb conjugation with Qutrub
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# المصرّف
|
| 13 |
+
|
| 14 |
+
Application de conjugaison des verbes arabes avec Qutrub + Flask.
|
app.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
|
| 3 |
+
from flask import Flask, request, jsonify, render_template
|
| 4 |
+
from libqutrub.mosaref_main import do_sarf
|
| 5 |
+
|
| 6 |
+
app = Flask(__name__, template_folder="templates")
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
@app.route("/")
|
| 10 |
+
def home():
|
| 11 |
+
return render_template("index.html")
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@app.route("/conjugate", methods=["POST"])
|
| 15 |
+
def conjugate():
|
| 16 |
+
data = request.get_json()
|
| 17 |
+
|
| 18 |
+
word = data.get("word", "").strip()
|
| 19 |
+
future_type = data.get("future_type", "فتحة")
|
| 20 |
+
tense = data.get("tense", "past")
|
| 21 |
+
|
| 22 |
+
params = {
|
| 23 |
+
"alltense": False,
|
| 24 |
+
"past": False,
|
| 25 |
+
"future": False,
|
| 26 |
+
"imperative": False,
|
| 27 |
+
"future_moode": False,
|
| 28 |
+
"confirmed": False,
|
| 29 |
+
"passive": False,
|
| 30 |
+
"transitive": True
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
if tense == "past":
|
| 34 |
+
params["past"] = True
|
| 35 |
+
elif tense == "future":
|
| 36 |
+
params["future"] = True
|
| 37 |
+
elif tense == "jussive":
|
| 38 |
+
params["future"] = True
|
| 39 |
+
params["future_moode"] = True
|
| 40 |
+
elif tense == "subjunctive":
|
| 41 |
+
params["future"] = True
|
| 42 |
+
params["future_moode"] = True
|
| 43 |
+
elif tense == "confirmed":
|
| 44 |
+
params["future"] = True
|
| 45 |
+
params["confirmed"] = True
|
| 46 |
+
elif tense == "imperative":
|
| 47 |
+
params["imperative"] = True
|
| 48 |
+
elif tense == "confirmed_imperative":
|
| 49 |
+
params["imperative"] = True
|
| 50 |
+
params["confirmed"] = True
|
| 51 |
+
|
| 52 |
+
result = do_sarf(
|
| 53 |
+
word=word,
|
| 54 |
+
future_type=future_type,
|
| 55 |
+
display_format="HTML",
|
| 56 |
+
**params
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
if result is None:
|
| 60 |
+
return jsonify({"result": "<p>تعذر تصريف الفعل. تأكد من صحة الإدخال.</p>"})
|
| 61 |
+
|
| 62 |
+
return jsonify({"result": result})
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
if __name__ == "__main__":
|
| 66 |
+
app.run(host="0.0.0.0", port=7860, debug=False)
|
libqutrub/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
libqutrub/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (171 Bytes). View file
|
|
|
libqutrub/__pycache__/ar_verb.cpython-313.pyc
ADDED
|
Binary file (39.3 kB). View file
|
|
|
libqutrub/__pycache__/classverb.cpython-313.pyc
ADDED
|
Binary file (50.4 kB). View file
|
|
|
libqutrub/__pycache__/conjugatedisplay.cpython-313.pyc
ADDED
|
Binary file (22.1 kB). View file
|
|
|
libqutrub/__pycache__/mosaref_main.cpython-313.pyc
ADDED
|
Binary file (5.9 kB). View file
|
|
|
libqutrub/__pycache__/triverbtable.cpython-313.pyc
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:17ddedc943edf2b9176f73818945e6414f5ad919c2b4c43a517d221b28382c14
|
| 3 |
+
size 737469
|
libqutrub/__pycache__/verb_const.cpython-313.pyc
ADDED
|
Binary file (16.9 kB). View file
|
|
|
libqutrub/__pycache__/verb_db.cpython-313.pyc
ADDED
|
Binary file (4.49 kB). View file
|
|
|
libqutrub/__pycache__/verb_valid.cpython-313.pyc
ADDED
|
Binary file (11.3 kB). View file
|
|
|
libqutrub/alefmaddaverbtable.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
AlefMaddaVerbTable={
|
| 2 |
+
u'آبل':[u'أءبل'],
|
| 3 |
+
u'آبه':[u'أءبه'],
|
| 4 |
+
u'آبى':[u'أءبى'],
|
| 5 |
+
u'آتم':[u'أءتم'],
|
| 6 |
+
u'آتن':[u'أءتن'],
|
| 7 |
+
u'آتى':[u'أءتى'],
|
| 8 |
+
u'آتى':[u'أءتى'],
|
| 9 |
+
u'آثر':[u'أءثر'],
|
| 10 |
+
u'آثف':[u'أءثف'],
|
| 11 |
+
u'آثم':[u'أءثم'],
|
| 12 |
+
u'آثى':[u'ءاثى'],
|
| 13 |
+
u'آجد':[u'أءجد'],
|
| 14 |
+
u'آجر':[u'أءجر',u'ءاجر'],
|
| 15 |
+
u'آجل':[u'أءجل'],
|
| 16 |
+
u'آجم':[u'أءجم'],
|
| 17 |
+
u'آحن':[u'ءاحن'],
|
| 18 |
+
u'آخذ':[u'ءاخذ'],
|
| 19 |
+
u'آخى':[u'أءخى',u'ءاخى'],
|
| 20 |
+
u'آدب':[u'أءدب'],
|
| 21 |
+
u'آدم':[u'أءدم'],
|
| 22 |
+
u'آدى':[u'أءدى'],
|
| 23 |
+
u'آذن':[u'أءذن'],
|
| 24 |
+
u'آذى':[u'أءذى'],
|
| 25 |
+
u'آرب':[u'أءرب',u'ءارب'],
|
| 26 |
+
u'آرخ':[u'أءرخ'],
|
| 27 |
+
u'آرس':[u'أءرس'],
|
| 28 |
+
u'آرض':[u'أءرض'],
|
| 29 |
+
u'آرط':[u'أءرط'],
|
| 30 |
+
u'آرف':[u'ءارف'],
|
| 31 |
+
u'آرق':[u'أءرق'],
|
| 32 |
+
u'آرك':[u'أءرك'],
|
| 33 |
+
u'آرم':[u'ءارم'],
|
| 34 |
+
u'آرن':[u'أءرن',u'ءارن'],
|
| 35 |
+
u'آرى':[u'أءرى'],
|
| 36 |
+
u'آزر':[u'ءازر'],
|
| 37 |
+
u'آزف':[u'أءزف'],
|
| 38 |
+
u'آزل':[u'أءزل'],
|
| 39 |
+
u'آزى':[u'أءزى',u'ءازى'],
|
| 40 |
+
u'آسب':[u'أءسب'],
|
| 41 |
+
u'آسد':[u'أءسد'],
|
| 42 |
+
u'آسف':[u'أءسف'],
|
| 43 |
+
u'آسن':[u'أءسن'],
|
| 44 |
+
u'آسى':[u'ءاسى'],
|
| 45 |
+
u'آسى':[u'أءسى',u'ءاسى'],
|
| 46 |
+
u'آشى':[u'أءشى'],
|
| 47 |
+
u'آصد':[u'أءصد'],
|
| 48 |
+
u'آصر':[u'ءاصر'],
|
| 49 |
+
u'آصل':[u'أءصل'],
|
| 50 |
+
u'آضّ':[u'ءاضّ'],
|
| 51 |
+
u'آطم':[u'أءطم'],
|
| 52 |
+
u'آفك':[u'أءفك'],
|
| 53 |
+
u'آفى':[u'أءفى'],
|
| 54 |
+
u'آقط':[u'أءقط'],
|
| 55 |
+
u'آكد':[u'أءكد'],
|
| 56 |
+
u'آكر':[u'ءاكر'],
|
| 57 |
+
u'آكف':[u'أءكف'],
|
| 58 |
+
u'آكل':[u'أءكل',u'ءاكل'],
|
| 59 |
+
u'آلت':[u'أءلت'],
|
| 60 |
+
u'آلس':[u'ءالس'],
|
| 61 |
+
u'آلف':[u'أءلف',u'ءالف'],
|
| 62 |
+
u'آلم':[u'أءلم'],
|
| 63 |
+
u'آلى':[u'أءلى'],
|
| 64 |
+
u'آمر':[u'أءمر',u'ءامر'],
|
| 65 |
+
u'آمن':[u'أءمن'],
|
| 66 |
+
u'آنث':[u'أءنث'],
|
| 67 |
+
u'آنس':[u'أءنس',u'ءانس'],
|
| 68 |
+
u'آنض':[u'أءنض'],
|
| 69 |
+
u'آنف':[u'أءنف'],
|
| 70 |
+
u'آنق':[u'أءنق'],
|
| 71 |
+
u'آنى':[u'أءنى'],
|
| 72 |
+
u'آهل':[u'أءهل'],
|
| 73 |
+
u'آوب':[u'ءاوب'],
|
| 74 |
+
u'آوى':[u'أءوى'],
|
| 75 |
+
u'آيد':[u'ءايد'],
|
| 76 |
+
u'آيس':[u'أءيس'],
|
| 77 |
+
}
|
libqutrub/ar_ctype.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python
|
| 2 |
+
# -*- coding=utf-8 -*-
|
| 3 |
+
#************************************************************************
|
| 4 |
+
# $Id: ar_ctype.py,v 0.7 2009/06/02 01:10:00 Taha Zerrouki $
|
| 5 |
+
#
|
| 6 |
+
# ------------
|
| 7 |
+
# Description:
|
| 8 |
+
# ------------
|
| 9 |
+
# Copyright (c) 2009, Arabtechies, Arabeyes Taha Zerrouki
|
| 10 |
+
#
|
| 11 |
+
# Elementary function to manipulate arabic texte
|
| 12 |
+
#
|
| 13 |
+
# -----------------
|
| 14 |
+
# Revision Details: (Updated by Revision Control System)
|
| 15 |
+
# -----------------
|
| 16 |
+
# $Date: 2009/06/02 01:10:00 $
|
| 17 |
+
# $Author: Taha Zerrouki $
|
| 18 |
+
# $Revision: 0.7 $
|
| 19 |
+
# $Source: arabtechies.sourceforge.net
|
| 20 |
+
#
|
| 21 |
+
#***********************************************************************/
|
| 22 |
+
|
| 23 |
+
import re#, string,sys
|
| 24 |
+
from arabic_const import *
|
| 25 |
+
HARAKAT_pat =re.compile(ur"[%s%s%s%s%s%s%s%s]"%(FATHATAN,DAMMATAN,KASRATAN,FATHA,DAMMA,KASRA,SUKUN,SHADDA) )
|
| 26 |
+
HARAKAT_NO_SHADDA_pat =re.compile(ur"[%s%s%s%s%s%s%s]"%(FATHATAN,DAMMATAN,KASRATAN,FATHA,DAMMA,KASRA,SUKUN) )
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
#strip tatweel from a word and return a result word
|
| 31 |
+
#--------------------------------------
|
| 32 |
+
def ar_strip_tatweel(w):
|
| 33 |
+
"strip tatweel from a word and return a result word"
|
| 34 |
+
return w.replace(TATWEEL, '')
|
| 35 |
+
|
| 36 |
+
#strip tatweel and vowel from a word and return a result word but keep shadda
|
| 37 |
+
#--------------------------------------
|
| 38 |
+
def ar_strip_marks_keepshadda(w):
|
| 39 |
+
return HARAKAT_NO_SHADDA_pat.sub('',w);
|
| 40 |
+
## return re.sub(ur'[%s%s%s%s%s%s%s%s]' % (FATHATAN, DAMMATAN, TATWEEL,
|
| 41 |
+
## KASRATAN, FATHA, DAMMA, KASRA, SUKUN), '', w)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
#strip tatweel and vowel from a word and return a result word
|
| 45 |
+
#--------------------------------------
|
| 46 |
+
def ar_strip_marks(w):
|
| 47 |
+
"strip tatweel and vowel from a word and return a result word"
|
| 48 |
+
return HARAKAT_pat.sub('',w);
|
| 49 |
+
## return re.sub(ur'[%s%s%s%s%s%s%s%s%s]' % (FATHATAN, DAMMATAN, TATWEEL,
|
| 50 |
+
## KASRATAN, FATHA, DAMMA, KASRA, SUKUN,SHADDA), '', w)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
#strip pounctuation from the text
|
| 55 |
+
#--------------------------------------
|
| 56 |
+
def ar_strip_punct(w):
|
| 57 |
+
return re.sub(r'[%s%s%s%s\\]' % (string.punctuation, string.digits,
|
| 58 |
+
string.ascii_letters, string.whitespace),
|
| 59 |
+
' ', w)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
#--------------------------------------
|
| 63 |
+
def replace_pos (word,rep, pos):
|
| 64 |
+
return word[0:pos]+rep+word[pos+1:];
|
| 65 |
+
|
| 66 |
+
def is_valid_arabic_word(word):
|
| 67 |
+
if word=="": return False;
|
| 68 |
+
## word_nm=ar_strip_marks_keepshadda(word);
|
| 69 |
+
## # the alef_madda is considered as 2 letters
|
| 70 |
+
## word_nm=word_nm.replace(ALEF_MADDA,HAMZA+ALEF);
|
| 71 |
+
# in arabic ranges
|
| 72 |
+
## if re.search(u"([^\u0621-\u0652%s%s%s])"%(LAM_ALEF, LAM_ALEF_HAMZA_ABOVE,LAM_ALEF_MADDA_ABOVE),word):
|
| 73 |
+
if re.search(u"([^\u0621-\u0652\ufefb\ufef7\ufef5])",word):
|
| 74 |
+
|
| 75 |
+
return False;
|
| 76 |
+
|
| 77 |
+
elif re.match(u"([\d])+",word):
|
| 78 |
+
return False;
|
| 79 |
+
## elif word[0] in (WAW_HAMZA,YEH_HAMZA,FATHA,DAMMA,SUKUN,KASRA):
|
| 80 |
+
## return False;
|
| 81 |
+
### إذا كانت الألف المقصورة في غير آخر الفعل
|
| 82 |
+
elif re.match(u"^(.)*[%s](.)+$"%ALEF_MAKSURA,word):
|
| 83 |
+
return False;
|
| 84 |
+
elif re.match(u"^(.)*[%s]([^%s%s%s])(.)+$"%(TEH_MARBUTA,DAMMA,KASRA,FATHA),word):
|
| 85 |
+
return False;
|
| 86 |
+
return True;
|
| 87 |
+
|
| 88 |
+
|
libqutrub/ar_verb.py
ADDED
|
@@ -0,0 +1,1232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python
|
| 2 |
+
# -*- coding = utf-8 -*-
|
| 3 |
+
#************************************************************************
|
| 4 |
+
# $Id: ar_verb.py, v 0.7 2009/06/02 01:10:00 Taha Zerrouki $
|
| 5 |
+
#
|
| 6 |
+
# ------------
|
| 7 |
+
# Description:
|
| 8 |
+
# ------------
|
| 9 |
+
# Copyright (c) 2009, Arabtechies, Arabeyes Taha Zerrouki
|
| 10 |
+
#
|
| 11 |
+
# Elementary function to manipulate arabic texte
|
| 12 |
+
#
|
| 13 |
+
# -----------------
|
| 14 |
+
# Revision Details: (Updated by Revision Control System)
|
| 15 |
+
# -----------------
|
| 16 |
+
# $Date: 2009/06/02 01:10:00 $
|
| 17 |
+
# $Author: Taha Zerrouki $
|
| 18 |
+
# $Revision: 0.7 $
|
| 19 |
+
# $Source: arabtechies.sourceforge.net
|
| 20 |
+
#
|
| 21 |
+
#***********************************************************************/
|
| 22 |
+
"""
|
| 23 |
+
Basic routines to treat verbs
|
| 24 |
+
ar_verb
|
| 25 |
+
"""
|
| 26 |
+
from __future__ import (
|
| 27 |
+
#~ absolute_import,
|
| 28 |
+
print_function,
|
| 29 |
+
#~ unicode_literals,
|
| 30 |
+
#~ division,
|
| 31 |
+
)
|
| 32 |
+
import re
|
| 33 |
+
import libqutrub.verb_const as vconst
|
| 34 |
+
# import ar_ctype
|
| 35 |
+
import pyarabic.araby as araby
|
| 36 |
+
#~ from pyarabic.araby import *
|
| 37 |
+
from pyarabic.araby import FATHA, DAMMA, KASRA, SHADDA, SUKUN, HAMZA, ALEF, \
|
| 38 |
+
WAW, ALEF_HAMZA_ABOVE, ALEF_MADDA, \
|
| 39 |
+
YEH_HAMZA, ALEF_MAKSURA, YEH, TEH, \
|
| 40 |
+
LAM_ALEF, SIMPLE_LAM_ALEF, LAM_ALEF_HAMZA_ABOVE, \
|
| 41 |
+
SIMPLE_LAM_ALEF_HAMZA_ABOVE, LAM_ALEF_MADDA_ABOVE ,SIMPLE_LAM_ALEF_MADDA_ABOVE
|
| 42 |
+
#~ import libqutrub.verb_valid as verb_valid
|
| 43 |
+
|
| 44 |
+
def replace_pos (word, rep, pos):
|
| 45 |
+
"""
|
| 46 |
+
Replace a letter in string in position
|
| 47 |
+
@param word: given string
|
| 48 |
+
@type word: unicode
|
| 49 |
+
@param rep: replecment letter
|
| 50 |
+
@type rep: unicode char
|
| 51 |
+
@param pos: replemcment position
|
| 52 |
+
@type pos: int
|
| 53 |
+
@return: modified string
|
| 54 |
+
@rtype: unicode string
|
| 55 |
+
"""
|
| 56 |
+
return word[0:pos]+rep+word[pos+1:]
|
| 57 |
+
#####################################
|
| 58 |
+
#{ verb attributes conversion functions
|
| 59 |
+
#####################################
|
| 60 |
+
|
| 61 |
+
def get_bab_sarf_harakat(number):
|
| 62 |
+
"""
|
| 63 |
+
Get the the past and future marks by the bab sarf number
|
| 64 |
+
- Bab: past future
|
| 65 |
+
- 1 : FATHA DAMMA
|
| 66 |
+
- 2 : FATHA KASRA
|
| 67 |
+
- 3 : FATHA FATHA
|
| 68 |
+
- 4 : KASRA FATHA
|
| 69 |
+
- 5 : DAMMA DAMMA
|
| 70 |
+
- 6 : KASRA KASRA
|
| 71 |
+
@param number: Bab sarf number (1-6).
|
| 72 |
+
@type number: integer(1-6)
|
| 73 |
+
@return: a tuple of (past_mark, future_mark)
|
| 74 |
+
@rtype: tuple
|
| 75 |
+
"""
|
| 76 |
+
bab = None
|
| 77 |
+
if number < 1 or number > 6:
|
| 78 |
+
bab = None
|
| 79 |
+
elif number == 1:
|
| 80 |
+
bab = (FATHA, DAMMA)
|
| 81 |
+
elif number == 2:
|
| 82 |
+
bab = (FATHA, KASRA)
|
| 83 |
+
elif number == 3:
|
| 84 |
+
bab = (FATHA, FATHA)
|
| 85 |
+
elif number == 4:
|
| 86 |
+
bab = (KASRA, FATHA)
|
| 87 |
+
elif number == 5:
|
| 88 |
+
bab = (DAMMA, DAMMA)
|
| 89 |
+
elif number == 6:
|
| 90 |
+
bab = (KASRA, KASRA)
|
| 91 |
+
return bab
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def get_bab_sarf_number(past_haraka, future_haraka):
|
| 95 |
+
"""
|
| 96 |
+
Get the bab sarf number by the past and future marks
|
| 97 |
+
- Bab: past future
|
| 98 |
+
- 1 : FATHA DAMMA
|
| 99 |
+
- 2 : FATHA KASRA
|
| 100 |
+
- 3 : FATHA FATHA
|
| 101 |
+
- 4 : KASRA FATHA
|
| 102 |
+
- 5 : DAMMA DAMMA
|
| 103 |
+
- 6 : KASRA KASRA
|
| 104 |
+
@param past_haraka: past haraka of the verb.
|
| 105 |
+
@type past_haraka: unicode
|
| 106 |
+
@param future_haraka: future haraka of the verb.
|
| 107 |
+
@type future_haraka: unicode
|
| 108 |
+
@return: Bab sarf number (1-6)
|
| 109 |
+
@rtype: integer
|
| 110 |
+
"""
|
| 111 |
+
bab = 0
|
| 112 |
+
if past_haraka == FATHA and future_haraka == DAMMA:
|
| 113 |
+
bab = 1
|
| 114 |
+
elif past_haraka == FATHA and future_haraka == KASRA:
|
| 115 |
+
bab = 2
|
| 116 |
+
elif past_haraka == FATHA and future_haraka == FATHA:
|
| 117 |
+
bab = 3
|
| 118 |
+
elif past_haraka == KASRA and future_haraka == FATHA:
|
| 119 |
+
bab = 4
|
| 120 |
+
elif past_haraka == DAMMA and future_haraka == DAMMA:
|
| 121 |
+
bab = 5
|
| 122 |
+
elif past_haraka == KASRA and future_haraka == KASRA:
|
| 123 |
+
bab = 6
|
| 124 |
+
return bab
|
| 125 |
+
|
| 126 |
+
def write_harakat_in_full(harakat):
|
| 127 |
+
"""
|
| 128 |
+
Write the harakat name in full in arabic
|
| 129 |
+
@param harakat: list of harakat chars.
|
| 130 |
+
@type harakat: unicode string
|
| 131 |
+
@return: harakat in full
|
| 132 |
+
@rtype: unicode
|
| 133 |
+
"""
|
| 134 |
+
full = u""
|
| 135 |
+
tab_harakat = {
|
| 136 |
+
FATHA:u"فتحة",
|
| 137 |
+
DAMMA:u"ضمة",
|
| 138 |
+
KASRA:u"كسرة",
|
| 139 |
+
SUKUN:u"سكون",
|
| 140 |
+
vconst.ALEF_HARAKA:u"ألف",
|
| 141 |
+
vconst.WAW_HARAKA:u"واو",
|
| 142 |
+
vconst.YEH_HARAKA:u"ياء",
|
| 143 |
+
vconst.ALEF_YEH_HARAKA:u"ى",
|
| 144 |
+
vconst.ALEF_WAW_HARAKA:u"و",
|
| 145 |
+
vconst.ALEF_YEH_ALTERNATIVE:u"ئ",
|
| 146 |
+
}
|
| 147 |
+
for hrk in harakat:
|
| 148 |
+
if hrk in tab_harakat:
|
| 149 |
+
full += u'-'+tab_harakat[hrk]
|
| 150 |
+
else:
|
| 151 |
+
full += u"*"
|
| 152 |
+
return full
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def get_past_harakat_by_babsarf(vtype):
|
| 156 |
+
"""
|
| 157 |
+
Get the past harakat for the trileteral verb by bab sarf
|
| 158 |
+
- Bab: past future
|
| 159 |
+
- 1 : FATHA DAMMA
|
| 160 |
+
- 2 : FATHA KASRA
|
| 161 |
+
- 3 : FATHA FATHA
|
| 162 |
+
- 4 : KASRA FATHA
|
| 163 |
+
- 5 : DAMMA DAMMA
|
| 164 |
+
- 6 : KASRA KASRA
|
| 165 |
+
@param vtype: the bab sarf codification.
|
| 166 |
+
@type vtype: unicode a string of number
|
| 167 |
+
@return: harakat
|
| 168 |
+
@rtype: unicode
|
| 169 |
+
"""
|
| 170 |
+
marks = KASRA*3 # make three kasraat by default
|
| 171 |
+
if vtype in ('1', '2', '3'):
|
| 172 |
+
marks = FATHA*3
|
| 173 |
+
elif vtype in ('4', '6'):
|
| 174 |
+
marks = u"".join([FATHA, KASRA, FATHA])
|
| 175 |
+
elif vtype == '5':
|
| 176 |
+
marks = u"".join([FATHA, DAMMA, FATHA])
|
| 177 |
+
return marks
|
| 178 |
+
|
| 179 |
+
def get_future_harakat_by_babsarf(vtype):
|
| 180 |
+
"""
|
| 181 |
+
Get the future harakat for the trileteral verb by bab sarf
|
| 182 |
+
- Bab: past future
|
| 183 |
+
- 1 : FATHA DAMMA
|
| 184 |
+
- 2 : FATHA KASRA
|
| 185 |
+
- 3 : FATHA FATHA
|
| 186 |
+
- 4 : KASRA FATHA
|
| 187 |
+
- 5 : DAMMA DAMMA
|
| 188 |
+
- 6 : KASRA KASRA
|
| 189 |
+
@param vtype: the bab sarf codification.
|
| 190 |
+
@type vtype: unicode a string of number
|
| 191 |
+
@return: harakat
|
| 192 |
+
@rtype: unicode
|
| 193 |
+
"""
|
| 194 |
+
#ToDo Review
|
| 195 |
+
marks = KASRA+KASRA+KASRA
|
| 196 |
+
if vtype in ('1', '2', '3'):
|
| 197 |
+
marks = FATHA+FATHA+FATHA
|
| 198 |
+
elif vtype in ('4', '6'):
|
| 199 |
+
marks = FATHA+KASRA+FATHA
|
| 200 |
+
elif vtype == '5':
|
| 201 |
+
marks = FATHA+DAMMA+FATHA
|
| 202 |
+
return marks
|
| 203 |
+
|
| 204 |
+
def get_future_haraka_by_babsarf(vtype):
|
| 205 |
+
"""
|
| 206 |
+
Get the future_type value from different codifications.
|
| 207 |
+
used also in comand line
|
| 208 |
+
in différent context the future_type is codified as:
|
| 209 |
+
values
|
| 210 |
+
or values used as Conjugation mode ( Bab Tasrif باب التصريف)
|
| 211 |
+
- Bab: past future
|
| 212 |
+
- 1 : FATHA DAMMA
|
| 213 |
+
- 2 : FATHA KASRA
|
| 214 |
+
- 3 : FATHA FATHA
|
| 215 |
+
- 4 : KASRA FATHA
|
| 216 |
+
- 5 : DAMMA DAMMA
|
| 217 |
+
- 6 : KASRA KASRA
|
| 218 |
+
@param vtype: the bab sarf codification.
|
| 219 |
+
@type vtype: unicode a string of number
|
| 220 |
+
@return: haraka
|
| 221 |
+
@rtype: unicode char
|
| 222 |
+
"""
|
| 223 |
+
|
| 224 |
+
if vtype == '1':
|
| 225 |
+
return DAMMA
|
| 226 |
+
elif vtype in ('2', '6'):
|
| 227 |
+
return KASRA
|
| 228 |
+
elif vtype in ('3', '4'):
|
| 229 |
+
return FATHA
|
| 230 |
+
elif vtype in ('1', '5'):
|
| 231 |
+
return DAMMA
|
| 232 |
+
else:
|
| 233 |
+
return ""
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
def get_haraka_by_name(haraka_name):
|
| 237 |
+
"""
|
| 238 |
+
Convert an arabic named harakat to a real haraka
|
| 239 |
+
values
|
| 240 |
+
- Fahta:(فتحة)
|
| 241 |
+
- DAMMA:(ضمة)
|
| 242 |
+
- KASRA:(كسرة)
|
| 243 |
+
@param haraka_name: the arabic name of haraka.
|
| 244 |
+
@type haraka_name: unicode
|
| 245 |
+
@return: the arabic name of haraka .
|
| 246 |
+
@rtype: unicode char
|
| 247 |
+
"""
|
| 248 |
+
if araby.is_shortharaka(haraka_name):
|
| 249 |
+
return haraka_name
|
| 250 |
+
if haraka_name == u"فتحة" :
|
| 251 |
+
return FATHA
|
| 252 |
+
elif haraka_name == u"ضمة":
|
| 253 |
+
return DAMMA
|
| 254 |
+
elif haraka_name == u"كسرة":
|
| 255 |
+
return KASRA
|
| 256 |
+
elif haraka_name == u"سكون":
|
| 257 |
+
return SUKUN
|
| 258 |
+
else:
|
| 259 |
+
return False
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
def get_future_type_by_name(haraka_name):
|
| 263 |
+
"""
|
| 264 |
+
Get the future_type value by haraka arabic name.
|
| 265 |
+
values
|
| 266 |
+
- FATHA:(فتحة)
|
| 267 |
+
- DAMMA:(ضمة)
|
| 268 |
+
- KASRA:(كسرة)
|
| 269 |
+
@param haraka_name: the arabic name of haraka.
|
| 270 |
+
@type haraka_name: unicode
|
| 271 |
+
@return: haraka
|
| 272 |
+
@rtype: unicode char
|
| 273 |
+
"""
|
| 274 |
+
haraka = get_haraka_by_name(haraka_name)
|
| 275 |
+
if haraka:
|
| 276 |
+
return haraka
|
| 277 |
+
else:
|
| 278 |
+
return FATHA
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
def get_future_type_entree(future_type):
|
| 282 |
+
"""
|
| 283 |
+
Get the future_type value from different codifications.
|
| 284 |
+
used also in comand line
|
| 285 |
+
in différent context the future_type is codified as:
|
| 286 |
+
values
|
| 287 |
+
- Fahta:(fatha, فتحة, ف, f)
|
| 288 |
+
- DAMMA:(damma, ضمة, ض, d)
|
| 289 |
+
- KASRA:(kasra, كسرة, ك, k)
|
| 290 |
+
or values used as Conjugation mode ( Bab Tasrif باب التصريف)
|
| 291 |
+
- Bab: past future
|
| 292 |
+
- 1 : FATHA DAMMA
|
| 293 |
+
- 2 : FATHA KASRA
|
| 294 |
+
- 3 : FATHA FATHA
|
| 295 |
+
- 4 : KASRA FATHA
|
| 296 |
+
- 5 : DAMMA DAMMA
|
| 297 |
+
- 6 : KASRA KASRA
|
| 298 |
+
@param future_type: the future_type codification.
|
| 299 |
+
@type future_type: unicode
|
| 300 |
+
@return: extract the future type mark
|
| 301 |
+
@rtype: unicode char
|
| 302 |
+
"""
|
| 303 |
+
future_type = u""+future_type.lower()
|
| 304 |
+
if future_type in (u'fatha', u'فتحة', u'ف', u'f', u'3', u'4'):
|
| 305 |
+
return FATHA
|
| 306 |
+
if future_type in (u'damma', u'ضمة', u'ض', u'd', u'1', u'5'):
|
| 307 |
+
return DAMMA
|
| 308 |
+
if future_type in (u'kasra', u'كسرة', u'ك', u'k', u'2', u'6'):
|
| 309 |
+
return KASRA
|
| 310 |
+
else: return FATHA
|
| 311 |
+
|
| 312 |
+
def get_transitive_entree(transitive):
|
| 313 |
+
"""
|
| 314 |
+
Get the transitive value from different codifications.
|
| 315 |
+
in différent context the transitivity is codified as:
|
| 316 |
+
- "t", "transitive",
|
| 317 |
+
- u"متعدي", u"م", u"مشترك", u"ك"
|
| 318 |
+
- True
|
| 319 |
+
@param transitive: the transitive codification.
|
| 320 |
+
@type transitive: unicode
|
| 321 |
+
@return: True if is transitive
|
| 322 |
+
@rtype: boolean
|
| 323 |
+
"""
|
| 324 |
+
return transitive in (u"متعدي", u"م", u"مشترك",
|
| 325 |
+
u"ك", "t", "transitive", True)
|
| 326 |
+
|
| 327 |
+
#####################################
|
| 328 |
+
#{verb pretreatment functions
|
| 329 |
+
#####################################
|
| 330 |
+
def normalize_alef_madda(word):
|
| 331 |
+
"""
|
| 332 |
+
Convert Alef madda into two letters.
|
| 333 |
+
@param word: given word.
|
| 334 |
+
@type word: unicode.
|
| 335 |
+
@return: converted word.
|
| 336 |
+
@rtype: unicode.
|
| 337 |
+
"""
|
| 338 |
+
if word.startswith(ALEF_MADDA):
|
| 339 |
+
word_nm = araby.strip_harakat(word)
|
| 340 |
+
#print word, word_nm, len(word), len(word_nm)
|
| 341 |
+
if len(word_nm) == 2:
|
| 342 |
+
return word_nm.replace(ALEF_MADDA, HAMZA+ALEF)
|
| 343 |
+
elif len(word_nm) == 3:
|
| 344 |
+
if word_nm in vconst.ALEF_MADDA_VERB_TABLE:
|
| 345 |
+
#print word, "exists in madd table", vconst.ALEF_MADDA_VERB_TABLE[word_nm][0]
|
| 346 |
+
#return the first one only
|
| 347 |
+
#mylist = ALEF_MADDA_VERB_TABLE[word_nm]
|
| 348 |
+
return vconst.ALEF_MADDA_VERB_TABLE[word_nm][0]
|
| 349 |
+
else:
|
| 350 |
+
return word_nm.replace(ALEF_MADDA, HAMZA+ALEF)
|
| 351 |
+
else:
|
| 352 |
+
return word_nm.replace(ALEF_MADDA, HAMZA+ALEF)
|
| 353 |
+
else:
|
| 354 |
+
return word_nm
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
def normalize(word, wordtype = "affix"):
|
| 358 |
+
"""
|
| 359 |
+
Normalize the word, by unifoming hamzat, Alef madda, shadda, and lamalefs.
|
| 360 |
+
@param word: given word.
|
| 361 |
+
@type word: unicode.
|
| 362 |
+
@param type: if the word is an affix
|
| 363 |
+
@type type: unicode.
|
| 364 |
+
@return: converted word.
|
| 365 |
+
@rtype: unicode.
|
| 366 |
+
"""
|
| 367 |
+
# تحويل الكلمة إلى شكلها النظري.
|
| 368 |
+
# الشكل اللإملائي للكلمة هو طريقة كتابتها حسب قواعد الإملاء
|
| 369 |
+
# الشكل النظري هو الشكل المتخيل للكلمة دون تطبيق قواعد اللغة
|
| 370 |
+
# ويخص عادة الأشكال المتعددة للهمزة، و التي تكتب همزة على السطر
|
| 371 |
+
# أمثلة
|
| 372 |
+
# إملائي نظري
|
| 373 |
+
#إِمْلَائِي ءِمْلَاءِي
|
| 374 |
+
#سَاَلَ سَءَلَ
|
| 375 |
+
# الهدف : تحويل الكلمة إلى شكل نظري،
|
| 376 |
+
#ومن ثم إمكانية تصريفها بعيدا عن قواعد الإملاء،
|
| 377 |
+
#وبعد التصريف يتم تطبيق قواعد الإملاء من جديد.
|
| 378 |
+
#الفرضية: الكلمات المدخلة مشكولة شكلا تاما.
|
| 379 |
+
#الطريقة:
|
| 380 |
+
# 1-تحويل جميع أنواع الهمزات إلى همزة على السطر
|
| 381 |
+
# 1-فك الإدغام
|
| 382 |
+
i = 0
|
| 383 |
+
# strip tatweel
|
| 384 |
+
# the tatweel is used to uniformate the affix
|
| 385 |
+
# when the Haraka is used separetely
|
| 386 |
+
if wordtype != "affix":
|
| 387 |
+
word = araby.strip_tatweel(word)
|
| 388 |
+
## تستبدل الألف الممدودة في , ل الكلمة بهمزة قطع بعدها همزة أخرى
|
| 389 |
+
if word.startswith(ALEF_MADDA):
|
| 390 |
+
word = normalize_alef_madda(word)
|
| 391 |
+
|
| 392 |
+
# ignore harakat at the begin of the word
|
| 393 |
+
len_word = len(word)
|
| 394 |
+
while i < len_word and araby.is_shortharaka(word[i]): # in HARAKAT:
|
| 395 |
+
i += 1
|
| 396 |
+
word = word[i:]
|
| 397 |
+
# convert all Hamza from into one form
|
| 398 |
+
word = araby.normalize_hamza(word)
|
| 399 |
+
#Convert All LAM ALEF Ligature into separate letters
|
| 400 |
+
word = word.replace(LAM_ALEF, SIMPLE_LAM_ALEF)
|
| 401 |
+
word = word.replace(LAM_ALEF_HAMZA_ABOVE, SIMPLE_LAM_ALEF_HAMZA_ABOVE)
|
| 402 |
+
word = word.replace(LAM_ALEF_MADDA_ABOVE, SIMPLE_LAM_ALEF_MADDA_ABOVE)
|
| 403 |
+
return word
|
| 404 |
+
|
| 405 |
+
|
| 406 |
+
def uniformate_alef_origin(marks, word_nm, future_type = KASRA):
|
| 407 |
+
"""
|
| 408 |
+
Convert toi its origin according to the future type haraka
|
| 409 |
+
@param marks: given marks.
|
| 410 |
+
@type marks: unicode.
|
| 411 |
+
@param word_nm: given word unvocalized.
|
| 412 |
+
@type word_nm: unicode.
|
| 413 |
+
@param future_type: The future mark of the triletiral verb.
|
| 414 |
+
@type future_type: unicode char, default KASRA.
|
| 415 |
+
@return: converted marks.
|
| 416 |
+
@rtype: unicode.
|
| 417 |
+
"""
|
| 418 |
+
if len(marks) != 2:
|
| 419 |
+
return marks
|
| 420 |
+
# الحرف ماقبل الأخير علة
|
| 421 |
+
elif marks[len(marks)-2] == vconst.ALEF_HARAKA:
|
| 422 |
+
if future_type == KASRA:
|
| 423 |
+
marks = marks[:-2]+vconst.ALEF_YEH_HARAKA+marks[-1:]
|
| 424 |
+
elif future_type == DAMMA:
|
| 425 |
+
marks = marks[:-2]+vconst.ALEF_WAW_HARAKA+marks[-1:]
|
| 426 |
+
# الحرف الأخير علة
|
| 427 |
+
if len(word_nm) == 3 and word_nm[-1:] == ALEF:
|
| 428 |
+
word_nm = word_nm[:-1]+vconst.ALEF_MAMDUDA
|
| 429 |
+
elif len(word_nm)>3 and word_nm[-1:] == ALEF:
|
| 430 |
+
word_nm = word_nm[:-1]+YEH#ALEF_MAKSURA
|
| 431 |
+
elif word_nm[-1:] == ALEF_MAKSURA:
|
| 432 |
+
word_nm = word_nm[:-1]+ALEF_MAKSURA
|
| 433 |
+
return marks
|
| 434 |
+
|
| 435 |
+
#--------------------------------------
|
| 436 |
+
# Predecated function
|
| 437 |
+
#--------------------------------------
|
| 438 |
+
def normalize_affix(word):
|
| 439 |
+
"""
|
| 440 |
+
Replace shadda by SUKUN +SHADDA
|
| 441 |
+
@param word: given word.
|
| 442 |
+
@type word: unicode.
|
| 443 |
+
@return: converted word.
|
| 444 |
+
@rtype: unicode.
|
| 445 |
+
"""
|
| 446 |
+
# convert SHadda to sukun shadda
|
| 447 |
+
word = word.replace(SHADDA, SUKUN+SHADDA)
|
| 448 |
+
|
| 449 |
+
return word
|
| 450 |
+
|
| 451 |
+
|
| 452 |
+
def uniformate_suffix(word):
|
| 453 |
+
""" separate the harakat and the letters of the given word,
|
| 454 |
+
it return two strings ( the word without harakat and the harakat).
|
| 455 |
+
If the weaked letters are reprsented as long harakat and striped
|
| 456 |
+
from the word.
|
| 457 |
+
"""
|
| 458 |
+
## type : affix : uniformate affixes
|
| 459 |
+
## word = normalize_affix(word)
|
| 460 |
+
word = word.replace(SHADDA, SUKUN+SHADDA)
|
| 461 |
+
shakl = u""
|
| 462 |
+
word_nm = u""
|
| 463 |
+
i = 0
|
| 464 |
+
len_word = len(word)
|
| 465 |
+
# print "len word", len(word)
|
| 466 |
+
while i < len_word:
|
| 467 |
+
if not araby.is_shortharaka(word[i]): # not in HARAKAT:
|
| 468 |
+
word_nm += word[i]
|
| 469 |
+
if i+1 < len(word) and araby.is_shortharaka(word[i+1]):
|
| 470 |
+
if word[i+1] == FATHA :
|
| 471 |
+
if i+2 < len(word) and word[i+2] == ALEF and \
|
| 472 |
+
i+3 < len(word):
|
| 473 |
+
shakl += vconst.ALEF_HARAKA
|
| 474 |
+
i += 3
|
| 475 |
+
else :
|
| 476 |
+
shakl += FATHA
|
| 477 |
+
i += 2
|
| 478 |
+
elif word[i+1] == DAMMA and i+2 < len(word) and \
|
| 479 |
+
word[i+2] == WAW:
|
| 480 |
+
if i+3 >= len(word) or not araby.is_shortharaka(word[i+3]):
|
| 481 |
+
shakl += vconst.WAW_HARAKA
|
| 482 |
+
i += 3
|
| 483 |
+
else :
|
| 484 |
+
shakl += DAMMA
|
| 485 |
+
i += 2
|
| 486 |
+
elif word[i+1] == KASRA and i+2 < len(word) and \
|
| 487 |
+
word[i+2] == YEH:
|
| 488 |
+
if i+3 >= len(word) or not araby.is_shortharaka(word[i+3]):
|
| 489 |
+
shakl += vconst.YEH_HARAKA
|
| 490 |
+
i += 3
|
| 491 |
+
else :
|
| 492 |
+
shakl += KASRA
|
| 493 |
+
i += 2
|
| 494 |
+
else :
|
| 495 |
+
shakl += word[i+1]
|
| 496 |
+
i += 2
|
| 497 |
+
|
| 498 |
+
elif i+1 < len(word) and araby.is_haraka(word[i+1]):
|
| 499 |
+
shakl += word[i+1]
|
| 500 |
+
else:
|
| 501 |
+
shakl += vconst.NOT_DEF_HARAKA
|
| 502 |
+
i += 1
|
| 503 |
+
else: i += 1
|
| 504 |
+
if len(word_nm) == len(shakl):
|
| 505 |
+
return (word_nm, shakl)
|
| 506 |
+
else: return (u"", u"")
|
| 507 |
+
|
| 508 |
+
|
| 509 |
+
def uniformate_verb(word):
|
| 510 |
+
"""
|
| 511 |
+
Separate the harakat and the letters of the given word,
|
| 512 |
+
it return two strings ( the word without harakat and the harakat).
|
| 513 |
+
If the weaked letters are reprsented as long harakat
|
| 514 |
+
and striped from the word.
|
| 515 |
+
@param word: given word.
|
| 516 |
+
@type word: unicode.
|
| 517 |
+
@return: (letters, harakat).
|
| 518 |
+
@rtype: tuple of unicode.
|
| 519 |
+
"""
|
| 520 |
+
if word == "":
|
| 521 |
+
return ("", "")
|
| 522 |
+
#normalize ALEF MADDA
|
| 523 |
+
if word.startswith(ALEF_MADDA):
|
| 524 |
+
word = normalize_alef_madda(word)
|
| 525 |
+
else:
|
| 526 |
+
word = word.replace(ALEF_MADDA, HAMZA+ALEF)
|
| 527 |
+
|
| 528 |
+
word_nm = araby.strip_harakat(word)
|
| 529 |
+
length = len(word_nm)
|
| 530 |
+
if len(word_nm) != 3:
|
| 531 |
+
# تستعمل الهمزات لتخمين حركات الفعل الثلاثي
|
| 532 |
+
# normalize hamza here, because we use it to
|
| 533 |
+
# detect harakat on the trilateral verb.
|
| 534 |
+
word_nm = vconst.HAMZAT_PATTERN.sub(HAMZA, word_nm)
|
| 535 |
+
# length of word after normalization
|
| 536 |
+
|
| 537 |
+
# اهمزات تستعمل لكشف تشكيل الفعل، يتم توحيدها لاحقا
|
| 538 |
+
if length == 3:
|
| 539 |
+
if word_nm[1]in (ALEF, ALEF_HAMZA_ABOVE) or \
|
| 540 |
+
word_nm[2] in (ALEF_MAKSURA, ALEF_HAMZA_ABOVE, ALEF):
|
| 541 |
+
marks = FATHA+FATHA+FATHA
|
| 542 |
+
elif word[1] == YEH_HAMZA or word[2] in (YEH, YEH_HAMZA):
|
| 543 |
+
marks = FATHA+KASRA+FATHA
|
| 544 |
+
else:
|
| 545 |
+
# let the verb haraka
|
| 546 |
+
i = 0
|
| 547 |
+
## ignore harakat at the began of the word
|
| 548 |
+
while araby.is_shortharaka(word[i]):# in HARAKAT:
|
| 549 |
+
i += 1
|
| 550 |
+
# الحرف الأول
|
| 551 |
+
if not araby.is_shortharaka(word[i]):#not in HARAKAT:
|
| 552 |
+
i += 1
|
| 553 |
+
# الحركة الأولى
|
| 554 |
+
while araby.is_shortharaka(word[i]):#word[i] in HARAKAT:
|
| 555 |
+
i += 1
|
| 556 |
+
# الحرف الثاني
|
| 557 |
+
if not araby.is_shortharaka(word[i]):#word[i] not in HARAKAT:
|
| 558 |
+
i += 1
|
| 559 |
+
#الحركة الثانية
|
| 560 |
+
if not araby.is_shortharaka(word[i]):#word[i] not in HARAKAT:
|
| 561 |
+
#وجدنا مشاكل في تصريف الفعل المضاعف في الماضي
|
| 562 |
+
# نجعل الحركة الثانية فتحة مؤقتا
|
| 563 |
+
#ToDo: review this case
|
| 564 |
+
secondharaka = FATHA
|
| 565 |
+
else:
|
| 566 |
+
secondharaka = word[i]
|
| 567 |
+
marks = u''.join([FATHA, secondharaka, FATHA])
|
| 568 |
+
# تستعمل الهمزات لتخمين حركات الفعل الثلاثي
|
| 569 |
+
# normalize hamza here, because we use it to
|
| 570 |
+
# detect harakat on the trilateral verb.
|
| 571 |
+
word_nm = vconst.HAMZAT_PATTERN.sub(HAMZA, word_nm)
|
| 572 |
+
|
| 573 |
+
elif length == 4:
|
| 574 |
+
marks = vconst.UNIFORMATE_MARKS_4
|
| 575 |
+
elif length == 5:
|
| 576 |
+
if word_nm.startswith(TEH):
|
| 577 |
+
marks = vconst.UNIFORMATE_MARKS_5TEH
|
| 578 |
+
else :
|
| 579 |
+
marks = vconst.UNIFORMATE_MARKS_5
|
| 580 |
+
elif length == 6:
|
| 581 |
+
marks = vconst.UNIFORMATE_MARKS_6
|
| 582 |
+
else:
|
| 583 |
+
marks = FATHA*len(word_nm)
|
| 584 |
+
|
| 585 |
+
i = 1
|
| 586 |
+
# first added automaticlly
|
| 587 |
+
new_word = word_nm[0]
|
| 588 |
+
new_harakat = marks[0]
|
| 589 |
+
# between the first and the last
|
| 590 |
+
while i < length-1:
|
| 591 |
+
if word_nm[i] == ALEF:
|
| 592 |
+
new_harakat = new_harakat[:-1]+vconst.ALEF_HARAKA
|
| 593 |
+
else:
|
| 594 |
+
new_harakat += marks[i]
|
| 595 |
+
new_word += word_nm[i]
|
| 596 |
+
i += 1
|
| 597 |
+
# the last letter
|
| 598 |
+
## حالة الفعل عيا، أعيا، عيّا والتي يتحول إلى ياء بدلا عن واو
|
| 599 |
+
if word_nm[i] == ALEF:
|
| 600 |
+
if len(word_nm) == 3 and word_nm[1] != YEH:
|
| 601 |
+
new_word += vconst.ALEF_MAMDUDA
|
| 602 |
+
else:
|
| 603 |
+
new_word += YEH
|
| 604 |
+
else:
|
| 605 |
+
new_word += word_nm[i]
|
| 606 |
+
new_harakat += marks[i]
|
| 607 |
+
## new_word += word_nm[i]
|
| 608 |
+
return (new_word, new_harakat)
|
| 609 |
+
|
| 610 |
+
|
| 611 |
+
#####################################
|
| 612 |
+
#{verb conjugation output treatment functions
|
| 613 |
+
#####################################
|
| 614 |
+
def standard_harakat(word):
|
| 615 |
+
"""
|
| 616 |
+
Treat Harakat on the word before output.
|
| 617 |
+
معالجة الحركات قبل الإخراج،
|
| 618 |
+
@param word: given vocalized word.
|
| 619 |
+
@type word: unicode.
|
| 620 |
+
@return: <vocalized word with ajusted harakat.
|
| 621 |
+
@rtype: unicode.
|
| 622 |
+
"""
|
| 623 |
+
k = 1
|
| 624 |
+
new_word = word[0]
|
| 625 |
+
len_word = len(word)
|
| 626 |
+
while k < len_word:
|
| 627 |
+
# الحروف من دون العلة لا تؤخذ بيعين الاعتبار، كما لا تؤخذ إذا كانت في أول الكلمة
|
| 628 |
+
if word[k] not in (ALEF, YEH, WAW, ALEF_MAKSURA):
|
| 629 |
+
new_word += word[k]
|
| 630 |
+
else:
|
| 631 |
+
##إذا كان الحرف علة ولم يكن في أول الكلمة
|
| 632 |
+
##إذا كان ما قبله ليس حركة، ومابعده ليس حركة، أو انتهت الكلمة
|
| 633 |
+
if not araby.is_shortharaka(word[k-1]) and \
|
| 634 |
+
(k+1 >= len_word or not araby.is_shortharaka(word[k+1])) :
|
| 635 |
+
if word[k] == ALEF:
|
| 636 |
+
new_word += FATHA+ALEF
|
| 637 |
+
elif word[k] == WAW :
|
| 638 |
+
new_word += DAMMA+WAW
|
| 639 |
+
elif word[k] == YEH:
|
| 640 |
+
new_word += KASRA+YEH
|
| 641 |
+
else:
|
| 642 |
+
new_word += word[k]
|
| 643 |
+
else:
|
| 644 |
+
new_word += word[k]
|
| 645 |
+
k += 1
|
| 646 |
+
return new_word
|
| 647 |
+
|
| 648 |
+
|
| 649 |
+
def geminating(word_nm, harakat):
|
| 650 |
+
""" treat geminating cases
|
| 651 |
+
المدخلات هي من كلمة غير مشكولة يقابلها حركاتها
|
| 652 |
+
والحرف المضعف يمثل بشدة
|
| 653 |
+
وإذا كانت الحالة تستوجب الفك، استبدلت الشدة بالحرف المضعف،
|
| 654 |
+
أمّا إذا كانت لا تستوجب الفك،
|
| 655 |
+
فتُعدّل حركة الحرف المضعف الأول إلى حركة ملغاة،
|
| 656 |
+
تحذف في دالة الرسم الإملائي فيما بعد
|
| 657 |
+
@param word_nm: given unvocalized word.
|
| 658 |
+
@type word_nm: unicode.
|
| 659 |
+
@param harakat: given harakat.
|
| 660 |
+
@type harakat: unicode.
|
| 661 |
+
@return: (letters, harakat).
|
| 662 |
+
@rtype: tuple of unicode.
|
| 663 |
+
"""
|
| 664 |
+
new_word = u""
|
| 665 |
+
new_harakat = u""
|
| 666 |
+
i = 0
|
| 667 |
+
length = len(word_nm)
|
| 668 |
+
## has_shadda = False
|
| 669 |
+
## has_shadda = False
|
| 670 |
+
if word_nm.find(SHADDA) < 0:
|
| 671 |
+
return (word_nm, harakat)
|
| 672 |
+
##has_shadda and
|
| 673 |
+
while i < length:
|
| 674 |
+
# نعالج الحالات التي فيها الحرف الحالي متبوع بحرف شدة،
|
| 675 |
+
# ندرس الحالات التي يجب فيها فك الإدغام
|
| 676 |
+
if (i > 0 and i+1 < length and word_nm[i+1] == SHADDA and \
|
| 677 |
+
harakat[i] in (SUKUN, FATHA, KASRA, DAMMA)) and harakat[i-1]:
|
| 678 |
+
# treat ungeminating case
|
| 679 |
+
|
| 680 |
+
#إذا كان الحرف المضعف الأول غير ساكن والحرف المضعّف الثاني (ممثلا بشدة)ساكنا،
|
| 681 |
+
# يفك الإدغام.أمّا إذا كانت لا تستوجب الفك،
|
| 682 |
+
|
| 683 |
+
if harakat[i] != SUKUN and harakat[i+1] == SUKUN:
|
| 684 |
+
#ungeminating
|
| 685 |
+
new_word += word_nm[i]
|
| 686 |
+
word_nm = replace_pos(word_nm, word_nm[i], i+1)
|
| 687 |
+
new_harakat += harakat[i]
|
| 688 |
+
i += 1
|
| 689 |
+
|
| 690 |
+
elif harakat[i] == SUKUN and harakat[i+1] == SUKUN:
|
| 691 |
+
#no geminating
|
| 692 |
+
new_word += word_nm[i]
|
| 693 |
+
word_nm = replace_pos(word_nm, word_nm[i], i+1)
|
| 694 |
+
new_harakat += FATHA
|
| 695 |
+
i += 1
|
| 696 |
+
else:
|
| 697 |
+
|
| 698 |
+
# عندما يكون الحرف السابق ساكنا فإنه يستعيع
|
| 699 |
+
#يض عن حركته بحركة الحرف الأول
|
| 700 |
+
if i-1 >= 0 and new_harakat[i-1] == SUKUN:
|
| 701 |
+
new_word += word_nm[i]+SHADDA
|
| 702 |
+
if harakat[i] != SUKUN:
|
| 703 |
+
new_harakat = new_harakat[:-1]+harakat[i]+ \
|
| 704 |
+
vconst.NOT_DEF_HARAKA+harakat[i+1]
|
| 705 |
+
else:
|
| 706 |
+
new_harakat = new_harakat[:-1]+FATHA+ \
|
| 707 |
+
vconst.NOT_DEF_HARAKA+harakat[i+1]
|
| 708 |
+
## يتم الإدغام إذا كان الحرف السابق ذو حركة طويلة
|
| 709 |
+
elif i-1 >= 0 and new_harakat[i-1] in \
|
| 710 |
+
(vconst.ALEF_HARAKA, vconst.WAW_HARAKA, \
|
| 711 |
+
vconst.YEH_HARAKA):
|
| 712 |
+
new_word += word_nm[i]+SHADDA
|
| 713 |
+
new_harakat += vconst.NOT_DEF_HARAKA+harakat[i+1]
|
| 714 |
+
|
| 715 |
+
elif harakat[i] == SUKUN:
|
| 716 |
+
new_word += word_nm[i]+SHADDA
|
| 717 |
+
new_harakat += vconst.NOT_DEF_HARAKA+harakat[i+1]
|
| 718 |
+
else:
|
| 719 |
+
## مؤقت حتى يتم حل المشكلة
|
| 720 |
+
new_word += word_nm[i]+SHADDA
|
| 721 |
+
new_harakat += vconst.NOT_DEF_HARAKA+harakat[i+1]
|
| 722 |
+
##TODO
|
| 723 |
+
## منع الإدغام في بعض الحالات التي لا يمكن فيها ��لإدغام
|
| 724 |
+
##مثل حالة سكتتا ، أي الحرفات متحركان وما قبلهاما متحرك
|
| 725 |
+
## تم حل هذه المشكلة من خلال خوارزمية التجانس بين التصريفات
|
| 726 |
+
i += 2
|
| 727 |
+
elif i > 0 and i+1 < length and word_nm[i+1] == word_nm[i] and \
|
| 728 |
+
harakat[i] == SUKUN and harakat[i+1] in (FATHA, DAMMA, KASRA):
|
| 729 |
+
# treat geminating case
|
| 730 |
+
new_word += word_nm[i]+SHADDA
|
| 731 |
+
new_harakat += vconst.NOT_DEF_HARAKA+harakat[i+1]
|
| 732 |
+
i += 2
|
| 733 |
+
else :
|
| 734 |
+
new_word += word_nm[i]
|
| 735 |
+
new_harakat += harakat[i]
|
| 736 |
+
i += 1
|
| 737 |
+
return (new_word, new_harakat)
|
| 738 |
+
|
| 739 |
+
|
| 740 |
+
def standard2(word_nm, harakat):
|
| 741 |
+
""" join the harakat and the letters to the give word
|
| 742 |
+
in the standard script,
|
| 743 |
+
it return one strings ( the word with harakat and the harakat).
|
| 744 |
+
|
| 745 |
+
@param word_nm: given unvocalized word.
|
| 746 |
+
@type word_nm: unicode.
|
| 747 |
+
@param harakat: given harakat.
|
| 748 |
+
@type harakat: unicode.
|
| 749 |
+
@return: vocalized word.
|
| 750 |
+
@rtype: unicode.
|
| 751 |
+
"""
|
| 752 |
+
if len(word_nm) != len(harakat):
|
| 753 |
+
print(word_nm.encode('utf8'),len(word_nm), u"-".join([araby.name(x) for x in harakat]), len(harakat))
|
| 754 |
+
return u"*"
|
| 755 |
+
else:
|
| 756 |
+
word = u""
|
| 757 |
+
i = 0
|
| 758 |
+
word_nm, harakat = geminating(word_nm, harakat)
|
| 759 |
+
if len(word_nm) != len(harakat):
|
| 760 |
+
return u""
|
| 761 |
+
## حالة عدم الابتداء بسكون
|
| 762 |
+
##إذا كان الحرف الثاني مضموما تكون الحركة الأولى مضمومة، وإلا تكون مكسورة
|
| 763 |
+
if len(harakat) != 0 and harakat.startswith(SUKUN):
|
| 764 |
+
word_nm = ALEF+word_nm
|
| 765 |
+
if len(harakat) >= 2 and harakat[1] in \
|
| 766 |
+
(DAMMA, vconst.WAW_HARAKA):
|
| 767 |
+
harakat = DAMMA+harakat
|
| 768 |
+
else:
|
| 769 |
+
harakat = KASRA+harakat
|
| 770 |
+
|
| 771 |
+
## word_nm = tahmeez2(word_nm, harakat)
|
| 772 |
+
if len(word_nm) != len(harakat):
|
| 773 |
+
return u""
|
| 774 |
+
word_before = word_nm
|
| 775 |
+
harakat_before = harakat
|
| 776 |
+
word_nm, harakat = homogenize(word_nm, harakat)
|
| 777 |
+
if len(word_nm) != len(harakat):
|
| 778 |
+
print("len word: ", len(word_nm), word_nm.encode('utf8') )
|
| 779 |
+
print("len harakat: ", len(harakat), repr(harakat))
|
| 780 |
+
print(repr(harakat_before), word_before.encode('utf8'))
|
| 781 |
+
return u""
|
| 782 |
+
word_nm = tahmeez2(word_nm, harakat)
|
| 783 |
+
|
| 784 |
+
len_word_nm = len(word_nm)
|
| 785 |
+
while i < len_word_nm:
|
| 786 |
+
# للعمل :
|
| 787 |
+
# هذه حالة الألف التي أصلها ياء
|
| 788 |
+
# وقد استغنينا عنها بأن جعلنا الحرف الناقص من الفعل الناقص حرفا تاما
|
| 789 |
+
if harakat[i] in vconst.WRITTEN_HARAKA:
|
| 790 |
+
word += word_nm[i]+vconst.WRITTEN_HARAKA[harakat[i]]
|
| 791 |
+
else:
|
| 792 |
+
word += word_nm[i]+harakat[i]
|
| 793 |
+
i += 1
|
| 794 |
+
|
| 795 |
+
#-تحويل همزة القطع على الألف بعدها فتحة
|
| 796 |
+
#وهمزة القطع على الألف بعدها سكون إلى ألف ممدودة
|
| 797 |
+
for (pat, rep) in vconst.STANDARD_REPLACEMENT:
|
| 798 |
+
word = word.replace( pat, rep)
|
| 799 |
+
|
| 800 |
+
|
| 801 |
+
return word
|
| 802 |
+
|
| 803 |
+
|
| 804 |
+
def tahmeez2(word_nm, harakat):
|
| 805 |
+
""" Transform hamza on the standard script.
|
| 806 |
+
in entry the word without harakat and the harakat seperately
|
| 807 |
+
return the word with non uniform hamza.
|
| 808 |
+
إعلال و إبدال الهمزة.
|
| 809 |
+
@param word_nm: given unvocalized word.
|
| 810 |
+
@type word_nm: unicode.
|
| 811 |
+
@param harakat: given harakat.
|
| 812 |
+
@type harakat: unicode.
|
| 813 |
+
@return: (letters, harakat) after treatment.
|
| 814 |
+
@rtype: tuple of unicode.
|
| 815 |
+
"""
|
| 816 |
+
# the harakat length != letters length
|
| 817 |
+
if len(word_nm) != len(harakat):
|
| 818 |
+
return u""
|
| 819 |
+
# if no hamza, no tahmeez
|
| 820 |
+
elif HAMZA not in word_nm:
|
| 821 |
+
return word_nm
|
| 822 |
+
else:
|
| 823 |
+
ha2 = u""
|
| 824 |
+
#eliminate some altenative of HARAKAT to standard.
|
| 825 |
+
for hrk in harakat:
|
| 826 |
+
if hrk == vconst.ALEF_YEH_HARAKA or \
|
| 827 |
+
hrk == vconst.ALEF_WAW_HARAKA:
|
| 828 |
+
hrk = vconst.ALEF_HARAKA
|
| 829 |
+
ha2 += hrk
|
| 830 |
+
harakat = ha2
|
| 831 |
+
word = u""
|
| 832 |
+
for i in range(len(word_nm)):
|
| 833 |
+
if word_nm[i] != HAMZA and word_nm[i] != ALEF_HAMZA_ABOVE:
|
| 834 |
+
word += word_nm[i]
|
| 835 |
+
else:
|
| 836 |
+
if i == 0:
|
| 837 |
+
actual = harakat[i]
|
| 838 |
+
swap = vconst.INITIAL_TAHMEEZ_TABLE.get(actual, actual)
|
| 839 |
+
else:
|
| 840 |
+
before = harakat[i-1]
|
| 841 |
+
actual = harakat[i]
|
| 842 |
+
|
| 843 |
+
if i+1 < len(word_nm):
|
| 844 |
+
# if the hamza have shadda, it will take the harakat of shadda.
|
| 845 |
+
if actual == vconst.NOT_DEF_HARAKA or actual == SUKUN:
|
| 846 |
+
if word_nm[i+1] == SHADDA and harakat[i+1] != SUKUN:
|
| 847 |
+
actual = harakat[i+1]
|
| 848 |
+
if before == vconst.NOT_DEF_HARAKA:
|
| 849 |
+
before = FATHA
|
| 850 |
+
if actual == vconst.NOT_DEF_HARAKA:
|
| 851 |
+
actual = FATHA
|
| 852 |
+
|
| 853 |
+
if before in vconst.MIDDLE_TAHMEEZ_TABLE and\
|
| 854 |
+
actual in vconst.MIDDLE_TAHMEEZ_TABLE[before]:
|
| 855 |
+
swap = vconst.MIDDLE_TAHMEEZ_TABLE[before][actual]
|
| 856 |
+
#~ # if the actual haraka is FATHA
|
| 857 |
+
if before in (SUKUN, vconst.YEH_HARAKA, vconst.ALEF_HARAKA, vconst.WAW_HARAKA):
|
| 858 |
+
if actual == FATHA and word_nm[i-1] == araby.YEH:
|
| 859 |
+
swap = araby.YEH_HAMZA
|
| 860 |
+
#~ #elif word_nm[i-1] in ( araby.WAW, araby.DAL,araby.THAL,
|
| 861 |
+
#~ # araby.REH, araby.ZAIN ):
|
| 862 |
+
#ZZZZ
|
| 863 |
+
elif word_nm[i-1] == araby.WAW and actual not in (KASRA, vconst.YEH_HARAKA):
|
| 864 |
+
swap = araby.HAMZA
|
| 865 |
+
else :
|
| 866 |
+
swap = word_nm[i]
|
| 867 |
+
|
| 868 |
+
else :
|
| 869 |
+
if before == vconst.NOT_DEF_HARAKA:
|
| 870 |
+
before = FATHA
|
| 871 |
+
if actual == vconst.NOT_DEF_HARAKA:
|
| 872 |
+
actual = FATHA
|
| 873 |
+
|
| 874 |
+
if before in vconst.FINAL_TAHMEEZ_TABLE and \
|
| 875 |
+
actual in vconst.FINAL_TAHMEEZ_TABLE[before]:
|
| 876 |
+
if word_nm[i-1] in( araby.WAW, ) and actual in (FATHA, DAMMA):
|
| 877 |
+
#pass
|
| 878 |
+
swap = araby.HAMZA
|
| 879 |
+
else:
|
| 880 |
+
swap = vconst.FINAL_TAHMEEZ_TABLE[before][actual]
|
| 881 |
+
else :
|
| 882 |
+
swap = word_nm[i]
|
| 883 |
+
word += swap
|
| 884 |
+
return word
|
| 885 |
+
|
| 886 |
+
def treat_sukun2(word_nm, harakat):
|
| 887 |
+
""" Treat the rencontre of sukun.
|
| 888 |
+
in entry the word without harakat and the harakat seperately,
|
| 889 |
+
and the probably haraka
|
| 890 |
+
return the new sequence of harakat
|
| 891 |
+
|
| 892 |
+
@param word_nm: given unvocalized word.
|
| 893 |
+
@type word_nm: unicode.
|
| 894 |
+
@param harakat: given harakat.
|
| 895 |
+
@type harakat: unicode.
|
| 896 |
+
@return: (letters, harakat).
|
| 897 |
+
@rtype: tuple of unicode.
|
| 898 |
+
"""
|
| 899 |
+
# if no sukun, to treat
|
| 900 |
+
if harakat.find(SUKUN) < 0:
|
| 901 |
+
return harakat
|
| 902 |
+
len_word = len(word_nm)
|
| 903 |
+
len_harakat = len(harakat)
|
| 904 |
+
|
| 905 |
+
if len_word != len_harakat:
|
| 906 |
+
return harakat
|
| 907 |
+
else:
|
| 908 |
+
new_harakat = u""
|
| 909 |
+
for i in range(len_word):
|
| 910 |
+
if i+1 < len_harakat and harakat[i+1] == SUKUN:
|
| 911 |
+
if harakat[i] == vconst.ALEF_HARAKA:
|
| 912 |
+
# other conditions
|
| 913 |
+
# إذا كان حرف الألف ثانيا مثل خاف يقلب كسرة،
|
| 914 |
+
#أما إذا كان ثالثا أو رابعا فيصبح فتحة،
|
| 915 |
+
# مثل خاف لا تخف
|
| 916 |
+
# حالة الألف بعدها حرف مشدد
|
| 917 |
+
if i+2 < len_word and word_nm[i+2] == SHADDA:
|
| 918 |
+
new_harakat += vconst.ALEF_HARAKA
|
| 919 |
+
elif i == 0 :
|
| 920 |
+
new_harakat += KASRA
|
| 921 |
+
else:
|
| 922 |
+
new_harakat += FATHA
|
| 923 |
+
# if the actual haraka is in table use table conversion
|
| 924 |
+
elif harakat[i] in vconst.CONVERSION_TABLE:
|
| 925 |
+
new_harakat += vconst.CONVERSION_TABLE[harakat[i]]
|
| 926 |
+
else :
|
| 927 |
+
new_harakat += harakat[i]
|
| 928 |
+
else :
|
| 929 |
+
new_harakat += harakat[i]
|
| 930 |
+
return new_harakat
|
| 931 |
+
|
| 932 |
+
|
| 933 |
+
|
| 934 |
+
def homogenize(word_nm, harakat):
|
| 935 |
+
""" لإreat the jonction of WAW, YEH.
|
| 936 |
+
معالجة التحولات التي تطرا على الياء أو الوا في وسط الكلمة أو في اخرها
|
| 937 |
+
@param word_nm: given unvocalized word.
|
| 938 |
+
@type word_nm: unicode.
|
| 939 |
+
@param harakat: given harakat.
|
| 940 |
+
@type harakat: unicode.
|
| 941 |
+
@return: (letters, harakat)after treatment.
|
| 942 |
+
@rtype: tuple of unicode.
|
| 943 |
+
"""
|
| 944 |
+
# inequal length between letters and harakat
|
| 945 |
+
if len(word_nm) != len(harakat):
|
| 946 |
+
print("Homogenize:inequal length", len(word_nm), len(harakat))
|
| 947 |
+
return (word_nm, harakat)
|
| 948 |
+
# word without weak letters doesn't need treatment
|
| 949 |
+
#~ elif not re.search(ur'[%s%s%s%s]'%(ALEF_MAKSURA, vconst.ALEF_MAMDUDA, \
|
| 950 |
+
elif not re.search(u'[%s%s%s%s]'%(ALEF_MAKSURA, vconst.ALEF_MAMDUDA, \
|
| 951 |
+
YEH, WAW), word_nm):
|
| 952 |
+
return (word_nm, harakat)
|
| 953 |
+
# treatment
|
| 954 |
+
else:
|
| 955 |
+
new_harakat = harakat[0]
|
| 956 |
+
new_word = word_nm[0]
|
| 957 |
+
# نبدأ من الحرف الثاني لأن الحرف الأول لا يعالج
|
| 958 |
+
i = 1
|
| 959 |
+
## دراسة حالات الياء والواو قبل النهاية
|
| 960 |
+
len_word_nm = len(word_nm)
|
| 961 |
+
while i < len_word_nm-1:
|
| 962 |
+
actual_letter = word_nm[i] # Actual letter
|
| 963 |
+
actual_haraka = harakat[i] # Actual haraka
|
| 964 |
+
if i-1 >= 0 :
|
| 965 |
+
previous_letter = word_nm[i-1] # previous letter
|
| 966 |
+
previous_haraka = harakat[i-1] # previous letter
|
| 967 |
+
else:
|
| 968 |
+
previous_letter = ''
|
| 969 |
+
previous_haraka = ''
|
| 970 |
+
if i+1 < len_word_nm:
|
| 971 |
+
next_letter = word_nm[i+1] # next letter
|
| 972 |
+
next_haraka = harakat[i+1] # next haraka
|
| 973 |
+
else:
|
| 974 |
+
next_letter = ''
|
| 975 |
+
next_haraka = ''
|
| 976 |
+
# إذا كان الحرف التالي مضعف
|
| 977 |
+
if i+2 < len_word_nm and word_nm[i+2] == SHADDA:
|
| 978 |
+
shadda_in_next = True
|
| 979 |
+
else:
|
| 980 |
+
shadda_in_next = False
|
| 981 |
+
|
| 982 |
+
if actual_letter == ALEF_MAKSURA or actual_letter == YEH:
|
| 983 |
+
#إذا كانت الياء ساكنة أو مكسورة (كسرا قصيرا أو طويلا)،
|
| 984 |
+
# وكان ما قبلها مكسورا، يأخذ ماقبلها كسرة طويلة #مثال :
|
| 985 |
+
# بِ +يْ = > بِي
|
| 986 |
+
#بِ +يِ = > بِي
|
| 987 |
+
#بِ +يي = > بِي
|
| 988 |
+
|
| 989 |
+
if actual_letter == ALEF_MAKSURA and next_haraka == SUKUN:
|
| 990 |
+
new_harakat += ""
|
| 991 |
+
elif (actual_haraka in(SUKUN, KASRA, vconst.YEH_HARAKA)) and \
|
| 992 |
+
previous_haraka == KASRA and not shadda_in_next:
|
| 993 |
+
new_harakat = new_harakat[:-1]+vconst.YEH_HARAKA
|
| 994 |
+
elif (actual_haraka in(KASRA)) and previous_haraka == KASRA \
|
| 995 |
+
and shadda_in_next:
|
| 996 |
+
new_harakat += ''
|
| 997 |
+
# حالة هو تيسّر في المضارع المبني للمجهول
|
| 998 |
+
#~ elif actual_letter == YEH and previous_haraka == DAMMA and \
|
| 999 |
+
#~ actual_haraka == DAMMA and shadda_in_next:
|
| 1000 |
+
#~ #pass
|
| 1001 |
+
#~ new_harakat += DAMMA
|
| 1002 |
+
#~ new_word += YEH
|
| 1003 |
+
# # مثل تؤدّينّ
|
| 1004 |
+
# elif previous_haraka in (KASRA, FATHA) and
|
| 1005 |
+
# actual_haraka == DAMMA and shadda_in_next:
|
| 1006 |
+
# new_harakat += FATHA
|
| 1007 |
+
# new_word += YEH
|
| 1008 |
+
# ToDO review
|
| 1009 |
+
#سقّى، يُسقُّون
|
| 1010 |
+
elif actual_haraka == DAMMA and shadda_in_next:
|
| 1011 |
+
if previous_haraka in (DAMMA, KASRA):
|
| 1012 |
+
#~ if previous_haraka in DAMMA:
|
| 1013 |
+
new_harakat = new_harakat[:-1]+DAMMA
|
| 1014 |
+
else:
|
| 1015 |
+
new_harakat += DAMMA
|
| 1016 |
+
new_word += WAW
|
| 1017 |
+
#تحويل الياء إلى واو ساكنة
|
| 1018 |
+
#2 - إذا كانت الياء مضمومة (ضما قصيرا أو طويلا)،
|
| 1019 |
+
# وكان ما قبلها مفتوحا، تتحول الياء إلى واو ساكنة. #مثال :
|
| 1020 |
+
# بَ +يُ = > بَِوْ
|
| 1021 |
+
#بَ +يو = > بَوْ
|
| 1022 |
+
|
| 1023 |
+
elif (actual_haraka in (DAMMA, vconst.WAW_HARAKA))and\
|
| 1024 |
+
previous_haraka == FATHA and not shadda_in_next:
|
| 1025 |
+
new_harakat += SUKUN
|
| 1026 |
+
new_word += WAW
|
| 1027 |
+
elif (actual_haraka in (DAMMA, vconst.WAW_HARAKA))and \
|
| 1028 |
+
previous_haraka == FATHA and shadda_in_next:
|
| 1029 |
+
new_harakat += actual_haraka
|
| 1030 |
+
new_word += WAW
|
| 1031 |
+
#إذا كانت ساكنة، وماقبلها مضموما،
|
| 1032 |
+
# ولم يكن ما بعدها ياء، أخذ ما قبلها ضمة طويلة.
|
| 1033 |
+
#مثال :
|
| 1034 |
+
# بُ +يُت = >بُوت
|
| 1035 |
+
|
| 1036 |
+
|
| 1037 |
+
elif (actual_haraka == SUKUN) and previous_haraka == DAMMA \
|
| 1038 |
+
and next_letter != YEH and not shadda_in_next:
|
| 1039 |
+
new_harakat = new_harakat[:-1]+vconst.WAW_HARAKA
|
| 1040 |
+
|
| 1041 |
+
elif (actual_haraka == vconst.YEH_HARAKA)and \
|
| 1042 |
+
previous_haraka == FATHA:
|
| 1043 |
+
new_harakat += SUKUN
|
| 1044 |
+
new_word += YEH
|
| 1045 |
+
elif (actual_haraka == vconst.WAW_HARAKA) and previous_haraka == KASRA :
|
| 1046 |
+
new_harakat = new_harakat[:-1]+vconst.WAW_HARAKA
|
| 1047 |
+
|
| 1048 |
+
#~ if araby.is_hamza(previous_letter):
|
| 1049 |
+
#~ new_word = new_word[:-1] + araby.WAW_HAMZA
|
| 1050 |
+
|
| 1051 |
+
|
| 1052 |
+
else :
|
| 1053 |
+
new_harakat += actual_haraka
|
| 1054 |
+
new_word += YEH
|
| 1055 |
+
|
| 1056 |
+
elif actual_letter == vconst.ALEF_MAMDUDA or \
|
| 1057 |
+
actual_letter == WAW:
|
| 1058 |
+
if actual_letter == vconst.ALEF_MAMDUDA and \
|
| 1059 |
+
next_haraka == SUKUN:
|
| 1060 |
+
new_harakat += ""
|
| 1061 |
+
elif actual_letter == vconst.ALEF_MAMDUDA and \
|
| 1062 |
+
(actual_haraka in(SUKUN, DAMMA, vconst.WAW_HARAKA))and\
|
| 1063 |
+
(previous_haraka == DAMMA) and not shadda_in_next:
|
| 1064 |
+
new_harakat = new_harakat[:-1]+vconst.WAW_HARAKA
|
| 1065 |
+
|
| 1066 |
+
elif actual_letter == WAW and (actual_haraka in(SUKUN, DAMMA))\
|
| 1067 |
+
and (previous_haraka == DAMMA) and not shadda_in_next:
|
| 1068 |
+
new_harakat = new_harakat[:-1]+vconst.WAW_HARAKA
|
| 1069 |
+
|
| 1070 |
+
#تحويل الواو المضمومة أو الطويلة إلى واو ساكنة
|
| 1071 |
+
elif (actual_haraka in (DAMMA, vconst.WAW_HARAKA)) \
|
| 1072 |
+
and previous_haraka == FATHA :
|
| 1073 |
+
new_harakat += SUKUN
|
| 1074 |
+
new_word += WAW
|
| 1075 |
+
# حالة وجع ايجع
|
| 1076 |
+
elif (actual_haraka == (SUKUN))and \
|
| 1077 |
+
(previous_haraka == KASRA) and not shadda_in_next:
|
| 1078 |
+
new_harakat = new_harakat[:-1]+vconst.YEH_HARAKA
|
| 1079 |
+
elif (actual_haraka == KASRA)and shadda_in_next:
|
| 1080 |
+
new_harakat = new_harakat[:-1]+KASRA
|
| 1081 |
+
|
| 1082 |
+
elif actual_letter == vconst.ALEF_MAMDUDA and \
|
| 1083 |
+
(actual_haraka == DAMMA) and shadda_in_next:
|
| 1084 |
+
if previous_haraka == DAMMA:
|
| 1085 |
+
new_harakat = new_harakat[:-1]+DAMMA
|
| 1086 |
+
else:
|
| 1087 |
+
new_harakat += DAMMA
|
| 1088 |
+
new_word += WAW
|
| 1089 |
+
|
| 1090 |
+
|
| 1091 |
+
elif actual_letter == WAW and (actual_haraka == vconst.WAW_HARAKA):
|
| 1092 |
+
new_harakat = new_harakat[:-1]+ vconst.WAW_HARAKA
|
| 1093 |
+
elif actual_letter == WAW and (actual_haraka == DAMMA) and previous_haraka == DAMMA and shadda_in_next:
|
| 1094 |
+
new_harakat +=""
|
| 1095 |
+
|
| 1096 |
+
elif actual_letter == vconst.ALEF_MAMDUDA and \
|
| 1097 |
+
(actual_haraka == vconst.YEH_HARAKA) and \
|
| 1098 |
+
not shadda_in_next:
|
| 1099 |
+
new_harakat = new_harakat[:-1]+vconst.YEH_HARAKA
|
| 1100 |
+
elif (actual_letter == WAW ) and (actual_haraka == DAMMA) and previous_haraka in (FATHA,) and \
|
| 1101 |
+
shadda_in_next:
|
| 1102 |
+
new_harakat += DAMMA
|
| 1103 |
+
new_word += WAW
|
| 1104 |
+
elif actual_letter == WAW and (actual_haraka == DAMMA) and previous_haraka in (FATHA,) and \
|
| 1105 |
+
shadda_in_next:
|
| 1106 |
+
new_harakat += DAMMA
|
| 1107 |
+
new_word += WAW
|
| 1108 |
+
elif actual_letter == WAW and (actual_haraka == DAMMA) and\
|
| 1109 |
+
shadda_in_next:
|
| 1110 |
+
new_harakat += DAMMA
|
| 1111 |
+
new_word += WAW
|
| 1112 |
+
#elif actual_letter == WAW and actual_haraka == FATHA and (previous_haraka == FATHA):
|
| 1113 |
+
# new_harakat += "" #actual_haraka
|
| 1114 |
+
# new_word += ""#WAW
|
| 1115 |
+
else :
|
| 1116 |
+
new_harakat += actual_haraka
|
| 1117 |
+
new_word += WAW
|
| 1118 |
+
else:
|
| 1119 |
+
new_harakat += actual_haraka
|
| 1120 |
+
new_word += actual_letter
|
| 1121 |
+
i += 1
|
| 1122 |
+
# end of while
|
| 1123 |
+
# we have to treat the last letter
|
| 1124 |
+
## دراسة حالة الحرف الأخير
|
| 1125 |
+
# Actual letter
|
| 1126 |
+
last_letter = word_nm[i]
|
| 1127 |
+
# Actual haraka
|
| 1128 |
+
last_haraka = harakat[i]
|
| 1129 |
+
if i-1 >= 0 :
|
| 1130 |
+
# previous letter
|
| 1131 |
+
previous_letter = word_nm[i-1]
|
| 1132 |
+
# previous haraka
|
| 1133 |
+
previous_haraka = harakat[i-1]
|
| 1134 |
+
else:
|
| 1135 |
+
previous_letter = ''
|
| 1136 |
+
previous_haraka = ''
|
| 1137 |
+
|
| 1138 |
+
if last_letter == ALEF_MAKSURA or last_letter == YEH :
|
| 1139 |
+
if (last_haraka in(KASRA, DAMMA)) and previous_haraka == KASRA:
|
| 1140 |
+
new_harakat = new_harakat[:-1]+vconst.YEH_HARAKA
|
| 1141 |
+
elif (last_haraka in(vconst.YEH_HARAKA)) and\
|
| 1142 |
+
previous_haraka == KASRA :
|
| 1143 |
+
new_harakat = new_harakat[:-1]+vconst.YEH_HARAKA
|
| 1144 |
+
#حذف حركة الحرف الأخير إذا كان ساكنا
|
| 1145 |
+
elif (last_haraka == SUKUN):
|
| 1146 |
+
## pass
|
| 1147 |
+
new_harakat += ''
|
| 1148 |
+
new_word += ''
|
| 1149 |
+
elif previous_letter == YEH and \
|
| 1150 |
+
(last_haraka in(KASRA, DAMMA, FATHA)) and previous_haraka == FATHA:
|
| 1151 |
+
new_harakat += vconst.NOT_DEF_HARAKA
|
| 1152 |
+
new_word += ALEF
|
| 1153 |
+
elif previous_letter != YEH and \
|
| 1154 |
+
(last_haraka in(KASRA, DAMMA, FATHA)) and previous_haraka == FATHA:
|
| 1155 |
+
new_harakat += vconst.NOT_DEF_HARAKA
|
| 1156 |
+
new_word += ALEF_MAKSURA
|
| 1157 |
+
elif (last_haraka in(vconst.WAW_HARAKA)) and \
|
| 1158 |
+
previous_haraka == KASRA:
|
| 1159 |
+
new_harakat = new_harakat[:-1]+vconst.WAW_HARAKA
|
| 1160 |
+
#حالة تصريف الفعل الناقص في المضارع المجزوم مع أنت للمؤنث
|
| 1161 |
+
elif (last_haraka == vconst.YEH_HARAKA) and \
|
| 1162 |
+
previous_haraka == FATHA:
|
| 1163 |
+
new_harakat += SUKUN
|
| 1164 |
+
new_word += YEH
|
| 1165 |
+
else :
|
| 1166 |
+
new_harakat += last_haraka
|
| 1167 |
+
new_word += YEH
|
| 1168 |
+
|
| 1169 |
+
elif last_letter == vconst.ALEF_MAMDUDA :
|
| 1170 |
+
if (last_haraka in(DAMMA, KASRA, vconst.WAW_HARAKA)) and \
|
| 1171 |
+
previous_haraka == DAMMA :
|
| 1172 |
+
new_harakat = new_harakat[:-1]+vconst.WAW_HARAKA
|
| 1173 |
+
elif (last_haraka in(vconst.ALEF_HARAKA)) and \
|
| 1174 |
+
previous_haraka == DAMMA:
|
| 1175 |
+
## pass
|
| 1176 |
+
new_harakat = new_harakat[:-1]+vconst.YEH_HARAKA
|
| 1177 |
+
elif (last_haraka == vconst.YEH_HARAKA):
|
| 1178 |
+
new_harakat = new_harakat[:-1]+vconst.YEH_HARAKA
|
| 1179 |
+
new_word += ''
|
| 1180 |
+
elif (last_haraka == SUKUN) and previous_haraka == KASRA :
|
| 1181 |
+
pass
|
| 1182 |
+
|
| 1183 |
+
elif (last_haraka == SUKUN):
|
| 1184 |
+
new_harakat += ''
|
| 1185 |
+
new_word += ''
|
| 1186 |
+
elif (last_haraka == FATHA)and previous_haraka == FATHA:
|
| 1187 |
+
new_harakat += vconst.NOT_DEF_HARAKA
|
| 1188 |
+
new_word += vconst.ALEF_MAMDUDA
|
| 1189 |
+
else :
|
| 1190 |
+
new_harakat += last_haraka
|
| 1191 |
+
new_word += WAW
|
| 1192 |
+
#new_word += vconst.ALEF_MAMDUDA
|
| 1193 |
+
elif last_letter == WAW :
|
| 1194 |
+
if (last_haraka in(DAMMA, FATHA)) and previous_haraka == FATHA:
|
| 1195 |
+
new_harakat += vconst.NOT_DEF_HARAKA
|
| 1196 |
+
new_word += ALEF_MAKSURA
|
| 1197 |
+
elif (last_haraka in(FATHA,)) and previous_haraka == KASRA:
|
| 1198 |
+
new_harakat += vconst.FATHA
|
| 1199 |
+
new_word += YEH
|
| 1200 |
+
elif (last_haraka in (vconst.YEH_HARAKA,)) and previous_haraka in (KASRA, DAMMA):
|
| 1201 |
+
new_harakat = new_harakat[:-1]+vconst.YEH_HARAKA
|
| 1202 |
+
new_word += ''
|
| 1203 |
+
elif (last_haraka in(SUKUN,)) and previous_haraka in (DAMMA, FATHA):
|
| 1204 |
+
new_harakat += ""
|
| 1205 |
+
new_word += ""
|
| 1206 |
+
else:
|
| 1207 |
+
new_harakat += harakat[i]
|
| 1208 |
+
new_word += word_nm[i]
|
| 1209 |
+
else:
|
| 1210 |
+
new_harakat += harakat[i]
|
| 1211 |
+
new_word += word_nm[i]
|
| 1212 |
+
return (new_word, new_harakat)
|
| 1213 |
+
|
| 1214 |
+
|
| 1215 |
+
def is_triliteral_verb(verb):
|
| 1216 |
+
""" Test if the verb is triliteral,
|
| 1217 |
+
used in selectionof verbs from the triliteral verb dictionnary
|
| 1218 |
+
@param verb: given verb.
|
| 1219 |
+
@type verb: unicode.
|
| 1220 |
+
@return: True if the verb is triliteral.
|
| 1221 |
+
@rtype: Boolean.
|
| 1222 |
+
"""
|
| 1223 |
+
verb_nm = araby.strip_harakat(verb)
|
| 1224 |
+
verb_nm = verb_nm.replace(ALEF_MADDA, HAMZA+ALEF)
|
| 1225 |
+
if len(verb_nm) == 3:
|
| 1226 |
+
return True
|
| 1227 |
+
else : return False
|
| 1228 |
+
|
| 1229 |
+
|
| 1230 |
+
|
| 1231 |
+
|
| 1232 |
+
|
libqutrub/arabic_const.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python
|
| 2 |
+
# -*- coding=utf-8 -*-
|
| 3 |
+
#---
|
| 4 |
+
# $Id: arabic_const.py,v 1.6 2003/04/22 17:18:22 elzubeir Exp $
|
| 5 |
+
#
|
| 6 |
+
# ------------
|
| 7 |
+
# Description:
|
| 8 |
+
# ------------
|
| 9 |
+
#
|
| 10 |
+
# Arabic codes
|
| 11 |
+
#
|
| 12 |
+
# (C) Copyright 2003, Arabeyes, Mohammed Elzubeir
|
| 13 |
+
# -----------------
|
| 14 |
+
# Revision Details: (Updated by Revision Control System)
|
| 15 |
+
# -----------------
|
| 16 |
+
# $Date: 2003/04/22 17:18:22 $
|
| 17 |
+
# $Author: elzubeir $
|
| 18 |
+
# $Revision: 1.6 $
|
| 19 |
+
# $Source: /home/arabeyes/cvs/projects/duali/pyduali/pyduali/arabic.py,v $
|
| 20 |
+
#
|
| 21 |
+
# This program is written under the BSD License.
|
| 22 |
+
#---
|
| 23 |
+
|
| 24 |
+
COMMA = u'\u060C'
|
| 25 |
+
SEMICOLON = u'\u061B'
|
| 26 |
+
QUESTION = u'\u061F'
|
| 27 |
+
HAMZA = u'\u0621'
|
| 28 |
+
ALEF_MADDA = u'\u0622'
|
| 29 |
+
ALEF_HAMZA_ABOVE = u'\u0623'
|
| 30 |
+
WAW_HAMZA = u'\u0624'
|
| 31 |
+
ALEF_HAMZA_BELOW = u'\u0625'
|
| 32 |
+
YEH_HAMZA = u'\u0626'
|
| 33 |
+
ALEF = u'\u0627'
|
| 34 |
+
BEH = u'\u0628'
|
| 35 |
+
TEH_MARBUTA = u'\u0629'
|
| 36 |
+
TEH = u'\u062a'
|
| 37 |
+
THEH = u'\u062b'
|
| 38 |
+
JEEM = u'\u062c'
|
| 39 |
+
HAH = u'\u062d'
|
| 40 |
+
KHAH = u'\u062e'
|
| 41 |
+
DAL = u'\u062f'
|
| 42 |
+
THAL = u'\u0630'
|
| 43 |
+
REH = u'\u0631'
|
| 44 |
+
ZAIN = u'\u0632'
|
| 45 |
+
SEEN = u'\u0633'
|
| 46 |
+
SHEEN = u'\u0634'
|
| 47 |
+
SAD = u'\u0635'
|
| 48 |
+
DAD = u'\u0636'
|
| 49 |
+
TAH = u'\u0637'
|
| 50 |
+
ZAH = u'\u0638'
|
| 51 |
+
AIN = u'\u0639'
|
| 52 |
+
GHAIN = u'\u063a'
|
| 53 |
+
TATWEEL = u'\u0640'
|
| 54 |
+
FEH = u'\u0641'
|
| 55 |
+
QAF = u'\u0642'
|
| 56 |
+
KAF = u'\u0643'
|
| 57 |
+
LAM = u'\u0644'
|
| 58 |
+
MEEM = u'\u0645'
|
| 59 |
+
NOON = u'\u0646'
|
| 60 |
+
HEH = u'\u0647'
|
| 61 |
+
WAW = u'\u0648'
|
| 62 |
+
ALEF_MAKSURA = u'\u0649'
|
| 63 |
+
YEH = u'\u064a'
|
| 64 |
+
MADDA_ABOVE = u'\u0653'
|
| 65 |
+
HAMZA_ABOVE = u'\u0654'
|
| 66 |
+
HAMZA_BELOW = u'\u0655'
|
| 67 |
+
ZERO = u'\u0660'
|
| 68 |
+
ONE = u'\u0661'
|
| 69 |
+
TWO = u'\u0662'
|
| 70 |
+
THREE = u'\u0663'
|
| 71 |
+
FOUR = u'\u0664'
|
| 72 |
+
FIVE = u'\u0665'
|
| 73 |
+
SIX = u'\u0666'
|
| 74 |
+
SEVEN = u'\u0667'
|
| 75 |
+
EIGHT = u'\u0668'
|
| 76 |
+
NINE = u'\u0669'
|
| 77 |
+
PERCENT = u'\u066a'
|
| 78 |
+
DECIMAL = u'\u066b'
|
| 79 |
+
THOUSANDS = u'\u066c'
|
| 80 |
+
STAR = u'\u066d'
|
| 81 |
+
MINI_ALEF = u'\u0670'
|
| 82 |
+
ALEF_WASLA = u'\u0671'
|
| 83 |
+
FULL_STOP = u'\u06d4'
|
| 84 |
+
BYTE_ORDER_MARK = u'\ufeff'
|
| 85 |
+
|
| 86 |
+
# Diacritics
|
| 87 |
+
FATHATAN = u'\u064b'
|
| 88 |
+
DAMMATAN = u'\u064c'
|
| 89 |
+
KASRATAN = u'\u064d'
|
| 90 |
+
FATHA = u'\u064e'
|
| 91 |
+
DAMMA = u'\u064f'
|
| 92 |
+
KASRA = u'\u0650'
|
| 93 |
+
SHADDA = u'\u0651'
|
| 94 |
+
SUKUN = u'\u0652'
|
| 95 |
+
|
| 96 |
+
SMALL_ALEF=u"\u0670"
|
| 97 |
+
SMALL_WAW=u"\u06E5"
|
| 98 |
+
SMALL_YEH=u"\u06E6"
|
| 99 |
+
|
| 100 |
+
#---------------------------------------------------------------------------
|
| 101 |
+
# Arabic ligatures
|
| 102 |
+
#---------------------------------------------------------------------------
|
| 103 |
+
|
| 104 |
+
LAM_ALEF=u'\ufefb'
|
| 105 |
+
LAM_ALEF_HAMZA_ABOVE=u'\ufef7'
|
| 106 |
+
LAM_ALEF_HAMZA_BELOW=u'\ufef9'
|
| 107 |
+
LAM_ALEF_MADDA_ABOVE=u'\ufef5'
|
| 108 |
+
simple_LAM_ALEF=LAM+ALEF
|
| 109 |
+
simple_LAM_ALEF_HAMZA_ABOVE=LAM+ALEF_HAMZA_ABOVE
|
| 110 |
+
simple_LAM_ALEF_HAMZA_BELOW=LAM+ALEF_HAMZA_BELOW
|
| 111 |
+
simple_LAM_ALEF_MADDA_ABOVE=LAM+HAMZA+FATHA+ALEF
|
| 112 |
+
|
| 113 |
+
|
libqutrub/classnoun.py
ADDED
|
@@ -0,0 +1,368 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python
|
| 2 |
+
# -*- coding=utf-8 -*-
|
| 3 |
+
|
| 4 |
+
#************************************************************************
|
| 5 |
+
# $Id: classnoun.py, v 0.1 2016/04/01 12:14:00 Taha Zerrouki $
|
| 6 |
+
#
|
| 7 |
+
# ------------
|
| 8 |
+
# Description:
|
| 9 |
+
# ------------
|
| 10 |
+
# Copyright (c) 2009, Arabtechies, Arabeyes Taha Zerrouki
|
| 11 |
+
#
|
| 12 |
+
# The Main class to do the Noun derivation
|
| 13 |
+
#
|
| 14 |
+
# -----------------
|
| 15 |
+
# Revision Details: (Updated by Revision Control System)
|
| 16 |
+
# -----------------
|
| 17 |
+
# $Date: 2016/04/01 12:14:00 $
|
| 18 |
+
# $Author: Taha Zerrouki $
|
| 19 |
+
# $Revision: 0.1 $
|
| 20 |
+
# $Source: qutrub.sourceforge.net
|
| 21 |
+
#
|
| 22 |
+
#***********************************************************************/
|
| 23 |
+
"""
|
| 24 |
+
Noun Class for derivation
|
| 25 |
+
@author: Taha Zerrouki
|
| 26 |
+
@contact: taha dot zerrouki at gmail dot com
|
| 27 |
+
@copyright: Arabtechies, Arabeyes, Taha Zerrouki
|
| 28 |
+
@license: GPL
|
| 29 |
+
@date:2016/04/01
|
| 30 |
+
@version: 0.1
|
| 31 |
+
"""
|
| 32 |
+
import pyarabic.araby as araby
|
| 33 |
+
from pyarabic.araby import FATHA, DAMMA, KASRA, SHADDA, SUKUN, HAMZA, ALEF, \
|
| 34 |
+
NOON, YEH_HAMZA, WAW, TATWEEL, MEEM, MEEM, YEH, TEH, ALEF_MAKSURA, DAMMATAN
|
| 35 |
+
import libqutrub.classverb as classverb
|
| 36 |
+
import libqutrub.verb_const as vconst
|
| 37 |
+
import libqutrub.ar_verb as ar_verb
|
| 38 |
+
|
| 39 |
+
#~ class ConjugStem:
|
| 40 |
+
# Noun derivation
|
| 41 |
+
class NounClass(classverb.VerbClass):
|
| 42 |
+
"""
|
| 43 |
+
Noun Class: represent a derived noun from a verb or a root
|
| 44 |
+
"""
|
| 45 |
+
def __init__(self, verb, transitive, future_type=FATHA):
|
| 46 |
+
"""
|
| 47 |
+
init method
|
| 48 |
+
@param verb: the given verb
|
| 49 |
+
@type verb: unicode.
|
| 50 |
+
@param transitive: the verb is transitive or not
|
| 51 |
+
@type transitive: Boolean.
|
| 52 |
+
@param future_type: The mark of the third radical letter in the verb,
|
| 53 |
+
used for triletiral verb only. Default value is Fatha;
|
| 54 |
+
@type future_type: unicode; one arabic letter (Fatha, Damma, Kasra).
|
| 55 |
+
"""
|
| 56 |
+
# we make transitive as True, to force the cverb conjugator
|
| 57 |
+
# to generate passive voices
|
| 58 |
+
classverb.VerbClass.__init__(self, verb, True, future_type)
|
| 59 |
+
self._prepare_subject_stem()
|
| 60 |
+
self._prepare_object_stem()
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
#####################################
|
| 64 |
+
#{ Attributes functions
|
| 65 |
+
#####################################
|
| 66 |
+
#####################################
|
| 67 |
+
#{ Extract information from verb functions
|
| 68 |
+
#####################################
|
| 69 |
+
|
| 70 |
+
def _prepare_subject_stem(self):
|
| 71 |
+
"""
|
| 72 |
+
Prepare the derivation stems
|
| 73 |
+
Those stems will be concatenated with conjugation affixes.
|
| 74 |
+
This function store results in self.tab_conjug_stem.
|
| 75 |
+
This function prepare conjugation stems for the following nouns type:
|
| 76 |
+
- اسم الفاعل
|
| 77 |
+
"""
|
| 78 |
+
#~ """
|
| 79 |
+
#~ اسم الفاعل /تعريفـه :
|
| 80 |
+
#~ اسم مشتق من الفعل المبني للمعلوم للدلالة على وصف من فعل الفعل على وجه الحدوث .
|
| 81 |
+
#~ مثل : كتب – كاتب ، جلس – جالس ، اجتهد – مُجتهد ، استمع – مُستمع .
|
| 82 |
+
#~ صوغه : يصاغ اسم الفاعل على النحو التالي :
|
| 83 |
+
#~ 1 ـ من الفعل الثلاثي على وزن فاعل :
|
| 84 |
+
#~ نحو : ضرب - ضارب ، وقف - واقف ، أخذ - آخذ ، قال - قائل ، بغى - باغ ، أتى - آت ، رمى - رام ، وقى - واق .
|
| 85 |
+
#~ فإن كان الفعل معتل الوسط بالألف " أجوف " تقلب ألفه همزة مثل : قال – قائل ، نام – نائم .
|
| 86 |
+
#~ ومنه قوله تعالى : { وفي أموالهم حق للسائل والمحروم } 19 الذاريات .
|
| 87 |
+
#~ أما إذا كان معتل الوسط بالواو أو بالياء فلا تتغير عينه في اسم الفاعل .
|
| 88 |
+
#~ مثل : حول – حاول ، حيد – حايد .
|
| 89 |
+
#~ وإن كان الفعل معتل الآخر " ناقصاً " فإن اسم الفاعل ينطبق عليه ما ينطبق على الاسم المنقوص . أي تحذف ياؤه الأخيرة في حالتي الرفع والجر ، وتبقى في حالة النصب .
|
| 90 |
+
#~ 2 ـ من الفعل المزيد :
|
| 91 |
+
#~ يصاغ اسم الفاعل من الفعل غير الثلاثي " المزيد " على وزن الفعل المضارع مع إبدال حرف المضارعة ميماً مضمومة وكسر ما قبل الآخر .
|
| 92 |
+
#~ مثل : طمأن – مُطمئِن ، انكسر - مُنكسِر ، استعمل – مُستعمِل .
|
| 93 |
+
#~ – الفعل المزيد الخماسي على وزن تفاعل هو ما تكون عينه مفتوحة في المضارع، لكنها تكون مكسورة في اسم الفاعل
|
| 94 |
+
#~
|
| 95 |
+
#~
|
| 96 |
+
#~ الخوارزمية:
|
| 97 |
+
#~ 1- إن كان ثلاثيا:
|
| 98 |
+
#~ - إن كان أجوفا، نغير حرفه الأوسط إلى همزة،
|
| 99 |
+
#~ - نشتقه على وزن فاعل
|
| 100 |
+
#~ 2- إن كان غير ثلاثي
|
| 101 |
+
#~ - إن كان خماسيا مبدوءا بتاء نأخذ مضارعه ونكسر ما قبل آخره
|
| 102 |
+
#~ - وإلا نأخذ مضارعه كما هو
|
| 103 |
+
#~ """
|
| 104 |
+
#~
|
| 105 |
+
letters = self.word_letters
|
| 106 |
+
marks = self.word_marks
|
| 107 |
+
# حركات مشتق اسم الفاعل
|
| 108 |
+
derivation_subject_marks = marks
|
| 109 |
+
derivation_subject_letters = letters
|
| 110 |
+
# حالة الفعل الثلاثي
|
| 111 |
+
if self.vlength == 3:
|
| 112 |
+
# الفعل الأجوف ممثل بحرفين باعتبار أنّ الألف هو حركة طويلة
|
| 113 |
+
# لذا نضع وسطه همزة
|
| 114 |
+
if len(letters) == 2:
|
| 115 |
+
# اسم الفاعل
|
| 116 |
+
derivation_subject_letters = letters[0] + HAMZA + letters[1]
|
| 117 |
+
elif letters.endswith(ALEF_MAKSURA) or letters.endswith(vconst.ALEF_MAMDUDA):
|
| 118 |
+
derivation_subject_letters = letters[0] + letters[1] + YEH
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
# اسم افاعل من الثلاثي جميعا
|
| 122 |
+
# حركاته
|
| 123 |
+
derivation_subject_marks = vconst.ALEF_HARAKA + KASRA + DAMMA
|
| 124 |
+
|
| 125 |
+
# الفعل غير الثلاثي
|
| 126 |
+
else:
|
| 127 |
+
# الفعل الخماسي المبدوء بتاء
|
| 128 |
+
# هذا مضارعه عينه مفتوحة
|
| 129 |
+
# لذا نحتاج إلى عين مكسورة
|
| 130 |
+
if self.vlength == 5 and letters.startswith(TEH):
|
| 131 |
+
#~ if len(letters) == 4: # تفاعل
|
| 132 |
+
#~ # الألف تعتبر مدة وليست حرفا
|
| 133 |
+
#~ # لذا يظهر الاختلاف بين طول الطلمة وعدد الحروف الفعلي
|
| 134 |
+
#~ # الفعل الخماسي المبدوء بتاء يختلف في حركة عين اسم الفاعل عن مضارعه
|
| 135 |
+
#~ derivation_subject_marks = FATHA + vconst.ALEF_HARAKA + KASRA + DAMMA
|
| 136 |
+
#~ else:
|
| 137 |
+
#~ # الفعل الخماسي المبدوء بتاء يختلف في حركة عين اسم الفاعل عن مضارعه
|
| 138 |
+
#~ derivation_subject_marks = FATHA + FATHA + SUKUN + KASRA + DAMMA
|
| 139 |
+
derivation_subject_marks = FATHA + FATHA + SUKUN + KASRA + DAMMA
|
| 140 |
+
|
| 141 |
+
# add Damma for MEEM
|
| 142 |
+
#~ derivation_subject_marks = DAMMA + derivation_subject_marks
|
| 143 |
+
else :
|
| 144 |
+
# الفعل غير الثلاثي يصاغ على منوال مضارعه
|
| 145 |
+
derivation_subject_marks = self.tab_conjug_stem[vconst.TenseFuture].marks
|
| 146 |
+
#~ if derivation_subject_marks.startswith(FATHA):
|
| 147 |
+
#~ derivation_subject_marks = DAMMA + derivation_subject_marks[1:]
|
| 148 |
+
#~ derivation_subject_marks = DAMMA + derivation_subject_marks
|
| 149 |
+
derivation_subject_letters = self.tab_conjug_stem[vconst.TenseFuture].letters
|
| 150 |
+
|
| 151 |
+
# معالجة الألفات في الفعل والحركات الطويلة
|
| 152 |
+
# إذا كان طول الحركات ألأصلية للفعل
|
| 153 |
+
# أقل من طول حركات الماضي المبني للمجهول
|
| 154 |
+
# هذا يعني وجود حركة طويلة
|
| 155 |
+
# نقوم بتحويل الحركة الطويلة إلى ما يوافقها
|
| 156 |
+
if len(marks) < len(derivation_subject_marks):
|
| 157 |
+
derivation_subject_marks = self._homogenize_harakat(marks, derivation_subject_marks)
|
| 158 |
+
# Add Meem Haraka
|
| 159 |
+
if self.vlength != 3:
|
| 160 |
+
if self.vlength == 5 and letters.startswith(TEH):
|
| 161 |
+
# add Damma for MEEM
|
| 162 |
+
derivation_subject_marks = DAMMA + derivation_subject_marks
|
| 163 |
+
else :
|
| 164 |
+
# الفعل غير الثلاثي يصاغ على منوال مضارعه
|
| 165 |
+
if derivation_subject_marks.startswith(FATHA):
|
| 166 |
+
derivation_subject_marks = DAMMA + derivation_subject_marks[1:]
|
| 167 |
+
### اشتقاق اسم الفاعل
|
| 168 |
+
self.tab_conjug_stem[vconst.SubjectNoun] = classverb.ConjugStem(
|
| 169 |
+
vconst.SubjectNoun, derivation_subject_letters, derivation_subject_marks)
|
| 170 |
+
|
| 171 |
+
def _prepare_object_stem(self):
|
| 172 |
+
"""
|
| 173 |
+
Prepare the derivation stems
|
| 174 |
+
Those stems will be concatenated with conjugation affixes.
|
| 175 |
+
This function store results in self.tab_conjug_stem.
|
| 176 |
+
This function prepare conjugation stems for the following nouns type:
|
| 177 |
+
- اسم المفعول
|
| 178 |
+
|
| 179 |
+
"""
|
| 180 |
+
letters = self.word_letters
|
| 181 |
+
marks = self.word_marks
|
| 182 |
+
# حركات مشتق اسم المفعول
|
| 183 |
+
derivation_object_marks = marks
|
| 184 |
+
derivation_object_letters = letters
|
| 185 |
+
# حالة الفعل الثلاثي
|
| 186 |
+
#~ """
|
| 187 |
+
#~ اسم المفعول تعريفـه :
|
| 188 |
+
#~ اسم يشتق من الفعل المبني للمجهول للدلالة على وصف من يقع عليه الفعل .
|
| 189 |
+
#~ مثل : ضُرب مضروب ، أُكل مأكول ، شُرب مشروب ، بُث مبثوث ، وُعد موعود ، أُتى مأتي ، رُجي مرجي ، مُلئ مملوء .
|
| 190 |
+
#~ صوغـه :
|
| 191 |
+
#~ لا يصاغ إلا من الأفعال المتعدية المتصرفة على النحو التالي :
|
| 192 |
+
#~ 1 ـ من الثلاثي على وزن مفعول .
|
| 193 |
+
#~ كما في الأمثلة السابقة . ومنه : الحق صوته مسموع .
|
| 194 |
+
#~ والشاي مشروب لذيذ الطعم .
|
| 195 |
+
#~ فإن كان الفعل معتل الوسط بالألف فإنه يحدث فيه إعلال تقتضيه القواعد الصرفية ، فيكون اسم المفعول من الفعل قال : مقول ، وباع : مبيع .
|
| 196 |
+
#~ ومما سبق يتبع في أخذ اسم المفعول من الأفعال المعتلة الوسط الآتي :
|
| 197 |
+
#~ نأخذ الفعل المضارع من الفعل المراد اشتقاق اسم المفعول منه ثم نحذف حرف المضارعة ونستبدلها بالميم .
|
| 198 |
+
#~ مثل : قال يقول مقول ، باع يبيع مبيع .
|
| 199 |
+
#~ فإن كان وسط المضارع ألفاً ترد في اسم المفعول إلى أصلها الواو أو الياء .
|
| 200 |
+
#~ مثل : خاف يخاف مخوف ، فالألف أصلها الواو لأن مصدرها " الخوف " .
|
| 201 |
+
#~ وهاب يهاب مهيب ، فالألف أصلها الياء لأن مصدرها " الهيبة " .
|
| 202 |
+
#~ وإن كان الفعل معتل الآخر " ناقصاً " نأتي بالمضارع منه ثم نحذف حرف المضارعة ونضع مكانها ميماً مفتوحة ونضعف الحرف الأخير الذي هو حرف العلة سواء أكان أصله واواً أو ياءً أو ألفاً .
|
| 203 |
+
#~ مثل : دعا يدعو مدعوّ ، رجا يرجو مرجوّ ، رمى يرمي مرميّ ، سعى يسعى مسعيّ .
|
| 204 |
+
#~ 2 ـ ويصاغ من غير الثلاثي " المزيد " على وزن الفعل المضارع مع إبدال حرف المضارعة ميماً مضمومة وفتح ما قبل الآخر .
|
| 205 |
+
#~ مثل : أنزل ينزل مُنزَل ، انطلق ينطلق مُنطلَق ، انحاز ينحاز مُنحاز ، استعمل يستعمل مُستعمَل .
|
| 206 |
+
#~ ـ إذا كان الفعل لازماً يصح اشتقاق اسم المفعول منه حسب القواعد السابقة بشرط استعمال شبه الجملة " الجار والمجرور أو الظرف " مع الفعل ، وقد يصح المصدر أيضاً .
|
| 207 |
+
#~ مثال : ذهب به – مذهوب به ، سافر يوم الخميس – ما مُسافَرٌ يوم الخميس .
|
| 208 |
+
#~ ومثال استعمال المصدر مع اسم مفعول الفعل اللازم : العلم مُنتفَع انتفاع عظيم به .
|
| 209 |
+
#~
|
| 210 |
+
#~ 1- إن كان ثلاثيا
|
| 211 |
+
#~ - غير معتل: على وزن مفعول
|
| 212 |
+
#~ - معتل : 1- فعل مثال => كغير المعتل
|
| 213 |
+
#~ 2- الأجوف : من مضارعه يقول => مقول
|
| 214 |
+
#~ يسير => مسير
|
| 215 |
+
#~ يخاف => مخوف
|
| 216 |
+
#~ يهاب => يهيب
|
| 217 |
+
#~ 3- الناقص : من مضارعه مع تضعيف الحرف الأخير
|
| 218 |
+
#~ 2- غير ثلاثي
|
| 219 |
+
#~ - على غرار المضارع المبني للمجهول
|
| 220 |
+
#~ ل
|
| 221 |
+
#~ """
|
| 222 |
+
if self.vlength == 3:
|
| 223 |
+
# اسم المفعول
|
| 224 |
+
# حالة المعتل
|
| 225 |
+
# الأجوف
|
| 226 |
+
if len(letters) == 2:
|
| 227 |
+
# اسم المفعول من الأجوف
|
| 228 |
+
# يشتق من المضارع المعلوم
|
| 229 |
+
derivation_object_marks = self.tab_conjug_stem[vconst.TenseFuture].marks
|
| 230 |
+
derivation_object_letters = self.tab_conjug_stem[vconst.TenseFuture].letters
|
| 231 |
+
|
| 232 |
+
elif (self.word_letters.endswith(ALEF_MAKSURA) or
|
| 233 |
+
self.word_letters.endswith(ALEF) or self.word_letters.endswith(YEH)):
|
| 234 |
+
# والناقص
|
| 235 |
+
# يشتق من المضارع المعلوم
|
| 236 |
+
# يضاف إليه شدة في آخره
|
| 237 |
+
derivation_object_marks = self.tab_conjug_stem[vconst.TenseFuture].marks
|
| 238 |
+
derivation_object_letters = self.tab_conjug_stem[vconst.TenseFuture].letters
|
| 239 |
+
else: # السالم والمضعف والمثال
|
| 240 |
+
if self.word_letters.endswith(SHADDA):
|
| 241 |
+
# strip last letters which is Shadda, duplicate the second letters
|
| 242 |
+
derivation_object_letters = letters[0]+ letters[1]*2
|
| 243 |
+
else:
|
| 244 |
+
derivation_object_letters = letters
|
| 245 |
+
# الحروف
|
| 246 |
+
derivation_object_marks = FATHA + SUKUN + vconst.WAW_HARAKA + DAMMA
|
| 247 |
+
# الفعل غير الثلاثي
|
| 248 |
+
else:
|
| 249 |
+
# اسم المفعول من غير الثلاثي
|
| 250 |
+
derivation_object_marks = self.tab_conjug_stem[vconst.TensePassiveFuture].marks
|
| 251 |
+
derivation_object_letters = self.tab_conjug_stem[vconst.TensePassiveFuture].letters
|
| 252 |
+
|
| 253 |
+
# معالجة الألفات في الفعل والحركات الطويلة
|
| 254 |
+
# إذا كان طول الحركات ألأصلية للفعل
|
| 255 |
+
# أقل من طول حركات الماضي المبني للمجهول
|
| 256 |
+
# هذا يعني وجود حركة طويل��
|
| 257 |
+
# نقوم بتحويل الحركة الطويلة إلى ما يوافقها
|
| 258 |
+
#~ if len(marks) < len(derivation_object_marks):
|
| 259 |
+
#~ derivation_object_marks = self._homogenize_harakat(marks,
|
| 260 |
+
#~ derivation_object_marks)
|
| 261 |
+
### اشتقاق اسم الفاعل والمفعول
|
| 262 |
+
self.tab_conjug_stem[vconst.ObjectNoun] = classverb.ConjugStem(
|
| 263 |
+
vconst.ObjectNoun, derivation_object_letters, derivation_object_marks)
|
| 264 |
+
|
| 265 |
+
def conjugate_noun(self, noun_type):
|
| 266 |
+
"""
|
| 267 |
+
Conjugate a verb in a given tense with a pronoun.
|
| 268 |
+
@param tense: given tense
|
| 269 |
+
@type tense: unicode name of the tense
|
| 270 |
+
@param pronoun: given pronoun
|
| 271 |
+
@type pronoun: unicode name of the pronoun
|
| 272 |
+
@return: conjugated verb
|
| 273 |
+
@rtype: unicode;
|
| 274 |
+
"""
|
| 275 |
+
|
| 276 |
+
if noun_type == vconst.SubjectNoun :
|
| 277 |
+
if self.vlength == 3 :
|
| 278 |
+
#prefix
|
| 279 |
+
pre_val = u""
|
| 280 |
+
else:
|
| 281 |
+
pre_val = MEEM
|
| 282 |
+
#suffix
|
| 283 |
+
suf_val = DAMMA
|
| 284 |
+
elif noun_type == vconst.ObjectNoun:
|
| 285 |
+
#prefix
|
| 286 |
+
pre_val = MEEM
|
| 287 |
+
#suffix
|
| 288 |
+
suf_val = DAMMA
|
| 289 |
+
else:
|
| 290 |
+
#prefix
|
| 291 |
+
pre_val = ""
|
| 292 |
+
#suffix
|
| 293 |
+
suf_val = ""
|
| 294 |
+
|
| 295 |
+
stem_l = self.tab_conjug_stem[noun_type].letters
|
| 296 |
+
stem_m = self.tab_conjug_stem[noun_type].marks
|
| 297 |
+
# _m : marks
|
| 298 |
+
#_l :letters
|
| 299 |
+
if pre_val != u"":
|
| 300 |
+
pre_val_l = pre_val
|
| 301 |
+
pre_val_m = stem_m[0]
|
| 302 |
+
stem_m = stem_m[1:]
|
| 303 |
+
else:
|
| 304 |
+
pre_val_l = u""
|
| 305 |
+
pre_val_m = u""
|
| 306 |
+
|
| 307 |
+
# the suffix already start by a HARAKA,
|
| 308 |
+
# we add Taweel to ensure valid word in the uniformate function
|
| 309 |
+
suf_val = TATWEEL + suf_val
|
| 310 |
+
#uniformate suffix
|
| 311 |
+
# the case is used to avoid duplicated staddization
|
| 312 |
+
if self.cache_standard['suffix'].has_key( suf_val):
|
| 313 |
+
(suf_val_l, suf_val_m) = self.cache_standard['suffix'][suf_val]
|
| 314 |
+
else:
|
| 315 |
+
(suf_val_l, suf_val_m) = ar_verb.uniformate_suffix(suf_val)
|
| 316 |
+
self.cache_standard['suffix'][suf_val] = (suf_val_l, suf_val_m)
|
| 317 |
+
# add affix to the stem
|
| 318 |
+
conj_l = pre_val_l + stem_l + suf_val_l
|
| 319 |
+
#The end of the stem marks takes the begining of the suffix marks
|
| 320 |
+
conj_m = pre_val_m + stem_m[:-1] + suf_val_m
|
| 321 |
+
# the begining of suffix letters is Tatweel, it will be striped
|
| 322 |
+
conj_l = pre_val_l + stem_l + suf_val_l[1:]
|
| 323 |
+
|
| 324 |
+
# Treat sukun
|
| 325 |
+
# the case is used to avoid duplicated staddization
|
| 326 |
+
key_cache = u'-'.join([conj_l, conj_m])
|
| 327 |
+
if self.cache_standard['sukun'].has_key(key_cache):
|
| 328 |
+
conj_m = self.cache_standard['sukun'][key_cache]
|
| 329 |
+
else:
|
| 330 |
+
#~ conj_m = ar_verb.treat_sukun2(conj_l, conj_m, self.future_type)
|
| 331 |
+
conj_m = ar_verb.treat_sukun2(conj_l, conj_m)
|
| 332 |
+
self.cache_standard['sukun'][key_cache] = conj_m
|
| 333 |
+
# standard orthographic form
|
| 334 |
+
# the case is used to avoid duplicated staddization
|
| 335 |
+
key_cache = u'-'.join([conj_l, conj_m])
|
| 336 |
+
if self.cache_standard['standard'].has_key(key_cache):
|
| 337 |
+
conj = self.cache_standard['standard'][key_cache]
|
| 338 |
+
else:
|
| 339 |
+
conj = ar_verb.standard2(conj_l, conj_m)
|
| 340 |
+
self.cache_standard['standard'][key_cache] = conj
|
| 341 |
+
return conj
|
| 342 |
+
|
| 343 |
+
def derivate(self):
|
| 344 |
+
"""
|
| 345 |
+
Derivate a subject and object nouns from a verb
|
| 346 |
+
@param tense: given tense
|
| 347 |
+
@type tense: unicode name of the tense
|
| 348 |
+
@param pronoun: given pronoun
|
| 349 |
+
@type pronoun: unicode name of the pronoun
|
| 350 |
+
@return: conjugated verb
|
| 351 |
+
@rtype: unicode;
|
| 352 |
+
"""
|
| 353 |
+
subj = self.conjugate_noun(vconst.SubjectNoun)
|
| 354 |
+
obj = self.conjugate_noun(vconst.ObjectNoun)
|
| 355 |
+
|
| 356 |
+
if subj.endswith(araby.DAMMA):
|
| 357 |
+
subj = subj[:-1]+araby.DAMMATAN
|
| 358 |
+
if self.vlength == 3 and obj.endswith(araby.YEH):
|
| 359 |
+
obj+= SHADDA + DAMMATAN
|
| 360 |
+
#~ if self.verb == u"مَحَا":
|
| 361 |
+
#~ print self.verb.encode('utf8'), len(self.word_letters), obj.endswith(WAW*2+
|
| 362 |
+
#~ DAMMA)
|
| 363 |
+
if self.vlength == 3 and obj.endswith(WAW*2+DAMMA):
|
| 364 |
+
obj = obj[:-2] +SHADDA + DAMMATAN
|
| 365 |
+
if obj.endswith(araby.DAMMA):
|
| 366 |
+
obj = obj[:-1]+araby.DAMMATAN
|
| 367 |
+
|
| 368 |
+
return u"\t".join([subj,obj])
|
libqutrub/classverb.py
ADDED
|
@@ -0,0 +1,1101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python
|
| 2 |
+
# -*- coding=utf-8 -*-
|
| 3 |
+
|
| 4 |
+
#************************************************************************
|
| 5 |
+
# $Id: classverb.py, v 0.7 2009/06/02 01:10:00 Taha Zerrouki $
|
| 6 |
+
#
|
| 7 |
+
# ------------
|
| 8 |
+
# Description:
|
| 9 |
+
# ------------
|
| 10 |
+
# Copyright (c) 2009, Arabtechies, Arabeyes Taha Zerrouki
|
| 11 |
+
#
|
| 12 |
+
# The Main class to do the conjugation
|
| 13 |
+
#
|
| 14 |
+
# -----------------
|
| 15 |
+
# Revision Details: (Updated by Revision Control System)
|
| 16 |
+
# -----------------
|
| 17 |
+
# $Date: 2009/06/02 01:10:00 $
|
| 18 |
+
# $Author: Taha Zerrouki $
|
| 19 |
+
# $Revision: 0.7 $
|
| 20 |
+
# $Source: arabtechies.sourceforge.net
|
| 21 |
+
#
|
| 22 |
+
#***********************************************************************/
|
| 23 |
+
"""
|
| 24 |
+
Verb Class for conjugation
|
| 25 |
+
@author: Taha Zerrouki
|
| 26 |
+
@contact: taha dot zerrouki at gmail dot com
|
| 27 |
+
@copyright: Arabtechies, Arabeyes, Taha Zerrouki
|
| 28 |
+
@license: GPL
|
| 29 |
+
@date:2009/06/02
|
| 30 |
+
@version: 0.9
|
| 31 |
+
"""
|
| 32 |
+
import copy
|
| 33 |
+
# from ar_ctype import *
|
| 34 |
+
#~ import sys
|
| 35 |
+
#~ import re
|
| 36 |
+
import pyarabic.araby as araby
|
| 37 |
+
from pyarabic.araby import FATHA, DAMMA, KASRA, SHADDA, SUKUN, HAMZA, ALEF, \
|
| 38 |
+
NOON, YEH_HAMZA, WAW, TATWEEL, MEEM, MEEM, YEH, TEH, ALEF_MAKSURA
|
| 39 |
+
#~ from libqutrub.ar_verb import *
|
| 40 |
+
import libqutrub.ar_verb as ar_verb
|
| 41 |
+
#~ from libqutrub.verb_const import *
|
| 42 |
+
import libqutrub.verb_const as vconst
|
| 43 |
+
import libqutrub.conjugatedisplay as conjugatedisplay
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
class ConjugStem:
|
| 47 |
+
"""
|
| 48 |
+
A Class to represent a conjugated stem
|
| 49 |
+
"""
|
| 50 |
+
# بنية جذع تصريف الجذع
|
| 51 |
+
#تتكون من الزمن، الحروف والحركات
|
| 52 |
+
# تستعمل لتخزين جذوع التصريف
|
| 53 |
+
tense = u""
|
| 54 |
+
#~ """ the actual tense"""
|
| 55 |
+
letters = u""
|
| 56 |
+
#~ """ letters of the conjugated stem"""
|
| 57 |
+
marks = u""
|
| 58 |
+
#~ """ marks of the conjugated stem"""
|
| 59 |
+
def __init__(self, tense, letters, marks):
|
| 60 |
+
"""
|
| 61 |
+
init method
|
| 62 |
+
@param tense: the given tense
|
| 63 |
+
@type tense: unicode.
|
| 64 |
+
@param letters: the word letters
|
| 65 |
+
@type letters: unicode.
|
| 66 |
+
@param marks: the word marks;
|
| 67 |
+
@type marks: unicode.
|
| 68 |
+
"""
|
| 69 |
+
self.tense = tense
|
| 70 |
+
self.letters = letters
|
| 71 |
+
self.marks = marks
|
| 72 |
+
# a global cache for verbs conjigation
|
| 73 |
+
cache_standard = {'standard':{},
|
| 74 |
+
'sukun':{},
|
| 75 |
+
'suffix':{}}
|
| 76 |
+
class VerbClass:
|
| 77 |
+
"""
|
| 78 |
+
Verb Class: represent a verb, prepare it to be conjugated and store the conjugation result
|
| 79 |
+
"""
|
| 80 |
+
#~ verb = u""
|
| 81 |
+
#~ #" internl verb : is the normalized form of the verb"
|
| 82 |
+
#~ internal_verb = u""
|
| 83 |
+
#~ word_letters = u""
|
| 84 |
+
#~ word_marks = u""
|
| 85 |
+
#~ unvocalized = u""
|
| 86 |
+
#~ vlength = 0
|
| 87 |
+
#~ vtype = u""
|
| 88 |
+
#~ future_type = u''
|
| 89 |
+
#~ transitive = u""
|
| 90 |
+
#~ hamza_zaida = False
|
| 91 |
+
#~ #deprecated
|
| 92 |
+
#~ # teh_zaida=False
|
| 93 |
+
#~ future_form = u""
|
| 94 |
+
#~ conj_display = None
|
| 95 |
+
#~ tab_conjug_stem = None
|
| 96 |
+
def __init__(self, verb, transitive, future_type=FATHA):
|
| 97 |
+
"""
|
| 98 |
+
init method
|
| 99 |
+
@param verb: the given verb
|
| 100 |
+
@type verb: unicode.
|
| 101 |
+
@param transitive: the verb is transitive or not
|
| 102 |
+
@type transitive: Boolean.
|
| 103 |
+
@param future_type: The mark of the third radical letter in the verb,
|
| 104 |
+
used for triletiral verb only. Default value is Fatha;
|
| 105 |
+
@type future_type: unicode; one arabic letter (Fatha, Damma, Kasra).
|
| 106 |
+
"""
|
| 107 |
+
self.verb = verb
|
| 108 |
+
# this cache is used to avoid duplicated operatioon in standardisation,
|
| 109 |
+
# treat_sukun, and uniformate suffix
|
| 110 |
+
self.cache_standard = cache_standard
|
| 111 |
+
self.internal_verb = ar_verb.normalize(verb)
|
| 112 |
+
self.future_type = ar_verb.get_future_type_by_name(future_type)
|
| 113 |
+
(self.word_letters, self.word_marks) = ar_verb.uniformate_verb(verb)
|
| 114 |
+
#Before last haraka in the past
|
| 115 |
+
self.past_haraka = araby.secondlast_char(self.word_marks)
|
| 116 |
+
self.word_marks = ar_verb.uniformate_alef_origin(self.word_marks,
|
| 117 |
+
self.internal_verb, self.future_type)
|
| 118 |
+
|
| 119 |
+
self.transitive = transitive
|
| 120 |
+
self.hamza_zaida = False
|
| 121 |
+
self.tab_conjug_stem = {}
|
| 122 |
+
verb = self.verb
|
| 123 |
+
tab_type = [u"", u"", u"", u"فعل ثلاثي", u"فعل رباعي", u"فعل خماسي",
|
| 124 |
+
u"فعل سداسي", u"فعل سباعي", u"فعل ثماني", u"فعل تساعي"]
|
| 125 |
+
verb = ar_verb.normalize(verb)
|
| 126 |
+
|
| 127 |
+
self.unvocalized = araby.strip_harakat(verb)
|
| 128 |
+
verb_nm = self.unvocalized
|
| 129 |
+
self.vlength = len(verb_nm)
|
| 130 |
+
self.vtype = tab_type[self.vlength]
|
| 131 |
+
|
| 132 |
+
# الهمزة زائدة
|
| 133 |
+
self.hamza_zaida = self._is_hamza_zaida(verb_nm)
|
| 134 |
+
|
| 135 |
+
# التاء الزائدة
|
| 136 |
+
#deprecated
|
| 137 |
+
#self.teh_zaida=self.is_teh_zaida(verb_nm)
|
| 138 |
+
|
| 139 |
+
# معالجة حالة الأفعال الشاذة
|
| 140 |
+
# إذا كان الفعل من الشواذ، استخرجنا جذوع التصريف من جدوله
|
| 141 |
+
#وإلا ولّدنا جذوع تصريفه
|
| 142 |
+
# في المضارع والأمر فقط
|
| 143 |
+
# أما الماضي فليس فيه شذوذ
|
| 144 |
+
self.past_stem = ""
|
| 145 |
+
self._prepare_past_stem()
|
| 146 |
+
self._prepare_passive_past_stem()
|
| 147 |
+
if self._is_irregular_verb():
|
| 148 |
+
self._prepare_irregular_future_imperative_stem()
|
| 149 |
+
|
| 150 |
+
else:
|
| 151 |
+
self._prepare_future_imperative_stem()
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
# display object
|
| 155 |
+
self.conj_display = conjugatedisplay.ConjugateDisplay(self.verb)
|
| 156 |
+
if self.transitive :
|
| 157 |
+
self.conj_display.add_attribut(u"اللزوم/التعدي", u"متعدي")
|
| 158 |
+
else :
|
| 159 |
+
self.conj_display.add_attribut(u"اللزوم/التعدي", u"لازم")
|
| 160 |
+
self.conj_display.add_attribut(u"الفعل", self.verb)
|
| 161 |
+
self.conj_display.add_attribut(u"نوع الفعل", self.vtype)
|
| 162 |
+
self.future_form = self.conjugate_tense_pronoun(vconst.TenseFuture,
|
| 163 |
+
vconst.PronounHuwa)
|
| 164 |
+
self.conj_display.set_future_form(self.future_form)
|
| 165 |
+
if self.transitive :
|
| 166 |
+
self.conj_display.settransitive()
|
| 167 |
+
self.conj_display.setbab(self.future_type)
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def __del__(self):
|
| 171 |
+
"""
|
| 172 |
+
Delete instance
|
| 173 |
+
|
| 174 |
+
"""
|
| 175 |
+
self.conj_display = None
|
| 176 |
+
self.tab_conjug_stem = None
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
#####################################
|
| 180 |
+
#{ Attributes functions
|
| 181 |
+
#####################################
|
| 182 |
+
def set_display(self, mode = 'Text'):
|
| 183 |
+
"""
|
| 184 |
+
Set the display mode as:
|
| 185 |
+
- 'Text':
|
| 186 |
+
- 'HTML':
|
| 187 |
+
- 'HTMLColoredDiacritics':
|
| 188 |
+
- 'DICT':
|
| 189 |
+
- 'CSV':
|
| 190 |
+
- 'GUI':
|
| 191 |
+
- 'TABLE':
|
| 192 |
+
- 'XML':
|
| 193 |
+
- 'TeX':
|
| 194 |
+
- 'ROWS':
|
| 195 |
+
@param mode: the given mode to display result
|
| 196 |
+
"""
|
| 197 |
+
self.conj_display.setmode(mode)
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
def get_conj_display(self):
|
| 202 |
+
"""
|
| 203 |
+
Get The conjugation display class with result.
|
| 204 |
+
@return: an object with result.
|
| 205 |
+
@rtype: conjugatedisplay class
|
| 206 |
+
"""
|
| 207 |
+
return copy.copy(self.conj_display)
|
| 208 |
+
#####################################
|
| 209 |
+
#{ Extract information from verb functions
|
| 210 |
+
#####################################
|
| 211 |
+
def _is_hamza_zaida(self, verb_normalized_unvocalized):
|
| 212 |
+
"""
|
| 213 |
+
Function to determine if the first HAMZA in the verb is not original
|
| 214 |
+
ترجع إذا كانت الهمزة الأولى في الفعل غير أصلية
|
| 215 |
+
Determine if the verb starts with Hamza and the Teh is not
|
| 216 |
+
@param verb_normalized_unvocalized: the unvovalized form f the verb.
|
| 217 |
+
@type verb_normalized_unvocalized: unicde
|
| 218 |
+
@return: return True if the start Teh is not original
|
| 219 |
+
@rtype: boolean;
|
| 220 |
+
"""
|
| 221 |
+
# if the lenght of verb is exactely 4 letters and starts by hamza
|
| 222 |
+
# and it is in the AF3Al wazn and not FA33al or FAA3la
|
| 223 |
+
# ألوزن المعني هو أفعل
|
| 224 |
+
# الأوزان غير المعنية هي فاعل وفعّل
|
| 225 |
+
# الأوزان المشتقة هي أفعّ من أفعل
|
| 226 |
+
# الخلاصة أن يكون الفعل رباعيا، حرفه الأول همزة
|
| 227 |
+
# ولا يكون حرفه الثاني ألف، لمنع الوزن فاعل
|
| 228 |
+
# ولا يكون حرفه الثالث شدة، لمنع الوزن فعّل
|
| 229 |
+
verb = verb_normalized_unvocalized
|
| 230 |
+
if len(verb) != 4 or not verb.startswith(HAMZA):
|
| 231 |
+
return False
|
| 232 |
+
elif len(verb) == 4 and verb.startswith(HAMZA) and \
|
| 233 |
+
verb[1]!=ALEF and verb[2]!=SHADDA:
|
| 234 |
+
return True
|
| 235 |
+
else :
|
| 236 |
+
return False
|
| 237 |
+
|
| 238 |
+
def _homogenize_harakat(self, original_harakat, applied_harakat):
|
| 239 |
+
"""
|
| 240 |
+
Treat Harakat to be homogenized with letters in conjugation.
|
| 241 |
+
إذا كان طول الحركات ألأصلية للفعل أقل من طول حركات الماضي المبني للمجهول
|
| 242 |
+
هذا يعني وجود حركة طويلة
|
| 243 |
+
نقوم بتحويل الحركة الطويلة إلى ما يوافقها
|
| 244 |
+
|
| 245 |
+
@param original_harakat: given original harakatof the verb.
|
| 246 |
+
@type original_harakat: unicode.
|
| 247 |
+
@param applied_harakat: given harakat to be applied to verb.
|
| 248 |
+
@type applied_harakat: unicode.
|
| 249 |
+
@return: nesw harakat to be applied to the verb.
|
| 250 |
+
@rtype: unicode.
|
| 251 |
+
"""
|
| 252 |
+
marks = original_harakat
|
| 253 |
+
new_marks = applied_harakat
|
| 254 |
+
# إذا كان طول الحركات ألأصلية للفعل أقل من طول حركات الماضي المبني للمجهول
|
| 255 |
+
# هذا يعني وجود حركة طويلة
|
| 256 |
+
# نقوم بتحويل الحركة الطويلة إلى ما يوافقها
|
| 257 |
+
if len(marks) < len(new_marks):
|
| 258 |
+
alef_haraka_pos = marks.find(vconst.ALEF_HARAKA)
|
| 259 |
+
if alef_haraka_pos < 0:
|
| 260 |
+
alef_haraka_pos = marks.find(vconst.ALEF_WAW_HARAKA)
|
| 261 |
+
if alef_haraka_pos < 0:
|
| 262 |
+
alef_haraka_pos = marks.find(vconst.ALEF_YEH_HARAKA)
|
| 263 |
+
if alef_haraka_pos >= 0 and alef_haraka_pos + 1 < len(new_marks):
|
| 264 |
+
first = new_marks[alef_haraka_pos]
|
| 265 |
+
second = new_marks[alef_haraka_pos + 1]
|
| 266 |
+
changed_haraka = \
|
| 267 |
+
vconst.HOMOGENIZE_ALEF_HARAKA_TABLE[first][second]
|
| 268 |
+
new_marks = new_marks[:alef_haraka_pos] + changed_haraka \
|
| 269 |
+
+ new_marks[alef_haraka_pos+2:]
|
| 270 |
+
return new_marks
|
| 271 |
+
#####################################
|
| 272 |
+
#{ Preparing conjugation stems for every tense functions
|
| 273 |
+
#####################################
|
| 274 |
+
def _prepare_future_imperative_stem(self):
|
| 275 |
+
"""
|
| 276 |
+
Prepare the conjugation stems for future tenses
|
| 277 |
+
(future, jussive, subjective) and imperative tense.
|
| 278 |
+
Those stems will be concatenated with conjugation affixes.
|
| 279 |
+
This function store results in self.tab_conjug_stem.
|
| 280 |
+
This function prepare conjugation stems for the following tenses:
|
| 281 |
+
- vconst.TenseFuture : تصريف الفعل المضارع
|
| 282 |
+
- vconst.TenseJussiveFuture : تصريف الفعل المضارع المجزوم
|
| 283 |
+
- vconst.TenseSubjunctiveFuture : تصريف الفعل المضارع المنصوب
|
| 284 |
+
- vconst.TenseConfirmedFuture: المضارع المؤكد الثقيل
|
| 285 |
+
- vconst.TensePassiveFuture :تصريف الفعل المضارع المبني للمجهول
|
| 286 |
+
- vconst.TensePassiveJussiveFuture: تصريف الفعل المضارع المجزوم المني للمجهول
|
| 287 |
+
- vconst.TensePassiveSubjunctiveFuture:تصريف الفعل المضارع المنصوب
|
| 288 |
+
- vconst.TensePassiveConfirmedFuture:المضارع المؤكد الثقيل المنبي للمجهول
|
| 289 |
+
- vconst.TenseImperative:الفعل الامر
|
| 290 |
+
- vconst.TenseConfirmedImperative: الفعل الامر المؤكد.
|
| 291 |
+
"""
|
| 292 |
+
letters = self.word_letters
|
| 293 |
+
marks = self.word_marks
|
| 294 |
+
future_letters = letters
|
| 295 |
+
# حالة الفعل الثلاثي
|
| 296 |
+
if self.vlength == 3:
|
| 297 |
+
first_future_mark = FATHA
|
| 298 |
+
first_passive_future_mark = DAMMA
|
| 299 |
+
future_marks = SUKUN + self.future_type + FATHA
|
| 300 |
+
passive_future_marks = SUKUN + FATHA + FATHA
|
| 301 |
+
# معالجة الفعل المثال الواوي
|
| 302 |
+
#ToDO
|
| 303 |
+
|
| 304 |
+
# الفعل الرباعي
|
| 305 |
+
elif self.vlength == 4:
|
| 306 |
+
first_future_mark = DAMMA
|
| 307 |
+
first_passive_future_mark = DAMMA
|
| 308 |
+
future_marks = FATHA + SUKUN + KASRA + DAMMA
|
| 309 |
+
passive_future_marks = FATHA + SUKUN + FATHA + DAMMA
|
| 310 |
+
# الفعل الخماسي
|
| 311 |
+
elif self.vlength == 5:
|
| 312 |
+
first_future_mark = FATHA
|
| 313 |
+
first_passive_future_mark = DAMMA
|
| 314 |
+
if letters.startswith(TEH):
|
| 315 |
+
future_marks = FATHA + FATHA + SUKUN + FATHA + DAMMA
|
| 316 |
+
passive_future_marks = FATHA + FATHA + SUKUN + FATHA + DAMMA
|
| 317 |
+
else :
|
| 318 |
+
future_marks = FATHA + SUKUN + FATHA + KASRA + DAMMA
|
| 319 |
+
passive_future_marks = FATHA + SUKUN + FATHA + FATHA + DAMMA
|
| 320 |
+
#الفعل السداسي
|
| 321 |
+
elif self.vlength == 6:
|
| 322 |
+
first_future_mark = FATHA
|
| 323 |
+
first_passive_future_mark = DAMMA
|
| 324 |
+
future_marks = FATHA + SUKUN + FATHA + SUKUN + KASRA + DAMMA
|
| 325 |
+
passive_future_marks = FATHA + SUKUN + FATHA + SUKUN + FATHA + DAMMA
|
| 326 |
+
# معالجة الألفات في الفعل والحركات الطويلة
|
| 327 |
+
# إذا كان طول الحركات ألأصلية للفعل
|
| 328 |
+
# أقل من طول حركات الماضي المبني للمجهول
|
| 329 |
+
# هذا يعني وجود حركة طويلة
|
| 330 |
+
# نقوم بتحويل الحركة الطويلة إلى ما يوافقها
|
| 331 |
+
if len(marks) < len(future_marks):
|
| 332 |
+
future_marks = self._homogenize_harakat(marks, future_marks)
|
| 333 |
+
passive_future_marks = self._homogenize_harakat(marks,
|
| 334 |
+
passive_future_marks)
|
| 335 |
+
imp_marks = future_marks
|
| 336 |
+
imp_letters = future_letters
|
| 337 |
+
# حالة الأفعال التي تبدأ بألف وصل
|
| 338 |
+
if letters.startswith(ALEF) or self.hamza_zaida:
|
| 339 |
+
future_letters = letters[1:]
|
| 340 |
+
future_marks = future_marks[1:]
|
| 341 |
+
passive_future_marks = passive_future_marks[1:]
|
| 342 |
+
passive_letters = letters[1:]
|
| 343 |
+
# حالة الفعل المثال
|
| 344 |
+
elif self.vlength == 3 and self.word_letters.startswith(WAW) and \
|
| 345 |
+
(self.future_type == KASRA or (self.future_type==FATHA and \
|
| 346 |
+
self.word_marks==FATHA+FATHA+FATHA and \
|
| 347 |
+
not self.word_letters.endswith(SHADDA))):
|
| 348 |
+
future_letters = letters[1:]
|
| 349 |
+
future_marks = future_marks[1:]
|
| 350 |
+
## passive_future_marks=passive_future_marks[1:]
|
| 351 |
+
passive_letters = letters
|
| 352 |
+
else:
|
| 353 |
+
future_letters = letters
|
| 354 |
+
passive_letters = letters
|
| 355 |
+
new_marks = first_future_mark + future_marks
|
| 356 |
+
passive_marks = first_passive_future_mark + passive_future_marks
|
| 357 |
+
|
| 358 |
+
# حالة الأفعال التي تبدأ بألف وصل
|
| 359 |
+
if imp_letters.startswith(ALEF):
|
| 360 |
+
imp_letters = letters[1:]
|
| 361 |
+
imp_marks = imp_marks[1:]
|
| 362 |
+
elif self.vlength == 3 and self.word_letters.startswith(WAW) and \
|
| 363 |
+
(self.future_type == KASRA or (self.future_type==FATHA and \
|
| 364 |
+
self.word_marks==FATHA+FATHA+FATHA)):
|
| 365 |
+
imp_letters = letters[1:]
|
| 366 |
+
imp_marks = imp_marks[1:]
|
| 367 |
+
else:
|
| 368 |
+
imp_letters = letters
|
| 369 |
+
|
| 370 |
+
# معالجة الفعل الناقص عند تصريفه في المجهول
|
| 371 |
+
# تستبدل واو التاقص الذي حركة عين ماضيه فتحة بياء
|
| 372 |
+
## passive_letters=future_letters
|
| 373 |
+
if self.vlength == 3 and passive_letters.endswith(vconst.ALEF_MAMDUDA):
|
| 374 |
+
passive_letters = passive_letters[:-1]+ALEF_MAKSURA
|
| 375 |
+
# القعل الأمر يأخذ نفس حركات الفعل المضارع دون حركة حرف المضارعة
|
| 376 |
+
## imp_marks=future_marks
|
| 377 |
+
### معلجة إضافة حرف ألف الوصل في الأفعال المسبوقة بالسكون
|
| 378 |
+
## new_marks=first_future_mark+future_marks
|
| 379 |
+
## passive_marks=first_passive_future_mark+passive_future_marks
|
| 380 |
+
self.tab_conjug_stem[vconst.TenseFuture] = ConjugStem(
|
| 381 |
+
vconst.TenseFuture, future_letters, new_marks)
|
| 382 |
+
# تصريف الفعل المضارع المنصوب والمجزوم
|
| 383 |
+
self.tab_conjug_stem[vconst.TenseJussiveFuture] = ConjugStem(
|
| 384 |
+
vconst.TenseJussiveFuture, future_letters, new_marks)
|
| 385 |
+
self.tab_conjug_stem[vconst.TenseSubjunctiveFuture] = ConjugStem(
|
| 386 |
+
vconst.TenseSubjunctiveFuture, future_letters, new_marks)
|
| 387 |
+
# المضارع المؤكد الثقيل
|
| 388 |
+
self.tab_conjug_stem[vconst.TenseConfirmedFuture] = ConjugStem(
|
| 389 |
+
vconst.TenseConfirmedFuture, future_letters, new_marks)
|
| 390 |
+
|
| 391 |
+
# المبني للمجهول
|
| 392 |
+
self.tab_conjug_stem[vconst.TensePassiveFuture] = ConjugStem(
|
| 393 |
+
vconst.TensePassiveFuture, passive_letters, passive_marks)
|
| 394 |
+
# تصريف الفعل المضارع المنصوب والمجزوم المني للمجهول
|
| 395 |
+
self.tab_conjug_stem[vconst.TensePassiveJussiveFuture] = ConjugStem(
|
| 396 |
+
vconst.TensePassiveJussiveFuture, passive_letters, passive_marks)
|
| 397 |
+
self.tab_conjug_stem[vconst.TensePassiveSubjunctiveFuture] = \
|
| 398 |
+
ConjugStem(vconst.TensePassiveSubjunctiveFuture, passive_letters,
|
| 399 |
+
passive_marks)
|
| 400 |
+
# المضارع المؤكد الثقيل المنبي للمجهول
|
| 401 |
+
self.tab_conjug_stem[vconst.TensePassiveConfirmedFuture] = ConjugStem(
|
| 402 |
+
vconst.TensePassiveConfirmedFuture, passive_letters, passive_marks)
|
| 403 |
+
|
| 404 |
+
# الفعل الامر
|
| 405 |
+
self.tab_conjug_stem[vconst.TenseImperative] = ConjugStem(
|
| 406 |
+
vconst.TenseImperative, imp_letters, imp_marks)
|
| 407 |
+
# الفعل الامر المؤكد
|
| 408 |
+
self.tab_conjug_stem[vconst.TenseConfirmedImperative] = ConjugStem(
|
| 409 |
+
vconst.TenseConfirmedImperative, imp_letters, imp_marks)
|
| 410 |
+
|
| 411 |
+
def _prepare_past_stem(self):
|
| 412 |
+
"""
|
| 413 |
+
Prepare the conjugation stems for past tense.
|
| 414 |
+
Those stems will be concatenated with conjugation affixes.
|
| 415 |
+
This function store results in self.tab_conjug_stem.
|
| 416 |
+
This function prepare conjugation stems for the following tenses:
|
| 417 |
+
- vconst.TensePast: الفعل الماضي.
|
| 418 |
+
"""
|
| 419 |
+
self.past_stem = self.internal_verb
|
| 420 |
+
self.tab_conjug_stem[vconst.TensePast] = ConjugStem(
|
| 421 |
+
vconst.TensePast, self.word_letters, self.word_marks)
|
| 422 |
+
|
| 423 |
+
|
| 424 |
+
def _prepare_passive_past_stem(self):
|
| 425 |
+
"""
|
| 426 |
+
Prepare the conjugation stems for past tense.
|
| 427 |
+
Those stems will be concatenated with conjugation affixes.
|
| 428 |
+
This function store results in self.tab_conjug_stem.
|
| 429 |
+
This function prepare conjugation stems for the following tenses:
|
| 430 |
+
- vconst.TensePast: الفعل الماضي
|
| 431 |
+
"""
|
| 432 |
+
letters = self.word_letters
|
| 433 |
+
marks = self.word_marks
|
| 434 |
+
|
| 435 |
+
if len(letters) == 3 and letters.endswith(vconst.ALEF_MAMDUDA) \
|
| 436 |
+
and marks[1] == FATHA:
|
| 437 |
+
letters = letters[:-1] + ALEF_MAKSURA
|
| 438 |
+
if self.vlength == 3:
|
| 439 |
+
passive_marks = DAMMA + KASRA + FATHA
|
| 440 |
+
elif self.vlength == 4:
|
| 441 |
+
passive_marks = DAMMA + SUKUN + KASRA + FATHA
|
| 442 |
+
elif self.vlength == 5:
|
| 443 |
+
if letters.startswith(TEH):
|
| 444 |
+
passive_marks = DAMMA + DAMMA + SUKUN + KASRA + FATHA
|
| 445 |
+
else :
|
| 446 |
+
passive_marks = DAMMA + SUKUN + DAMMA + KASRA + FATHA
|
| 447 |
+
elif self.vlength == 6:
|
| 448 |
+
passive_marks = DAMMA + SUKUN + DAMMA + SUKUN + KASRA + FATHA
|
| 449 |
+
# إذا كان طول الحركات ألأصلية للفعل أقل من طول حركات الماضي المبني للمجهول
|
| 450 |
+
# هذا يعني وجود حركة طويلة
|
| 451 |
+
# نقوم بتحويل الحركة الطويلة إلى ما يوافقها
|
| 452 |
+
if len(marks) < len(passive_marks):
|
| 453 |
+
passive_marks = self._homogenize_harakat(marks, passive_marks)
|
| 454 |
+
|
| 455 |
+
# - حالة الفعل الأجوف الذي حركة مضارعه فتحة أو كسرة،
|
| 456 |
+
#- فيصبح في الماضي عند التقاء الساكنين كسرة،
|
| 457 |
+
#لذا يجب تعديل ذلك في الماضي المجهول،
|
| 458 |
+
# بجعلها تتحول إلى ضمة عند التقاء الساكنين.
|
| 459 |
+
if len(passive_marks) == 2 and passive_marks[0] == vconst.YEH_HARAKA \
|
| 460 |
+
and self.future_type in (FATHA, KASRA):
|
| 461 |
+
passive_marks = vconst.ALTERNATIVE_YEH_HARAKA + FATHA
|
| 462 |
+
self.tab_conjug_stem[vconst.TensePassivePast] = ConjugStem(\
|
| 463 |
+
vconst.TensePassivePast, letters, passive_marks)
|
| 464 |
+
|
| 465 |
+
def conjugate_tense_pronoun(self, tense, pronoun):
|
| 466 |
+
"""
|
| 467 |
+
Conjugate a verb in a given tense with a pronoun.
|
| 468 |
+
@param tense: given tense
|
| 469 |
+
@type tense: unicode name of the tense
|
| 470 |
+
@param pronoun: given pronoun
|
| 471 |
+
@type pronoun: unicode name of the pronoun
|
| 472 |
+
@return: conjugated verb
|
| 473 |
+
@rtype: unicode;
|
| 474 |
+
"""
|
| 475 |
+
#prefix
|
| 476 |
+
pre_val = vconst.TableTensePronoun[tense][pronoun][0]
|
| 477 |
+
#suffix
|
| 478 |
+
suf_val = vconst.TableTensePronoun[tense][pronoun][1]
|
| 479 |
+
stem_l = self.tab_conjug_stem[tense].letters
|
| 480 |
+
stem_m = self.tab_conjug_stem[tense].marks
|
| 481 |
+
#deprecated
|
| 482 |
+
## return self.join(stem_l, stem_m, prefix, suffix)
|
| 483 |
+
# _m : marks
|
| 484 |
+
#_l :letters
|
| 485 |
+
if pre_val != u"":
|
| 486 |
+
pre_val_l = pre_val
|
| 487 |
+
pre_val_m = stem_m[0]
|
| 488 |
+
stem_m = stem_m[1:]
|
| 489 |
+
else:
|
| 490 |
+
pre_val_l = u""
|
| 491 |
+
pre_val_m = u""
|
| 492 |
+
|
| 493 |
+
# the suffix already start by a HARAKA,
|
| 494 |
+
# we add Taweel to ensure valid word in the uniformate function
|
| 495 |
+
suf_val = TATWEEL + suf_val
|
| 496 |
+
#uniformate suffix
|
| 497 |
+
# the case is used to avoid duplicated staddization
|
| 498 |
+
if suf_val in self.cache_standard['suffix']:
|
| 499 |
+
(suf_val_l, suf_val_m) = self.cache_standard['suffix'][suf_val]
|
| 500 |
+
else:
|
| 501 |
+
(suf_val_l, suf_val_m) = ar_verb.uniformate_suffix(suf_val)
|
| 502 |
+
self.cache_standard['suffix'][suf_val] = (suf_val_l, suf_val_m)
|
| 503 |
+
# add affix to the stem
|
| 504 |
+
conj_l = pre_val_l + stem_l + suf_val_l
|
| 505 |
+
#The end of the stem marks takes the begining of the suffix marks
|
| 506 |
+
conj_m = pre_val_m + stem_m[:-1] + suf_val_m
|
| 507 |
+
# the begining of suffix letters is Tatweel, it will be striped
|
| 508 |
+
conj_l = pre_val_l + stem_l + suf_val_l[1:]
|
| 509 |
+
|
| 510 |
+
# Treat sukun
|
| 511 |
+
# the case is used to avoid duplicated staddization
|
| 512 |
+
key_cache = u'-'.join([conj_l, conj_m])
|
| 513 |
+
if key_cache in self.cache_standard['sukun']:
|
| 514 |
+
conj_m = self.cache_standard['sukun'][key_cache]
|
| 515 |
+
else:
|
| 516 |
+
#~ conj_m = ar_verb.treat_sukun2(conj_l, conj_m, self.future_type)
|
| 517 |
+
conj_m = ar_verb.treat_sukun2(conj_l, conj_m)
|
| 518 |
+
self.cache_standard['sukun'][key_cache] = conj_m
|
| 519 |
+
# standard orthographic form
|
| 520 |
+
# the case is used to avoid duplicated staddization
|
| 521 |
+
key_cache = u'-'.join([conj_l, conj_m])
|
| 522 |
+
if key_cache in self.cache_standard['standard']:
|
| 523 |
+
conj = self.cache_standard['standard'][key_cache]
|
| 524 |
+
else:
|
| 525 |
+
conj = ar_verb.standard2(conj_l, conj_m)
|
| 526 |
+
self.cache_standard['standard'][key_cache] = conj
|
| 527 |
+
return conj
|
| 528 |
+
|
| 529 |
+
|
| 530 |
+
#----------------------------------------------------------------
|
| 531 |
+
# التصريف في الأزمنة المختلفة،
|
| 532 |
+
# عند وضع قائمة خاصة بالأزمنة المختارة،
|
| 533 |
+
# تلقائيا كافة الأزمنة
|
| 534 |
+
#----------------------------------------------------------------
|
| 535 |
+
def conjugate_all_tenses(self, listtense = None):
|
| 536 |
+
"""
|
| 537 |
+
Conjugate a verb with a list of tenses.
|
| 538 |
+
@param listtense: given tense
|
| 539 |
+
@type listtense: list of unicode
|
| 540 |
+
@return: conjugated verb
|
| 541 |
+
@rtype: the type is given according to the display mode;
|
| 542 |
+
"""
|
| 543 |
+
if not listtense:
|
| 544 |
+
listtense = vconst.TABLE_TENSE
|
| 545 |
+
for tense in listtense:
|
| 546 |
+
if tense == vconst.TensePast:
|
| 547 |
+
conj_ana = self.conjugate_tense_pronoun(tense,
|
| 548 |
+
vconst.PronounAna)
|
| 549 |
+
self.conj_display.add(tense, vconst.PronounAna, conj_ana)
|
| 550 |
+
conj_ana_without_last_mark = conj_ana[:-1]
|
| 551 |
+
self.conj_display.add(tense, vconst.PronounAnta,
|
| 552 |
+
conj_ana_without_last_mark+FATHA)
|
| 553 |
+
self.conj_display.add(tense, vconst.PronounAnti,
|
| 554 |
+
conj_ana_without_last_mark+KASRA)
|
| 555 |
+
self.conj_display.add(tense, vconst.PronounAntuma,
|
| 556 |
+
conj_ana+MEEM+FATHA+ALEF)
|
| 557 |
+
self.conj_display.add(tense, vconst.PronounAntuma_f,
|
| 558 |
+
conj_ana+MEEM+FATHA+ALEF)
|
| 559 |
+
self.conj_display.add(tense, vconst.PronounAntum,
|
| 560 |
+
conj_ana+MEEM)
|
| 561 |
+
self.conj_display.add(tense, vconst.PronounAntunna,
|
| 562 |
+
conj_ana+NOON+SHADDA+FATHA)
|
| 563 |
+
self.conj_display.add(tense, vconst.PronounAna, conj_ana)
|
| 564 |
+
|
| 565 |
+
conj_nahnu = self.conjugate_tense_pronoun(tense,
|
| 566 |
+
vconst.PronounNahnu)
|
| 567 |
+
self.conj_display.add(tense, vconst.PronounNahnu, conj_nahnu)
|
| 568 |
+
|
| 569 |
+
conj_hunna = self.conjugate_tense_pronoun(tense,
|
| 570 |
+
vconst.PronounHunna)
|
| 571 |
+
self.conj_display.add(tense, vconst.PronounHunna, conj_hunna)
|
| 572 |
+
|
| 573 |
+
conj_huma = self.conjugate_tense_pronoun(tense,
|
| 574 |
+
vconst.PronounHuma)
|
| 575 |
+
self.conj_display.add(tense, vconst.PronounHuma, conj_huma)
|
| 576 |
+
|
| 577 |
+
conj_hum = self.conjugate_tense_pronoun(tense,
|
| 578 |
+
vconst.PronounHum)
|
| 579 |
+
self.conj_display.add(tense, vconst.PronounHum, conj_hum)
|
| 580 |
+
|
| 581 |
+
conj_hunna = self.conjugate_tense_pronoun(tense,
|
| 582 |
+
vconst.PronounHunna)
|
| 583 |
+
self.conj_display.add(tense, vconst.PronounHunna, conj_hunna)
|
| 584 |
+
|
| 585 |
+
conj_huwa = self.conjugate_tense_pronoun(tense,
|
| 586 |
+
vconst.PronounHuwa)
|
| 587 |
+
self.conj_display.add(tense, vconst.PronounHuwa, conj_huwa)
|
| 588 |
+
conj_hya = self.conjugate_tense_pronoun(tense,
|
| 589 |
+
vconst.PronounHya)
|
| 590 |
+
self.conj_display.add(tense, vconst.PronounHya, conj_hya)
|
| 591 |
+
self.conj_display.add(tense, vconst.PronounHuma_f,
|
| 592 |
+
conj_hya[:-1]+FATHA+ALEF)
|
| 593 |
+
elif tense == vconst.TensePassivePast:
|
| 594 |
+
conj_ana = self.conjugate_tense_pronoun(tense,
|
| 595 |
+
vconst.PronounAna)
|
| 596 |
+
self.conj_display.add(tense, vconst.PronounAna, conj_ana)
|
| 597 |
+
conj_ana_without_last_mark = conj_ana[:-1]
|
| 598 |
+
self.conj_display.add(tense, vconst.PronounAnta,
|
| 599 |
+
conj_ana_without_last_mark+FATHA)
|
| 600 |
+
self.conj_display.add(tense, vconst.PronounAnti,
|
| 601 |
+
conj_ana_without_last_mark+KASRA)
|
| 602 |
+
self.conj_display.add(tense, vconst.PronounAntuma,
|
| 603 |
+
conj_ana+MEEM+FATHA+ALEF)
|
| 604 |
+
self.conj_display.add(tense, vconst.PronounAntuma_f,
|
| 605 |
+
conj_ana+MEEM+FATHA+ALEF)
|
| 606 |
+
self.conj_display.add(tense, vconst.PronounAntum,
|
| 607 |
+
conj_ana+MEEM)
|
| 608 |
+
self.conj_display.add(tense, vconst.PronounAntunna,
|
| 609 |
+
conj_ana+NOON+SHADDA+FATHA)
|
| 610 |
+
self.conj_display.add(tense, vconst.PronounAna, conj_ana)
|
| 611 |
+
|
| 612 |
+
conj_nahnu = self.conjugate_tense_pronoun(tense,
|
| 613 |
+
vconst.PronounNahnu)
|
| 614 |
+
self.conj_display.add(tense, vconst.PronounNahnu,
|
| 615 |
+
conj_nahnu)
|
| 616 |
+
|
| 617 |
+
conj_hunna = self.conjugate_tense_pronoun(tense,
|
| 618 |
+
vconst.PronounHunna)
|
| 619 |
+
self.conj_display.add(tense, vconst.PronounHunna,
|
| 620 |
+
conj_hunna)
|
| 621 |
+
|
| 622 |
+
conj_hunna = self.conjugate_tense_pronoun(tense,
|
| 623 |
+
vconst.PronounHunna)
|
| 624 |
+
self.conj_display.add(tense, vconst.PronounHunna,
|
| 625 |
+
conj_hunna)
|
| 626 |
+
|
| 627 |
+
conj_huwa = self.conjugate_tense_pronoun(tense,
|
| 628 |
+
vconst.PronounHuwa)
|
| 629 |
+
self.conj_display.add(tense, vconst.PronounHuwa, conj_huwa)
|
| 630 |
+
conj_hum = self.conjugate_tense_pronoun(tense,
|
| 631 |
+
vconst.PronounHum)
|
| 632 |
+
self.conj_display.add(tense, vconst.PronounHum, conj_hum)
|
| 633 |
+
# حالة الفعل مهموز الآخر
|
| 634 |
+
if conj_huwa.endswith(YEH+HAMZA+FATHA) :
|
| 635 |
+
self.conj_display.add(tense, vconst.PronounHya,
|
| 636 |
+
conj_huwa[:-2]+YEH_HAMZA+FATHA+TEH+SUKUN)
|
| 637 |
+
self.conj_display.add(tense, vconst.PronounHuma_f,
|
| 638 |
+
conj_huwa[:-2]+YEH_HAMZA+FATHA+TEH+FATHA+ALEF)
|
| 639 |
+
## conj_huma=self.conjugate_tense_pronoun(tense,
|
| 640 |
+
## vconst.PronounHuma)
|
| 641 |
+
self.conj_display.add(tense, vconst.PronounHuma,
|
| 642 |
+
conj_huwa[:-2]+YEH_HAMZA+FATHA+ALEF)
|
| 643 |
+
|
| 644 |
+
## conj_hum=self.conjugate_tense_pronoun(tense,
|
| 645 |
+
# vconst.PronounHum)
|
| 646 |
+
self.conj_display.add(tense, vconst.PronounHum,
|
| 647 |
+
conj_huwa[:-2]+YEH_HAMZA+DAMMA+WAW+ALEF)
|
| 648 |
+
|
| 649 |
+
else :
|
| 650 |
+
self.conj_display.add(tense, vconst.PronounHya,
|
| 651 |
+
conj_huwa+TEH+SUKUN)
|
| 652 |
+
self.conj_display.add(tense, vconst.PronounHuma_f,
|
| 653 |
+
conj_huwa+TEH+FATHA+ALEF)
|
| 654 |
+
self.conj_display.add(tense, vconst.PronounHuma,
|
| 655 |
+
conj_huwa+ALEF)
|
| 656 |
+
#~ if conj_huwa.endswith(KASRA+YEH+FATHA):
|
| 657 |
+
#~ self.conj_display.add(tense, vconst.PronounHum,
|
| 658 |
+
#~ conj_huwa[:-3]+DAMMA+WAW+ALEF)
|
| 659 |
+
#~ else:
|
| 660 |
+
#~ self.conj_display.add(tense, vconst.PronounHum,
|
| 661 |
+
#~ conj_huwa[:-1]+DAMMA+WAW+ALEF)
|
| 662 |
+
elif tense in (vconst.TenseFuture, vconst.TensePassiveFuture,
|
| 663 |
+
vconst.TenseJussiveFuture, vconst.TenseSubjunctiveFuture,
|
| 664 |
+
vconst.TenseConfirmedFuture, vconst.TensePassiveJussiveFuture,
|
| 665 |
+
vconst.TensePassiveSubjunctiveFuture,
|
| 666 |
+
vconst.TensePassiveConfirmedFuture):
|
| 667 |
+
conj_ana = self.conjugate_tense_pronoun(tense,
|
| 668 |
+
vconst.PronounAna)
|
| 669 |
+
self.conj_display.add(tense, vconst.PronounAna,
|
| 670 |
+
conj_ana)
|
| 671 |
+
|
| 672 |
+
conj_anta = self.conjugate_tense_pronoun(tense,
|
| 673 |
+
vconst.PronounAnta)
|
| 674 |
+
self.conj_display.add(tense, vconst.PronounAnta,
|
| 675 |
+
conj_anta)
|
| 676 |
+
conj_anta_without_future_letter = conj_anta[1:]
|
| 677 |
+
## self.conj_display.add(tense, vconst.PronounAnta,
|
| 678 |
+
## TEH+conj_ana_without_future_letter)
|
| 679 |
+
self.conj_display.add(tense, vconst.PronounNahnu,
|
| 680 |
+
NOON+conj_anta_without_future_letter)
|
| 681 |
+
self.conj_display.add(tense, vconst.PronounHuwa,
|
| 682 |
+
YEH+conj_anta_without_future_letter)
|
| 683 |
+
self.conj_display.add(tense, vconst.PronounHya,
|
| 684 |
+
TEH+conj_anta_without_future_letter)
|
| 685 |
+
|
| 686 |
+
conj_anti = self.conjugate_tense_pronoun(tense,
|
| 687 |
+
vconst.PronounAnti)
|
| 688 |
+
self.conj_display.add(tense, vconst.PronounAnti,
|
| 689 |
+
conj_anti)
|
| 690 |
+
|
| 691 |
+
conj_antuma = self.conjugate_tense_pronoun(tense,
|
| 692 |
+
vconst.PronounAntuma)
|
| 693 |
+
self.conj_display.add(tense, vconst.PronounAntuma,
|
| 694 |
+
conj_antuma)
|
| 695 |
+
self.conj_display.add(tense, vconst.PronounAntuma_f,
|
| 696 |
+
conj_antuma)
|
| 697 |
+
self.conj_display.add(tense, vconst.PronounHuma_f,
|
| 698 |
+
conj_antuma)
|
| 699 |
+
self.conj_display.add(tense, vconst.PronounHuma,
|
| 700 |
+
YEH+conj_antuma[1:])
|
| 701 |
+
|
| 702 |
+
conj_antum = self.conjugate_tense_pronoun(tense,
|
| 703 |
+
vconst.PronounAntum)
|
| 704 |
+
self.conj_display.add(tense, vconst.PronounAntum,
|
| 705 |
+
conj_antum)
|
| 706 |
+
self.conj_display.add(tense, vconst.PronounHum,
|
| 707 |
+
YEH+conj_antum[1:])
|
| 708 |
+
|
| 709 |
+
conj_antunna = self.conjugate_tense_pronoun(tense,
|
| 710 |
+
vconst.PronounAntunna)
|
| 711 |
+
self.conj_display.add(tense, vconst.PronounAntunna,
|
| 712 |
+
conj_antunna)
|
| 713 |
+
self.conj_display.add(tense, vconst.PronounHunna,
|
| 714 |
+
YEH+conj_antunna[1:])
|
| 715 |
+
elif tense == vconst.TenseImperative or \
|
| 716 |
+
tense == vconst.TenseConfirmedImperative:
|
| 717 |
+
for pron in vconst.ImperativePronouns:
|
| 718 |
+
conj = self.conjugate_tense_pronoun(tense, pron)
|
| 719 |
+
self.conj_display.add(tense, pron, conj)
|
| 720 |
+
if not self.transitive:
|
| 721 |
+
for tense in vconst.TablePassiveTense:
|
| 722 |
+
for pron in vconst.PronounsTableNotPassiveForUntransitive:
|
| 723 |
+
self.conj_display.add(tense, pron, u"")
|
| 724 |
+
# if the result is not diplyed directely on the screen, we return it
|
| 725 |
+
result = self.conj_display.display(self.conj_display.mode,
|
| 726 |
+
listtense)
|
| 727 |
+
if result:
|
| 728 |
+
return result
|
| 729 |
+
|
| 730 |
+
def conjugate_tense_for_pronoun(self, tense, pronoun):
|
| 731 |
+
"""
|
| 732 |
+
Conjugate a verb for a pronoun in specific tense,
|
| 733 |
+
we use an homoginized conjugation
|
| 734 |
+
@param tense: given tense
|
| 735 |
+
@type tense: unicode
|
| 736 |
+
@param pronoun: given pronoun
|
| 737 |
+
@type pronoun: unicode
|
| 738 |
+
@return: conjugated verb
|
| 739 |
+
@rtype: unicode;
|
| 740 |
+
"""
|
| 741 |
+
# the idea is to generate some conjugation from others
|
| 742 |
+
# in particalar cases, we can generate conjugation
|
| 743 |
+
# from others pronouns.
|
| 744 |
+
# for each tense we have two pronouns lists:
|
| 745 |
+
# - direct conjugated pronouns.
|
| 746 |
+
# - indirect conjugated pronouns.
|
| 747 |
+
|
| 748 |
+
if tense == vconst.TensePast:
|
| 749 |
+
# direct concongated pronouns
|
| 750 |
+
if pronoun in (vconst.PronounAna, vconst.PronounNahnu,
|
| 751 |
+
vconst.PronounHunna, vconst.PronounHuma , vconst.PronounHum,
|
| 752 |
+
vconst.PronounHunna, vconst.PronounHuwa, vconst.PronounHya):
|
| 753 |
+
conj = self.conjugate_tense_pronoun( tense, pronoun)
|
| 754 |
+
self.conj_display.add(tense, pronoun, conj)
|
| 755 |
+
# indirect conjugation
|
| 756 |
+
# from Aana Pronoun
|
| 757 |
+
elif pronoun in (vconst.PronounAnta, vconst.PronounAnta,
|
| 758 |
+
vconst.PronounAnti, vconst.PronounAntuma, vconst.PronounAntuma_f,
|
| 759 |
+
vconst.PronounAntum, vconst.PronounAntunna):
|
| 760 |
+
# test if the verb is conjugated
|
| 761 |
+
conj_ana = self.conj_display.get_conj(tense, pronoun)
|
| 762 |
+
if conj_ana == u"":
|
| 763 |
+
conj_ana = self.conjugate_tense_pronoun(tense,
|
| 764 |
+
vconst.PronounAna)
|
| 765 |
+
conj_ana_without_last_mark = conj_ana[:-1]
|
| 766 |
+
if pronoun == vconst.PronounAnta:
|
| 767 |
+
self.conj_display.add(tense, vconst.PronounAnta,
|
| 768 |
+
conj_ana_without_last_mark+FATHA)
|
| 769 |
+
elif pronoun == vconst.PronounAnti:
|
| 770 |
+
self.conj_display.add(tense, vconst.PronounAnti,
|
| 771 |
+
conj_ana_without_last_mark+KASRA)
|
| 772 |
+
elif pronoun == vconst.PronounAntuma :
|
| 773 |
+
self.conj_display.add(tense, vconst.PronounAntuma,
|
| 774 |
+
conj_ana+MEEM+FATHA+ALEF)
|
| 775 |
+
elif pronoun == vconst.PronounAntuma_f:
|
| 776 |
+
self.conj_display.add(tense, vconst.PronounAntuma_f,
|
| 777 |
+
conj_ana+MEEM+FATHA+ALEF)
|
| 778 |
+
elif pronoun == vconst.PronounAntum:
|
| 779 |
+
self.conj_display.add(tense, vconst.PronounAntum,
|
| 780 |
+
conj_ana+MEEM)
|
| 781 |
+
elif pronoun == vconst.PronounAntunna:
|
| 782 |
+
self.conj_display.add(tense, vconst.PronounAntunna,
|
| 783 |
+
conj_ana+NOON+SHADDA+FATHA)
|
| 784 |
+
|
| 785 |
+
# indirect conjugation
|
| 786 |
+
# from Hya Pronoun
|
| 787 |
+
elif pronoun == vconst.PronounHuma_f:
|
| 788 |
+
# test if the verb is conjugated
|
| 789 |
+
conj_hya = self.conj_display.get_conj(tense, vconst.PronounHya)
|
| 790 |
+
if conj_hya == u"":
|
| 791 |
+
conj_hya = self.conjugate_tense_pronoun(tense,
|
| 792 |
+
vconst.PronounHya)
|
| 793 |
+
self.conj_display.add(tense, vconst.PronounHuma_f,
|
| 794 |
+
conj_hya[:-1]+FATHA+ALEF)
|
| 795 |
+
elif tense == vconst.TensePassivePast:
|
| 796 |
+
# direct conjugation
|
| 797 |
+
if pronoun in (vconst.PronounAna, vconst.PronounNahnu,
|
| 798 |
+
vconst.PronounHunna, vconst.PronounHunna, vconst.PronounHuwa, vconst.PronounHum):
|
| 799 |
+
conj = self.conjugate_tense_pronoun(tense, pronoun)
|
| 800 |
+
self.conj_display.add(tense, pronoun, conj)
|
| 801 |
+
# indirect conjugation
|
| 802 |
+
# Ana pronoun like conjugation
|
| 803 |
+
elif pronoun in (vconst.PronounAnta, vconst.PronounAnti,
|
| 804 |
+
vconst.PronounAntuma, vconst.PronounAntuma_f, vconst.PronounAntum,
|
| 805 |
+
vconst.PronounAntunna):
|
| 806 |
+
conj_ana = self.conj_display.get_conj(tense, vconst.PronounAna)
|
| 807 |
+
if conj_ana == u"":
|
| 808 |
+
conj_ana = self.conjugate_tense_pronoun(tense,
|
| 809 |
+
vconst.PronounAna)
|
| 810 |
+
self.conj_display.add(tense, vconst.PronounAna,
|
| 811 |
+
conj_ana)
|
| 812 |
+
conj_ana_without_last_mark = conj_ana[:-1]
|
| 813 |
+
if pronoun == vconst.PronounAnta:
|
| 814 |
+
self.conj_display.add(tense, vconst.PronounAnta,
|
| 815 |
+
conj_ana_without_last_mark+FATHA)
|
| 816 |
+
elif pronoun == vconst.PronounAnti:
|
| 817 |
+
self.conj_display.add(tense, vconst.PronounAnti,
|
| 818 |
+
conj_ana_without_last_mark+KASRA)
|
| 819 |
+
elif pronoun == vconst.PronounAntuma:
|
| 820 |
+
self.conj_display.add(tense, vconst.PronounAntuma,
|
| 821 |
+
conj_ana+MEEM+FATHA+ALEF)
|
| 822 |
+
elif pronoun == vconst.PronounAntuma_f:
|
| 823 |
+
self.conj_display.add(tense, vconst.PronounAntuma_f,
|
| 824 |
+
conj_ana+MEEM+FATHA+ALEF)
|
| 825 |
+
elif pronoun == vconst.PronounAntum:
|
| 826 |
+
self.conj_display.add(tense, vconst.PronounAntum,
|
| 827 |
+
conj_ana+MEEM)
|
| 828 |
+
elif pronoun == vconst.PronounAntunna:
|
| 829 |
+
self.conj_display.add(tense, vconst.PronounAntunna,
|
| 830 |
+
conj_ana+NOON+SHADDA+FATHA)
|
| 831 |
+
# indirect conjugation
|
| 832 |
+
# Ana pronoun like conjugation
|
| 833 |
+
elif pronoun in ( vconst.PronounHya, vconst.PronounHuma_f,
|
| 834 |
+
#~ vconst.PronounHuma, vconst.PronounHum):
|
| 835 |
+
vconst.PronounHuma,):
|
| 836 |
+
conj_huwa = self.conj_display.get_conj(tense,
|
| 837 |
+
vconst.PronounHuwa)
|
| 838 |
+
if conj_huwa == u"":
|
| 839 |
+
conj_huwa = self.conjugate_tense_pronoun(tense,
|
| 840 |
+
vconst.PronounHuwa)
|
| 841 |
+
self.conj_display.add(tense, vconst.PronounHuwa, conj_huwa)
|
| 842 |
+
# حالة الفعل مهموز الآخر
|
| 843 |
+
if conj_huwa.endswith(YEH+HAMZA+FATHA) :
|
| 844 |
+
self.conj_display.add(tense, vconst.PronounHya,
|
| 845 |
+
conj_huwa[:-2]+YEH_HAMZA+FATHA+TEH+SUKUN)
|
| 846 |
+
self.conj_display.add(tense, vconst.PronounHuma_f,
|
| 847 |
+
conj_huwa[:-2]+YEH_HAMZA+FATHA+TEH+FATHA+ALEF)
|
| 848 |
+
self.conj_display.add(tense, vconst.PronounHuma,
|
| 849 |
+
conj_huwa[:-2]+YEH_HAMZA+FATHA+ALEF)
|
| 850 |
+
|
| 851 |
+
#~ self.conj_display.add(tense, vconst.PronounHum,
|
| 852 |
+
#~ conj_huwa[:-2]+YEH_HAMZA+DAMMA+WAW+ALEF)
|
| 853 |
+
|
| 854 |
+
else :
|
| 855 |
+
self.conj_display.add(tense, vconst.PronounHya,
|
| 856 |
+
conj_huwa+TEH+SUKUN)
|
| 857 |
+
self.conj_display.add(tense, vconst.PronounHuma_f,
|
| 858 |
+
conj_huwa+TEH+FATHA+ALEF)
|
| 859 |
+
self.conj_display.add(tense, vconst.PronounHuma,
|
| 860 |
+
conj_huwa+ALEF)
|
| 861 |
+
#~ if conj_huwa.endswith(KASRA+YEH+FATHA):
|
| 862 |
+
#~ self.conj_display.add(tense, vconst.PronounHum,
|
| 863 |
+
#~ conj_huwa[:-3]+DAMMA+WAW+ALEF)
|
| 864 |
+
#~ else:
|
| 865 |
+
#~ self.conj_display.add(tense, vconst.PronounHum,
|
| 866 |
+
#~ conj_huwa[:-1]+DAMMA+WAW+ALEF)
|
| 867 |
+
elif tense in (vconst.TenseFuture, vconst.TensePassiveFuture,
|
| 868 |
+
vconst.TenseJussiveFuture, vconst.TenseSubjunctiveFuture,
|
| 869 |
+
vconst.TenseConfirmedFuture, vconst.TensePassiveJussiveFuture,
|
| 870 |
+
vconst.TensePassiveSubjunctiveFuture,
|
| 871 |
+
vconst.TensePassiveConfirmedFuture):
|
| 872 |
+
|
| 873 |
+
# direct pronouns conjugations
|
| 874 |
+
if pronoun in (vconst.PronounAna, vconst.PronounAnta,
|
| 875 |
+
vconst.PronounAnti, vconst.PronounAntuma, vconst.PronounAntum,
|
| 876 |
+
vconst.PronounAntunna):
|
| 877 |
+
conj = self.conjugate_tense_pronoun(tense, pronoun)
|
| 878 |
+
self.conj_display.add(tense, pronoun, conj)
|
| 879 |
+
# indirect pronouns
|
| 880 |
+
# Anta pronouns conjugation like
|
| 881 |
+
elif pronoun in (vconst.PronounNahnu, vconst.PronounHuwa,
|
| 882 |
+
vconst.PronounHya):
|
| 883 |
+
conj_anta = self.conj_display.get_conj(tense,
|
| 884 |
+
vconst.PronounAnta)
|
| 885 |
+
if conj_anta == u"":
|
| 886 |
+
conj_anta = self.conjugate_tense_pronoun(tense,
|
| 887 |
+
vconst.PronounAnta)
|
| 888 |
+
self.conj_display.add(tense, vconst.PronounAnta,
|
| 889 |
+
conj_anta)
|
| 890 |
+
|
| 891 |
+
conj_anta_without_future_letter = conj_anta[1:]
|
| 892 |
+
if pronoun == vconst.PronounNahnu:
|
| 893 |
+
self.conj_display.add(tense, vconst.PronounNahnu,
|
| 894 |
+
NOON+conj_anta_without_future_letter)
|
| 895 |
+
elif pronoun == vconst.PronounHuwa:
|
| 896 |
+
self.conj_display.add(tense, vconst.PronounHuwa,
|
| 897 |
+
YEH+conj_anta_without_future_letter)
|
| 898 |
+
elif pronoun == vconst.PronounHya:
|
| 899 |
+
self.conj_display.add(tense, vconst.PronounHya,
|
| 900 |
+
TEH+conj_anta_without_future_letter)
|
| 901 |
+
# indirect pronouns
|
| 902 |
+
# Antuma pronouns conjugation like
|
| 903 |
+
elif pronoun in (vconst.PronounAntuma, vconst.PronounAntuma_f,
|
| 904 |
+
vconst.PronounHuma, vconst.PronounHuma_f ):
|
| 905 |
+
conj_antuma = self.conj_display.get_conj(tense,
|
| 906 |
+
vconst.PronounAntuma)
|
| 907 |
+
if conj_antuma == u"":
|
| 908 |
+
conj_antuma = self.conjugate_tense_pronoun(tense,
|
| 909 |
+
vconst.PronounAntuma)
|
| 910 |
+
self.conj_display.add(tense, vconst.PronounAntuma,
|
| 911 |
+
conj_antuma)
|
| 912 |
+
if pronoun == vconst.PronounAntuma_f:
|
| 913 |
+
self.conj_display.add(tense, vconst.PronounAntuma_f,
|
| 914 |
+
conj_antuma)
|
| 915 |
+
if pronoun == vconst.PronounHuma_f:
|
| 916 |
+
self.conj_display.add(tense, vconst.PronounHuma_f,
|
| 917 |
+
conj_antuma)
|
| 918 |
+
if pronoun == vconst.PronounHuma:
|
| 919 |
+
self.conj_display.add(tense, vconst.PronounHuma,
|
| 920 |
+
YEH+conj_antuma[1:])
|
| 921 |
+
# indirect pronouns
|
| 922 |
+
# Antum pronouns conjugation like
|
| 923 |
+
elif pronoun == vconst.PronounHum:
|
| 924 |
+
conj_antum = self.conj_display.get_conj(tense,
|
| 925 |
+
vconst.PronounAntum)
|
| 926 |
+
if conj_antum == u"":
|
| 927 |
+
conj_antum = self.conjugate_tense_pronoun(tense,
|
| 928 |
+
vconst.PronounAntum)
|
| 929 |
+
self.conj_display.add(tense, vconst.PronounAntum,
|
| 930 |
+
conj_antum)
|
| 931 |
+
self.conj_display.add(tense, vconst.PronounHum,
|
| 932 |
+
YEH+conj_antum[1:])
|
| 933 |
+
# indirect pronouns
|
| 934 |
+
# Antum pronouns conjugation like
|
| 935 |
+
elif pronoun == vconst.PronounHunna:
|
| 936 |
+
conj_antunna = self.conj_display.get_conj(tense,
|
| 937 |
+
vconst.PronounAntunna)
|
| 938 |
+
if conj_antunna == u"":
|
| 939 |
+
conj_antunna = self.conjugate_tense_pronoun(tense,
|
| 940 |
+
vconst.PronounAntunna)
|
| 941 |
+
self.conj_display.add(tense, vconst.PronounAntunna,
|
| 942 |
+
conj_antunna)
|
| 943 |
+
self.conj_display.add(tense, vconst.PronounHunna,
|
| 944 |
+
YEH+conj_antunna[1:])
|
| 945 |
+
elif tense == vconst.TenseImperative or \
|
| 946 |
+
tense == vconst.TenseConfirmedImperative:
|
| 947 |
+
conj = self.conjugate_tense_pronoun(tense, pronoun)
|
| 948 |
+
self.conj_display.add(tense, pronoun, conj)
|
| 949 |
+
# the cnjugated form is stored in cnj_display
|
| 950 |
+
return self.conj_display.get_conj(tense, pronoun)
|
| 951 |
+
#####################################
|
| 952 |
+
#{ Irregular verbs functions
|
| 953 |
+
#####################################
|
| 954 |
+
def _is_irregular_verb(self):
|
| 955 |
+
"""
|
| 956 |
+
Return True if the verb is irregular,
|
| 957 |
+
founded in the irregular verb table
|
| 958 |
+
Aإرجاع إّذا كان الفعل ضاذا.
|
| 959 |
+
الأفعال العربية الخاصة هي
|
| 960 |
+
رأى، أكل أمر سأل،
|
| 961 |
+
# ج- إذا كان يتصرف من باب (مَنَعَ يَمْنَعُ)،
|
| 962 |
+
تحذف واوه, نحو: وَضَعَ، يَضَعُ، وَجَأَ يَجَأُ،
|
| 963 |
+
وَدَعَ يَدَعُ، وَزَعَ يَزَعُ، وَضَأَ يَضَأُ، وَطَأَ يَطَأُ،
|
| 964 |
+
وَقَعَ يَقَعُ، وَلَغَ يَلَغُ، وَهَبَ يَهَبُ،
|
| 965 |
+
عدا خمسة أفعال هي: (وَبَأ)، و(وَبَهَ)، و(وَجَعَ)، و(وَسَعَ)، و(وَهَلَ)،
|
| 966 |
+
فلا تحذف منها الواو؛ فنقول: يَوْبَأُ، يَوْبَهُ، يَوْجَعُ، يَوْسَعُ، يَوْهَلُ.
|
| 967 |
+
الأفعال (وَبَأ)، و(وَبَهَ)، و(وَجَعَ)، و(وَسَعَ)، و(وَهَلَ)، الفعل وبَأ يوبأ
|
| 968 |
+
@return:True if irregular
|
| 969 |
+
@rtype: Boolean
|
| 970 |
+
"""
|
| 971 |
+
if len(self.word_letters) != 3:
|
| 972 |
+
return False
|
| 973 |
+
else:
|
| 974 |
+
# the key is composed from the letters and past and future marks,
|
| 975 |
+
# to identify irregular verb
|
| 976 |
+
if self.word_letters == u"ءرى":
|
| 977 |
+
#~ print self.word_letters.encode("utf8")
|
| 978 |
+
self.past_haraka = araby.FATHA
|
| 979 |
+
self.future_type = araby.KASRA
|
| 980 |
+
#~ self.vlength = 4
|
| 981 |
+
key = self.word_letters + self.past_haraka+self.future_type
|
| 982 |
+
if key in vconst.IRREGULAR_VERB_CONJUG:
|
| 983 |
+
return True
|
| 984 |
+
return False
|
| 985 |
+
|
| 986 |
+
|
| 987 |
+
def _get_irregular_future_stem(self):
|
| 988 |
+
"""
|
| 989 |
+
Get the future stem for irregular verb.
|
| 990 |
+
@return: the future conjuagtion stem
|
| 991 |
+
@rtype: unicode;
|
| 992 |
+
"""
|
| 993 |
+
# the key is composed from the letters and past and future marks,
|
| 994 |
+
# to identify irregular verb
|
| 995 |
+
key = self.word_letters+self.past_haraka+self.future_type
|
| 996 |
+
if key in vconst.IRREGULAR_VERB_CONJUG:
|
| 997 |
+
return vconst.IRREGULAR_VERB_CONJUG[key][vconst.TenseFuture]
|
| 998 |
+
else:
|
| 999 |
+
return self.word_letters
|
| 1000 |
+
|
| 1001 |
+
|
| 1002 |
+
def _get_irregular_passivefuture_stem(self):
|
| 1003 |
+
"""
|
| 1004 |
+
Get the passive future stem for irregular verb.
|
| 1005 |
+
@return: the passive future conjuagtion stem
|
| 1006 |
+
@rtype: unicode;
|
| 1007 |
+
"""
|
| 1008 |
+
# the key is composed from the letters and past and future marks,
|
| 1009 |
+
# to identify irregular verb
|
| 1010 |
+
key = self.word_letters+self.past_haraka+self.future_type
|
| 1011 |
+
if key in vconst.IRREGULAR_VERB_CONJUG:
|
| 1012 |
+
return vconst.IRREGULAR_VERB_CONJUG[key][vconst.TensePassiveFuture]
|
| 1013 |
+
else:
|
| 1014 |
+
return self.word_letters
|
| 1015 |
+
|
| 1016 |
+
|
| 1017 |
+
def _get_irregular_imperative_stem(self):
|
| 1018 |
+
"""
|
| 1019 |
+
Get the imperative stem for irregular verb.
|
| 1020 |
+
@return: the passive imperative conjuagtion stem
|
| 1021 |
+
@rtype: unicode;
|
| 1022 |
+
"""
|
| 1023 |
+
# the key is composed from the letters and past and future marks,
|
| 1024 |
+
# to identify irregular verb
|
| 1025 |
+
key = self.word_letters + self.past_haraka+self.future_type
|
| 1026 |
+
if key in vconst.IRREGULAR_VERB_CONJUG:
|
| 1027 |
+
return vconst.IRREGULAR_VERB_CONJUG[key][vconst.TenseImperative]
|
| 1028 |
+
else:
|
| 1029 |
+
return self.word_letters
|
| 1030 |
+
|
| 1031 |
+
# prepare the irregular conjug for future and imperative
|
| 1032 |
+
# تحضير جذوع التصريف في المضارع والأمر للأفعال الضاذة
|
| 1033 |
+
def _prepare_irregular_future_imperative_stem(self):
|
| 1034 |
+
"""
|
| 1035 |
+
Prepare the conjugation stems for future tenses
|
| 1036 |
+
(future, jussive, subjective) and imperative tense.
|
| 1037 |
+
Those stems will be concatenated with conjugation affixes.
|
| 1038 |
+
"""
|
| 1039 |
+
## if self.word_letters in vconst.IRREGULAR_VERB_CONJUG.keys():
|
| 1040 |
+
if self._is_irregular_verb():
|
| 1041 |
+
(letters, marks) = self._get_irregular_future_stem()
|
| 1042 |
+
#vconst.IRREGULAR_VERB_CONJUG[self.word_letters][vconst.TenseFuture]
|
| 1043 |
+
#تمت إضافة حركة حرف المضارعة إلى الجذع المستعمل في الفعل الشاذ
|
| 1044 |
+
self.tab_conjug_stem[vconst.TenseFuture] = ConjugStem(
|
| 1045 |
+
vconst.TenseFuture, letters, marks)
|
| 1046 |
+
self.tab_conjug_stem[vconst.TenseJussiveFuture] = ConjugStem(
|
| 1047 |
+
vconst.TenseJussiveFuture, letters, marks)
|
| 1048 |
+
self.tab_conjug_stem[vconst.TenseSubjunctiveFuture] = ConjugStem(
|
| 1049 |
+
vconst.TenseSubjunctiveFuture, letters, marks)
|
| 1050 |
+
self.tab_conjug_stem[vconst.TenseConfirmedFuture] = ConjugStem(
|
| 1051 |
+
vconst.TenseConfirmedFuture, letters, marks)
|
| 1052 |
+
|
| 1053 |
+
(letters1, marks1) = self._get_irregular_passivefuture_stem()
|
| 1054 |
+
#تمت إضافة حركة حرف المضارعة إلى الجذع المستعمل في الفعل الشاذ
|
| 1055 |
+
self.tab_conjug_stem[vconst.TensePassiveFuture] = ConjugStem(
|
| 1056 |
+
vconst.TensePassiveFuture, letters1, marks1)
|
| 1057 |
+
self.tab_conjug_stem[vconst.TensePassiveJussiveFuture] = ConjugStem(
|
| 1058 |
+
vconst.TensePassiveJussiveFuture, letters1, marks1)
|
| 1059 |
+
self.tab_conjug_stem[vconst.TensePassiveSubjunctiveFuture] = \
|
| 1060 |
+
ConjugStem(vconst.TensePassiveSubjunctiveFuture, letters1, marks1)
|
| 1061 |
+
self.tab_conjug_stem[vconst.TensePassiveConfirmedFuture] = \
|
| 1062 |
+
ConjugStem(vconst.TensePassiveConfirmedFuture, letters1, marks1)
|
| 1063 |
+
|
| 1064 |
+
(letters2, marks2) = self._get_irregular_imperative_stem()
|
| 1065 |
+
self.tab_conjug_stem[vconst.TenseImperative] = ConjugStem(
|
| 1066 |
+
vconst.TenseImperative, letters2, marks2)
|
| 1067 |
+
self.tab_conjug_stem[vconst.TenseConfirmedImperative] = \
|
| 1068 |
+
ConjugStem(vconst.TenseConfirmedImperative, letters2, marks2)
|
| 1069 |
+
return False
|
| 1070 |
+
|
| 1071 |
+
|
| 1072 |
+
def get_conj(self, tense, pronoun):
|
| 1073 |
+
"""
|
| 1074 |
+
Get the conjugated verb by tense and pronoun.
|
| 1075 |
+
@param tense: tense of the added conjuagtion.
|
| 1076 |
+
@type tense: unicode
|
| 1077 |
+
@param pronoun: pronoun of the added conjuagtion.
|
| 1078 |
+
@type pronoun: unicode
|
| 1079 |
+
@return : conjugated form of verb if exists.
|
| 1080 |
+
@rtype : unicode
|
| 1081 |
+
"""
|
| 1082 |
+
return self.conj_display.get_conj(tense, pronoun)
|
| 1083 |
+
|
| 1084 |
+
def get_pronoun_features(self, pronoun):
|
| 1085 |
+
"""
|
| 1086 |
+
Get the features of given pronoun.
|
| 1087 |
+
@param pronoun: pronoun of conjuagtion.
|
| 1088 |
+
@type pronoun: unicode
|
| 1089 |
+
@return : dictionary of pronoun attributes.
|
| 1090 |
+
@rtype : dictionary
|
| 1091 |
+
"""
|
| 1092 |
+
return vconst.PRONOUN_FEATURES.get(pronoun, None)
|
| 1093 |
+
def get_tense_features(self, tense):
|
| 1094 |
+
"""
|
| 1095 |
+
Get the features of given tense.
|
| 1096 |
+
@param tense: tense of the conjuagtion.
|
| 1097 |
+
@type tense: unicode
|
| 1098 |
+
@return : dictionary of tense attributes.
|
| 1099 |
+
@rtype : dictionary
|
| 1100 |
+
"""
|
| 1101 |
+
return vconst.TENSE_FEATURES.get(tense, None)
|
libqutrub/conjugate.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python
|
| 2 |
+
# -*- coding=utf-8 -*-
|
| 3 |
+
#************************************************************************
|
| 4 |
+
# $Id: conjugate.py, v 0.7 2009/06/02 01:10:00 Taha Zerrouki $
|
| 5 |
+
#
|
| 6 |
+
# ------------
|
| 7 |
+
# Description:
|
| 8 |
+
# ------------
|
| 9 |
+
# Copyright (c) 2009, Arabtechies, Arabeyes Taha Zerrouki
|
| 10 |
+
#
|
| 11 |
+
# This file is the main file to execute the application in the command line
|
| 12 |
+
#
|
| 13 |
+
# -----------------
|
| 14 |
+
# Revision Details: (Updated by Revision Control System)
|
| 15 |
+
# -----------------
|
| 16 |
+
# $Date: 2009/06/02 01:10:00 $
|
| 17 |
+
# $Author: Taha Zerrouki $
|
| 18 |
+
# $Revision: 0.7 $
|
| 19 |
+
# $Source: arabtechies.sourceforge.net
|
| 20 |
+
#
|
| 21 |
+
#***********************************************************************/
|
| 22 |
+
"""
|
| 23 |
+
Conjugate console
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
import sys
|
| 27 |
+
import getopt
|
| 28 |
+
import os
|
| 29 |
+
sys.path.append('../')
|
| 30 |
+
import libqutrub.mosaref_main as mosaref_main
|
| 31 |
+
import libqutrub.ar_verb as ar_verb
|
| 32 |
+
import libqutrub.verb_valid as verb_valid
|
| 33 |
+
|
| 34 |
+
SCRIPT_NAME = os.path.splitext(os.path.basename(sys.argv[0]))[0]
|
| 35 |
+
SCRIPT_VERSION = '0.1'
|
| 36 |
+
AUTHOR_NAME = "Taha Zerrouki"
|
| 37 |
+
def usage():
|
| 38 |
+
"""Display usage options"""
|
| 39 |
+
print "(C) CopyLeft 2009, %s" % AUTHOR_NAME
|
| 40 |
+
print "Usage: %s -f filename [OPTIONS]" % SCRIPT_NAME
|
| 41 |
+
#"Display usage options"
|
| 42 |
+
print "\t[-h | --help]\toutputs this usage message"
|
| 43 |
+
print "\t[-V | --version]\tprogram version"
|
| 44 |
+
print "\t[-f | --file=filename]\tinput file to %s" % SCRIPT_NAME
|
| 45 |
+
print "\t[-d | --display=format]\tdisplay format as html, csv, tex, xml"
|
| 46 |
+
print "\t[-a | --all ] \tConjugate in all tenses"
|
| 47 |
+
print "\t[-i | --imperative]\tConjugate in imperative"
|
| 48 |
+
print "\t[-F | --future]\tconjugate in the present and the future"
|
| 49 |
+
print "\t[-p | --past]\t conjugate in the past"
|
| 50 |
+
print "\t[-c | --confirmed] conjugate in confirmed (future or imperative)"
|
| 51 |
+
print """\t[-m | --moode]\tconjugate in future Subjunctive(mansoub)
|
| 52 |
+
or Jussive (majzoom)"""
|
| 53 |
+
print " \t[-v | --passive] passive form"
|
| 54 |
+
print "\r\nN.B. FILE FORMAT is descripted in README"
|
| 55 |
+
print "\r\nThis program is licensed under the GPL License\n"
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def grabargs():
|
| 59 |
+
"""Grab command-line arguments"""
|
| 60 |
+
alltense = False
|
| 61 |
+
future = False
|
| 62 |
+
past = False
|
| 63 |
+
passive = False
|
| 64 |
+
imperative = False
|
| 65 |
+
confirmed = False
|
| 66 |
+
future_moode = False
|
| 67 |
+
fname = ''
|
| 68 |
+
display_format = 'csv'
|
| 69 |
+
|
| 70 |
+
if not sys.argv[1:]:
|
| 71 |
+
usage()
|
| 72 |
+
sys.exit(0)
|
| 73 |
+
try:
|
| 74 |
+
opts, args = getopt.getopt(sys.argv[1:], "hVvcmaiFpi:d:f:",
|
| 75 |
+
["help", "version", "imperative", "passive",
|
| 76 |
+
'confirmed', 'moode', "past", "all",
|
| 77 |
+
"future", "file = ", "display = "], )
|
| 78 |
+
except getopt.GetoptError:
|
| 79 |
+
usage()
|
| 80 |
+
sys.exit(0)
|
| 81 |
+
for opt, val in opts:
|
| 82 |
+
if opt in ("-h", "--help"):
|
| 83 |
+
usage()
|
| 84 |
+
sys.exit(0)
|
| 85 |
+
if opt in ("-V", "--version"):
|
| 86 |
+
print SCRIPT_VERSION
|
| 87 |
+
sys.exit(0)
|
| 88 |
+
if opt in ("-v", "--passive"):
|
| 89 |
+
passive = True
|
| 90 |
+
if opt in ("-f", "--file"):
|
| 91 |
+
fname = val
|
| 92 |
+
if opt in ("-d", "--display"):
|
| 93 |
+
display_format = val.upper()
|
| 94 |
+
if opt in ("-F", "--future"):
|
| 95 |
+
future = True
|
| 96 |
+
if opt in ("-a", "--all"):
|
| 97 |
+
alltense = True
|
| 98 |
+
if opt in ("-p", "--past"):
|
| 99 |
+
past = True
|
| 100 |
+
if opt in ("-i", "--imperative"):
|
| 101 |
+
imperative = True
|
| 102 |
+
if opt in ("-c", "--confirmed"):
|
| 103 |
+
confirmed = True
|
| 104 |
+
if opt in ("-m", "--moode"):
|
| 105 |
+
future_moode = True
|
| 106 |
+
|
| 107 |
+
return (fname, alltense, future, past, passive, imperative, confirmed,
|
| 108 |
+
future_moode, display_format)
|
| 109 |
+
|
| 110 |
+
def main():
|
| 111 |
+
"""Main function"""
|
| 112 |
+
filename, alltense, future, past, passive, imperative, confirmed, \
|
| 113 |
+
future_moode, display_format = grabargs()
|
| 114 |
+
try:
|
| 115 |
+
fle = open(filename)
|
| 116 |
+
except IOError:
|
| 117 |
+
print " Error :No such file or directory: %s" % filename
|
| 118 |
+
sys.exit(0)
|
| 119 |
+
|
| 120 |
+
print filename, alltense, future, past, passive, imperative, \
|
| 121 |
+
confirmed, future_moode
|
| 122 |
+
|
| 123 |
+
line = fle.readline().decode("utf")
|
| 124 |
+
text = u""
|
| 125 |
+
verb_table = []
|
| 126 |
+
nb_field = 2
|
| 127 |
+
while line :
|
| 128 |
+
if not line.startswith("#"):
|
| 129 |
+
|
| 130 |
+
text = text+" "+ line.strip()
|
| 131 |
+
liste = line.split("\t")
|
| 132 |
+
if len(liste) >= nb_field:
|
| 133 |
+
verb_table.append(liste)
|
| 134 |
+
|
| 135 |
+
line = fle.readline().decode("utf8")
|
| 136 |
+
fle.close()
|
| 137 |
+
|
| 138 |
+
for tuple_verb in verb_table:
|
| 139 |
+
word = tuple_verb[0]
|
| 140 |
+
|
| 141 |
+
if not verb_valid.is_valid_infinitive_verb(word):
|
| 142 |
+
print u"is invalid verb ",
|
| 143 |
+
print word.encode("utf8")
|
| 144 |
+
else:
|
| 145 |
+
future_type = u""+tuple_verb[1]
|
| 146 |
+
future_type = ar_verb.get_future_type_entree(future_type)
|
| 147 |
+
transitive = u""+tuple_verb[2]
|
| 148 |
+
if transitive in (u"متعدي", u"م", u"مشترك", u"ك", "t",
|
| 149 |
+
"transitive"):
|
| 150 |
+
transitive = True
|
| 151 |
+
else :
|
| 152 |
+
transitive = False
|
| 153 |
+
text = mosaref_main.do_sarf(word, future_type, alltense, past,
|
| 154 |
+
future, passive, imperative, future_moode, confirmed,
|
| 155 |
+
transitive, display_format)
|
| 156 |
+
print text.encode("utf8")
|
| 157 |
+
|
| 158 |
+
if __name__ == "__main__":
|
| 159 |
+
main()
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
|
libqutrub/conjugatedisplay.py
ADDED
|
@@ -0,0 +1,568 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python
|
| 2 |
+
# -*- coding=utf-8 -*-
|
| 3 |
+
#************************************************************************
|
| 4 |
+
# $Id: conjugateddisplay.py, v 0.7 2009/06/02 01:10:00 Taha Zerrouki $
|
| 5 |
+
#
|
| 6 |
+
# ------------
|
| 7 |
+
# Description:
|
| 8 |
+
# ------------
|
| 9 |
+
# Copyright (c) 2009, Arabtechies, Arabeyes Taha Zerrouki
|
| 10 |
+
#
|
| 11 |
+
# The Class used to display information after conjugated
|
| 12 |
+
# All print and views and display are redirected to this class
|
| 13 |
+
#
|
| 14 |
+
# -----------------
|
| 15 |
+
# Revision Details: (Updated by Revision Control System)
|
| 16 |
+
# -----------------
|
| 17 |
+
# $Date: 2009/06/02 01:10:00 $
|
| 18 |
+
# $Author: Taha Zerrouki $
|
| 19 |
+
# $Revision: 0.7 $
|
| 20 |
+
# $Source: arabtechies.sourceforge.net
|
| 21 |
+
#
|
| 22 |
+
#***********************************************************************/
|
| 23 |
+
"""
|
| 24 |
+
The conjugation display class to manage different display format.
|
| 25 |
+
"""
|
| 26 |
+
#~ from libqutrub.verb_const import *
|
| 27 |
+
import libqutrub.verb_const as vconst
|
| 28 |
+
import pyarabic.araby as araby
|
| 29 |
+
|
| 30 |
+
# صف عرض التصريفات حسب الضمائر
|
| 31 |
+
# جدول عرض التصريفات حسب الأزمنة
|
| 32 |
+
# تعيينه متغيرا شاملا من أجل تقليل
|
| 33 |
+
#~ بناء جدول عرض التصريفات في كل عرض.
|
| 34 |
+
ONE_TENSE_PRONOUN = {u"أنا":"" , u"أنت":"" , u"أنتِ":"" , u"هو":"" ,
|
| 35 |
+
u"هي":"" , u"أنتما":"" , u"أنتما مؤ":"" , u"هما":"" ,
|
| 36 |
+
u"هما مؤ":"" ,
|
| 37 |
+
u"نحن":"" , u"أنتم":"" , u"أنتن":"" , u"هم":"" , u"هن":""}
|
| 38 |
+
|
| 39 |
+
# delete the global TableConj vvariable because it causes problem
|
| 40 |
+
TAB_DISPLAY = {
|
| 41 |
+
vconst.PronounAna:u"1",
|
| 42 |
+
vconst.PronounNahnu:u"2",
|
| 43 |
+
vconst.PronounAnta:u"3",
|
| 44 |
+
vconst.PronounAnti:u"4ِ",
|
| 45 |
+
vconst.PronounAntuma:u"5",
|
| 46 |
+
vconst.PronounAntuma_f:u"6",
|
| 47 |
+
vconst.PronounAntum:u"7",
|
| 48 |
+
vconst.PronounAntunna:u"8",
|
| 49 |
+
vconst.PronounHuwa:u"9",
|
| 50 |
+
vconst.PronounHya:u"10",
|
| 51 |
+
vconst.PronounHuma:u"11",
|
| 52 |
+
vconst.PronounHuma_f:u"12",
|
| 53 |
+
vconst.PronounHum:u"13",
|
| 54 |
+
vconst.PronounHunna:u"14",
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
# const for Tense Name
|
| 58 |
+
vconst.TensePast:u"20",
|
| 59 |
+
vconst.TenseFuture:u"21",
|
| 60 |
+
vconst.TenseImperative:u"22",
|
| 61 |
+
vconst.TenseConfirmedImperative:u"23",
|
| 62 |
+
vconst.TenseJussiveFuture:u"24",
|
| 63 |
+
vconst.TenseSubjunctiveFuture:u"25",
|
| 64 |
+
vconst.TenseConfirmedFuture:u"26",
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
vconst.TensePassivePast:u"27",
|
| 68 |
+
vconst.TensePassiveFuture:u"28",
|
| 69 |
+
vconst.TensePassiveJussiveFuture:u"29",
|
| 70 |
+
vconst.TensePassiveSubjunctiveFuture:u"30",
|
| 71 |
+
vconst.TensePassiveConfirmedFuture:u"31",
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
class ConjugateDisplay:
|
| 75 |
+
"""
|
| 76 |
+
conjugatedisplay class is used to display verb conjugation
|
| 77 |
+
in different ways and uses.
|
| 78 |
+
"""
|
| 79 |
+
tab_conjug = {}
|
| 80 |
+
pronouns = {}
|
| 81 |
+
verb = u""
|
| 82 |
+
mode = 'Text'
|
| 83 |
+
future_form = u""
|
| 84 |
+
text = {}
|
| 85 |
+
transitive = False
|
| 86 |
+
def __init__(self, verb):
|
| 87 |
+
"""
|
| 88 |
+
Create the conjugedtdisplay instance for the verb.
|
| 89 |
+
@param verb: given verb.
|
| 90 |
+
@type verb unicode.
|
| 91 |
+
"""
|
| 92 |
+
# بناء جدول عرض التصريفات
|
| 93 |
+
self.tab_conjug = {
|
| 94 |
+
vconst.TensePast:ONE_TENSE_PRONOUN.copy(),
|
| 95 |
+
vconst.TensePassivePast:ONE_TENSE_PRONOUN.copy(),
|
| 96 |
+
vconst.TenseFuture:ONE_TENSE_PRONOUN.copy(),
|
| 97 |
+
vconst.TensePassiveFuture:ONE_TENSE_PRONOUN.copy(),
|
| 98 |
+
vconst.TenseJussiveFuture:ONE_TENSE_PRONOUN.copy(),
|
| 99 |
+
vconst.TensePassiveJussiveFuture:ONE_TENSE_PRONOUN.copy(),
|
| 100 |
+
vconst.TenseSubjunctiveFuture:ONE_TENSE_PRONOUN.copy(),
|
| 101 |
+
vconst.TensePassiveSubjunctiveFuture:ONE_TENSE_PRONOUN.copy(),
|
| 102 |
+
vconst.TenseImperative:ONE_TENSE_PRONOUN.copy(),
|
| 103 |
+
vconst.TenseConfirmedFuture:ONE_TENSE_PRONOUN.copy(),
|
| 104 |
+
vconst.TenseConfirmedImperative:ONE_TENSE_PRONOUN.copy()
|
| 105 |
+
}
|
| 106 |
+
self.verb = verb
|
| 107 |
+
self.text = {}
|
| 108 |
+
self.mode = 'Text'
|
| 109 |
+
self.future_form = u""
|
| 110 |
+
self.transitive = False
|
| 111 |
+
self.bab = "0"
|
| 112 |
+
def __del__(self):
|
| 113 |
+
self.tab_conjug = {}
|
| 114 |
+
self.verb = ""
|
| 115 |
+
self.text = {}
|
| 116 |
+
self.mode = 'Text'
|
| 117 |
+
self.future_form = u""
|
| 118 |
+
self.transitive = False
|
| 119 |
+
self.bab = "0"
|
| 120 |
+
#####################################
|
| 121 |
+
#{ Attributes functions
|
| 122 |
+
#####################################
|
| 123 |
+
def setmode(self, mode):
|
| 124 |
+
"""
|
| 125 |
+
Set the display mode as:
|
| 126 |
+
- 'Text':
|
| 127 |
+
- 'HTML':
|
| 128 |
+
- 'HTMLColoredDiacritics':
|
| 129 |
+
- 'DICT':
|
| 130 |
+
- 'CSV':
|
| 131 |
+
- 'GUI':
|
| 132 |
+
- 'TABLE':
|
| 133 |
+
- 'XML':
|
| 134 |
+
- 'TeX':
|
| 135 |
+
- 'ROWS':
|
| 136 |
+
@param mode: the given mode to display result
|
| 137 |
+
@type mode: unicode
|
| 138 |
+
"""
|
| 139 |
+
self.mode = mode
|
| 140 |
+
def settransitive(self):
|
| 141 |
+
"""
|
| 142 |
+
Set the transitivity value to True.
|
| 143 |
+
"""
|
| 144 |
+
self.transitive = True
|
| 145 |
+
def setbab(self, bab):
|
| 146 |
+
"""
|
| 147 |
+
Set the bab sarf value to bab
|
| 148 |
+
@param bab: the given sarf bab.
|
| 149 |
+
@type bab: integer (1-6)
|
| 150 |
+
"""
|
| 151 |
+
self.bab = bab
|
| 152 |
+
#------------------------------------------------------------------
|
| 153 |
+
def set_future_form(self, future_form):
|
| 154 |
+
"""
|
| 155 |
+
Set the future form of the verb value to future_form.
|
| 156 |
+
مثلا: صرب يصرب
|
| 157 |
+
@param future_form: the future form.
|
| 158 |
+
@type future_form: unicode
|
| 159 |
+
"""
|
| 160 |
+
self.future_form = future_form
|
| 161 |
+
def get_verb_attributs(self):
|
| 162 |
+
"""
|
| 163 |
+
Get attributes as text
|
| 164 |
+
@return: Attributes as text.
|
| 165 |
+
@rtype: unicode
|
| 166 |
+
"""
|
| 167 |
+
return self.text
|
| 168 |
+
|
| 169 |
+
def add_attribut(self, title, value):
|
| 170 |
+
"""
|
| 171 |
+
Add a new attribut to display, like the transitivity
|
| 172 |
+
the root and the future form.
|
| 173 |
+
@param title: the title of the attribute to display.
|
| 174 |
+
@type title: unicode
|
| 175 |
+
@param value:the value if the attribute.
|
| 176 |
+
@type value: unicode
|
| 177 |
+
"""
|
| 178 |
+
if title != '' :
|
| 179 |
+
self.text[title] = value
|
| 180 |
+
def get_conj(self, tense, pronoun):
|
| 181 |
+
"""
|
| 182 |
+
Get the conjugated verb by tense and pronoun.
|
| 183 |
+
@param tense: tense of the added conjuagtion.
|
| 184 |
+
@type tense: unicode
|
| 185 |
+
@param pronoun: pronoun of the added conjuagtion.
|
| 186 |
+
@type pronoun: unicode
|
| 187 |
+
@return : conjugated form of verb if exists.
|
| 188 |
+
@rtype : unicode
|
| 189 |
+
|
| 190 |
+
"""
|
| 191 |
+
if tense in self.tab_conjug:
|
| 192 |
+
if pronoun in self.tab_conjug[tense]:
|
| 193 |
+
return self.tab_conjug[tense][pronoun]
|
| 194 |
+
return u""
|
| 195 |
+
|
| 196 |
+
def add(self, tense, pronoun, verbconjugated):
|
| 197 |
+
"""
|
| 198 |
+
Add a new conjugation to display.
|
| 199 |
+
@param tense: tense of the added conjuagtion.
|
| 200 |
+
@type tense: unicode
|
| 201 |
+
@param pronoun: pronoun of the added conjuagtion.
|
| 202 |
+
@type pronoun: unicode
|
| 203 |
+
@param verbconjugated:aded conjuagtion.
|
| 204 |
+
@type verbconjugated:unicode
|
| 205 |
+
|
| 206 |
+
"""
|
| 207 |
+
if tense not in self.tab_conjug:
|
| 208 |
+
self.tab_conjug[tense] = {}
|
| 209 |
+
self.tab_conjug[tense][pronoun] = verbconjugated
|
| 210 |
+
#####################################
|
| 211 |
+
#{ Display functions
|
| 212 |
+
#####################################
|
| 213 |
+
def display(self, listtense = None):
|
| 214 |
+
"""
|
| 215 |
+
Display The conjugation result for a list of tenses,
|
| 216 |
+
with a display mode given by the class attribute.
|
| 217 |
+
Set the display mode as:
|
| 218 |
+
- 'Text':
|
| 219 |
+
- 'HTML':
|
| 220 |
+
- 'HTMLColoredDiacritics':
|
| 221 |
+
- 'DICT':
|
| 222 |
+
- 'CSV':
|
| 223 |
+
- 'GUI':
|
| 224 |
+
- 'TABLE':
|
| 225 |
+
- 'XML':
|
| 226 |
+
- 'TeX':
|
| 227 |
+
- 'ROWS':
|
| 228 |
+
@param listtense: the given tenses list to display result
|
| 229 |
+
@type listtense: list of unicode
|
| 230 |
+
@return: the result in a specified dispaly mode.
|
| 231 |
+
@rtype: according to display mode.
|
| 232 |
+
"""
|
| 233 |
+
return self.display(self.mode, listtense)
|
| 234 |
+
def display(self, mode, listtense = None):
|
| 235 |
+
"""
|
| 236 |
+
Display The conjugation result for a list of tenses,
|
| 237 |
+
with a display mode.
|
| 238 |
+
Set the display mode as:
|
| 239 |
+
- 'Text':
|
| 240 |
+
- 'HTML':
|
| 241 |
+
- 'HTMLColoredDiacritics':
|
| 242 |
+
- 'DICT':
|
| 243 |
+
- 'CSV':
|
| 244 |
+
- 'GUI':
|
| 245 |
+
- 'TABLE':
|
| 246 |
+
- 'XML':
|
| 247 |
+
- 'TeX':
|
| 248 |
+
- 'ROWS':
|
| 249 |
+
@param mode: the given mode to display result
|
| 250 |
+
@type mode: unicode
|
| 251 |
+
@param listtense: the given tenses list to display result
|
| 252 |
+
@type listtense: list of unicode
|
| 253 |
+
@return: the result in a specified dispaly mode.
|
| 254 |
+
@rtype: according to display mode.
|
| 255 |
+
"""
|
| 256 |
+
if not listtense:
|
| 257 |
+
listtense = vconst.TABLE_TENSE
|
| 258 |
+
if mode == 'Text':
|
| 259 |
+
return self.display_text(listtense)
|
| 260 |
+
elif mode == 'HTML':
|
| 261 |
+
return self.display_html(listtense)
|
| 262 |
+
elif mode == 'HTMLColoredDiacritics':
|
| 263 |
+
return self.display_html_colored_diacritics(listtense)
|
| 264 |
+
elif mode == 'DICT':
|
| 265 |
+
return self.display_dict(listtense)
|
| 266 |
+
elif mode == 'CSV':
|
| 267 |
+
return self.display_csv(listtense)
|
| 268 |
+
elif mode == 'GUI':
|
| 269 |
+
return self.display_table(listtense)
|
| 270 |
+
elif mode == 'TABLE':
|
| 271 |
+
return self.display_table(listtense)
|
| 272 |
+
elif mode == 'XML':
|
| 273 |
+
return self.display_xml(listtense)
|
| 274 |
+
elif mode.upper() == 'TeX'.upper():
|
| 275 |
+
return self.display_tex(listtense)
|
| 276 |
+
elif mode == 'ROWS'.upper():
|
| 277 |
+
return self.display_rows(listtense)
|
| 278 |
+
else:
|
| 279 |
+
return self.display_text(listtense)
|
| 280 |
+
|
| 281 |
+
def display_text(self, listtense):
|
| 282 |
+
"""
|
| 283 |
+
Display The conjugation result for a list of tenses, as text.
|
| 284 |
+
@param listtense: the given tenses list to display result
|
| 285 |
+
@type listtense: list of unicode
|
| 286 |
+
@return: the result as text.
|
| 287 |
+
@rtype: uunicode.
|
| 288 |
+
"""
|
| 289 |
+
text = u""
|
| 290 |
+
for title in self.text.keys():
|
| 291 |
+
text += u"%s: %s\n" % (title, self.text[title])
|
| 292 |
+
text += u"\t"
|
| 293 |
+
text += u"\t".join(listtense)
|
| 294 |
+
for pronoun in vconst.PronounsTable:
|
| 295 |
+
text += u"\n%s" % (pronoun)
|
| 296 |
+
for tense in listtense:
|
| 297 |
+
if pronoun in self.tab_conjug[tense]:
|
| 298 |
+
text += u"\t%s" % (self.tab_conjug[tense][pronoun])
|
| 299 |
+
return text
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
def display_csv(self, listtense ):
|
| 303 |
+
"""
|
| 304 |
+
Display The conjugation result for a list of tenses,
|
| 305 |
+
as comma separeted value text.
|
| 306 |
+
every line contains:
|
| 307 |
+
example:
|
| 308 |
+
>>> اللزوم/التعدي: متعدي
|
| 309 |
+
الفعل: مَنَحَ
|
| 310 |
+
نوع الفعل: فعل ثلاثي
|
| 311 |
+
الماضي المعلومالمضارع المعلومالمضارع المجزومالمضارع المنصو
|
| 312 |
+
بالمضارع المؤكد الثقيلالأمرالأمر المؤكدالماضي المجهولالمضارع المجهولالمضارع المجهول المجزومالمضارع المجهول المنصوبالمضارع المؤكد الثقيل المجهول
|
| 313 |
+
أنامَنَحْتُأَمْنَحُأَمْنَحْأَمْنَحَأَمْنَحَنَّمُنِحْتُأُمْنَحُأُمْنَحْأُمْنَحَأُمْنَحَنَّ
|
| 314 |
+
نحنمَنَحْنَانَمْنَحُنَمْنَحْنَمْنَحَنَمْنَحَنَّمُنِحْنَانُمْنَحُنُمْنَحْنُمْنَحَنُمْنَحَنَّ
|
| 315 |
+
أنتمَنَحْتَتَمْنَحُتَمْنَحْتَمْنَحَتَمْنَحَنَّاِمْنَحْاِمْنَحَنَّمُنِحْتَتُمْنَحُتُمْنَحْتُمْنَحَتُمْنَحَنَّ
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
@param listtense: the given tenses list to display result
|
| 319 |
+
@type listtense: list of unicode
|
| 320 |
+
@return: the result as text in row.
|
| 321 |
+
@rtype: unicode.
|
| 322 |
+
"""
|
| 323 |
+
text = u""
|
| 324 |
+
for title in self.text.keys():
|
| 325 |
+
text += u"%s: %s\n" % (title, self.text[title])
|
| 326 |
+
text += u"".join(listtense)
|
| 327 |
+
text += u"\n"
|
| 328 |
+
for pronoun in vconst.PronounsTable:
|
| 329 |
+
text += u"%s" % (pronoun)
|
| 330 |
+
for tense in listtense:
|
| 331 |
+
# print (self.verb).encode("utf-8"),
|
| 332 |
+
if pronoun in self.tab_conjug[tense]:
|
| 333 |
+
text += u"%s" % (self.tab_conjug[tense][pronoun])
|
| 334 |
+
text += u"\n"
|
| 335 |
+
return text
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
|
| 340 |
+
def display_rows(self, listtense ):
|
| 341 |
+
"""
|
| 342 |
+
Display The conjugation result for a list of tenses, as text in rows.
|
| 343 |
+
every row contains:
|
| 344 |
+
- unvocalized conjugation,
|
| 345 |
+
- unvocalized conjugation,
|
| 346 |
+
- pronoun
|
| 347 |
+
- tense,
|
| 348 |
+
- transitive,
|
| 349 |
+
- original verb
|
| 350 |
+
- tasrif bab
|
| 351 |
+
|
| 352 |
+
@param listtense: the given tenses list to display result
|
| 353 |
+
@type listtense: list of unicode
|
| 354 |
+
@return: the result as text in row.
|
| 355 |
+
@rtype: unicode.
|
| 356 |
+
"""
|
| 357 |
+
text = u""
|
| 358 |
+
|
| 359 |
+
transitive = "0"
|
| 360 |
+
if self.transitive:
|
| 361 |
+
transitive = '1'
|
| 362 |
+
for pronoun in vconst.PronounsTable:
|
| 363 |
+
## text += u"%s" % (pronoun)
|
| 364 |
+
for tense in listtense:
|
| 365 |
+
# print (self.verb).encode("utf-8"),
|
| 366 |
+
if self.tab_conjug[tense][pronoun] != "":
|
| 367 |
+
text += "\t".join([
|
| 368 |
+
araby.strip_harakat(self.tab_conjug[tense][pronoun]),
|
| 369 |
+
self.tab_conjug[tense][pronoun],
|
| 370 |
+
TAB_DISPLAY[pronoun],
|
| 371 |
+
TAB_DISPLAY[tense],
|
| 372 |
+
transitive,
|
| 373 |
+
self.verb,
|
| 374 |
+
self.bab,
|
| 375 |
+
])
|
| 376 |
+
text += u"\n"
|
| 377 |
+
return text
|
| 378 |
+
|
| 379 |
+
|
| 380 |
+
def display_html(self, listtense):
|
| 381 |
+
"""
|
| 382 |
+
Display The conjugation result for a list of tenses, as HTML.
|
| 383 |
+
@param listtense: the given tenses list to display result
|
| 384 |
+
@type listtense: list of unicode
|
| 385 |
+
# @return: the result as HTML.
|
| 386 |
+
@rtype: unicode.
|
| 387 |
+
"""
|
| 388 |
+
indicative_tenses = []
|
| 389 |
+
passive_tenses = []
|
| 390 |
+
for tense in listtense:
|
| 391 |
+
if tense in vconst.TableIndicativeTense:
|
| 392 |
+
indicative_tenses.append(tense)
|
| 393 |
+
else:
|
| 394 |
+
passive_tenses.append(tense)
|
| 395 |
+
text = u""
|
| 396 |
+
text += u"<h3>%s : %s - %s</h3>\n" % (self.verb, self.verb,
|
| 397 |
+
self.future_form)
|
| 398 |
+
# text += u"<h3>%s - %s</h3>\n\n" % (self.verb, self.future_form)
|
| 399 |
+
# print spelcial attribut of the verb
|
| 400 |
+
text += u"<ul>\n"
|
| 401 |
+
for title in self.text.keys():
|
| 402 |
+
text += u"<li><b>%s:</b> %s</li>\n" % (title, self.text[title])
|
| 403 |
+
text += u"</ul>\n\n"
|
| 404 |
+
|
| 405 |
+
for mode in("indicative", "passive"):
|
| 406 |
+
if mode == "indicative":
|
| 407 |
+
listtense_to_display = indicative_tenses
|
| 408 |
+
|
| 409 |
+
else:
|
| 410 |
+
listtense_to_display = passive_tenses
|
| 411 |
+
text += "<br/>"
|
| 412 |
+
if len(listtense_to_display) >0:
|
| 413 |
+
text += u"""<table class = 'resultarea' border = 1
|
| 414 |
+
cellspacing = 0>\n"""
|
| 415 |
+
text += u"<tr><th> </th>"
|
| 416 |
+
for tense in listtense_to_display:
|
| 417 |
+
text += u"<th>%s</th>" % (tense)
|
| 418 |
+
text += u"</tr>\n"
|
| 419 |
+
for pronoun in vconst.PronounsTable:
|
| 420 |
+
text += u"<tr>"
|
| 421 |
+
text += u"<th>%s</th>" % (pronoun)
|
| 422 |
+
for tense in listtense_to_display:
|
| 423 |
+
text += u"<td> %s</td>" % (
|
| 424 |
+
self.tab_conjug[tense][pronoun])
|
| 425 |
+
text += u"</tr>\n"
|
| 426 |
+
text += u"</table>\n"
|
| 427 |
+
return text
|
| 428 |
+
|
| 429 |
+
def display_html_colored_diacritics(self, listtense):
|
| 430 |
+
"""
|
| 431 |
+
Display The conjugation result for a list of tenses,
|
| 432 |
+
as HTML with colored vocalization.
|
| 433 |
+
@param listtense: the given tenses list to display result
|
| 434 |
+
@type listtense: list of unicode
|
| 435 |
+
@return: the result as HTML.
|
| 436 |
+
@rtype: unicode.
|
| 437 |
+
"""
|
| 438 |
+
text = self.display_html(listtense)
|
| 439 |
+
## text = "<div style = 'color:red'>"+text+"</div>"
|
| 440 |
+
text = self.highlight_diacritics_html(text)
|
| 441 |
+
return text
|
| 442 |
+
|
| 443 |
+
def highlight_diacritics_html(self, text):
|
| 444 |
+
"""
|
| 445 |
+
Highlight dfiactitics in the HTML text.
|
| 446 |
+
@param text: the given text
|
| 447 |
+
@type text: unicode.
|
| 448 |
+
@return: the result as HTML.
|
| 449 |
+
@rtype: unicode.
|
| 450 |
+
"""
|
| 451 |
+
hight_text = u""
|
| 452 |
+
lefttag = u"<span class = 'tashkeel'>"
|
| 453 |
+
righttag = u"</span>"
|
| 454 |
+
for i in range(len(text)):
|
| 455 |
+
if text[i] in (araby.FATHA, araby.DAMMA, araby.KASRA, araby.SUKUN):
|
| 456 |
+
if (i>0 and text[i-1] not in (araby.ALEF,
|
| 457 |
+
araby.ALEF_HAMZA_ABOVE, araby.WAW_HAMZA, araby.ALEF_MADDA,
|
| 458 |
+
araby.DAL, araby.THAL, araby.WAW, araby.REH, araby.ZAIN,
|
| 459 |
+
araby.SHADDA)) and (i+1<len(text) and text[i+1] not in (" ", "<")):
|
| 460 |
+
hight_text += u"".join([lefttag, araby.TATWEEL,
|
| 461 |
+
text[i], righttag])
|
| 462 |
+
else :
|
| 463 |
+
## hight_text += u"<span style = 'color:red'>%s</span>"%text[i]
|
| 464 |
+
hight_text += u"".join([lefttag, " ", text[i], righttag])
|
| 465 |
+
else:
|
| 466 |
+
hight_text += text[i]
|
| 467 |
+
return hight_text
|
| 468 |
+
|
| 469 |
+
def display_table(self, listtense):
|
| 470 |
+
"""Display The conjugation result for a list of tenses, as array.
|
| 471 |
+
@param listtense: the given tenses list to display result
|
| 472 |
+
@type listtense: list of unicode
|
| 473 |
+
@return: the result as table, the table[0] contains pronouns.
|
| 474 |
+
@rtype: dict with number indice.
|
| 475 |
+
"""
|
| 476 |
+
table = {}
|
| 477 |
+
|
| 478 |
+
j = 0
|
| 479 |
+
table[0] = {0:u"الضمائر"}
|
| 480 |
+
for j in range(len(listtense)):
|
| 481 |
+
table[0][j+1] = listtense[j]
|
| 482 |
+
i = 1
|
| 483 |
+
for pronoun in vconst.PronounsTable:
|
| 484 |
+
table[i] = {}
|
| 485 |
+
table[i][0] = pronoun
|
| 486 |
+
j = 1
|
| 487 |
+
for tense in listtense:
|
| 488 |
+
table[i][j] = self.tab_conjug[tense][pronoun]
|
| 489 |
+
j = j+1
|
| 490 |
+
i = i+1
|
| 491 |
+
return table
|
| 492 |
+
|
| 493 |
+
def display_dict(self, listtense):
|
| 494 |
+
"""
|
| 495 |
+
Display The conjugation result for a list of tenses, as python dict.
|
| 496 |
+
@param listtense: the given tenses list to display result
|
| 497 |
+
@type listtense: list of unicode
|
| 498 |
+
@return: the result as python dict.
|
| 499 |
+
@rtype: dict.
|
| 500 |
+
"""
|
| 501 |
+
table = {}
|
| 502 |
+
for tense in listtense:
|
| 503 |
+
table[tense] = self.tab_conjug[tense]
|
| 504 |
+
#text = json.dumps(table, ensure_ascii = False)
|
| 505 |
+
return table
|
| 506 |
+
|
| 507 |
+
def display_xml(self, listtense):
|
| 508 |
+
"""
|
| 509 |
+
Display The conjugation result for a list of tenses, as XML.
|
| 510 |
+
@param listtense: the given tenses list to display result
|
| 511 |
+
@type listtense: list of unicode
|
| 512 |
+
@return: the result as XML.
|
| 513 |
+
@rtype: unicode.
|
| 514 |
+
"""
|
| 515 |
+
text = u""
|
| 516 |
+
text += u"<verb_conjugation>\n"
|
| 517 |
+
text += u"\t<proprety name = 'verb' value = '%s'/>\n" % (self.verb)
|
| 518 |
+
for title in self.text.keys():
|
| 519 |
+
text += u"\t<proprety name = '%s' value = '%s'/>\n" % (title,
|
| 520 |
+
self.text[title])
|
| 521 |
+
for tense in listtense:
|
| 522 |
+
text += u"\t<tense name = '%s'>\n" % (tense)
|
| 523 |
+
for pronoun in vconst.PronounsTable:
|
| 524 |
+
if self.tab_conjug[tense][pronoun] != "":
|
| 525 |
+
text += u"""\t\t<conjugation pronoun = '%s' value = '%s'
|
| 526 |
+
/>\n""" % (pronoun, self.tab_conjug[tense][pronoun])
|
| 527 |
+
text += u"\t</tense>\n"
|
| 528 |
+
text += u"</verb_conjugation>"
|
| 529 |
+
return text
|
| 530 |
+
|
| 531 |
+
def display_tex(self, listtense):
|
| 532 |
+
"""
|
| 533 |
+
Display The conjugation result for a list of tenses, as TeX.
|
| 534 |
+
@param listtense: the given tenses list to display result
|
| 535 |
+
@type listtense: list of unicode
|
| 536 |
+
@return: the result as TeX format.
|
| 537 |
+
@rtype: unicode.
|
| 538 |
+
"""
|
| 539 |
+
text = u""
|
| 540 |
+
text += u"\\environment qutrub-layout\n"
|
| 541 |
+
text += u"\\starttext\n"
|
| 542 |
+
|
| 543 |
+
text += u"\\Title{%s}\n" % (self.verb)
|
| 544 |
+
|
| 545 |
+
text += u"\\startitemize\n"
|
| 546 |
+
for title in self.text.keys():
|
| 547 |
+
if title == u" الكتابة الداخلية للفعل ":
|
| 548 |
+
text += u"\\item {\\bf %s} \\DeShape{%s}\n" % (title,
|
| 549 |
+
self.text[title])
|
| 550 |
+
else:
|
| 551 |
+
text += u"\\item {\\bf %s} %s\n" % (title, self.text[title])
|
| 552 |
+
text += u"\\stopitemize\n"
|
| 553 |
+
|
| 554 |
+
text += u"\\starttable[|lB|l|l|l|l|l|]\n"
|
| 555 |
+
text += u"\\HL[3]\n\\NC"
|
| 556 |
+
for tense in listtense:
|
| 557 |
+
text += u"\\NC {\\bf %s}" % (tense)
|
| 558 |
+
text += u"\\SR\n\\HL\n"
|
| 559 |
+
for pronoun in vconst.PronounsTable:
|
| 560 |
+
text += u"\\NC %s" % (pronoun)
|
| 561 |
+
for tense in listtense:
|
| 562 |
+
text += u"\\NC %s" % (self.tab_conjug[tense][pronoun])
|
| 563 |
+
text += u"\\AR\n"
|
| 564 |
+
text += u"\\LR\\HL[3]\n"
|
| 565 |
+
text += u"\\stoptable\n"
|
| 566 |
+
|
| 567 |
+
text += u"\\stoptext"
|
| 568 |
+
return text
|
libqutrub/mosaref_main.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#************************************************************************
|
| 2 |
+
# $Id: mosaref_main.py, v 0.7 2009/06/02 01:10:00 Taha Zerrouki $
|
| 3 |
+
#
|
| 4 |
+
# ------------
|
| 5 |
+
# Description:
|
| 6 |
+
# ------------
|
| 7 |
+
# Copyright (c) 2009, Arabtechies, Arabeyes Taha Zerrouki
|
| 8 |
+
#
|
| 9 |
+
# This file is used by the web interface to execute verb conjugation
|
| 10 |
+
#
|
| 11 |
+
# -----------------
|
| 12 |
+
# Revision Details: (Updated by Revision Control System)
|
| 13 |
+
# -----------------
|
| 14 |
+
# $Date: 2009/06/02 01:10:00 $
|
| 15 |
+
# $Author: Taha Zerrouki $
|
| 16 |
+
# $Revision: 0.7 $
|
| 17 |
+
# $Source: arabtechies.sourceforge.net
|
| 18 |
+
#
|
| 19 |
+
#***********************************************************************/
|
| 20 |
+
"""
|
| 21 |
+
The main function to call qutrub conjugation from other programs.
|
| 22 |
+
"""
|
| 23 |
+
import libqutrub.classverb as classverb
|
| 24 |
+
import libqutrub.ar_verb as ar_verb
|
| 25 |
+
import libqutrub.verb_valid as verb_valid
|
| 26 |
+
import libqutrub.verb_const as vconst
|
| 27 |
+
import pyarabic.araby as araby
|
| 28 |
+
import libqutrub.verb_db as verb_db
|
| 29 |
+
verb_db.create_index_triverbtable()
|
| 30 |
+
#~ """ you need to create the trileteral verb dictionary
|
| 31 |
+
# index to search within triverbs."""
|
| 32 |
+
|
| 33 |
+
def do_sarf(word, future_type, alltense = True, past = False, future = False,
|
| 34 |
+
passive = False, imperative = False, future_moode = False, confirmed = False,
|
| 35 |
+
transitive = False, display_format = "HTML"):
|
| 36 |
+
"""
|
| 37 |
+
The main function to conjugate verbs.
|
| 38 |
+
You must specify all parameters.
|
| 39 |
+
Can be used as an example to call the conjugation class.
|
| 40 |
+
@param word: the givern verb. the given word must be vocalized,
|
| 41 |
+
if it's 3 letters length only, else, the verb can be unvocalized,
|
| 42 |
+
but the Shadda must be given, it' considered as letter.
|
| 43 |
+
@type word: unicode.
|
| 44 |
+
@param future_type: For Triliteral verbs,
|
| 45 |
+
you must give the mark of Ain in the future,
|
| 46 |
+
كة عين الفعل في المضارع. it's given as a name of haraka (فتحة، ضمة، كسرة).
|
| 47 |
+
@type future_type: unicode(فتحة، ضمة، كسرة).
|
| 48 |
+
@param all: conjugate in all arabic tenses.
|
| 49 |
+
@type all: Boolean, default(True)
|
| 50 |
+
@param past: conjugate in past tense ألماضي
|
| 51 |
+
@type past: Boolean, default(False)
|
| 52 |
+
@param future: conjugate in arabic present and future tenses المضارع
|
| 53 |
+
@type future: Boolean, default(False)
|
| 54 |
+
@param passive: conjugate in passive voice المبني للمجهول
|
| 55 |
+
@type passive: Boolean, default(False)
|
| 56 |
+
@param imperative: conjugate in imperative tense الأمر
|
| 57 |
+
@type imperative: Boolean, default(False)
|
| 58 |
+
@param future_moode: conjugate in future moode tenses المضارع المنصوب والمجزوم
|
| 59 |
+
@type future_moode: Boolean, default(False)
|
| 60 |
+
@param confirmed: conjugate in confirmed cases tense المؤكّد
|
| 61 |
+
@type confirmed: Boolean, default(False)
|
| 62 |
+
@param transitive: the verb transitivity التعدي واللزوم
|
| 63 |
+
@type transitive: Boolean, default(False)
|
| 64 |
+
@param display_format: Choose the display format:
|
| 65 |
+
- 'Text':
|
| 66 |
+
- 'HTML':
|
| 67 |
+
- 'HTMLColoredDiacritics':
|
| 68 |
+
- 'DICT':
|
| 69 |
+
- 'CSV':
|
| 70 |
+
- 'GUI':
|
| 71 |
+
- 'TABLE':
|
| 72 |
+
- 'XML':
|
| 73 |
+
- 'TeX':
|
| 74 |
+
- 'ROWS':
|
| 75 |
+
@type display_format: string, default("HTML")
|
| 76 |
+
@return: The conjugation result
|
| 77 |
+
@rtype: According to display_format.
|
| 78 |
+
"""
|
| 79 |
+
valid = verb_valid.is_valid_infinitive_verb(word)
|
| 80 |
+
if valid:
|
| 81 |
+
future_type = ar_verb.get_future_type_by_name(future_type)
|
| 82 |
+
#~ bab_sarf = 0
|
| 83 |
+
#init the verb class to treat the verb
|
| 84 |
+
vbc = classverb.VerbClass(word, transitive, future_type)
|
| 85 |
+
vbc.set_display(display_format)
|
| 86 |
+
|
| 87 |
+
if alltense :
|
| 88 |
+
result = vbc.conjugate_all_tenses()
|
| 89 |
+
else :
|
| 90 |
+
listetenses = []
|
| 91 |
+
if past :
|
| 92 |
+
listetenses.append(vconst.TensePast)
|
| 93 |
+
if (past and passive ) :
|
| 94 |
+
listetenses.append(vconst.TensePassivePast)
|
| 95 |
+
if future :
|
| 96 |
+
listetenses.append(vconst.TenseFuture)
|
| 97 |
+
if (future and passive ) :
|
| 98 |
+
listetenses.append(vconst.TensePassiveFuture)
|
| 99 |
+
if (future_moode) :
|
| 100 |
+
listetenses.append(vconst.TenseSubjunctiveFuture)
|
| 101 |
+
listetenses.append(vconst.TenseJussiveFuture)
|
| 102 |
+
if (confirmed) :
|
| 103 |
+
if (future):
|
| 104 |
+
listetenses.append(vconst.TenseConfirmedFuture)
|
| 105 |
+
if (imperative):
|
| 106 |
+
listetenses.append(vconst.TenseConfirmedImperative)
|
| 107 |
+
if (future and transitive and confirmed) :
|
| 108 |
+
listetenses.append(vconst.TensePassiveConfirmedFuture)
|
| 109 |
+
if (passive and future_moode) :
|
| 110 |
+
listetenses.append(vconst.TensePassiveSubjunctiveFuture)
|
| 111 |
+
listetenses.append(vconst.TensePassiveJussiveFuture)
|
| 112 |
+
if imperative :
|
| 113 |
+
listetenses.append(vconst.TenseImperative)
|
| 114 |
+
result = vbc.conjugate_all_tenses(listetenses)
|
| 115 |
+
return result
|
| 116 |
+
else: return None
|
| 117 |
+
|
| 118 |
+
def get_future_form(verb_vocalised, haraka = araby.FATHA):
|
| 119 |
+
"""
|
| 120 |
+
Get The future form of a verb. for example the future form of
|
| 121 |
+
qal with Damma as a Haraka of future verb, we get yqolu.
|
| 122 |
+
الحصول على صيغة الفعل في المضارع، فالفعل قال، وحركة عينه في المضارع صمة، نحصل على يقول.
|
| 123 |
+
@param verb_vocalised: given verb.
|
| 124 |
+
@type verb_vocalised:unicode.
|
| 125 |
+
@param haraka: the future mark for triverbs.
|
| 126 |
+
@type haraka: unicode.
|
| 127 |
+
@return: The conjugated form in the future tense.
|
| 128 |
+
@rtype: unicode.
|
| 129 |
+
"""
|
| 130 |
+
word = verb_vocalised
|
| 131 |
+
transitive = True
|
| 132 |
+
future_type = haraka
|
| 133 |
+
if future_type not in (araby.FATHA, araby.DAMMA, araby.KASRA):
|
| 134 |
+
future_type = ar_verb.get_future_type_by_name(future_type)
|
| 135 |
+
vbc = classverb.VerbClass(word, transitive, future_type)
|
| 136 |
+
#vb.verb_class()
|
| 137 |
+
return vbc.conjugate_tense_pronoun(vconst.TenseFuture, vconst.PronounHuwa)
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
|
libqutrub/stack.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python
|
| 2 |
+
# -*- coding=utf-8 -*-
|
| 3 |
+
#************************************************************************
|
| 4 |
+
# from arabic_const import *
|
| 5 |
+
from pyarabic.araby import *
|
| 6 |
+
from verb_const import *
|
| 7 |
+
class Stack :
|
| 8 |
+
def __init__(self,text="") :
|
| 9 |
+
self.items = list(text);
|
| 10 |
+
|
| 11 |
+
def push(self, item) :
|
| 12 |
+
self.items.append(item)
|
| 13 |
+
|
| 14 |
+
def pop(self) :
|
| 15 |
+
if not self.isEmpty():
|
| 16 |
+
return self.items.pop()
|
| 17 |
+
else:
|
| 18 |
+
return None;
|
| 19 |
+
|
| 20 |
+
def isEmpty(self) :
|
| 21 |
+
return (self.items == [])
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def separate(word):
|
| 25 |
+
"""
|
| 26 |
+
separate the letters from the vowels, in arabic word,
|
| 27 |
+
if a letter hasn't a haraka, the not definited haraka is attributed.
|
| 28 |
+
return ( letters,vowels);
|
| 29 |
+
"""
|
| 30 |
+
#debug=True;
|
| 31 |
+
stack1=Stack(word)
|
| 32 |
+
# the word is inversed in the stack
|
| 33 |
+
stack1.items.reverse();
|
| 34 |
+
letters=Stack()
|
| 35 |
+
marks=Stack()
|
| 36 |
+
vowels=('a','u')
|
| 37 |
+
last1=stack1.pop();
|
| 38 |
+
# if the last element must be a letter,
|
| 39 |
+
# the arabic word can't starts with a haraka
|
| 40 |
+
# in th stack the word is inversed
|
| 41 |
+
while last1 in vowels: last1=stack1.pop();
|
| 42 |
+
while last1!=None:
|
| 43 |
+
if last1 in vowels:
|
| 44 |
+
# we can't have two harakats beside.
|
| 45 |
+
# the shadda is considered as a letter
|
| 46 |
+
marks.pop();
|
| 47 |
+
marks.push(last1);
|
| 48 |
+
elif last1==SHADDA:
|
| 49 |
+
# is the element is a Shadda,
|
| 50 |
+
# the previous letter must have a sukun as mark,
|
| 51 |
+
# and the shadda take the indefinate mark
|
| 52 |
+
marks.pop();
|
| 53 |
+
marks.push(SUKUN);
|
| 54 |
+
marks.push(NOT_DEF_HARAKA);
|
| 55 |
+
letters.push(SHADDA);
|
| 56 |
+
else:
|
| 57 |
+
marks.push(NOT_DEF_HARAKA);
|
| 58 |
+
letters.push(last1);
|
| 59 |
+
last1=stack1.pop();
|
| 60 |
+
return (''.join(letters.items),''.join(marks.items))
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def joint(letters,marks):
|
| 64 |
+
"""
|
| 65 |
+
joint the letters with the marks
|
| 66 |
+
the length ot letters and marks must be equal
|
| 67 |
+
return word;
|
| 68 |
+
"""
|
| 69 |
+
#debug=True;
|
| 70 |
+
debug=False;
|
| 71 |
+
# The length ot letters and marks must be equal
|
| 72 |
+
if len(letters)!=len(marks): return "";
|
| 73 |
+
|
| 74 |
+
stackLetter=Stack(letters)
|
| 75 |
+
stackLetter.items.reverse();
|
| 76 |
+
stackMark=Stack(marks)
|
| 77 |
+
stackMark.items.reverse();
|
| 78 |
+
wordStack=Stack();
|
| 79 |
+
last1=stackLetter.pop();
|
| 80 |
+
last2=stackMark.pop();
|
| 81 |
+
|
| 82 |
+
vowels=('a','u','o','i',SUKUN)
|
| 83 |
+
while last1!=None and last2!=None:
|
| 84 |
+
if last1 == SHADDA:
|
| 85 |
+
top=wordStack.pop();
|
| 86 |
+
if top not in vowels:
|
| 87 |
+
wordStack.push(top);
|
| 88 |
+
wordStack.push(last1);
|
| 89 |
+
if last2!= NOT_DEF_HARAKA:
|
| 90 |
+
wordStack.push(last2);
|
| 91 |
+
else:
|
| 92 |
+
wordStack.push(last1);
|
| 93 |
+
if last2!= NOT_DEF_HARAKA:
|
| 94 |
+
wordStack.push(last2);
|
| 95 |
+
|
| 96 |
+
last1=stackLetter.pop();
|
| 97 |
+
last2=stackMark.pop();
|
| 98 |
+
if not (stackLetter.isEmpty() and stackMark.isEmpty()):
|
| 99 |
+
return False;
|
| 100 |
+
else:
|
| 101 |
+
#wordStack.items.reverse();
|
| 102 |
+
return ''.join(wordStack.items);
|
| 103 |
+
|
| 104 |
+
def vocalizedlike(word1,word2):
|
| 105 |
+
"""
|
| 106 |
+
if the two words has the same letters and the same harakats, this fuction return True.
|
| 107 |
+
The two words can be full vocalized, or partial vocalized
|
| 108 |
+
"""
|
| 109 |
+
debug=False;
|
| 110 |
+
stack1=Stack(word1)
|
| 111 |
+
stack2=Stack(word2)
|
| 112 |
+
last1=stack1.pop();
|
| 113 |
+
last2=stack2.pop();
|
| 114 |
+
if debug: print "+0", stack1, stack2;
|
| 115 |
+
vowels=('a','u')
|
| 116 |
+
while last1!=None and last2!=None:
|
| 117 |
+
if last1==last2:
|
| 118 |
+
if debug: print "+2", stack1.items,last1, stack2.items,last2
|
| 119 |
+
last1=stack1.pop();
|
| 120 |
+
last2=stack2.pop();
|
| 121 |
+
elif last1 in vowels and last2 not in vowels:
|
| 122 |
+
if debug: print "+2", stack1.items,last1, stack2.items,last2
|
| 123 |
+
last1=stack1.pop();
|
| 124 |
+
elif last1 not in vowels and last2 in vowels:
|
| 125 |
+
if debug: print "+2", stack1.items,last1, stack2.items,last2
|
| 126 |
+
last2=stack2.pop();
|
| 127 |
+
else:
|
| 128 |
+
if debug: print "+2", stack1.items,last1, stack2.items,last2
|
| 129 |
+
break;
|
| 130 |
+
if not (stack1.isEmpty() and stack2.isEmpty()):
|
| 131 |
+
return False;
|
| 132 |
+
else: return True;
|
| 133 |
+
#-------------------------
|
| 134 |
+
# Function def vaznlike(word1,wazn):
|
| 135 |
+
#-------------------------
|
| 136 |
+
def waznlike(word1,wazn):
|
| 137 |
+
"""
|
| 138 |
+
if the word1 is like a wazn (pattern),
|
| 139 |
+
the letters must be equal,
|
| 140 |
+
the wazn has FEH, AIN, LAM letters.
|
| 141 |
+
this are as generic letters.
|
| 142 |
+
The two words can be full vocalized, or partial vocalized
|
| 143 |
+
"""
|
| 144 |
+
debug=False;
|
| 145 |
+
stack1=Stack(word1)
|
| 146 |
+
stack2=Stack(wazn)
|
| 147 |
+
root=Stack()
|
| 148 |
+
last1=stack1.pop();
|
| 149 |
+
last2=stack2.pop();
|
| 150 |
+
if debug: print "+0", stack1, stack2;
|
| 151 |
+
vowels=('a','u')
|
| 152 |
+
while last1!=None and last2!=None:
|
| 153 |
+
if last1==last2 and last2 not in (FEH, AIN,LAM):
|
| 154 |
+
if debug: print "+2", stack1.items,last1, stack2.items,last2
|
| 155 |
+
last1=stack1.pop();
|
| 156 |
+
last2=stack2.pop();
|
| 157 |
+
elif last1 not in vowels and last2 in (FEH, AIN,LAM):
|
| 158 |
+
if debug: print "+2", stack1.items,last1, stack2.items,last2
|
| 159 |
+
root.push(last1);
|
| 160 |
+
print "t";
|
| 161 |
+
last1=stack1.pop();
|
| 162 |
+
last2=stack2.pop();
|
| 163 |
+
elif last1 in vowels and last2 not in vowels:
|
| 164 |
+
if debug: print "+2", stack1.items,last1, stack2.items,last2
|
| 165 |
+
last1=stack1.pop();
|
| 166 |
+
elif last1 not in vowels and last2 in vowels:
|
| 167 |
+
if debug: print "+2", stack1.items,last1, stack2.items,last2
|
| 168 |
+
last2=stack2.pop();
|
| 169 |
+
else:
|
| 170 |
+
if debug: print "+2", stack1.items,last1, stack2.items,last2
|
| 171 |
+
break;
|
| 172 |
+
# reverse the root letters
|
| 173 |
+
root.items.reverse();
|
| 174 |
+
print " the root is ", root.items#"".join(root.items);
|
| 175 |
+
if not (stack1.isEmpty() and stack2.isEmpty()):
|
| 176 |
+
return False;
|
| 177 |
+
else: return True;
|
libqutrub/triverbtable.py
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
libqutrub/verb_const.py
ADDED
|
@@ -0,0 +1,613 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python
|
| 2 |
+
# -*- coding=utf-8 -*-
|
| 3 |
+
#---
|
| 4 |
+
#************************************************************************
|
| 5 |
+
# $Id: verb_const.py, v 0.7 2009/06/02 01:10:00 Taha Zerrouki $
|
| 6 |
+
#
|
| 7 |
+
# ------------
|
| 8 |
+
# Description:
|
| 9 |
+
# ------------
|
| 10 |
+
# Copyright (c) 2009, Arabtechies, Arabeyes Taha Zerrouki
|
| 11 |
+
#
|
| 12 |
+
# List of constants used in the arabic verb conjugation
|
| 13 |
+
#
|
| 14 |
+
# -----------------
|
| 15 |
+
# Revision Details:
|
| 16 |
+
# -----------------
|
| 17 |
+
# $Date: 2009/06/02 01:10:00 $
|
| 18 |
+
# $Author: Taha Zerrouki $
|
| 19 |
+
# $Revision: 0.7 $
|
| 20 |
+
# $Source: arabtechies.sourceforge.net
|
| 21 |
+
#
|
| 22 |
+
#***********************************************************************/
|
| 23 |
+
"""
|
| 24 |
+
Arabic Qutrub verb conjugation, verb_const file
|
| 25 |
+
"""
|
| 26 |
+
from pyarabic.araby import FATHA, DAMMA, KASRA, SHADDA, SUKUN, HAMZA, ALEF, \
|
| 27 |
+
NOON, ALEF_WASLA, WAW, ALEF_HAMZA_ABOVE, ALEF_HAMZA_BELOW, ALEF_MADDA, \
|
| 28 |
+
YEH_HAMZA, WAW_HAMZA, TATWEEL, SMALL_ALEF, SMALL_YEH, SMALL_WAW, YEH, \
|
| 29 |
+
ALEF_MAKSURA
|
| 30 |
+
|
| 31 |
+
import re
|
| 32 |
+
|
| 33 |
+
PronounsTable = (u"أنا" , u"نحن" , u"أنت" , u"أنتِ" , u"أنتما" ,
|
| 34 |
+
u"أنتما مؤ" , u"أنتم" , u"أنتن" , u"هو" , u"هي" , u"هما" ,
|
| 35 |
+
u"هما مؤ" , u"هم" , u"هن")
|
| 36 |
+
PronounsTableNotPassiveForUntransitive = (u"أنا" , u"نحن" , u"أنت" ,
|
| 37 |
+
u"أنتِ" , u"أنتما" , u"أنتما مؤ" , u"أنتم" , u"أنتن" , u"هما" ,
|
| 38 |
+
u"هما مؤ" , u"هم" , u"هن")
|
| 39 |
+
PronounAna = u"أنا"
|
| 40 |
+
PronounNahnu = u"نحن"
|
| 41 |
+
PronounAnta = u"أنت"
|
| 42 |
+
PronounAnti = u"أنتِ"
|
| 43 |
+
PronounAntuma = u"أنتما"
|
| 44 |
+
PronounAntuma_f = u"أنتما مؤ"
|
| 45 |
+
PronounAntum = u"أنتم"
|
| 46 |
+
PronounAntunna = u"أنتن"
|
| 47 |
+
PronounHuwa = u"هو"
|
| 48 |
+
PronounHya = u"هي"
|
| 49 |
+
PronounHuma = u"هما"
|
| 50 |
+
PronounHuma_f = u"هما مؤ"
|
| 51 |
+
PronounHum = u"هم"
|
| 52 |
+
PronounHunna = u"هن"
|
| 53 |
+
PRONOUN_FEATURES = {
|
| 54 |
+
u"أنا" : {'person':u'متكلم', 'gender':u'', 'number': u'مفرد'}
|
| 55 |
+
, u"أنت" : {'person':u'مخاطب', 'gender':u'مذكر', 'number': u'مفرد'}
|
| 56 |
+
, u"أنتِ" : {'person':u'مخاطب', 'gender':u'مؤنث', 'number': u'مفرد'}
|
| 57 |
+
, u"هو" : {'person':u'غائب', 'gender':u'مذكر', 'number': u'مفرد'}
|
| 58 |
+
, u"هي" : {'person':u'غائب', 'gender':u'مؤنث', 'number': u'مفرد'}
|
| 59 |
+
, u"أنتما" : {'person':u'مخاطب', 'gender':u'مذكر', 'number': u'مثنى'}
|
| 60 |
+
, u"أنتما مؤ" : {'person':u'مخاطب', 'gender':u'مؤنث', 'number': u'مثنى'}
|
| 61 |
+
, u"هما" : {'person':u'غائب', 'gender':u'مذكر', 'number': u'مثنى'}
|
| 62 |
+
, u"هما مؤ" : {'person':u'غائب', 'gender':u'مؤنث', 'number': u'مثنى'}
|
| 63 |
+
, u"نحن" : {'person':u'متكلم', 'gender':u'', 'number': u'جمع'}
|
| 64 |
+
, u"أنتم" : {'person':u'مخاطب', 'gender':u'مذكر', 'number': u'جمع'}
|
| 65 |
+
, u"أنتن" : {'person':u'مخاطب', 'gender':u'مؤنث', 'number': u'جمع'}
|
| 66 |
+
, u"هم" : {'person':u'غائب', 'gender':u'مذكر', 'number': u'جمع'}
|
| 67 |
+
, u"هن" : {'person':u'غائب', 'gender':u'مؤنث', 'number': u'جمع'}
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
ImperativePronouns = (u"أنت" , u"أنتِ" , u"أنتما" , u"أنتما مؤ" , u"أنتم"
|
| 71 |
+
, u"أنتن" )
|
| 72 |
+
# const for Tense Name
|
| 73 |
+
TensePast = u"الماضي المعلوم"
|
| 74 |
+
TenseFuture = u"المضارع المعلوم"
|
| 75 |
+
TenseImperative = u"الأمر"
|
| 76 |
+
TenseConfirmedImperative = u"الأمر المؤكد"
|
| 77 |
+
TenseJussiveFuture = u"المضارع المجزوم"
|
| 78 |
+
TenseSubjunctiveFuture = u"المضارع المنصوب"
|
| 79 |
+
TenseConfirmedFuture = u"المضارع المؤكد الثقيل"
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
TensePassivePast = u"الماضي المجهول"
|
| 83 |
+
TensePassiveFuture = u"المضارع المجهول"
|
| 84 |
+
TensePassiveJussiveFuture = u"المضارع المجهول المجزوم"
|
| 85 |
+
TensePassiveSubjunctiveFuture = u"المضارع المجهول المنصوب"
|
| 86 |
+
TensePassiveConfirmedFuture = u"المضارع المؤكد الثقيل المجهول "
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
TABLE_TENSE = [TensePast, TenseFuture, TenseJussiveFuture,
|
| 90 |
+
TenseSubjunctiveFuture, TenseConfirmedFuture, TenseImperative,
|
| 91 |
+
TenseConfirmedImperative,
|
| 92 |
+
TensePassivePast, TensePassiveFuture,
|
| 93 |
+
TensePassiveJussiveFuture, TensePassiveSubjunctiveFuture,
|
| 94 |
+
TensePassiveConfirmedFuture]
|
| 95 |
+
TableIndicativeTense = [TensePast, TenseFuture, TenseJussiveFuture,
|
| 96 |
+
TenseSubjunctiveFuture, TenseConfirmedFuture, TenseImperative,
|
| 97 |
+
TenseConfirmedImperative]
|
| 98 |
+
TablePassiveTense = [TensePassivePast, TensePassiveFuture,
|
| 99 |
+
TensePassiveJussiveFuture, TensePassiveSubjunctiveFuture,
|
| 100 |
+
TensePassiveConfirmedFuture]
|
| 101 |
+
|
| 102 |
+
TENSE_FEATURES = {
|
| 103 |
+
TensePast : { 'tense':u'ماضي', 'voice':u'معلوم', 'mood':u'', 'confirmed':u'', },
|
| 104 |
+
TenseFuture : { 'tense':u'مضارع', 'voice':u'معلوم', 'mood':u'مرفوع', 'confirmed':u'', },
|
| 105 |
+
TenseImperative : { 'tense':u'أمر', 'voice':u'', 'mood':u'', 'confirmed':u'', },
|
| 106 |
+
TenseConfirmedImperative : { 'tense':u'أمر', 'voice':u'', 'mood':u'', 'confirmed':u'مؤكذ', },
|
| 107 |
+
TenseJussiveFuture : { 'tense':u'مضارع', 'voice':u'معلوم', 'mood':u'مجزوم', 'confirmed':u'', },
|
| 108 |
+
TenseSubjunctiveFuture : { 'tense':u'مضارع', 'voice':u'معلوم', 'mood':u'منص��ب', 'confirmed':u'', },
|
| 109 |
+
TenseConfirmedFuture : { 'tense':u'مضارع', 'voice':u'معلوم', 'mood':u'', 'confirmed':u'مؤكد', },
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
TensePassivePast : { 'tense':u'ماضي', 'voice':u'مجهول', 'mood':u'', 'confirmed':u'', },
|
| 113 |
+
TensePassiveFuture : { 'tense':u'مضارع', 'voice':u'مجهول', 'mood':u'مرفوع', 'confirmed':u'', },
|
| 114 |
+
TensePassiveJussiveFuture : { 'tense':u'مضارع', 'voice':u'مجهول', 'mood':u'مجزوم', 'confirmed':u'', },
|
| 115 |
+
TensePassiveSubjunctiveFuture : { 'tense':u'مضارع', 'voice':u'مجهول', 'mood':u'منصوب', 'confirmed':u'', },
|
| 116 |
+
TensePassiveConfirmedFuture : { 'tense':u'مضارع', 'voice':u'مجهول', 'mood':u'', 'confirmed':u'مؤكد', },
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
past = {
|
| 120 |
+
u"أنا" : [u"", u"ْتُ"]
|
| 121 |
+
, u"أنت" : [u"", u"ْتَ"]
|
| 122 |
+
, u"أنتِ" : [u"", u"ْتِ"]
|
| 123 |
+
, u"هو" : [u"", u"َ"]
|
| 124 |
+
, u"هي" : [u"", u"َتْ"]
|
| 125 |
+
, u"أنتما" : [u"", u"ْتُما"]
|
| 126 |
+
, u"أنتما مؤ" : [u"", u"ْتُما"]
|
| 127 |
+
, u"هما" : [u"", u"َا"]
|
| 128 |
+
, u"هما مؤ" : [u"", u"َتَا"]
|
| 129 |
+
, u"نحن" : [u"", u"ْنَا"]
|
| 130 |
+
, u"أنتم" : [u"", u"ْتُم"]
|
| 131 |
+
, u"أنتن" : [u"", u"ْتُنَّ"]
|
| 132 |
+
##, u"هم" : [u"", u"ُوا"]
|
| 133 |
+
, u"هم" : [u"", DAMMA + WAW + ALEF_WASLA]
|
| 134 |
+
, u"هن" : [u"", u"ْنَ"]
|
| 135 |
+
}
|
| 136 |
+
future = {
|
| 137 |
+
u"أنا" : [u"أ", u"ُ"]
|
| 138 |
+
, u"أنت" : [u"ت", u"ُ"]
|
| 139 |
+
, u"أنتِ" : [u"ت", u"ِينَ"]
|
| 140 |
+
, u"أنتم" : [u"ت", u"ُونَ"]
|
| 141 |
+
, u"أنتما" : [u"ت", FATHA + ALEF + NOON + KASRA]
|
| 142 |
+
, u"أنتما مؤ" : [u"ت", FATHA + ALEF + NOON + KASRA]
|
| 143 |
+
, u"أنتن" : [u"ت", SUKUN + NOON + FATHA]
|
| 144 |
+
, u"نحن" : [u"ن", u"ُ"]
|
| 145 |
+
, u"هم" : [u"ي", u"ُونَ"]
|
| 146 |
+
, u"هما" : [u"ي", u"َانِ"]
|
| 147 |
+
, u"هما مؤ" : [u"ت", u"َانِ"]
|
| 148 |
+
, u"هن" : [u"ي", u"ْنَ"]
|
| 149 |
+
, u"هو" : [u"ي", u"ُ"]
|
| 150 |
+
, u"هي" : [u"ت", u"ُ"]
|
| 151 |
+
}
|
| 152 |
+
future_majzoom = {
|
| 153 |
+
u"أنا" : [u"أ", u"ْ"]
|
| 154 |
+
, u"أنت" : [u"ت", u"ْ"]
|
| 155 |
+
, u"أنتِ" : [u"ت", u"ِي"]
|
| 156 |
+
, u"أنتم" : [u"ت", DAMMA + WAW + ALEF_WASLA]
|
| 157 |
+
##, u"أنتم" : [u"ت", DAMMA+WAW+ALEF]
|
| 158 |
+
, u"أنتما" : [u"ت", u"َا"]
|
| 159 |
+
, u"أنتما مؤ" : [u"ت", u"َا"]
|
| 160 |
+
, u"أنتن" : [u"ت", u"ْنَ"]
|
| 161 |
+
, u"نحن" : [u"ن", u"ْ"]
|
| 162 |
+
##, u"هم" : [u"ي", DAMMA+WAW+ALEF]
|
| 163 |
+
, u"هم" : [u"ي", DAMMA+WAW+ALEF_WASLA]
|
| 164 |
+
, u"هما" : [u"ي", u"َا"]
|
| 165 |
+
, u"هما مؤ" : [u"ت", u"َا"]
|
| 166 |
+
, u"هن" : [u"ي", u"ْنَ"]
|
| 167 |
+
, u"هو" : [u"ي", u"ْ"]
|
| 168 |
+
, u"هي" : [u"ت", u"ْ"]
|
| 169 |
+
}
|
| 170 |
+
future_mansoub = {
|
| 171 |
+
u"أنا" : [u"أ", u"َ"]
|
| 172 |
+
, u"أنت" : [u"ت", u"َ"]
|
| 173 |
+
, u"أنتِ" : [u"ت", u"ِي"]
|
| 174 |
+
, u"أنتم" : [u"ت", DAMMA+WAW+ALEF_WASLA]
|
| 175 |
+
##, u"أنتم" : [u"ت", DAMMA+WAW+ALEF]
|
| 176 |
+
, u"أنتما" : [u"ت", u"َا"]
|
| 177 |
+
, u"أنتما مؤ" : [u"ت", u"َا"]
|
| 178 |
+
, u"أنتن" : [u"ت", u"ْنَ"]
|
| 179 |
+
, u"نحن" : [u"ن", u"َ"]
|
| 180 |
+
##, u"هم" : [u"ي", DAMMA+WAW+ALEF]
|
| 181 |
+
, u"هم" : [u"ي", DAMMA+WAW+ALEF_WASLA]
|
| 182 |
+
, u"هما" : [u"ي", u"َا"]
|
| 183 |
+
, u"هما مؤ" : [u"ت", u"َا"]
|
| 184 |
+
, u"هن" : [u"ي", u"ْنَ"]
|
| 185 |
+
, u"هو" : [u"ي", u"َ"]
|
| 186 |
+
, u"هي" : [u"ت", u"َ"]
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
future_confirmed = {
|
| 190 |
+
u"أنا" : [u"أ", FATHA+NOON+SHADDA+FATHA]
|
| 191 |
+
, u"أنت" : [u"ت", FATHA+NOON+SHADDA+FATHA]
|
| 192 |
+
, u"أنتِ" : [u"ت", KASRA+NOON+SHADDA+FATHA]
|
| 193 |
+
, u"أنتما" : [u"ت", FATHA+ALEF+NOON+SHADDA+KASRA]
|
| 194 |
+
, u"أنتما مؤ" : [u"ت", FATHA+ALEF+NOON+SHADDA+KASRA]
|
| 195 |
+
, u"أنتم" : [u"ت", DAMMA+NOON+SHADDA+FATHA]
|
| 196 |
+
, u"أنتن" : [u"ت", SUKUN+NOON+FATHA+ALEF+NOON+SHADDA+KASRA]
|
| 197 |
+
, u"نحن" : [u"ن", FATHA+NOON+SHADDA+FATHA]
|
| 198 |
+
, u"هم" : [u"ي", DAMMA+NOON+SHADDA+FATHA]
|
| 199 |
+
, u"هما" : [u"ي", FATHA+ALEF+NOON+SHADDA+KASRA]
|
| 200 |
+
, u"هما مؤ" : [u"ت", FATHA+ALEF+NOON+SHADDA+KASRA]
|
| 201 |
+
, u"هن" : [u"ي", SUKUN+NOON+FATHA+ALEF+NOON+SHADDA+KASRA]
|
| 202 |
+
, u"هو" : [u"ي", FATHA+NOON+SHADDA+FATHA]
|
| 203 |
+
, u"هي" : [u"ت", FATHA+NOON+SHADDA+FATHA]
|
| 204 |
+
}
|
| 205 |
+
imperative = {
|
| 206 |
+
u"أنت" : [u"", u"ْ"]
|
| 207 |
+
, u"أنتِ" : [u"", u"ِي"]
|
| 208 |
+
, u"أنتم" : [u"", DAMMA+WAW+ALEF_WASLA]
|
| 209 |
+
, u"أنتما" : [u"", u"َا"]
|
| 210 |
+
, u"أنتما مؤ" : [u"", u"َا"]
|
| 211 |
+
, u"أنتن" : [u"", u"ْنَ"]
|
| 212 |
+
}
|
| 213 |
+
imperative_confirmed = {
|
| 214 |
+
u"أنت" : [u"", FATHA+NOON+SHADDA+FATHA]
|
| 215 |
+
, u"أنتِ" : [u"", KASRA+NOON+SHADDA+FATHA]
|
| 216 |
+
, u"أنتم" : [u"", DAMMA+NOON+SHADDA+FATHA]
|
| 217 |
+
, u"أنتما" : [u"", FATHA+ALEF+NOON+SHADDA+KASRA]
|
| 218 |
+
, u"أنتما مؤ" : [u"", FATHA+ALEF+NOON+SHADDA+KASRA]
|
| 219 |
+
, u"أنتن" : [u"", SUKUN+NOON+FATHA+ALEF+NOON+SHADDA+KASRA]
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
TableTensePronoun = {}
|
| 223 |
+
TableTensePronoun[TensePast] = past
|
| 224 |
+
TableTensePronoun[TenseFuture] = future
|
| 225 |
+
TableTensePronoun[TenseImperative] = imperative
|
| 226 |
+
TableTensePronoun[TenseJussiveFuture] = future_majzoom
|
| 227 |
+
TableTensePronoun[TenseSubjunctiveFuture] = future_mansoub
|
| 228 |
+
TableTensePronoun[TenseConfirmedFuture] = future_confirmed
|
| 229 |
+
TableTensePronoun[TenseConfirmedImperative] = imperative_confirmed
|
| 230 |
+
|
| 231 |
+
TableTensePronoun[TensePassivePast] = past
|
| 232 |
+
TableTensePronoun[TensePassiveFuture] = future
|
| 233 |
+
TableTensePronoun[TensePassiveJussiveFuture] = future_majzoom
|
| 234 |
+
TableTensePronoun[TensePassiveSubjunctiveFuture] = future_mansoub
|
| 235 |
+
TableTensePronoun[TensePassiveConfirmedFuture] = future_confirmed
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
TAB_SARF = {
|
| 239 |
+
#باب تصريف الفعل، الصفر لكل الأفعال عدا الثلاثي
|
| 240 |
+
0: {"past":FATHA, "future":KASRA},
|
| 241 |
+
# فَعَل يَفْعُل
|
| 242 |
+
|
| 243 |
+
1: {"past":FATHA, "future":DAMMA},
|
| 244 |
+
# فَعَل يَفْعِل
|
| 245 |
+
2: {"past":FATHA, "future":KASRA},
|
| 246 |
+
# فَعَل يَفْعَل
|
| 247 |
+
3: {"past":FATHA, "future":FATHA},
|
| 248 |
+
# فَعِل يَفْعَل
|
| 249 |
+
4: {"past":KASRA, "future":FATHA},
|
| 250 |
+
# فَعِل يَفْعِل
|
| 251 |
+
5: {"past":KASRA, "future":KASRA},
|
| 252 |
+
# فَعُل يَفْعُل
|
| 253 |
+
6: {"past":DAMMA, "future":DAMMA},
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
NOT_DEF_HARAKA = TATWEEL
|
| 257 |
+
##NOT_DEF_HARAKA = FATHA
|
| 258 |
+
|
| 259 |
+
STRIP_HARAKA = u"i"
|
| 260 |
+
ALEF_HARAKA = SMALL_ALEF
|
| 261 |
+
ALEF4_HARAKA = u"y"
|
| 262 |
+
ALEF_YEH_HARAKA = u"#"
|
| 263 |
+
ALEF_WAW_HARAKA = u"*"
|
| 264 |
+
|
| 265 |
+
YEH_HARAKA = SMALL_YEH
|
| 266 |
+
|
| 267 |
+
ALTERNATIVE_YEH_HARAKA = u"t"
|
| 268 |
+
ALEF_YEH_ALTERNATIVE = u"x"
|
| 269 |
+
WAW_HARAKA = SMALL_WAW
|
| 270 |
+
ALEF_MAMDUDA = "9"
|
| 271 |
+
YEH_NAKISA = "5"
|
| 272 |
+
|
| 273 |
+
WRITTEN_HARAKA = {
|
| 274 |
+
ALEF_HARAKA:FATHA+ALEF,
|
| 275 |
+
ALEF_WAW_HARAKA:FATHA+ALEF,
|
| 276 |
+
ALEF_YEH_HARAKA:FATHA+ALEF,
|
| 277 |
+
WAW_HARAKA:DAMMA+WAW,
|
| 278 |
+
YEH_HARAKA:KASRA+YEH,
|
| 279 |
+
ALTERNATIVE_YEH_HARAKA:KASRA+YEH,
|
| 280 |
+
NOT_DEF_HARAKA:'',
|
| 281 |
+
FATHA: FATHA,
|
| 282 |
+
DAMMA:DAMMA,
|
| 283 |
+
KASRA:KASRA,
|
| 284 |
+
SUKUN:SUKUN,
|
| 285 |
+
SHADDA:SHADDA
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
# table of conversion if التقاء الساكنين
|
| 289 |
+
CONVERSION_TABLE = {
|
| 290 |
+
ALEF_YEH_HARAKA: KASRA,
|
| 291 |
+
ALEF_WAW_HARAKA: DAMMA,
|
| 292 |
+
WAW_HARAKA: DAMMA,
|
| 293 |
+
YEH_HARAKA : KASRA,
|
| 294 |
+
ALTERNATIVE_YEH_HARAKA: DAMMA,
|
| 295 |
+
}
|
| 296 |
+
##WAW_MAKSURA = WAW
|
| 297 |
+
|
| 298 |
+
#HARAKAT = u"%s%s%s%s%s"%(SUKUN, FATHA, DAMMA, KASRA, SHADDA)
|
| 299 |
+
HARAKAT = (SUKUN, FATHA, DAMMA, KASRA)
|
| 300 |
+
HARAKAT2 = u"".join([ALEF_HARAKA, WAW_HARAKA, YEH_HARAKA, SUKUN,
|
| 301 |
+
FATHA, DAMMA, KASRA])
|
| 302 |
+
HAMZAT_PATTERN = re.compile(u"[%s%s%s%s%s]"%(ALEF_HAMZA_ABOVE, WAW_HAMZA,
|
| 303 |
+
YEH_HAMZA , HAMZA, ALEF_HAMZA_BELOW), re.UNICODE)
|
| 304 |
+
HAMZAT = (ALEF_HAMZA_ABOVE, WAW_HAMZA, YEH_HAMZA , HAMZA, ALEF_HAMZA_BELOW)
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
LAM_ALEF_PAT = re.compile(u'[\ufef7\ufef9\ufef5]', re.UNICODE)
|
| 308 |
+
|
| 309 |
+
#uniformate harkat
|
| 310 |
+
UNIFORMATE_MARKS_4 = FATHA+SUKUN+FATHA+FATHA
|
| 311 |
+
UNIFORMATE_MARKS_5TEH = FATHA+FATHA+SUKUN+FATHA+FATHA
|
| 312 |
+
UNIFORMATE_MARKS_5 = KASRA+SUKUN+FATHA+FATHA+FATHA
|
| 313 |
+
UNIFORMATE_MARKS_6 = KASRA+SUKUN+FATHA+SUKUN+FATHA+FATHA
|
| 314 |
+
|
| 315 |
+
BEGIN_WORD = u"^"
|
| 316 |
+
END_WORD = u"$"
|
| 317 |
+
|
| 318 |
+
LONG_HARAKAT = (ALEF_HARAKA, YEH_HARAKA, WAW_HARAKA, ALEF_YEH_HARAKA,
|
| 319 |
+
ALEF_WAW_HARAKA)
|
| 320 |
+
_F = FATHA
|
| 321 |
+
_D = DAMMA
|
| 322 |
+
_K = KASRA
|
| 323 |
+
_S = SUKUN
|
| 324 |
+
_A = ALEF_HARAKA
|
| 325 |
+
_W = WAW_HARAKA
|
| 326 |
+
_Y = YEH_HARAKA
|
| 327 |
+
|
| 328 |
+
_AH = ALEF_HARAKA
|
| 329 |
+
_YH = YEH_HARAKA
|
| 330 |
+
_WH = WAW_HARAKA
|
| 331 |
+
_AYH = ALEF_YEH_HARAKA
|
| 332 |
+
_AWH = ALEF_WAW_HARAKA
|
| 333 |
+
_YHALT = ALTERNATIVE_YEH_HARAKA
|
| 334 |
+
#HAMZAT
|
| 335 |
+
_AHA = ALEF_HAMZA_ABOVE
|
| 336 |
+
_AHB = ALEF_HAMZA_BELOW
|
| 337 |
+
_AM = ALEF_MADDA
|
| 338 |
+
_YHA = YEH_HAMZA
|
| 339 |
+
_WHA = WAW_HAMZA
|
| 340 |
+
_HZ = HAMZA
|
| 341 |
+
|
| 342 |
+
|
| 343 |
+
INITIAL_TAHMEEZ_TABLE = {_S:_HZ, _F:_AHA, _D:_AHA, _K:_AHB, _AH:_AM ,
|
| 344 |
+
_WH:_AHA, _YH:_AHB, _YHALT:_AHB}
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
MIDDLE_TAHMEEZ_TABLE = {
|
| 348 |
+
_S: {_S:_HZ, _F:_AHA, _D:_WHA, _K:_YHA, _AH:_AHA, _WH:_WHA, _YH:_YHA },
|
| 349 |
+
_F: {_S:_AHA, _F:_AHA, _D:_WHA, _K:_YHA, _AH:_AHA, _WH:_WHA, _YH:_YHA },
|
| 350 |
+
_D: {_S:_WHA, _F:_WHA, _D:_WHA, _K:_YHA, _AH:_WHA, _WH:_WHA, _YH:_YHA },
|
| 351 |
+
_K: {_S:_YHA, _F:_YHA, _D:_YHA, _K:_YHA, _AH:_YHA, _WH:_YHA, _YH:_YHA },
|
| 352 |
+
#_AH: {_S:_HZ, _F:_HZ, _D:_WHA, _K:_YHA, _AH:_HZ, _WH:_WHA, _YH:_YHA },
|
| 353 |
+
_AH: {_S:_HZ, _F:_HZ, _D:_WHA, _K:_YHA, _AH:_HZ, _WH:_WHA, _YH:_YHA },
|
| 354 |
+
#_WH: {_S:_HZ, _F:_HZ, _D:_WHA, _K:_YHA, _AH:_HZ, _WH:_WHA, _YH:_YHA },
|
| 355 |
+
_WH: {_S:_HZ, _F:_HZ, _D:_HZ, _K:_YHA, _AH:_HZ, _WH:_HZ, _YH:_YHA },
|
| 356 |
+
_YH: {_S:_YHA, _F:_YHA, _D:_YHA, _K:_YHA, _AH:_YHA, _WH:_YHA, _YH:_YHA },
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
FINAL_TAHMEEZ_TABLE = {
|
| 360 |
+
u"%s" % BEGIN_WORD :{_S:_HZ, _F:_AHA, _D:_AHA, _K:_YHA, _AH:_AM, _WH:_AHA,
|
| 361 |
+
_YH:_AHA},
|
| 362 |
+
#~ _S: {_S:_HZ, _F:_AHA, _D:_WHA, _K:_YHA, _AH:_AHA, _WH:_WHA, _YH:_YHA },
|
| 363 |
+
_S: {_S:_HZ, _F:_AHA, _D:_WHA, _K:_YHA, _AH:_AHA, _WH:_WHA, _YH:_YHA },
|
| 364 |
+
_F: {_S:_AHA, _F:_AHA, _D:_AHA, _K:_YHA, _AH:_AHA, _WH:_WHA, _YH:_YHA },
|
| 365 |
+
_D: {_S:_WHA, _F:_WHA, _D:_WHA, _K:_YHA, _AH:_WHA, _WH:_WHA, _YH:_YHA },
|
| 366 |
+
_K: {_S:_YHA, _F:_YHA, _D:_YHA, _K:_YHA, _AH:_WHA, _WH:_WHA, _YH:_YHA },
|
| 367 |
+
_AH: {_S:_HZ, _F:_HZ, _D:_HZ, _K:_HZ, _AH:_HZ, _WH:_WHA, _YH:_YHA },
|
| 368 |
+
_WH: {_S:_HZ, _F:_HZ, _D:_HZ, _K:_HZ, _AH:_WHA, _WH:_WHA, _YH:_YHA},
|
| 369 |
+
_YH: {_S:_HZ, _F:_HZ, _D:_HZ, _K:_HZ, _AH:_WHA, _WH:_WHA, _YH:_YHA}
|
| 370 |
+
}
|
| 371 |
+
|
| 372 |
+
# جدول تحويل الألف الفتحة الطويلة إلى حركات أخرى حسب سياقها
|
| 373 |
+
HOMOGENIZE_ALEF_HARAKA_TABLE = {
|
| 374 |
+
_S:{_S:'*' , _F:ALEF_HARAKA, _D:WAW_HARAKA, _K:YEH_HARAKA },
|
| 375 |
+
_F:{_S:ALEF_HARAKA, _F:ALEF_HARAKA, _D:ALEF_HARAKA, _K:ALEF_HARAKA },
|
| 376 |
+
_D:{_S:WAW_HARAKA, _F:ALEF_HARAKA, _D:ALEF_HARAKA, _K:YEH_HARAKA },
|
| 377 |
+
_K:{_S:YEH_HARAKA, _F:ALEF_HARAKA, _D:YEH_HARAKA, _K:ALEF_HARAKA},
|
| 378 |
+
}
|
| 379 |
+
|
| 380 |
+
|
| 381 |
+
# Table of irregular verbs
|
| 382 |
+
# irregular verbs have common forms
|
| 383 |
+
# جدول الأفعال عربية الشاذة،
|
| 384 |
+
# مثل الفعل رأى، أرى، أخذ أكل، سأل
|
| 385 |
+
#الأفعال المثال
|
| 386 |
+
# كل سطر يحتوي على جذوع تصريف الفعل
|
| 387 |
+
# في زمن معين
|
| 388 |
+
IRREGULAR_VERB_CONJUG = {}
|
| 389 |
+
CONJUG_BAB = u"باب التصريف"
|
| 390 |
+
|
| 391 |
+
# في الحركات، الحركة الأولى هي لحركة حرف المضارعة
|
| 392 |
+
IRREGULAR_VERB_CONJUG[u"رءى"+FATHA+FATHA] = {
|
| 393 |
+
CONJUG_BAB:(FATHA, FATHA),
|
| 394 |
+
TenseFuture:(u"رى", FATHA+FATHA+FATHA),
|
| 395 |
+
TensePassiveFuture:(u"رى", DAMMA+FATHA+FATHA),
|
| 396 |
+
TenseImperative:(u"رى", FATHA+FATHA),
|
| 397 |
+
}
|
| 398 |
+
#ToDO
|
| 399 |
+
# الفعل أرى مشكلة كبيرة
|
| 400 |
+
# لأنه الفعل الشاذ الوحيد الرباعي
|
| 401 |
+
|
| 402 |
+
IRREGULAR_VERB_CONJUG[u"ءرى"+FATHA+KASRA] = {
|
| 403 |
+
CONJUG_BAB:(KASRA, FATHA),
|
| 404 |
+
TenseFuture:(u"ري", DAMMA+KASRA+FATHA),
|
| 405 |
+
TensePassiveFuture:(u"ري", DAMMA+FATHA+FATHA),
|
| 406 |
+
TenseImperative:(u"ءري", FATHA+KASRA+FATHA),
|
| 407 |
+
}
|
| 408 |
+
#~ ان يتصرف من باب (عَلِمَ يَعْلَمُ)،
|
| 409 |
+
#~ لا تحذف واوه؛ نحو: وَجِلَ، يَوْجَلُ،
|
| 410 |
+
#~ عدا ثلاثة أفعال هي: (وذر), و(وسع)، و(وطأ)،
|
| 411 |
+
#~ تحذف واوها؛ فنقول: وَذِرَ، يَذَرُ،
|
| 412 |
+
# ونقول: وَسِعَ، يَسَعُ، ونقول: وَطِئَ، يَطَأُ.
|
| 413 |
+
#إذا ك# الفعل وذر يذر
|
| 414 |
+
# KASRA FATHA
|
| 415 |
+
IRREGULAR_VERB_CONJUG[u"وذر"+KASRA+FATHA] = {
|
| 416 |
+
CONJUG_BAB:(KASRA, FATHA),
|
| 417 |
+
TenseFuture:(u"ذر", FATHA+FATHA+DAMMA),
|
| 418 |
+
TensePassiveFuture:(u"ذر", DAMMA+FATHA+DAMMA),
|
| 419 |
+
TenseImperative:(u"ذر", FATHA+SUKUN),
|
| 420 |
+
}
|
| 421 |
+
# الفعل وَسِعَ يسع
|
| 422 |
+
# KASRA FATHA
|
| 423 |
+
IRREGULAR_VERB_CONJUG[u"وسع"+KASRA+FATHA] = {
|
| 424 |
+
CONJUG_BAB:(KASRA, FATHA),
|
| 425 |
+
TenseFuture:(u"سع", FATHA+FATHA+DAMMA),
|
| 426 |
+
TensePassiveFuture:(u"سع", DAMMA+FATHA+DAMMA),
|
| 427 |
+
TenseImperative:(u"سع", FATHA+SUKUN),
|
| 428 |
+
}
|
| 429 |
+
# الفعل وطئ يطأ
|
| 430 |
+
# KASRA FATHA
|
| 431 |
+
IRREGULAR_VERB_CONJUG[u"وطء"+KASRA+FATHA] = {
|
| 432 |
+
CONJUG_BAB:(KASRA, FATHA),
|
| 433 |
+
TenseFuture:(u"طء", FATHA+FATHA+DAMMA),
|
| 434 |
+
TensePassiveFuture:(u"وطء", DAMMA+SUKUN+FATHA+DAMMA),
|
| 435 |
+
TenseImperative:(u"طء", FATHA+SUKUN),
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
|
| 439 |
+
|
| 440 |
+
# الأفعال التي يتغير أمرها بحذف الهمزة وجوبا، مثل أكل، أخذ
|
| 441 |
+
# أما ما لا تحذف همزته وجوبا مثل سأل وأمر، فلا تعتبر شاذة
|
| 442 |
+
|
| 443 |
+
# الفعل أكَل يأكُل، كُل
|
| 444 |
+
#FATHA, DAMMA
|
| 445 |
+
IRREGULAR_VERB_CONJUG[u"ءكل"+FATHA+DAMMA] = {
|
| 446 |
+
CONJUG_BAB:(FATHA, DAMMA),
|
| 447 |
+
TenseFuture:(u"ءكل", FATHA+SUKUN+DAMMA+DAMMA),
|
| 448 |
+
TensePassiveFuture:(u"ءكل", DAMMA+SUKUN+FATHA+FATHA),
|
| 449 |
+
TenseImperative:(u"كل", DAMMA+SUKUN),
|
| 450 |
+
}
|
| 451 |
+
#الفعل أخَذَ يأخُذُ، خُذ
|
| 452 |
+
#FATHA, DAMMA
|
| 453 |
+
IRREGULAR_VERB_CONJUG[u"ءخذ"+FATHA+DAMMA] = {
|
| 454 |
+
CONJUG_BAB:(FATHA, DAMMA),
|
| 455 |
+
TenseFuture:(u"ءخذ", FATHA+SUKUN+DAMMA+DAMMA),
|
| 456 |
+
TensePassiveFuture:(u"ءخذ", DAMMA+SUKUN+FATHA+FATHA),
|
| 457 |
+
TenseImperative:(u"خذ", DAMMA+SUKUN),
|
| 458 |
+
}
|
| 459 |
+
#ج- إذا كان يتصرف من باب (مَنَعَ يَمْنَعُ)،
|
| 460 |
+
#~ تحذف واوه, نحو: وَضَعَ، يَضَعُ، وَجَأَ يَجَأُ، وَدَعَ يَدَعُ، وَزَعَ يَزَعُ،
|
| 461 |
+
#~ وَضَأَ يَضَأُ، وَطَأَ يَطَأُ، وَقَعَ يَقَعُ، وَلَغَ يَلَغُ، وَهَبَ يَهَبُ،
|
| 462 |
+
#~ عدا خمسة أفعال هي:
|
| 463 |
+
#~ (وَبَأ)، و(وَبَهَ)، و(وَجَعَ)، و(وَسَعَ)، و(وَهَلَ)،
|
| 464 |
+
#~ فلا تحذف منها الواو؛ فنقول: يَوْبَأُ، يَوْبَهُ، يَوْجَعُ، يَوْسَعُ، يَوْهَلُ.
|
| 465 |
+
# الأفعال (وَبَأ)، و(وَبَهَ)، و(وَجَعَ)، و(وَسَعَ)، و(وَهَلَ)،#الفعل وبَأ يوبأ
|
| 466 |
+
#FATHA FATHA
|
| 467 |
+
IRREGULAR_VERB_CONJUG[u"وبء"+FATHA+FATHA] = {
|
| 468 |
+
CONJUG_BAB:(FATHA, FATHA),
|
| 469 |
+
TenseFuture:(u"وبء", FATHA+SUKUN+FATHA+DAMMA),
|
| 470 |
+
TensePassiveFuture:(u"وبء", DAMMA+SUKUN+FATHA+DAMMA),
|
| 471 |
+
TenseImperative:(u"وبء", SUKUN+FATHA+SUKUN),
|
| 472 |
+
}
|
| 473 |
+
# الفعل وبه يوبه
|
| 474 |
+
#FATHA FATHA
|
| 475 |
+
IRREGULAR_VERB_CONJUG[u"وبه"+FATHA+FATHA] = {
|
| 476 |
+
CONJUG_BAB:(FATHA, FATHA),
|
| 477 |
+
TenseFuture:(u"وبه", FATHA+SUKUN+FATHA+DAMMA),
|
| 478 |
+
TensePassiveFuture:(u"وبه", DAMMA+SUKUN+FATHA+DAMMA),
|
| 479 |
+
TenseImperative:(u"وبه", SUKUN+FATHA+SUKUN),
|
| 480 |
+
}
|
| 481 |
+
# الفعل وجع يوجع
|
| 482 |
+
#FATHA FATHA
|
| 483 |
+
IRREGULAR_VERB_CONJUG[u"وجع"+FATHA+FATHA] = {
|
| 484 |
+
CONJUG_BAB: (FATHA, FATHA),
|
| 485 |
+
TenseFuture: (u"وجع", FATHA+SUKUN+FATHA+DAMMA),
|
| 486 |
+
TensePassiveFuture: (u"وجع", DAMMA+SUKUN+FATHA+DAMMA),
|
| 487 |
+
TenseImperative: (u"وجع", SUKUN+FATHA+SUKUN),
|
| 488 |
+
}
|
| 489 |
+
#الفعل وسع يوسع
|
| 490 |
+
#FATHA FATHA
|
| 491 |
+
IRREGULAR_VERB_CONJUG[u"وسع"+FATHA+FATHA] = {
|
| 492 |
+
CONJUG_BAB: (FATHA, FATHA),
|
| 493 |
+
TenseFuture: (u"وسع", FATHA+SUKUN+FATHA+DAMMA),
|
| 494 |
+
TensePassiveFuture: (u"وسع", DAMMA+SUKUN+FATHA+DAMMA),
|
| 495 |
+
TenseImperative: (u"وسع", SUKUN+FATHA+SUKUN),
|
| 496 |
+
}
|
| 497 |
+
|
| 498 |
+
# الفعل وهل يوهل
|
| 499 |
+
#FATHA FATHA
|
| 500 |
+
IRREGULAR_VERB_CONJUG[u"وهل"+FATHA+FATHA] = {
|
| 501 |
+
CONJUG_BAB: (FATHA, FATHA),
|
| 502 |
+
TenseFuture: (u"وهل", FATHA+SUKUN+FATHA+DAMMA),
|
| 503 |
+
TensePassiveFuture: (u"وهل", DAMMA+SUKUN+FATHA+DAMMA),
|
| 504 |
+
TenseImperative: (u"وهل", SUKUN+FATHA+SUKUN),
|
| 505 |
+
}
|
| 506 |
+
|
| 507 |
+
|
| 508 |
+
|
| 509 |
+
ALEF_MADDA_VERB_TABLE = {
|
| 510 |
+
u'آبل':[u'أءبل'],
|
| 511 |
+
u'آبه':[u'أءبه'],
|
| 512 |
+
u'آبى':[u'أءبى'],
|
| 513 |
+
u'آتم':[u'أءتم'],
|
| 514 |
+
u'آتن':[u'أءتن'],
|
| 515 |
+
u'آتى':[u'أءتى'],
|
| 516 |
+
#~ u'آتى':[u'أءتى'],
|
| 517 |
+
u'آثر':[u'أءثر'],
|
| 518 |
+
u'آثف':[u'أءثف'],
|
| 519 |
+
u'آثم':[u'أءثم'],
|
| 520 |
+
u'آثى':[u'ءاثى'],
|
| 521 |
+
u'آجد':[u'أءجد'],
|
| 522 |
+
u'آجر':[u'أءجر', u'ءاجر'],
|
| 523 |
+
u'آجل':[u'أءجل'],
|
| 524 |
+
u'آجم':[u'أءجم'],
|
| 525 |
+
u'آحن':[u'ءاحن'],
|
| 526 |
+
u'آخذ':[u'ءاخذ'],
|
| 527 |
+
u'آخى':[u'أءخى', u'ءاخى'],
|
| 528 |
+
u'آدب':[u'أءدب'],
|
| 529 |
+
u'آدم':[u'أءدم'],
|
| 530 |
+
u'آدى':[u'أءدى'],
|
| 531 |
+
u'آذن':[u'أءذن'],
|
| 532 |
+
u'آذى':[u'أءذى'],
|
| 533 |
+
u'آرب':[u'أءرب', u'ءارب'],
|
| 534 |
+
u'آرخ':[u'أءرخ'],
|
| 535 |
+
u'آرس':[u'أءرس'],
|
| 536 |
+
u'آرض':[u'أءرض'],
|
| 537 |
+
u'آرط':[u'أءرط'],
|
| 538 |
+
u'آرف':[u'ءارف'],
|
| 539 |
+
u'آرق':[u'أءرق'],
|
| 540 |
+
u'آرك':[u'أءرك'],
|
| 541 |
+
u'آرم':[u'ءارم'],
|
| 542 |
+
u'آرن':[u'أءرن', u'ءارن'],
|
| 543 |
+
u'آرى':[u'أءرى'],
|
| 544 |
+
u'آزر':[u'ءازر'],
|
| 545 |
+
u'آزف':[u'أءزف'],
|
| 546 |
+
u'آزل':[u'أءزل'],
|
| 547 |
+
u'آزى':[u'أءزى', u'ءازى'],
|
| 548 |
+
u'آسب':[u'أءسب'],
|
| 549 |
+
u'آسد':[u'أءسد'],
|
| 550 |
+
u'آسف':[u'أءسف'],
|
| 551 |
+
u'آسن':[u'أءسن'],
|
| 552 |
+
#~ u'آسى':[u'ءاسى'],
|
| 553 |
+
u'آسى':[u'أءسى', u'ءاسى'],
|
| 554 |
+
u'آشى':[u'أءشى'],
|
| 555 |
+
u'آصد':[u'أءصد'],
|
| 556 |
+
u'آصر':[u'ءاصر'],
|
| 557 |
+
u'آصل':[u'أءصل'],
|
| 558 |
+
u'آضّ':[u'ءاضّ'],
|
| 559 |
+
u'آض':[u'ءاضّ'],
|
| 560 |
+
u'آطم':[u'أءطم'],
|
| 561 |
+
u'آفك':[u'أءفك'],
|
| 562 |
+
u'آفى':[u'أءفى'],
|
| 563 |
+
u'آقط':[u'أءقط'],
|
| 564 |
+
u'آكد':[u'أءكد'],
|
| 565 |
+
u'آكر':[u'ءاكر'],
|
| 566 |
+
u'آكف':[u'أءكف'],
|
| 567 |
+
u'آكل':[u'أءكل', u'ءاكل'],
|
| 568 |
+
u'آلت':[u'أءلت'],
|
| 569 |
+
u'آلس':[u'ءالس'],
|
| 570 |
+
u'آلف':[u'أءلف', u'ءالف'],
|
| 571 |
+
u'آلم':[u'أءلم'],
|
| 572 |
+
u'آلى':[u'أءلى'],
|
| 573 |
+
u'آمر':[u'أءمر', u'ءامر'],
|
| 574 |
+
u'آمن':[u'أءمن'],
|
| 575 |
+
u'آنث':[u'أءنث'],
|
| 576 |
+
u'آنس':[u'أءنس', u'ءانس'],
|
| 577 |
+
u'آنض':[u'أءنض'],
|
| 578 |
+
u'آنف':[u'أءنف'],
|
| 579 |
+
u'آنق':[u'أءنق'],
|
| 580 |
+
u'آنى':[u'أءنى'],
|
| 581 |
+
u'آهل':[u'أءهل'],
|
| 582 |
+
u'آوب':[u'ءاوب'],
|
| 583 |
+
u'آوى':[u'أءوى'],
|
| 584 |
+
u'آيد':[u'ءايد'],
|
| 585 |
+
u'آيس':[u'أءيس'],
|
| 586 |
+
}
|
| 587 |
+
|
| 588 |
+
STANDARD_REPLACEMENT=[
|
| 589 |
+
#-تحويل همزة القطع على الألف بعدها فتحة
|
| 590 |
+
#وهمزة القطع على الألف بعدها سكون إلى ألف ممدودة
|
| 591 |
+
( u"".join([ALEF_HAMZA_ABOVE, FATHA, ALEF]), ALEF_MADDA)
|
| 592 |
+
, ( u"".join([ALEF_MADDA, FATHA]), ALEF_MADDA)
|
| 593 |
+
, ( u"".join([ALEF_MADDA, ALEF]), ALEF_MADDA)
|
| 594 |
+
, ( u"".join([ALEF_HAMZA_ABOVE, FATHA, ALEF_HAMZA_ABOVE, SUKUN]), ALEF_MADDA)
|
| 595 |
+
, ( u"".join([ALEF_HAMZA_ABOVE, FATHA, ALEF_HAMZA_ABOVE, FATHA]), ALEF_MADDA)
|
| 596 |
+
, ( u"".join([ALEF_HAMZA_ABOVE, DAMMA, WAW_HAMZA, SUKUN]), ALEF_HAMZA_ABOVE+DAMMA+WAW)
|
| 597 |
+
, ( u"".join([YEH, SHADDA, FATHA, ALEF_MAKSURA]), YEH+SHADDA+FATHA+ALEF)
|
| 598 |
+
# إدغام النون الساكنة
|
| 599 |
+
, ( u"".join([NOON, SUKUN, NOON]), NOON+SHADDA)
|
| 600 |
+
# إذا كان الحرف الأول ساكنا وبعده شدة، ثم أضيفت إليه الألف
|
| 601 |
+
, ( u"".join([SUKUN, SHADDA]), SHADDA)
|
| 602 |
+
## معالجة ألف التفريق
|
| 603 |
+
, ( ALEF_WASLA, ALEF)
|
| 604 |
+
## معالجة ألف التفريق
|
| 605 |
+
, ( ALEF_MAMDUDA, ALEF)
|
| 606 |
+
|
| 607 |
+
## معالجة ألف الوصل الزائدة عند إضافتها إلى أول الفعل المثال
|
| 608 |
+
## word = word.replace( u"%s%s%s%s"%(ALEF, DAMMA, YEH, SUKUN), ALEF+DAMMA+WAW)
|
| 609 |
+
|
| 610 |
+
|
| 611 |
+
|
| 612 |
+
|
| 613 |
+
]
|
libqutrub/verb_db.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python
|
| 2 |
+
# -*- coding = utf-8 -*-
|
| 3 |
+
#************************************************************************
|
| 4 |
+
# $Id: ar_verb.py, v 0.7 2009/06/02 01:10:00 Taha Zerrouki $
|
| 5 |
+
#
|
| 6 |
+
# ------------
|
| 7 |
+
# Description:
|
| 8 |
+
# ------------
|
| 9 |
+
# Copyright (c) 2009, Arabtechies, Arabeyes Taha Zerrouki
|
| 10 |
+
#
|
| 11 |
+
# Elementary function to manipulate arabic texte
|
| 12 |
+
#
|
| 13 |
+
# -----------------
|
| 14 |
+
# Revision Details: (Updated by Revision Control System)
|
| 15 |
+
# -----------------
|
| 16 |
+
# $Date: 2009/06/02 01:10:00 $
|
| 17 |
+
# $Author: Taha Zerrouki $
|
| 18 |
+
# $Revision: 0.7 $
|
| 19 |
+
# $Source: arabtechies.sourceforge.net
|
| 20 |
+
#
|
| 21 |
+
#***********************************************************************/
|
| 22 |
+
"""
|
| 23 |
+
Basic routines to treat verbs
|
| 24 |
+
ar_verb
|
| 25 |
+
"""
|
| 26 |
+
import os
|
| 27 |
+
# the db file
|
| 28 |
+
db_path = os.path.join(os.path.dirname(__file__), "data/verbdict.db")
|
| 29 |
+
|
| 30 |
+
import pyarabic.araby as araby
|
| 31 |
+
import libqutrub.triverbtable as triverbtable
|
| 32 |
+
TRIVERBTABLE_INDEX = {}
|
| 33 |
+
|
| 34 |
+
def create_index_triverbtable():
|
| 35 |
+
""" Create index from the verb dictionary
|
| 36 |
+
to accelerate the search in the dictionary for verbs
|
| 37 |
+
@return: create the TRIVERBTABLE_INDEX
|
| 38 |
+
@rtype: None
|
| 39 |
+
"""
|
| 40 |
+
# the key is the vocverb + the bab number
|
| 41 |
+
for key in triverbtable.TriVerbTable.keys():
|
| 42 |
+
vocverb = triverbtable.TriVerbTable[key]['verb']
|
| 43 |
+
unvverb = araby.strip_harakat(vocverb)
|
| 44 |
+
normverb = araby.normalize_hamza(unvverb)
|
| 45 |
+
if normverb in TRIVERBTABLE_INDEX:
|
| 46 |
+
TRIVERBTABLE_INDEX[normverb].append(key)
|
| 47 |
+
else:
|
| 48 |
+
TRIVERBTABLE_INDEX[normverb] = [key, ]
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def find_alltriverb(triverb, givenharaka = araby.FATHA,
|
| 53 |
+
vocalised_entree = False):
|
| 54 |
+
"""
|
| 55 |
+
Find the triliteral verb in the dictionary (TriVerbTable)
|
| 56 |
+
return a list of possible verb forms
|
| 57 |
+
each item contains:
|
| 58 |
+
- 'root':
|
| 59 |
+
- 'haraka:
|
| 60 |
+
- 'bab':
|
| 61 |
+
- 'transitive':
|
| 62 |
+
@param triverb: given verb.
|
| 63 |
+
@type triverb: unicode.
|
| 64 |
+
@param givenharaka: given haraka of tuture type of the verb,
|
| 65 |
+
default(FATHA).
|
| 66 |
+
@type givenharaka: unicode.
|
| 67 |
+
@param VocalisedEntree: True if the given verb is vocalized,
|
| 68 |
+
default False.
|
| 69 |
+
@type VocalisedEntree: Boolean.
|
| 70 |
+
@return: list of triliteral verbs.
|
| 71 |
+
@rtype: list of dicts.
|
| 72 |
+
"""
|
| 73 |
+
liste = []
|
| 74 |
+
|
| 75 |
+
if vocalised_entree:
|
| 76 |
+
verb_nm = araby.strip_harakat(triverb)
|
| 77 |
+
else:
|
| 78 |
+
verb_nm = triverb
|
| 79 |
+
|
| 80 |
+
normalized = araby.normalize_hamza(verb_nm)
|
| 81 |
+
if TRIVERBTABLE_INDEX.has_key(normalized):
|
| 82 |
+
for verb_voc_id in TRIVERBTABLE_INDEX[normalized]:
|
| 83 |
+
if triverb == triverbtable.TriVerbTable[verb_voc_id]['verb'] and \
|
| 84 |
+
givenharaka == triverbtable.TriVerbTable[verb_voc_id]['haraka']:
|
| 85 |
+
liste.insert(0, triverbtable.TriVerbTable[verb_voc_id])
|
| 86 |
+
# if VocalisedEntree:
|
| 87 |
+
#if verb_voc_id[:-1] == triverb:
|
| 88 |
+
# liste.append(TriVerbTable[verb_voc_id])
|
| 89 |
+
else:
|
| 90 |
+
liste.append(triverbtable.TriVerbTable[verb_voc_id])
|
| 91 |
+
else:
|
| 92 |
+
print("triverb has no verb")
|
| 93 |
+
return liste
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def find_triliteral_verb(db_base_path, triliteralverb, givenharaka):
|
| 98 |
+
"""
|
| 99 |
+
Find the triliteral verb in the dictionary,
|
| 100 |
+
return a list of possible verb forms
|
| 101 |
+
@param db_base_path: the database path
|
| 102 |
+
@type db_base_path: path string.
|
| 103 |
+
@param triliteralverb: given verb.
|
| 104 |
+
@type triliteralverb: unicode.
|
| 105 |
+
@param givenharaka: given haraka of tuture type of the verb.
|
| 106 |
+
@type givenharaka: unicode.
|
| 107 |
+
@return: list of triliteral verbs.
|
| 108 |
+
@rtype: list of unicode.
|
| 109 |
+
"""
|
| 110 |
+
liste = []
|
| 111 |
+
try:
|
| 112 |
+
import sqlite3 as sqlite
|
| 113 |
+
import os
|
| 114 |
+
# db_path = os.path.join(_base_directory(req), "data/verbdict.db")
|
| 115 |
+
|
| 116 |
+
#db_path = os.path.join(db_base_path, "data/verbdict.db")
|
| 117 |
+
conn = sqlite.connect(db_path)
|
| 118 |
+
cursor = conn.cursor()
|
| 119 |
+
verb_nm = araby.strip_harakat(triliteralverb)
|
| 120 |
+
tup = (verb_nm, )
|
| 121 |
+
cursor.execute("""select verb_vocalised, haraka, transitive
|
| 122 |
+
from verbdict
|
| 123 |
+
where verb_unvocalised = ?""", tup)
|
| 124 |
+
for row in cursor:
|
| 125 |
+
verb_vocalised = row[0]
|
| 126 |
+
haraka = row[1]
|
| 127 |
+
transitive = row[2]
|
| 128 |
+
# Return the transitivity option
|
| 129 |
+
#MEEM is transitive
|
| 130 |
+
# KAF is commun ( transitive and intransitive)
|
| 131 |
+
# LAM is intransitive
|
| 132 |
+
if transitive in (araby.KAF, araby.MEEM):
|
| 133 |
+
transitive = True
|
| 134 |
+
else:
|
| 135 |
+
transitive = False
|
| 136 |
+
# if the given verb is the list,
|
| 137 |
+
#it will be inserted in the top of the list,
|
| 138 |
+
#to be treated in prior
|
| 139 |
+
if triliteralverb == verb_vocalised and givenharaka == haraka:
|
| 140 |
+
liste.insert(0, {"verb":verb_vocalised,
|
| 141 |
+
"haraka":haraka, "transitive":transitive})
|
| 142 |
+
# else the verb is appended in the liste
|
| 143 |
+
else:
|
| 144 |
+
liste.append({"verb":verb_vocalised,
|
| 145 |
+
"haraka":haraka, "transitive":transitive})
|
| 146 |
+
cursor.close()
|
| 147 |
+
return liste
|
| 148 |
+
except IOError:
|
| 149 |
+
return None
|
libqutrub/verb_valid.py
ADDED
|
@@ -0,0 +1,359 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python
|
| 2 |
+
# -*- coding = utf-8 -*-
|
| 3 |
+
#************************************************************************
|
| 4 |
+
# $Id: verb_valid.py, v 0.7 2009/06/02 01:10:00 Taha Zerrouki $
|
| 5 |
+
#
|
| 6 |
+
# ------------
|
| 7 |
+
# Description:
|
| 8 |
+
# ------------
|
| 9 |
+
# Copyright (c) 2009, Arabtechies, Arabeyes Taha Zerrouki
|
| 10 |
+
#
|
| 11 |
+
# Elementary function to validate verbs
|
| 12 |
+
#
|
| 13 |
+
# -----------------
|
| 14 |
+
# Revision Details: (Updated by Revision Control System)
|
| 15 |
+
# -----------------
|
| 16 |
+
# $Date: 2009/06/02 01:10:00 $
|
| 17 |
+
# $Author: Taha Zerrouki $
|
| 18 |
+
# $Revision: 0.7 $
|
| 19 |
+
# $Source: arabtechies.sourceforge.net
|
| 20 |
+
#
|
| 21 |
+
#***********************************************************************/
|
| 22 |
+
"""
|
| 23 |
+
Basic routines to validate verbs
|
| 24 |
+
ar_verb
|
| 25 |
+
"""
|
| 26 |
+
import re
|
| 27 |
+
# import string
|
| 28 |
+
# import sys
|
| 29 |
+
# import os
|
| 30 |
+
# import types
|
| 31 |
+
# from arabic_const import *
|
| 32 |
+
import libqutrub.verb_const as vconst #~ from verb_const import *
|
| 33 |
+
# import ar_ctype
|
| 34 |
+
import pyarabic.araby as araby
|
| 35 |
+
from pyarabic.araby import FATHA, SHADDA, HAMZA, ALEF, \
|
| 36 |
+
NOON, ALEF_HAMZA_ABOVE, ALEF_HAMZA_BELOW, ALEF_MADDA, \
|
| 37 |
+
ALEF_MAKSURA, BEH, DAD, DAL, DAMMATAN, FATHATAN, FEH, GHAIN, HAH, \
|
| 38 |
+
HEH, JEEM, KAF, KASRATAN, KHAH, LAM, REH, SAD, SHEEN, TAH, TEH, \
|
| 39 |
+
TEH_MARBUTA, THAL, THEH, YEH, ZAH, ZAIN
|
| 40 |
+
#used to
|
| 41 |
+
VALID_INFINITIVE_VERB6_PATTERN = \
|
| 42 |
+
re.compile(u"^است...|ا..ن..|ا..و..|ا..ا.ّ|ا....ّ|ا.ّ.ّ.|ا.ّا..$", re.UNICODE)
|
| 43 |
+
|
| 44 |
+
VALID_INFINITIVE_VERB4_PATTERN = re.compile(\
|
| 45 |
+
u"^([%s%s][^%s]{2}.|[^%s%s]%s[^%s%s].|[^%s%s]{2}%s[^%s]|[^%s%s]{4})$"\
|
| 46 |
+
%(ALEF_HAMZA_ABOVE, HAMZA, SHADDA, ALEF, SHADDA, ALEF, ALEF, SHADDA, ALEF,
|
| 47 |
+
SHADDA, SHADDA, SHADDA, ALEF, SHADDA), re.UNICODE)
|
| 48 |
+
|
| 49 |
+
VALID_INFINITIVE_VERB5_PATTERN = re.compile( u"|".join([
|
| 50 |
+
u"^ا...ّ$",
|
| 51 |
+
# حالة اتخذ أو اذّكر أو اطّلع
|
| 52 |
+
u"^%s[%s%s%s]%s..$"%(ALEF, TEH, THAL, TAH, SHADDA),
|
| 53 |
+
# حالة اتخذ أو اذّكر أو اطّلع
|
| 54 |
+
u"^ا[تذط]ّ[^اّ][^اّ]$",
|
| 55 |
+
# انفعل
|
| 56 |
+
u"^ان...$",
|
| 57 |
+
#افتعل
|
| 58 |
+
u"^(ازد|اصط|اضط)..$"
|
| 59 |
+
u"^ا[^صضطظد]ت..$",
|
| 60 |
+
u"^ا...ّ$",
|
| 61 |
+
# حالة اتخذ أو اذّكر أو اطّلع
|
| 62 |
+
u"^ا.ّ..$",
|
| 63 |
+
u"^ا...ى$",
|
| 64 |
+
]) , re.UNICODE)
|
| 65 |
+
|
| 66 |
+
#####################################
|
| 67 |
+
#{validation functions
|
| 68 |
+
#####################################
|
| 69 |
+
def is_valid_infinitive_verb(word, vocalized = True):
|
| 70 |
+
"""
|
| 71 |
+
Determine if the given word is a valid infinitive form of an arabic verb.
|
| 72 |
+
A word is not valid infinitive if
|
| 73 |
+
- lenght < 3 letters.
|
| 74 |
+
- starts with : ALEF_MAKSURA, WAW_HAMZA, YEH_HAMZA, HARAKAT
|
| 75 |
+
- contains TEH_MARBUTA, Tanwin
|
| 76 |
+
- contains non arabic letters.
|
| 77 |
+
- contains ALEF_MAKSURA not in the end.
|
| 78 |
+
- contains double haraka : a warning
|
| 79 |
+
@param word: given word.
|
| 80 |
+
@type word: unicode.
|
| 81 |
+
@param is_vocalized: if the given word is vocalized.
|
| 82 |
+
@type is_vocalized:Boolean, default(True).
|
| 83 |
+
@return: True if the word is a valid infinitive form of verb.
|
| 84 |
+
@rtype: Boolean.
|
| 85 |
+
"""
|
| 86 |
+
# test if the word is an arabic valid word,
|
| 87 |
+
if not araby.is_arabicword(word):
|
| 88 |
+
return False
|
| 89 |
+
if vocalized :
|
| 90 |
+
word_nm = araby.strip_harakat(word)
|
| 91 |
+
else:
|
| 92 |
+
word_nm = word
|
| 93 |
+
# the alef_madda is considered as 2 letters
|
| 94 |
+
|
| 95 |
+
word_nm = word_nm.replace(ALEF_MADDA, HAMZA+ALEF)
|
| 96 |
+
length = len(word_nm)
|
| 97 |
+
|
| 98 |
+
# lenght with shadda must be between 3 and 6
|
| 99 |
+
if length < 3 or length >= 7:
|
| 100 |
+
return False
|
| 101 |
+
# a 3 length verb can't start by Alef or Shadda,
|
| 102 |
+
#and the second letter can't be shadda
|
| 103 |
+
elif length == 3 and (word_nm[0] == ALEF or word_nm[0] == SHADDA \
|
| 104 |
+
or word_nm[1] == SHADDA):
|
| 105 |
+
return False
|
| 106 |
+
|
| 107 |
+
# a 5 length verb must start by ALEF or TEH
|
| 108 |
+
elif length == 5 and word_nm[0] not in (TEH, ALEF):
|
| 109 |
+
return False
|
| 110 |
+
# a 6 length verb must start by ALEF
|
| 111 |
+
elif length == 6 and word_nm[0] != ALEF:
|
| 112 |
+
return False
|
| 113 |
+
|
| 114 |
+
# contains some invalide letters in verb
|
| 115 |
+
elif re.search(u"[%s%s%s%s%s]"%(ALEF_HAMZA_BELOW, TEH_MARBUTA,
|
| 116 |
+
DAMMATAN, KASRATAN, FATHATAN), word):
|
| 117 |
+
return False
|
| 118 |
+
# contains some SHADDA sequence letters in verb
|
| 119 |
+
# Like shadda shadda, shadda on alef, start
|
| 120 |
+
# by shadda, shadda on alef_ maksura,
|
| 121 |
+
# ALEF folowed by (ALEF, ALEF_MAKSURA)
|
| 122 |
+
# ALEF Folowed by a letter and ALEF
|
| 123 |
+
# end with ALEF folowed by (YEH, ALEF_MAKSURA)
|
| 124 |
+
# first letter is alef and ALLw alef and two letters aand shadda
|
| 125 |
+
elif re.search(u"([%s%s%s]%s|^%s|^%s..%s|^.%s|%s.%s|%s%s|%s[%s%s]$)"%(
|
| 126 |
+
ALEF, ALEF_MAKSURA, SHADDA, SHADDA, SHADDA, ALEF, SHADDA, SHADDA,
|
| 127 |
+
ALEF, ALEF, ALEF, ALEF, ALEF, ALEF_MAKSURA, YEH), word_nm):
|
| 128 |
+
return False
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
# Invalid root form some letters :
|
| 132 |
+
#~ # initial YEH folowed by
|
| 133 |
+
#~ ((THEH, JEEM, HAH, KHAH, THAL, ZAIN, SHEEN, SAD, DAD,
|
| 134 |
+
#~ TAH, ZAH, GHAIN, KAF, HEH, YEH))
|
| 135 |
+
elif re.search(u"^%s[%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s]"%(
|
| 136 |
+
YEH, THEH, JEEM, HAH, KHAH, THAL, ZAIN, SHEEN, SAD, DAD,
|
| 137 |
+
TAH, ZAH, GHAIN, KAF, HEH, YEH), word_nm):
|
| 138 |
+
return False
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
# TEH After (DAL, THAL, TAH, ZAH, DAD)
|
| 142 |
+
elif re.search(u"[%s%s%s%s%s]%s"%(DAL, THAL, DAD, TAH, ZAH, TEH), word_nm):
|
| 143 |
+
return False
|
| 144 |
+
# Contains invalid root sequence in arabic, near in phonetic
|
| 145 |
+
# like BEH and FEH, LAM And REH
|
| 146 |
+
elif re.search(u"%s%s|%s%s|%s%s|%s%s|%s%s|%s%s|%s%s"%(
|
| 147 |
+
LAM, REH, REH, LAM, FEH, BEH, BEH, FEH, NOON,
|
| 148 |
+
LAM, HEH, HAH, HAH, HEH), word_nm):
|
| 149 |
+
return False
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
# in non 5 letters verbs :initial TEH followed by
|
| 153 |
+
# (THEH, DAL, THAL, ZAIN, SHEEN, SAD, DAD, TAH, ZAH)
|
| 154 |
+
elif length != 5 and word_nm.startswith(TEH) and word_nm[1] in (
|
| 155 |
+
TEH, THEH, DAL, THAL, ZAIN, SHEEN, SAD, DAD, TAH, ZAH):
|
| 156 |
+
return False
|
| 157 |
+
# if word start by the same letter doubled
|
| 158 |
+
elif word_nm[0] == word_nm[1] and word[0] != TEH:
|
| 159 |
+
return False
|
| 160 |
+
|
| 161 |
+
#verify the wazn of the verb
|
| 162 |
+
elif length == 3:
|
| 163 |
+
if re.match("^[^%s][^%s].$"%(ALEF, SHADDA), word_nm):
|
| 164 |
+
return True
|
| 165 |
+
# الأوزان المقبولة هي فعل، فعّ،
|
| 166 |
+
# الأوزان غير المقبولة
|
| 167 |
+
# اعل، فّل
|
| 168 |
+
else: return False
|
| 169 |
+
elif length == 4:
|
| 170 |
+
#1- أفعل، 2- فاعل، 3 فعّل 4 فعلل
|
| 171 |
+
if re.match(\
|
| 172 |
+
"^([%s%s][^%s]{2}.|[^%s%s]%s[^%s%s].|[^%s%s]{2}%s[^%s]|[^%s%s]{4})$"\
|
| 173 |
+
%(ALEF_HAMZA_ABOVE, HAMZA, SHADDA, ALEF, SHADDA, ALEF, ALEF, SHADDA,
|
| 174 |
+
ALEF, SHADDA, SHADDA, SHADDA, ALEF, SHADDA), word_nm):
|
| 175 |
+
|
| 176 |
+
return True
|
| 177 |
+
# الأوزان المقبولة هي فعل، فعّ،
|
| 178 |
+
# الأوزان غير المقبولة
|
| 179 |
+
# افعل: يجب تثبيت همزة القطع
|
| 180 |
+
#فّعل، فعلّ: الشدة لها موضع خاص
|
| 181 |
+
# فعال، فعلا: للألف موضع خاص
|
| 182 |
+
else: return False
|
| 183 |
+
elif length == 5:
|
| 184 |
+
|
| 185 |
+
if word_nm.startswith(ALEF):
|
| 186 |
+
if re.match(u"^ا...ّ$", word_nm):
|
| 187 |
+
return True
|
| 188 |
+
# حالة اتخذ أو اذّكر أو اطّلع
|
| 189 |
+
if re.match(u"^%s[%s%s%s]%s..$"%(ALEF, TEH, THAL, TAH, SHADDA), \
|
| 190 |
+
word_nm):
|
| 191 |
+
return True
|
| 192 |
+
|
| 193 |
+
# انفعل
|
| 194 |
+
elif re.match(u"^ان...$", word_nm):
|
| 195 |
+
return True
|
| 196 |
+
#افتعل
|
| 197 |
+
elif re.match(u"^(ازد|اصط|اضط)..$", word_nm):
|
| 198 |
+
return True
|
| 199 |
+
elif re.match(u"^ا[^صضطظد]ت..$", word_nm):
|
| 200 |
+
return True
|
| 201 |
+
elif re.match(u"^ا...ّ$", word_nm):
|
| 202 |
+
return True
|
| 203 |
+
# حالة اتخذ أو اذّكر أو اطّلع
|
| 204 |
+
elif re.match(u"^ا.ّ..$", word_nm):
|
| 205 |
+
return True
|
| 206 |
+
elif re.match(u"^ا...ى$", word_nm):
|
| 207 |
+
return True
|
| 208 |
+
else: return False
|
| 209 |
+
elif word_nm.startswith(TEH):
|
| 210 |
+
return True
|
| 211 |
+
else:
|
| 212 |
+
return False
|
| 213 |
+
|
| 214 |
+
# الأوزان المقبولة هي فعل، فعّ،
|
| 215 |
+
# الأوزان غير المقبولة
|
| 216 |
+
#للشدة موضع خاص: تفعّل، افتعّ
|
| 217 |
+
# للألف مواضع خاصة،
|
| 218 |
+
elif length == 6:
|
| 219 |
+
if not (word_nm.startswith(ALEF) or word_nm.startswith(TEH)):
|
| 220 |
+
return False
|
| 221 |
+
if VALID_INFINITIVE_VERB6_PATTERN.match(word_nm):
|
| 222 |
+
return True
|
| 223 |
+
# الأوزان المقبولة هي فعل، فعّ،
|
| 224 |
+
# الأوزان غير المقبولة
|
| 225 |
+
#للشدة موضع خاص: تفعّل، افتعّ
|
| 226 |
+
# للألف مواضع خاصة،
|
| 227 |
+
else: return False
|
| 228 |
+
return True
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
def suggest_verb(verb):
|
| 232 |
+
"""
|
| 233 |
+
Generate a list of valid infinitive verb for an invalid infinitive form.
|
| 234 |
+
@param verb: given verb, of invalid infinitive form.
|
| 235 |
+
@type verb: unicode.
|
| 236 |
+
@return: a list of suggested infinitive verb forms
|
| 237 |
+
@rtype: list of unicode.
|
| 238 |
+
"""
|
| 239 |
+
# the verb is invalid
|
| 240 |
+
list_suggest = []
|
| 241 |
+
# first strip harakat, shadda is not striped
|
| 242 |
+
verb = araby.strip_harakat(verb)
|
| 243 |
+
# second strip all inacceptable letters in an infinivive form
|
| 244 |
+
verb = re.sub(u"[%s%s%s%s]"%( TEH_MARBUTA, DAMMATAN, KASRATAN, FATHATAN), \
|
| 245 |
+
'', verb)
|
| 246 |
+
# test the resulted verb if it's valid, if ok,
|
| 247 |
+
# add it to the suggestion list.
|
| 248 |
+
if is_valid_infinitive_verb(verb):
|
| 249 |
+
list_suggest.append(verb)
|
| 250 |
+
return list_suggest
|
| 251 |
+
# if the verb starts by ALEF_HAMZA_BELOW like إستعمل,
|
| 252 |
+
#replace if by an ALEF, because it's a common error.
|
| 253 |
+
# if the result is valid add it to the suggestions list
|
| 254 |
+
elif verb.startswith(ALEF_HAMZA_BELOW):
|
| 255 |
+
verb = re.sub(ALEF_HAMZA_BELOW, ALEF, verb)
|
| 256 |
+
if is_valid_infinitive_verb(verb):
|
| 257 |
+
list_suggest.append(verb)
|
| 258 |
+
return list_suggest
|
| 259 |
+
# if the verb starts by ALEF like اضرب,
|
| 260 |
+
#replace if by an ALEF_HAMZA_ABOVE, because it's a common error.
|
| 261 |
+
# if the result is valid add it to the suggestions list
|
| 262 |
+
elif verb.startswith(ALEF):
|
| 263 |
+
verb_one = re.sub(ALEF, ALEF_HAMZA_ABOVE+FATHA, verb, 1)
|
| 264 |
+
if is_valid_infinitive_verb(verb_one):
|
| 265 |
+
list_suggest.append(verb_one)
|
| 266 |
+
return list_suggest
|
| 267 |
+
# if the verb is 2 letters length,
|
| 268 |
+
# suggest to add the third letter as :
|
| 269 |
+
# Shadda, Alef, Alef Maksura, Yeh at the end
|
| 270 |
+
# if the result is valid add it to the suggestions list
|
| 271 |
+
elif len(verb) == 2:
|
| 272 |
+
verb = re.sub(ALEF, ALEF_HAMZA_ABOVE, verb, 1)
|
| 273 |
+
#suggest to add the third letter as : Shadda at the end
|
| 274 |
+
verb_one = verb+SHADDA
|
| 275 |
+
if is_valid_infinitive_verb(verb_one):
|
| 276 |
+
list_suggest.append(verb_one)
|
| 277 |
+
#suggest to add the third letter as : Alef Maksura
|
| 278 |
+
verb_one = verb+ALEF_MAKSURA
|
| 279 |
+
if is_valid_infinitive_verb(verb_one):
|
| 280 |
+
list_suggest.append(verb_one)
|
| 281 |
+
#suggest to add the third letter as :Alef at the end
|
| 282 |
+
verb_one = verb+ALEF
|
| 283 |
+
if is_valid_infinitive_verb(verb_one):
|
| 284 |
+
list_suggest.append(verb_one)
|
| 285 |
+
#suggest to add the third letter as :Alef in middle
|
| 286 |
+
verb_one = verb[0]+ALEF+verb[1]
|
| 287 |
+
if is_valid_infinitive_verb(verb_one):
|
| 288 |
+
list_suggest.append(verb_one)
|
| 289 |
+
return list_suggest
|
| 290 |
+
elif len(verb) >= 6:
|
| 291 |
+
# if the verb is more than 6 letters length,
|
| 292 |
+
#suggest to replace the over added letters by Alef
|
| 293 |
+
# if the result is valid add it to the suggestions list
|
| 294 |
+
for i in range(len(verb)-6):
|
| 295 |
+
verb_one = ALEF+verb[i:i+5]
|
| 296 |
+
if is_valid_infinitive_verb(verb_one):
|
| 297 |
+
list_suggest.append(verb_one)
|
| 298 |
+
elif len(verb) == 5:
|
| 299 |
+
# if the verb is 5 letters length, suggest
|
| 300 |
+
# if the result is valid add it to the suggestions list
|
| 301 |
+
# ToDo: review this part
|
| 302 |
+
for i in range(len(verb)-5):
|
| 303 |
+
verb_one = ALEF+verb[i:i+4]
|
| 304 |
+
if is_valid_infinitive_verb(verb_one):
|
| 305 |
+
list_suggest.append(verb_one)
|
| 306 |
+
elif len(verb) == 4:
|
| 307 |
+
|
| 308 |
+
# if the verb is 5 letters length,
|
| 309 |
+
#suggest to replace the over added letters by Alef
|
| 310 |
+
# if the result is valid add it to the suggestions list
|
| 311 |
+
# فعال = > فاعل
|
| 312 |
+
#فّعل = > فعّل
|
| 313 |
+
if verb[2] == ALEF or verb[1] == SHADDA:
|
| 314 |
+
verb_one = verb[0]+verb[2]+verb[1]+verb[3]
|
| 315 |
+
if is_valid_infinitive_verb(verb_one):
|
| 316 |
+
list_suggest.append(verb_one)
|
| 317 |
+
if verb.endswith(SHADDA):
|
| 318 |
+
# if the verb is 4 letters length,
|
| 319 |
+
#suggest to correct the alef and shadda position
|
| 320 |
+
# if the result is valid add it to the suggestions list
|
| 321 |
+
#فعلّ = > فعّل
|
| 322 |
+
verb_one = verb[0]+verb[1]+verb[3]+verb[2]
|
| 323 |
+
if is_valid_infinitive_verb(verb_one):
|
| 324 |
+
list_suggest.append(verb_one)
|
| 325 |
+
return list_suggest
|
| 326 |
+
else:
|
| 327 |
+
# else sugest to conjugate another verb
|
| 328 |
+
list_suggest.append(u"كتب")
|
| 329 |
+
return list_suggest
|
| 330 |
+
return list_suggest
|
| 331 |
+
|
| 332 |
+
#####################################
|
| 333 |
+
#{verb pretreatment functions
|
| 334 |
+
#####################################
|
| 335 |
+
def normalize_alef_madda(word):
|
| 336 |
+
"""
|
| 337 |
+
Convert Alef madda into two letters.
|
| 338 |
+
@param word: given word.
|
| 339 |
+
@type word: unicode.
|
| 340 |
+
@return: converted word.
|
| 341 |
+
@rtype: unicode.
|
| 342 |
+
"""
|
| 343 |
+
if word.startswith(ALEF_MADDA):
|
| 344 |
+
word_nm = araby.strip_harakat(word)
|
| 345 |
+
if len(word_nm) == 2:
|
| 346 |
+
return word_nm.replace(ALEF_MADDA, HAMZA+ALEF)
|
| 347 |
+
elif len(word_nm) == 3:
|
| 348 |
+
if vconst.ALEF_MADDA_VERB_TABLE.has_key(word_nm):
|
| 349 |
+
#return the first one only
|
| 350 |
+
#mylist = ALEF_MADDA_VERB_TABLE[word_nm]
|
| 351 |
+
return vconst.ALEF_MADDA_VERB_TABLE[word_nm][0]
|
| 352 |
+
else:
|
| 353 |
+
return word_nm.replace(ALEF_MADDA, HAMZA+ALEF)
|
| 354 |
+
else:
|
| 355 |
+
return word_nm.replace(ALEF_MADDA, HAMZA+ALEF)
|
| 356 |
+
else:
|
| 357 |
+
return word_nm
|
| 358 |
+
|
| 359 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Flask==3.1.0
|
| 2 |
+
PyArabic==0.6.15
|
templates/index.html
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="ar">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<title>المصرّف</title>
|
| 6 |
+
|
| 7 |
+
<style>
|
| 8 |
+
body {
|
| 9 |
+
direction: rtl;
|
| 10 |
+
text-align: right;
|
| 11 |
+
font-family: Arial;
|
| 12 |
+
padding: 30px;
|
| 13 |
+
background: #f5f5f5;
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
.container {
|
| 17 |
+
background: white;
|
| 18 |
+
padding: 20px;
|
| 19 |
+
border-radius: 10px;
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
input, select, button {
|
| 23 |
+
padding: 10px;
|
| 24 |
+
margin: 10px 0;
|
| 25 |
+
width: 100%;
|
| 26 |
+
font-size: 16px;
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
button {
|
| 30 |
+
background: #2c7be5;
|
| 31 |
+
color: white;
|
| 32 |
+
border: none;
|
| 33 |
+
cursor: pointer;
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
#result {
|
| 37 |
+
margin-top: 20px;
|
| 38 |
+
}
|
| 39 |
+
</style>
|
| 40 |
+
</head>
|
| 41 |
+
|
| 42 |
+
<body>
|
| 43 |
+
|
| 44 |
+
<div class="container">
|
| 45 |
+
<h1>المصرّف</h1>
|
| 46 |
+
|
| 47 |
+
<input id="word" placeholder="أدخل الفعل (مثال: كتب)">
|
| 48 |
+
|
| 49 |
+
<select id="tense">
|
| 50 |
+
<option value="past">الماضي</option>
|
| 51 |
+
<option value="future">المضارع</option>
|
| 52 |
+
<option value="jussive">المجزوم</option>
|
| 53 |
+
<option value="subjunctive">المنصوب</option>
|
| 54 |
+
<option value="confirmed">المؤكد</option>
|
| 55 |
+
<option value="imperative">الأمر</option>
|
| 56 |
+
</select>
|
| 57 |
+
|
| 58 |
+
<select id="future_type">
|
| 59 |
+
<option value="فتحة">فتحة</option>
|
| 60 |
+
<option value="ضمة">ضمة</option>
|
| 61 |
+
<option value="كسرة">كسرة</option>
|
| 62 |
+
</select>
|
| 63 |
+
|
| 64 |
+
<button onclick="generate()">توليد التصريف</button>
|
| 65 |
+
|
| 66 |
+
<div id="result"></div>
|
| 67 |
+
</div>
|
| 68 |
+
|
| 69 |
+
<script>
|
| 70 |
+
async function generate() {
|
| 71 |
+
const word = document.getElementById("word").value;
|
| 72 |
+
const tense = document.getElementById("tense").value;
|
| 73 |
+
const future_type = document.getElementById("future_type").value;
|
| 74 |
+
|
| 75 |
+
const response = await fetch("http://127.0.0.1:5000/conjugate", {
|
| 76 |
+
method: "POST",
|
| 77 |
+
headers: {
|
| 78 |
+
"Content-Type": "application/json"
|
| 79 |
+
},
|
| 80 |
+
body: JSON.stringify({
|
| 81 |
+
word,
|
| 82 |
+
tense,
|
| 83 |
+
future_type
|
| 84 |
+
})
|
| 85 |
+
});
|
| 86 |
+
|
| 87 |
+
const data = await response.json();
|
| 88 |
+
|
| 89 |
+
document.getElementById("result").innerHTML = data.result;
|
| 90 |
+
}
|
| 91 |
+
</script>
|
| 92 |
+
|
| 93 |
+
</body>
|
| 94 |
+
</html>
|