Boulbaba commited on
Commit
e007b04
·
verified ·
1 Parent(s): 5dfa188

Upload 29 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ libqutrub/__pycache__/triverbtable.cpython-313.pyc filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ ENV PYTHONUNBUFFERED=1
11
+
12
+ EXPOSE 7860
13
+
14
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,12 +1,14 @@
1
  ---
2
- title: Mosaref
3
- emoji: 🌖
4
- colorFrom: yellow
5
- colorTo: red
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
- short_description: arab verbs conjugator
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
  ---
2
+ title: Al Mosaref
3
+ emoji: 📚
4
+ colorFrom: green
5
+ colorTo: blue
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
+ short_description: Arabic verb conjugation with Qutrub
10
  ---
11
 
12
+ # المصرّف
13
+
14
+ Application de conjugaison des verbes arabes avec Qutrub + Flask.
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from flask import Flask, request, jsonify, render_template
4
+ from libqutrub.mosaref_main import do_sarf
5
+
6
+ app = Flask(__name__, template_folder="templates")
7
+
8
+
9
+ @app.route("/")
10
+ def home():
11
+ return render_template("index.html")
12
+
13
+
14
+ @app.route("/conjugate", methods=["POST"])
15
+ def conjugate():
16
+ data = request.get_json()
17
+
18
+ word = data.get("word", "").strip()
19
+ future_type = data.get("future_type", "فتحة")
20
+ tense = data.get("tense", "past")
21
+
22
+ params = {
23
+ "alltense": False,
24
+ "past": False,
25
+ "future": False,
26
+ "imperative": False,
27
+ "future_moode": False,
28
+ "confirmed": False,
29
+ "passive": False,
30
+ "transitive": True
31
+ }
32
+
33
+ if tense == "past":
34
+ params["past"] = True
35
+ elif tense == "future":
36
+ params["future"] = True
37
+ elif tense == "jussive":
38
+ params["future"] = True
39
+ params["future_moode"] = True
40
+ elif tense == "subjunctive":
41
+ params["future"] = True
42
+ params["future_moode"] = True
43
+ elif tense == "confirmed":
44
+ params["future"] = True
45
+ params["confirmed"] = True
46
+ elif tense == "imperative":
47
+ params["imperative"] = True
48
+ elif tense == "confirmed_imperative":
49
+ params["imperative"] = True
50
+ params["confirmed"] = True
51
+
52
+ result = do_sarf(
53
+ word=word,
54
+ future_type=future_type,
55
+ display_format="HTML",
56
+ **params
57
+ )
58
+
59
+ if result is None:
60
+ return jsonify({"result": "<p>تعذر تصريف الفعل. تأكد من صحة الإدخال.</p>"})
61
+
62
+ return jsonify({"result": result})
63
+
64
+
65
+ if __name__ == "__main__":
66
+ app.run(host="0.0.0.0", port=7860, debug=False)
libqutrub/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ 
libqutrub/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (171 Bytes). View file
 
libqutrub/__pycache__/ar_verb.cpython-313.pyc ADDED
Binary file (39.3 kB). View file
 
libqutrub/__pycache__/classverb.cpython-313.pyc ADDED
Binary file (50.4 kB). View file
 
libqutrub/__pycache__/conjugatedisplay.cpython-313.pyc ADDED
Binary file (22.1 kB). View file
 
libqutrub/__pycache__/mosaref_main.cpython-313.pyc ADDED
Binary file (5.9 kB). View file
 
libqutrub/__pycache__/triverbtable.cpython-313.pyc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17ddedc943edf2b9176f73818945e6414f5ad919c2b4c43a517d221b28382c14
3
+ size 737469
libqutrub/__pycache__/verb_const.cpython-313.pyc ADDED
Binary file (16.9 kB). View file
 
libqutrub/__pycache__/verb_db.cpython-313.pyc ADDED
Binary file (4.49 kB). View file
 
libqutrub/__pycache__/verb_valid.cpython-313.pyc ADDED
Binary file (11.3 kB). View file
 
libqutrub/alefmaddaverbtable.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ AlefMaddaVerbTable={
2
+ u'آبل':[u'أءبل'],
3
+ u'آبه':[u'أءبه'],
4
+ u'آبى':[u'أءبى'],
5
+ u'آتم':[u'أءتم'],
6
+ u'آتن':[u'أءتن'],
7
+ u'آتى':[u'أءتى'],
8
+ u'آتى':[u'أءتى'],
9
+ u'آثر':[u'أءثر'],
10
+ u'آثف':[u'أءثف'],
11
+ u'آثم':[u'أءثم'],
12
+ u'آثى':[u'ءاثى'],
13
+ u'آجد':[u'أءجد'],
14
+ u'آجر':[u'أءجر',u'ءاجر'],
15
+ u'آجل':[u'أءجل'],
16
+ u'آجم':[u'أءجم'],
17
+ u'آحن':[u'ءاحن'],
18
+ u'آخذ':[u'ءاخذ'],
19
+ u'آخى':[u'أءخى',u'ءاخى'],
20
+ u'آدب':[u'أءدب'],
21
+ u'آدم':[u'أءدم'],
22
+ u'آدى':[u'أءدى'],
23
+ u'آذن':[u'أءذن'],
24
+ u'آذى':[u'أءذى'],
25
+ u'آرب':[u'أءرب',u'ءارب'],
26
+ u'آرخ':[u'أءرخ'],
27
+ u'آرس':[u'أءرس'],
28
+ u'آرض':[u'أءرض'],
29
+ u'آرط':[u'أءرط'],
30
+ u'آرف':[u'ءارف'],
31
+ u'آرق':[u'أءرق'],
32
+ u'آرك':[u'أءرك'],
33
+ u'آرم':[u'ءارم'],
34
+ u'آرن':[u'أءرن',u'ءارن'],
35
+ u'آرى':[u'أءرى'],
36
+ u'آزر':[u'ءازر'],
37
+ u'آزف':[u'أءزف'],
38
+ u'آزل':[u'أءزل'],
39
+ u'آزى':[u'أءزى',u'ءازى'],
40
+ u'آسب':[u'أءسب'],
41
+ u'آسد':[u'أءسد'],
42
+ u'آسف':[u'أءسف'],
43
+ u'آسن':[u'أءسن'],
44
+ u'آسى':[u'ءاسى'],
45
+ u'آسى':[u'أءسى',u'ءاسى'],
46
+ u'آشى':[u'أءشى'],
47
+ u'آصد':[u'أءصد'],
48
+ u'آصر':[u'ءاصر'],
49
+ u'آصل':[u'أءصل'],
50
+ u'آضّ':[u'ءاضّ'],
51
+ u'آطم':[u'أءطم'],
52
+ u'آفك':[u'أءفك'],
53
+ u'آفى':[u'أءفى'],
54
+ u'آقط':[u'أءقط'],
55
+ u'آكد':[u'أءكد'],
56
+ u'آكر':[u'ءاكر'],
57
+ u'آكف':[u'أءكف'],
58
+ u'آكل':[u'أءكل',u'ءاكل'],
59
+ u'آلت':[u'أءلت'],
60
+ u'آلس':[u'ءالس'],
61
+ u'آلف':[u'أءلف',u'ءالف'],
62
+ u'آلم':[u'أءلم'],
63
+ u'آلى':[u'أءلى'],
64
+ u'آمر':[u'أءمر',u'ءامر'],
65
+ u'آمن':[u'أءمن'],
66
+ u'آنث':[u'أءنث'],
67
+ u'آنس':[u'أءنس',u'ءانس'],
68
+ u'آنض':[u'أءنض'],
69
+ u'آنف':[u'أءنف'],
70
+ u'آنق':[u'أءنق'],
71
+ u'آنى':[u'أءنى'],
72
+ u'آهل':[u'أءهل'],
73
+ u'آوب':[u'ءاوب'],
74
+ u'آوى':[u'أءوى'],
75
+ u'آيد':[u'ءايد'],
76
+ u'آيس':[u'أءيس'],
77
+ }
libqutrub/ar_ctype.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- coding=utf-8 -*-
3
+ #************************************************************************
4
+ # $Id: ar_ctype.py,v 0.7 2009/06/02 01:10:00 Taha Zerrouki $
5
+ #
6
+ # ------------
7
+ # Description:
8
+ # ------------
9
+ # Copyright (c) 2009, Arabtechies, Arabeyes Taha Zerrouki
10
+ #
11
+ # Elementary function to manipulate arabic texte
12
+ #
13
+ # -----------------
14
+ # Revision Details: (Updated by Revision Control System)
15
+ # -----------------
16
+ # $Date: 2009/06/02 01:10:00 $
17
+ # $Author: Taha Zerrouki $
18
+ # $Revision: 0.7 $
19
+ # $Source: arabtechies.sourceforge.net
20
+ #
21
+ #***********************************************************************/
22
+
23
+ import re#, string,sys
24
+ from arabic_const import *
25
+ HARAKAT_pat =re.compile(ur"[%s%s%s%s%s%s%s%s]"%(FATHATAN,DAMMATAN,KASRATAN,FATHA,DAMMA,KASRA,SUKUN,SHADDA) )
26
+ HARAKAT_NO_SHADDA_pat =re.compile(ur"[%s%s%s%s%s%s%s]"%(FATHATAN,DAMMATAN,KASRATAN,FATHA,DAMMA,KASRA,SUKUN) )
27
+
28
+
29
+
30
+ #strip tatweel from a word and return a result word
31
+ #--------------------------------------
32
+ def ar_strip_tatweel(w):
33
+ "strip tatweel from a word and return a result word"
34
+ return w.replace(TATWEEL, '')
35
+
36
+ #strip tatweel and vowel from a word and return a result word but keep shadda
37
+ #--------------------------------------
38
+ def ar_strip_marks_keepshadda(w):
39
+ return HARAKAT_NO_SHADDA_pat.sub('',w);
40
+ ## return re.sub(ur'[%s%s%s%s%s%s%s%s]' % (FATHATAN, DAMMATAN, TATWEEL,
41
+ ## KASRATAN, FATHA, DAMMA, KASRA, SUKUN), '', w)
42
+
43
+
44
+ #strip tatweel and vowel from a word and return a result word
45
+ #--------------------------------------
46
+ def ar_strip_marks(w):
47
+ "strip tatweel and vowel from a word and return a result word"
48
+ return HARAKAT_pat.sub('',w);
49
+ ## return re.sub(ur'[%s%s%s%s%s%s%s%s%s]' % (FATHATAN, DAMMATAN, TATWEEL,
50
+ ## KASRATAN, FATHA, DAMMA, KASRA, SUKUN,SHADDA), '', w)
51
+
52
+
53
+
54
+ #strip pounctuation from the text
55
+ #--------------------------------------
56
+ def ar_strip_punct(w):
57
+ return re.sub(r'[%s%s%s%s\\]' % (string.punctuation, string.digits,
58
+ string.ascii_letters, string.whitespace),
59
+ ' ', w)
60
+
61
+
62
+ #--------------------------------------
63
+ def replace_pos (word,rep, pos):
64
+ return word[0:pos]+rep+word[pos+1:];
65
+
66
+ def is_valid_arabic_word(word):
67
+ if word=="": return False;
68
+ ## word_nm=ar_strip_marks_keepshadda(word);
69
+ ## # the alef_madda is considered as 2 letters
70
+ ## word_nm=word_nm.replace(ALEF_MADDA,HAMZA+ALEF);
71
+ # in arabic ranges
72
+ ## if re.search(u"([^\u0621-\u0652%s%s%s])"%(LAM_ALEF, LAM_ALEF_HAMZA_ABOVE,LAM_ALEF_MADDA_ABOVE),word):
73
+ if re.search(u"([^\u0621-\u0652\ufefb\ufef7\ufef5])",word):
74
+
75
+ return False;
76
+
77
+ elif re.match(u"([\d])+",word):
78
+ return False;
79
+ ## elif word[0] in (WAW_HAMZA,YEH_HAMZA,FATHA,DAMMA,SUKUN,KASRA):
80
+ ## return False;
81
+ ### إذا كانت الألف المقصورة في غير آخر الفعل
82
+ elif re.match(u"^(.)*[%s](.)+$"%ALEF_MAKSURA,word):
83
+ return False;
84
+ elif re.match(u"^(.)*[%s]([^%s%s%s])(.)+$"%(TEH_MARBUTA,DAMMA,KASRA,FATHA),word):
85
+ return False;
86
+ return True;
87
+
88
+
libqutrub/ar_verb.py ADDED
@@ -0,0 +1,1232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+  #!/usr/bin/python
2
+ # -*- coding = utf-8 -*-
3
+ #************************************************************************
4
+ # $Id: ar_verb.py, v 0.7 2009/06/02 01:10:00 Taha Zerrouki $
5
+ #
6
+ # ------------
7
+ # Description:
8
+ # ------------
9
+ # Copyright (c) 2009, Arabtechies, Arabeyes Taha Zerrouki
10
+ #
11
+ # Elementary function to manipulate arabic texte
12
+ #
13
+ # -----------------
14
+ # Revision Details: (Updated by Revision Control System)
15
+ # -----------------
16
+ # $Date: 2009/06/02 01:10:00 $
17
+ # $Author: Taha Zerrouki $
18
+ # $Revision: 0.7 $
19
+ # $Source: arabtechies.sourceforge.net
20
+ #
21
+ #***********************************************************************/
22
+ """
23
+ Basic routines to treat verbs
24
+ ar_verb
25
+ """
26
+ from __future__ import (
27
+ #~ absolute_import,
28
+ print_function,
29
+ #~ unicode_literals,
30
+ #~ division,
31
+ )
32
+ import re
33
+ import libqutrub.verb_const as vconst
34
+ # import ar_ctype
35
+ import pyarabic.araby as araby
36
+ #~ from pyarabic.araby import *
37
+ from pyarabic.araby import FATHA, DAMMA, KASRA, SHADDA, SUKUN, HAMZA, ALEF, \
38
+ WAW, ALEF_HAMZA_ABOVE, ALEF_MADDA, \
39
+ YEH_HAMZA, ALEF_MAKSURA, YEH, TEH, \
40
+ LAM_ALEF, SIMPLE_LAM_ALEF, LAM_ALEF_HAMZA_ABOVE, \
41
+ SIMPLE_LAM_ALEF_HAMZA_ABOVE, LAM_ALEF_MADDA_ABOVE ,SIMPLE_LAM_ALEF_MADDA_ABOVE
42
+ #~ import libqutrub.verb_valid as verb_valid
43
+
44
+ def replace_pos (word, rep, pos):
45
+ """
46
+ Replace a letter in string in position
47
+ @param word: given string
48
+ @type word: unicode
49
+ @param rep: replecment letter
50
+ @type rep: unicode char
51
+ @param pos: replemcment position
52
+ @type pos: int
53
+ @return: modified string
54
+ @rtype: unicode string
55
+ """
56
+ return word[0:pos]+rep+word[pos+1:]
57
+ #####################################
58
+ #{ verb attributes conversion functions
59
+ #####################################
60
+
61
+ def get_bab_sarf_harakat(number):
62
+ """
63
+ Get the the past and future marks by the bab sarf number
64
+ - Bab: past future
65
+ - 1 : FATHA DAMMA
66
+ - 2 : FATHA KASRA
67
+ - 3 : FATHA FATHA
68
+ - 4 : KASRA FATHA
69
+ - 5 : DAMMA DAMMA
70
+ - 6 : KASRA KASRA
71
+ @param number: Bab sarf number (1-6).
72
+ @type number: integer(1-6)
73
+ @return: a tuple of (past_mark, future_mark)
74
+ @rtype: tuple
75
+ """
76
+ bab = None
77
+ if number < 1 or number > 6:
78
+ bab = None
79
+ elif number == 1:
80
+ bab = (FATHA, DAMMA)
81
+ elif number == 2:
82
+ bab = (FATHA, KASRA)
83
+ elif number == 3:
84
+ bab = (FATHA, FATHA)
85
+ elif number == 4:
86
+ bab = (KASRA, FATHA)
87
+ elif number == 5:
88
+ bab = (DAMMA, DAMMA)
89
+ elif number == 6:
90
+ bab = (KASRA, KASRA)
91
+ return bab
92
+
93
+
94
+ def get_bab_sarf_number(past_haraka, future_haraka):
95
+ """
96
+ Get the bab sarf number by the past and future marks
97
+ - Bab: past future
98
+ - 1 : FATHA DAMMA
99
+ - 2 : FATHA KASRA
100
+ - 3 : FATHA FATHA
101
+ - 4 : KASRA FATHA
102
+ - 5 : DAMMA DAMMA
103
+ - 6 : KASRA KASRA
104
+ @param past_haraka: past haraka of the verb.
105
+ @type past_haraka: unicode
106
+ @param future_haraka: future haraka of the verb.
107
+ @type future_haraka: unicode
108
+ @return: Bab sarf number (1-6)
109
+ @rtype: integer
110
+ """
111
+ bab = 0
112
+ if past_haraka == FATHA and future_haraka == DAMMA:
113
+ bab = 1
114
+ elif past_haraka == FATHA and future_haraka == KASRA:
115
+ bab = 2
116
+ elif past_haraka == FATHA and future_haraka == FATHA:
117
+ bab = 3
118
+ elif past_haraka == KASRA and future_haraka == FATHA:
119
+ bab = 4
120
+ elif past_haraka == DAMMA and future_haraka == DAMMA:
121
+ bab = 5
122
+ elif past_haraka == KASRA and future_haraka == KASRA:
123
+ bab = 6
124
+ return bab
125
+
126
+ def write_harakat_in_full(harakat):
127
+ """
128
+ Write the harakat name in full in arabic
129
+ @param harakat: list of harakat chars.
130
+ @type harakat: unicode string
131
+ @return: harakat in full
132
+ @rtype: unicode
133
+ """
134
+ full = u""
135
+ tab_harakat = {
136
+ FATHA:u"فتحة",
137
+ DAMMA:u"ضمة",
138
+ KASRA:u"كسرة",
139
+ SUKUN:u"سكون",
140
+ vconst.ALEF_HARAKA:u"ألف",
141
+ vconst.WAW_HARAKA:u"واو",
142
+ vconst.YEH_HARAKA:u"ياء",
143
+ vconst.ALEF_YEH_HARAKA:u"ى",
144
+ vconst.ALEF_WAW_HARAKA:u"و",
145
+ vconst.ALEF_YEH_ALTERNATIVE:u"ئ",
146
+ }
147
+ for hrk in harakat:
148
+ if hrk in tab_harakat:
149
+ full += u'-'+tab_harakat[hrk]
150
+ else:
151
+ full += u"*"
152
+ return full
153
+
154
+
155
+ def get_past_harakat_by_babsarf(vtype):
156
+ """
157
+ Get the past harakat for the trileteral verb by bab sarf
158
+ - Bab: past future
159
+ - 1 : FATHA DAMMA
160
+ - 2 : FATHA KASRA
161
+ - 3 : FATHA FATHA
162
+ - 4 : KASRA FATHA
163
+ - 5 : DAMMA DAMMA
164
+ - 6 : KASRA KASRA
165
+ @param vtype: the bab sarf codification.
166
+ @type vtype: unicode a string of number
167
+ @return: harakat
168
+ @rtype: unicode
169
+ """
170
+ marks = KASRA*3 # make three kasraat by default
171
+ if vtype in ('1', '2', '3'):
172
+ marks = FATHA*3
173
+ elif vtype in ('4', '6'):
174
+ marks = u"".join([FATHA, KASRA, FATHA])
175
+ elif vtype == '5':
176
+ marks = u"".join([FATHA, DAMMA, FATHA])
177
+ return marks
178
+
179
+ def get_future_harakat_by_babsarf(vtype):
180
+ """
181
+ Get the future harakat for the trileteral verb by bab sarf
182
+ - Bab: past future
183
+ - 1 : FATHA DAMMA
184
+ - 2 : FATHA KASRA
185
+ - 3 : FATHA FATHA
186
+ - 4 : KASRA FATHA
187
+ - 5 : DAMMA DAMMA
188
+ - 6 : KASRA KASRA
189
+ @param vtype: the bab sarf codification.
190
+ @type vtype: unicode a string of number
191
+ @return: harakat
192
+ @rtype: unicode
193
+ """
194
+ #ToDo Review
195
+ marks = KASRA+KASRA+KASRA
196
+ if vtype in ('1', '2', '3'):
197
+ marks = FATHA+FATHA+FATHA
198
+ elif vtype in ('4', '6'):
199
+ marks = FATHA+KASRA+FATHA
200
+ elif vtype == '5':
201
+ marks = FATHA+DAMMA+FATHA
202
+ return marks
203
+
204
+ def get_future_haraka_by_babsarf(vtype):
205
+ """
206
+ Get the future_type value from different codifications.
207
+ used also in comand line
208
+ in différent context the future_type is codified as:
209
+ values
210
+ or values used as Conjugation mode ( Bab Tasrif باب التصريف)
211
+ - Bab: past future
212
+ - 1 : FATHA DAMMA
213
+ - 2 : FATHA KASRA
214
+ - 3 : FATHA FATHA
215
+ - 4 : KASRA FATHA
216
+ - 5 : DAMMA DAMMA
217
+ - 6 : KASRA KASRA
218
+ @param vtype: the bab sarf codification.
219
+ @type vtype: unicode a string of number
220
+ @return: haraka
221
+ @rtype: unicode char
222
+ """
223
+
224
+ if vtype == '1':
225
+ return DAMMA
226
+ elif vtype in ('2', '6'):
227
+ return KASRA
228
+ elif vtype in ('3', '4'):
229
+ return FATHA
230
+ elif vtype in ('1', '5'):
231
+ return DAMMA
232
+ else:
233
+ return ""
234
+
235
+
236
+ def get_haraka_by_name(haraka_name):
237
+ """
238
+ Convert an arabic named harakat to a real haraka
239
+ values
240
+ - Fahta:(فتحة)
241
+ - DAMMA:(ضمة)
242
+ - KASRA:(كسرة)
243
+ @param haraka_name: the arabic name of haraka.
244
+ @type haraka_name: unicode
245
+ @return: the arabic name of haraka .
246
+ @rtype: unicode char
247
+ """
248
+ if araby.is_shortharaka(haraka_name):
249
+ return haraka_name
250
+ if haraka_name == u"فتحة" :
251
+ return FATHA
252
+ elif haraka_name == u"ضمة":
253
+ return DAMMA
254
+ elif haraka_name == u"كسرة":
255
+ return KASRA
256
+ elif haraka_name == u"سكون":
257
+ return SUKUN
258
+ else:
259
+ return False
260
+
261
+
262
+ def get_future_type_by_name(haraka_name):
263
+ """
264
+ Get the future_type value by haraka arabic name.
265
+ values
266
+ - FATHA:(فتحة)
267
+ - DAMMA:(ضمة)
268
+ - KASRA:(كسرة)
269
+ @param haraka_name: the arabic name of haraka.
270
+ @type haraka_name: unicode
271
+ @return: haraka
272
+ @rtype: unicode char
273
+ """
274
+ haraka = get_haraka_by_name(haraka_name)
275
+ if haraka:
276
+ return haraka
277
+ else:
278
+ return FATHA
279
+
280
+
281
+ def get_future_type_entree(future_type):
282
+ """
283
+ Get the future_type value from different codifications.
284
+ used also in comand line
285
+ in différent context the future_type is codified as:
286
+ values
287
+ - Fahta:(fatha, فتحة, ف, f)
288
+ - DAMMA:(damma, ضمة, ض, d)
289
+ - KASRA:(kasra, كسرة, ك, k)
290
+ or values used as Conjugation mode ( Bab Tasrif باب التصريف)
291
+ - Bab: past future
292
+ - 1 : FATHA DAMMA
293
+ - 2 : FATHA KASRA
294
+ - 3 : FATHA FATHA
295
+ - 4 : KASRA FATHA
296
+ - 5 : DAMMA DAMMA
297
+ - 6 : KASRA KASRA
298
+ @param future_type: the future_type codification.
299
+ @type future_type: unicode
300
+ @return: extract the future type mark
301
+ @rtype: unicode char
302
+ """
303
+ future_type = u""+future_type.lower()
304
+ if future_type in (u'fatha', u'فتحة', u'ف', u'f', u'3', u'4'):
305
+ return FATHA
306
+ if future_type in (u'damma', u'ضمة', u'ض', u'd', u'1', u'5'):
307
+ return DAMMA
308
+ if future_type in (u'kasra', u'كسرة', u'ك', u'k', u'2', u'6'):
309
+ return KASRA
310
+ else: return FATHA
311
+
312
+ def get_transitive_entree(transitive):
313
+ """
314
+ Get the transitive value from different codifications.
315
+ in différent context the transitivity is codified as:
316
+ - "t", "transitive",
317
+ - u"متعدي", u"م", u"مشترك", u"ك"
318
+ - True
319
+ @param transitive: the transitive codification.
320
+ @type transitive: unicode
321
+ @return: True if is transitive
322
+ @rtype: boolean
323
+ """
324
+ return transitive in (u"متعدي", u"م", u"مشترك",
325
+ u"ك", "t", "transitive", True)
326
+
327
+ #####################################
328
+ #{verb pretreatment functions
329
+ #####################################
330
+ def normalize_alef_madda(word):
331
+ """
332
+ Convert Alef madda into two letters.
333
+ @param word: given word.
334
+ @type word: unicode.
335
+ @return: converted word.
336
+ @rtype: unicode.
337
+ """
338
+ if word.startswith(ALEF_MADDA):
339
+ word_nm = araby.strip_harakat(word)
340
+ #print word, word_nm, len(word), len(word_nm)
341
+ if len(word_nm) == 2:
342
+ return word_nm.replace(ALEF_MADDA, HAMZA+ALEF)
343
+ elif len(word_nm) == 3:
344
+ if word_nm in vconst.ALEF_MADDA_VERB_TABLE:
345
+ #print word, "exists in madd table", vconst.ALEF_MADDA_VERB_TABLE[word_nm][0]
346
+ #return the first one only
347
+ #mylist = ALEF_MADDA_VERB_TABLE[word_nm]
348
+ return vconst.ALEF_MADDA_VERB_TABLE[word_nm][0]
349
+ else:
350
+ return word_nm.replace(ALEF_MADDA, HAMZA+ALEF)
351
+ else:
352
+ return word_nm.replace(ALEF_MADDA, HAMZA+ALEF)
353
+ else:
354
+ return word_nm
355
+
356
+
357
+ def normalize(word, wordtype = "affix"):
358
+ """
359
+ Normalize the word, by unifoming hamzat, Alef madda, shadda, and lamalefs.
360
+ @param word: given word.
361
+ @type word: unicode.
362
+ @param type: if the word is an affix
363
+ @type type: unicode.
364
+ @return: converted word.
365
+ @rtype: unicode.
366
+ """
367
+ # تحويل الكلمة إلى شكلها النظري.
368
+ # الشكل اللإملائي للكلمة هو طريقة كتابتها حسب قواعد الإملاء
369
+ # الشكل النظري هو الشكل المتخيل للكلمة دون تطبيق قواعد اللغة
370
+ # ويخص عادة الأشكال المتعددة للهمزة، و التي تكتب همزة على السطر
371
+ # أمثلة
372
+ # إملائي نظري
373
+ #إِمْلَائِي ءِمْلَاءِي
374
+ #سَاَلَ سَءَلَ
375
+ # الهدف : تحويل الكلمة إلى شكل نظري،
376
+ #ومن ثم إمكانية تصريفها بعيدا عن قواعد الإملاء،
377
+ #وبعد التصريف يتم تطبيق قواعد الإملاء من جديد.
378
+ #الفرضية: الكلمات المدخلة مشكولة شكلا تاما.
379
+ #الطريقة:
380
+ # 1-تحويل جميع أنواع الهمزات إلى همزة على السطر
381
+ # 1-فك الإدغام
382
+ i = 0
383
+ # strip tatweel
384
+ # the tatweel is used to uniformate the affix
385
+ # when the Haraka is used separetely
386
+ if wordtype != "affix":
387
+ word = araby.strip_tatweel(word)
388
+ ## تستبدل الألف الممدودة في , ل الكلمة بهمزة قطع بعدها همزة أخرى
389
+ if word.startswith(ALEF_MADDA):
390
+ word = normalize_alef_madda(word)
391
+
392
+ # ignore harakat at the begin of the word
393
+ len_word = len(word)
394
+ while i < len_word and araby.is_shortharaka(word[i]): # in HARAKAT:
395
+ i += 1
396
+ word = word[i:]
397
+ # convert all Hamza from into one form
398
+ word = araby.normalize_hamza(word)
399
+ #Convert All LAM ALEF Ligature into separate letters
400
+ word = word.replace(LAM_ALEF, SIMPLE_LAM_ALEF)
401
+ word = word.replace(LAM_ALEF_HAMZA_ABOVE, SIMPLE_LAM_ALEF_HAMZA_ABOVE)
402
+ word = word.replace(LAM_ALEF_MADDA_ABOVE, SIMPLE_LAM_ALEF_MADDA_ABOVE)
403
+ return word
404
+
405
+
406
+ def uniformate_alef_origin(marks, word_nm, future_type = KASRA):
407
+ """
408
+ Convert toi its origin according to the future type haraka
409
+ @param marks: given marks.
410
+ @type marks: unicode.
411
+ @param word_nm: given word unvocalized.
412
+ @type word_nm: unicode.
413
+ @param future_type: The future mark of the triletiral verb.
414
+ @type future_type: unicode char, default KASRA.
415
+ @return: converted marks.
416
+ @rtype: unicode.
417
+ """
418
+ if len(marks) != 2:
419
+ return marks
420
+ # الحرف ماقبل الأخير علة
421
+ elif marks[len(marks)-2] == vconst.ALEF_HARAKA:
422
+ if future_type == KASRA:
423
+ marks = marks[:-2]+vconst.ALEF_YEH_HARAKA+marks[-1:]
424
+ elif future_type == DAMMA:
425
+ marks = marks[:-2]+vconst.ALEF_WAW_HARAKA+marks[-1:]
426
+ # الحرف الأخير علة
427
+ if len(word_nm) == 3 and word_nm[-1:] == ALEF:
428
+ word_nm = word_nm[:-1]+vconst.ALEF_MAMDUDA
429
+ elif len(word_nm)>3 and word_nm[-1:] == ALEF:
430
+ word_nm = word_nm[:-1]+YEH#ALEF_MAKSURA
431
+ elif word_nm[-1:] == ALEF_MAKSURA:
432
+ word_nm = word_nm[:-1]+ALEF_MAKSURA
433
+ return marks
434
+
435
+ #--------------------------------------
436
+ # Predecated function
437
+ #--------------------------------------
438
+ def normalize_affix(word):
439
+ """
440
+ Replace shadda by SUKUN +SHADDA
441
+ @param word: given word.
442
+ @type word: unicode.
443
+ @return: converted word.
444
+ @rtype: unicode.
445
+ """
446
+ # convert SHadda to sukun shadda
447
+ word = word.replace(SHADDA, SUKUN+SHADDA)
448
+
449
+ return word
450
+
451
+
452
+ def uniformate_suffix(word):
453
+ """ separate the harakat and the letters of the given word,
454
+ it return two strings ( the word without harakat and the harakat).
455
+ If the weaked letters are reprsented as long harakat and striped
456
+ from the word.
457
+ """
458
+ ## type : affix : uniformate affixes
459
+ ## word = normalize_affix(word)
460
+ word = word.replace(SHADDA, SUKUN+SHADDA)
461
+ shakl = u""
462
+ word_nm = u""
463
+ i = 0
464
+ len_word = len(word)
465
+ # print "len word", len(word)
466
+ while i < len_word:
467
+ if not araby.is_shortharaka(word[i]): # not in HARAKAT:
468
+ word_nm += word[i]
469
+ if i+1 < len(word) and araby.is_shortharaka(word[i+1]):
470
+ if word[i+1] == FATHA :
471
+ if i+2 < len(word) and word[i+2] == ALEF and \
472
+ i+3 < len(word):
473
+ shakl += vconst.ALEF_HARAKA
474
+ i += 3
475
+ else :
476
+ shakl += FATHA
477
+ i += 2
478
+ elif word[i+1] == DAMMA and i+2 < len(word) and \
479
+ word[i+2] == WAW:
480
+ if i+3 >= len(word) or not araby.is_shortharaka(word[i+3]):
481
+ shakl += vconst.WAW_HARAKA
482
+ i += 3
483
+ else :
484
+ shakl += DAMMA
485
+ i += 2
486
+ elif word[i+1] == KASRA and i+2 < len(word) and \
487
+ word[i+2] == YEH:
488
+ if i+3 >= len(word) or not araby.is_shortharaka(word[i+3]):
489
+ shakl += vconst.YEH_HARAKA
490
+ i += 3
491
+ else :
492
+ shakl += KASRA
493
+ i += 2
494
+ else :
495
+ shakl += word[i+1]
496
+ i += 2
497
+
498
+ elif i+1 < len(word) and araby.is_haraka(word[i+1]):
499
+ shakl += word[i+1]
500
+ else:
501
+ shakl += vconst.NOT_DEF_HARAKA
502
+ i += 1
503
+ else: i += 1
504
+ if len(word_nm) == len(shakl):
505
+ return (word_nm, shakl)
506
+ else: return (u"", u"")
507
+
508
+
509
+ def uniformate_verb(word):
510
+ """
511
+ Separate the harakat and the letters of the given word,
512
+ it return two strings ( the word without harakat and the harakat).
513
+ If the weaked letters are reprsented as long harakat
514
+ and striped from the word.
515
+ @param word: given word.
516
+ @type word: unicode.
517
+ @return: (letters, harakat).
518
+ @rtype: tuple of unicode.
519
+ """
520
+ if word == "":
521
+ return ("", "")
522
+ #normalize ALEF MADDA
523
+ if word.startswith(ALEF_MADDA):
524
+ word = normalize_alef_madda(word)
525
+ else:
526
+ word = word.replace(ALEF_MADDA, HAMZA+ALEF)
527
+
528
+ word_nm = araby.strip_harakat(word)
529
+ length = len(word_nm)
530
+ if len(word_nm) != 3:
531
+ # تستعمل الهمزات لتخمين حركات الفعل الثلاثي
532
+ # normalize hamza here, because we use it to
533
+ # detect harakat on the trilateral verb.
534
+ word_nm = vconst.HAMZAT_PATTERN.sub(HAMZA, word_nm)
535
+ # length of word after normalization
536
+
537
+ # اهمزات تستعمل لكشف تشكيل الفعل، يتم توحيدها لاحقا
538
+ if length == 3:
539
+ if word_nm[1]in (ALEF, ALEF_HAMZA_ABOVE) or \
540
+ word_nm[2] in (ALEF_MAKSURA, ALEF_HAMZA_ABOVE, ALEF):
541
+ marks = FATHA+FATHA+FATHA
542
+ elif word[1] == YEH_HAMZA or word[2] in (YEH, YEH_HAMZA):
543
+ marks = FATHA+KASRA+FATHA
544
+ else:
545
+ # let the verb haraka
546
+ i = 0
547
+ ## ignore harakat at the began of the word
548
+ while araby.is_shortharaka(word[i]):# in HARAKAT:
549
+ i += 1
550
+ # الحرف الأول
551
+ if not araby.is_shortharaka(word[i]):#not in HARAKAT:
552
+ i += 1
553
+ # الحركة الأولى
554
+ while araby.is_shortharaka(word[i]):#word[i] in HARAKAT:
555
+ i += 1
556
+ # الحرف الثاني
557
+ if not araby.is_shortharaka(word[i]):#word[i] not in HARAKAT:
558
+ i += 1
559
+ #الحركة الثانية
560
+ if not araby.is_shortharaka(word[i]):#word[i] not in HARAKAT:
561
+ #وجدنا مشاكل في تصريف الفعل المضاعف في الماضي
562
+ # نجعل الحركة الثانية فتحة مؤقتا
563
+ #ToDo: review this case
564
+ secondharaka = FATHA
565
+ else:
566
+ secondharaka = word[i]
567
+ marks = u''.join([FATHA, secondharaka, FATHA])
568
+ # تستعمل الهمزات لتخمين حركات الفعل الثلاثي
569
+ # normalize hamza here, because we use it to
570
+ # detect harakat on the trilateral verb.
571
+ word_nm = vconst.HAMZAT_PATTERN.sub(HAMZA, word_nm)
572
+
573
+ elif length == 4:
574
+ marks = vconst.UNIFORMATE_MARKS_4
575
+ elif length == 5:
576
+ if word_nm.startswith(TEH):
577
+ marks = vconst.UNIFORMATE_MARKS_5TEH
578
+ else :
579
+ marks = vconst.UNIFORMATE_MARKS_5
580
+ elif length == 6:
581
+ marks = vconst.UNIFORMATE_MARKS_6
582
+ else:
583
+ marks = FATHA*len(word_nm)
584
+
585
+ i = 1
586
+ # first added automaticlly
587
+ new_word = word_nm[0]
588
+ new_harakat = marks[0]
589
+ # between the first and the last
590
+ while i < length-1:
591
+ if word_nm[i] == ALEF:
592
+ new_harakat = new_harakat[:-1]+vconst.ALEF_HARAKA
593
+ else:
594
+ new_harakat += marks[i]
595
+ new_word += word_nm[i]
596
+ i += 1
597
+ # the last letter
598
+ ## حالة الفعل عيا، أعيا، عيّا والتي يتحول إلى ياء بدلا عن واو
599
+ if word_nm[i] == ALEF:
600
+ if len(word_nm) == 3 and word_nm[1] != YEH:
601
+ new_word += vconst.ALEF_MAMDUDA
602
+ else:
603
+ new_word += YEH
604
+ else:
605
+ new_word += word_nm[i]
606
+ new_harakat += marks[i]
607
+ ## new_word += word_nm[i]
608
+ return (new_word, new_harakat)
609
+
610
+
611
+ #####################################
612
+ #{verb conjugation output treatment functions
613
+ #####################################
614
+ def standard_harakat(word):
615
+ """
616
+ Treat Harakat on the word before output.
617
+ معالجة الحركات قبل الإخراج،
618
+ @param word: given vocalized word.
619
+ @type word: unicode.
620
+ @return: <vocalized word with ajusted harakat.
621
+ @rtype: unicode.
622
+ """
623
+ k = 1
624
+ new_word = word[0]
625
+ len_word = len(word)
626
+ while k < len_word:
627
+ # الحروف من دون العلة لا تؤخذ بيعين الاعتبار، كما لا تؤخذ إذا كانت في أول الكلمة
628
+ if word[k] not in (ALEF, YEH, WAW, ALEF_MAKSURA):
629
+ new_word += word[k]
630
+ else:
631
+ ##إذا كان الحرف علة ولم يكن في أول الكلمة
632
+ ##إذا كان ما قبله ليس حركة، ومابعده ليس حركة، أو انتهت الكلمة
633
+ if not araby.is_shortharaka(word[k-1]) and \
634
+ (k+1 >= len_word or not araby.is_shortharaka(word[k+1])) :
635
+ if word[k] == ALEF:
636
+ new_word += FATHA+ALEF
637
+ elif word[k] == WAW :
638
+ new_word += DAMMA+WAW
639
+ elif word[k] == YEH:
640
+ new_word += KASRA+YEH
641
+ else:
642
+ new_word += word[k]
643
+ else:
644
+ new_word += word[k]
645
+ k += 1
646
+ return new_word
647
+
648
+
649
+ def geminating(word_nm, harakat):
650
+ """ treat geminating cases
651
+ المدخلات هي من كلمة غير مشكولة يقابلها حركاتها
652
+ والحرف المضعف يمثل بشدة
653
+ وإذا كانت الحالة تستوجب الفك، استبدلت الشدة بالحرف المضعف،
654
+ أمّا إذا كانت لا تستوجب الفك،
655
+ فتُعدّل حركة الحرف المضعف الأول إلى حركة ملغاة،
656
+ تحذف في دالة الرسم الإملائي فيما بعد
657
+ @param word_nm: given unvocalized word.
658
+ @type word_nm: unicode.
659
+ @param harakat: given harakat.
660
+ @type harakat: unicode.
661
+ @return: (letters, harakat).
662
+ @rtype: tuple of unicode.
663
+ """
664
+ new_word = u""
665
+ new_harakat = u""
666
+ i = 0
667
+ length = len(word_nm)
668
+ ## has_shadda = False
669
+ ## has_shadda = False
670
+ if word_nm.find(SHADDA) < 0:
671
+ return (word_nm, harakat)
672
+ ##has_shadda and
673
+ while i < length:
674
+ # نعالج الحالات التي فيها الحرف الحالي متبوع بحرف شدة،
675
+ # ندرس الحالات التي يجب فيها فك الإدغام
676
+ if (i > 0 and i+1 < length and word_nm[i+1] == SHADDA and \
677
+ harakat[i] in (SUKUN, FATHA, KASRA, DAMMA)) and harakat[i-1]:
678
+ # treat ungeminating case
679
+
680
+ #إذا كان الحرف المضعف الأول غير ساكن والحرف المضعّف الثاني (ممثلا بشدة)ساكنا،
681
+ # يفك الإدغام.أمّا إذا كانت لا تستوجب الفك،
682
+
683
+ if harakat[i] != SUKUN and harakat[i+1] == SUKUN:
684
+ #ungeminating
685
+ new_word += word_nm[i]
686
+ word_nm = replace_pos(word_nm, word_nm[i], i+1)
687
+ new_harakat += harakat[i]
688
+ i += 1
689
+
690
+ elif harakat[i] == SUKUN and harakat[i+1] == SUKUN:
691
+ #no geminating
692
+ new_word += word_nm[i]
693
+ word_nm = replace_pos(word_nm, word_nm[i], i+1)
694
+ new_harakat += FATHA
695
+ i += 1
696
+ else:
697
+
698
+ # عندما يكون الحرف السابق ساكنا فإنه يستعيع
699
+ #يض عن حركته بحركة الحرف الأول
700
+ if i-1 >= 0 and new_harakat[i-1] == SUKUN:
701
+ new_word += word_nm[i]+SHADDA
702
+ if harakat[i] != SUKUN:
703
+ new_harakat = new_harakat[:-1]+harakat[i]+ \
704
+ vconst.NOT_DEF_HARAKA+harakat[i+1]
705
+ else:
706
+ new_harakat = new_harakat[:-1]+FATHA+ \
707
+ vconst.NOT_DEF_HARAKA+harakat[i+1]
708
+ ## يتم الإدغام إذا كان الحرف السابق ذو حركة طويلة
709
+ elif i-1 >= 0 and new_harakat[i-1] in \
710
+ (vconst.ALEF_HARAKA, vconst.WAW_HARAKA, \
711
+ vconst.YEH_HARAKA):
712
+ new_word += word_nm[i]+SHADDA
713
+ new_harakat += vconst.NOT_DEF_HARAKA+harakat[i+1]
714
+
715
+ elif harakat[i] == SUKUN:
716
+ new_word += word_nm[i]+SHADDA
717
+ new_harakat += vconst.NOT_DEF_HARAKA+harakat[i+1]
718
+ else:
719
+ ## مؤقت حتى يتم حل المشكلة
720
+ new_word += word_nm[i]+SHADDA
721
+ new_harakat += vconst.NOT_DEF_HARAKA+harakat[i+1]
722
+ ##TODO
723
+ ## منع الإدغام في بعض الحالات التي لا يمكن فيها ��لإدغام
724
+ ##مثل حالة سكتتا ، أي الحرفات متحركان وما قبلهاما متحرك
725
+ ## تم حل هذه المشكلة من خلال خوارزمية التجانس بين التصريفات
726
+ i += 2
727
+ elif i > 0 and i+1 < length and word_nm[i+1] == word_nm[i] and \
728
+ harakat[i] == SUKUN and harakat[i+1] in (FATHA, DAMMA, KASRA):
729
+ # treat geminating case
730
+ new_word += word_nm[i]+SHADDA
731
+ new_harakat += vconst.NOT_DEF_HARAKA+harakat[i+1]
732
+ i += 2
733
+ else :
734
+ new_word += word_nm[i]
735
+ new_harakat += harakat[i]
736
+ i += 1
737
+ return (new_word, new_harakat)
738
+
739
+
740
+ def standard2(word_nm, harakat):
741
+ """ join the harakat and the letters to the give word
742
+ in the standard script,
743
+ it return one strings ( the word with harakat and the harakat).
744
+
745
+ @param word_nm: given unvocalized word.
746
+ @type word_nm: unicode.
747
+ @param harakat: given harakat.
748
+ @type harakat: unicode.
749
+ @return: vocalized word.
750
+ @rtype: unicode.
751
+ """
752
+ if len(word_nm) != len(harakat):
753
+ print(word_nm.encode('utf8'),len(word_nm), u"-".join([araby.name(x) for x in harakat]), len(harakat))
754
+ return u"*"
755
+ else:
756
+ word = u""
757
+ i = 0
758
+ word_nm, harakat = geminating(word_nm, harakat)
759
+ if len(word_nm) != len(harakat):
760
+ return u""
761
+ ## حالة عدم الابتداء بسكون
762
+ ##إذا كان الحرف الثاني مضموما تكون الحركة الأولى مضمومة، وإلا تكون مكسورة
763
+ if len(harakat) != 0 and harakat.startswith(SUKUN):
764
+ word_nm = ALEF+word_nm
765
+ if len(harakat) >= 2 and harakat[1] in \
766
+ (DAMMA, vconst.WAW_HARAKA):
767
+ harakat = DAMMA+harakat
768
+ else:
769
+ harakat = KASRA+harakat
770
+
771
+ ## word_nm = tahmeez2(word_nm, harakat)
772
+ if len(word_nm) != len(harakat):
773
+ return u""
774
+ word_before = word_nm
775
+ harakat_before = harakat
776
+ word_nm, harakat = homogenize(word_nm, harakat)
777
+ if len(word_nm) != len(harakat):
778
+ print("len word: ", len(word_nm), word_nm.encode('utf8') )
779
+ print("len harakat: ", len(harakat), repr(harakat))
780
+ print(repr(harakat_before), word_before.encode('utf8'))
781
+ return u""
782
+ word_nm = tahmeez2(word_nm, harakat)
783
+
784
+ len_word_nm = len(word_nm)
785
+ while i < len_word_nm:
786
+ # للعمل :
787
+ # هذه حالة الألف التي أصلها ياء
788
+ # وقد استغنينا عنها بأن جعلنا الحرف الناقص من الفعل الناقص حرفا تاما
789
+ if harakat[i] in vconst.WRITTEN_HARAKA:
790
+ word += word_nm[i]+vconst.WRITTEN_HARAKA[harakat[i]]
791
+ else:
792
+ word += word_nm[i]+harakat[i]
793
+ i += 1
794
+
795
+ #-تحويل همزة القطع على الألف بعدها فتحة
796
+ #وهمزة القطع على الألف بعدها سكون إلى ألف ممدودة
797
+ for (pat, rep) in vconst.STANDARD_REPLACEMENT:
798
+ word = word.replace( pat, rep)
799
+
800
+
801
+ return word
802
+
803
+
804
+ def tahmeez2(word_nm, harakat):
805
+ """ Transform hamza on the standard script.
806
+ in entry the word without harakat and the harakat seperately
807
+ return the word with non uniform hamza.
808
+ إعلال و إبدال الهمزة.
809
+ @param word_nm: given unvocalized word.
810
+ @type word_nm: unicode.
811
+ @param harakat: given harakat.
812
+ @type harakat: unicode.
813
+ @return: (letters, harakat) after treatment.
814
+ @rtype: tuple of unicode.
815
+ """
816
+ # the harakat length != letters length
817
+ if len(word_nm) != len(harakat):
818
+ return u""
819
+ # if no hamza, no tahmeez
820
+ elif HAMZA not in word_nm:
821
+ return word_nm
822
+ else:
823
+ ha2 = u""
824
+ #eliminate some altenative of HARAKAT to standard.
825
+ for hrk in harakat:
826
+ if hrk == vconst.ALEF_YEH_HARAKA or \
827
+ hrk == vconst.ALEF_WAW_HARAKA:
828
+ hrk = vconst.ALEF_HARAKA
829
+ ha2 += hrk
830
+ harakat = ha2
831
+ word = u""
832
+ for i in range(len(word_nm)):
833
+ if word_nm[i] != HAMZA and word_nm[i] != ALEF_HAMZA_ABOVE:
834
+ word += word_nm[i]
835
+ else:
836
+ if i == 0:
837
+ actual = harakat[i]
838
+ swap = vconst.INITIAL_TAHMEEZ_TABLE.get(actual, actual)
839
+ else:
840
+ before = harakat[i-1]
841
+ actual = harakat[i]
842
+
843
+ if i+1 < len(word_nm):
844
+ # if the hamza have shadda, it will take the harakat of shadda.
845
+ if actual == vconst.NOT_DEF_HARAKA or actual == SUKUN:
846
+ if word_nm[i+1] == SHADDA and harakat[i+1] != SUKUN:
847
+ actual = harakat[i+1]
848
+ if before == vconst.NOT_DEF_HARAKA:
849
+ before = FATHA
850
+ if actual == vconst.NOT_DEF_HARAKA:
851
+ actual = FATHA
852
+
853
+ if before in vconst.MIDDLE_TAHMEEZ_TABLE and\
854
+ actual in vconst.MIDDLE_TAHMEEZ_TABLE[before]:
855
+ swap = vconst.MIDDLE_TAHMEEZ_TABLE[before][actual]
856
+ #~ # if the actual haraka is FATHA
857
+ if before in (SUKUN, vconst.YEH_HARAKA, vconst.ALEF_HARAKA, vconst.WAW_HARAKA):
858
+ if actual == FATHA and word_nm[i-1] == araby.YEH:
859
+ swap = araby.YEH_HAMZA
860
+ #~ #elif word_nm[i-1] in ( araby.WAW, araby.DAL,araby.THAL,
861
+ #~ # araby.REH, araby.ZAIN ):
862
+ #ZZZZ
863
+ elif word_nm[i-1] == araby.WAW and actual not in (KASRA, vconst.YEH_HARAKA):
864
+ swap = araby.HAMZA
865
+ else :
866
+ swap = word_nm[i]
867
+
868
+ else :
869
+ if before == vconst.NOT_DEF_HARAKA:
870
+ before = FATHA
871
+ if actual == vconst.NOT_DEF_HARAKA:
872
+ actual = FATHA
873
+
874
+ if before in vconst.FINAL_TAHMEEZ_TABLE and \
875
+ actual in vconst.FINAL_TAHMEEZ_TABLE[before]:
876
+ if word_nm[i-1] in( araby.WAW, ) and actual in (FATHA, DAMMA):
877
+ #pass
878
+ swap = araby.HAMZA
879
+ else:
880
+ swap = vconst.FINAL_TAHMEEZ_TABLE[before][actual]
881
+ else :
882
+ swap = word_nm[i]
883
+ word += swap
884
+ return word
885
+
886
+ def treat_sukun2(word_nm, harakat):
887
+ """ Treat the rencontre of sukun.
888
+ in entry the word without harakat and the harakat seperately,
889
+ and the probably haraka
890
+ return the new sequence of harakat
891
+
892
+ @param word_nm: given unvocalized word.
893
+ @type word_nm: unicode.
894
+ @param harakat: given harakat.
895
+ @type harakat: unicode.
896
+ @return: (letters, harakat).
897
+ @rtype: tuple of unicode.
898
+ """
899
+ # if no sukun, to treat
900
+ if harakat.find(SUKUN) < 0:
901
+ return harakat
902
+ len_word = len(word_nm)
903
+ len_harakat = len(harakat)
904
+
905
+ if len_word != len_harakat:
906
+ return harakat
907
+ else:
908
+ new_harakat = u""
909
+ for i in range(len_word):
910
+ if i+1 < len_harakat and harakat[i+1] == SUKUN:
911
+ if harakat[i] == vconst.ALEF_HARAKA:
912
+ # other conditions
913
+ # إذا كان حرف الألف ثانيا مثل خاف يقلب كسرة،
914
+ #أما إذا كان ثالثا أو رابعا فيصبح فتحة،
915
+ # مثل خاف لا تخف
916
+ # حالة الألف بعدها حرف مشدد
917
+ if i+2 < len_word and word_nm[i+2] == SHADDA:
918
+ new_harakat += vconst.ALEF_HARAKA
919
+ elif i == 0 :
920
+ new_harakat += KASRA
921
+ else:
922
+ new_harakat += FATHA
923
+ # if the actual haraka is in table use table conversion
924
+ elif harakat[i] in vconst.CONVERSION_TABLE:
925
+ new_harakat += vconst.CONVERSION_TABLE[harakat[i]]
926
+ else :
927
+ new_harakat += harakat[i]
928
+ else :
929
+ new_harakat += harakat[i]
930
+ return new_harakat
931
+
932
+
933
+
934
+ def homogenize(word_nm, harakat):
935
+ """ لإreat the jonction of WAW, YEH.
936
+ معالجة التحولات التي تطرا على الياء أو الوا في وسط الكلمة أو في اخرها
937
+ @param word_nm: given unvocalized word.
938
+ @type word_nm: unicode.
939
+ @param harakat: given harakat.
940
+ @type harakat: unicode.
941
+ @return: (letters, harakat)after treatment.
942
+ @rtype: tuple of unicode.
943
+ """
944
+ # inequal length between letters and harakat
945
+ if len(word_nm) != len(harakat):
946
+ print("Homogenize:inequal length", len(word_nm), len(harakat))
947
+ return (word_nm, harakat)
948
+ # word without weak letters doesn't need treatment
949
+ #~ elif not re.search(ur'[%s%s%s%s]'%(ALEF_MAKSURA, vconst.ALEF_MAMDUDA, \
950
+ elif not re.search(u'[%s%s%s%s]'%(ALEF_MAKSURA, vconst.ALEF_MAMDUDA, \
951
+ YEH, WAW), word_nm):
952
+ return (word_nm, harakat)
953
+ # treatment
954
+ else:
955
+ new_harakat = harakat[0]
956
+ new_word = word_nm[0]
957
+ # نبدأ من الحرف الثاني لأن الحرف الأول لا يعالج
958
+ i = 1
959
+ ## دراسة حالات الياء والواو قبل النهاية
960
+ len_word_nm = len(word_nm)
961
+ while i < len_word_nm-1:
962
+ actual_letter = word_nm[i] # Actual letter
963
+ actual_haraka = harakat[i] # Actual haraka
964
+ if i-1 >= 0 :
965
+ previous_letter = word_nm[i-1] # previous letter
966
+ previous_haraka = harakat[i-1] # previous letter
967
+ else:
968
+ previous_letter = ''
969
+ previous_haraka = ''
970
+ if i+1 < len_word_nm:
971
+ next_letter = word_nm[i+1] # next letter
972
+ next_haraka = harakat[i+1] # next haraka
973
+ else:
974
+ next_letter = ''
975
+ next_haraka = ''
976
+ # إذا كان الحرف التالي مضعف
977
+ if i+2 < len_word_nm and word_nm[i+2] == SHADDA:
978
+ shadda_in_next = True
979
+ else:
980
+ shadda_in_next = False
981
+
982
+ if actual_letter == ALEF_MAKSURA or actual_letter == YEH:
983
+ #إذا كانت الياء ساكنة أو مكسورة (كسرا قصيرا أو طويلا)،
984
+ # وكان ما قبلها مكسورا، يأخذ ماقبلها كسرة طويلة #مثال :
985
+ # بِ +يْ = > بِي
986
+ #بِ +يِ = > بِي
987
+ #بِ +يي = > بِي
988
+
989
+ if actual_letter == ALEF_MAKSURA and next_haraka == SUKUN:
990
+ new_harakat += ""
991
+ elif (actual_haraka in(SUKUN, KASRA, vconst.YEH_HARAKA)) and \
992
+ previous_haraka == KASRA and not shadda_in_next:
993
+ new_harakat = new_harakat[:-1]+vconst.YEH_HARAKA
994
+ elif (actual_haraka in(KASRA)) and previous_haraka == KASRA \
995
+ and shadda_in_next:
996
+ new_harakat += ''
997
+ # حالة هو تيسّر في المضارع المبني للمجهول
998
+ #~ elif actual_letter == YEH and previous_haraka == DAMMA and \
999
+ #~ actual_haraka == DAMMA and shadda_in_next:
1000
+ #~ #pass
1001
+ #~ new_harakat += DAMMA
1002
+ #~ new_word += YEH
1003
+ # # مثل تؤدّينّ
1004
+ # elif previous_haraka in (KASRA, FATHA) and
1005
+ # actual_haraka == DAMMA and shadda_in_next:
1006
+ # new_harakat += FATHA
1007
+ # new_word += YEH
1008
+ # ToDO review
1009
+ #سقّى، يُسقُّون
1010
+ elif actual_haraka == DAMMA and shadda_in_next:
1011
+ if previous_haraka in (DAMMA, KASRA):
1012
+ #~ if previous_haraka in DAMMA:
1013
+ new_harakat = new_harakat[:-1]+DAMMA
1014
+ else:
1015
+ new_harakat += DAMMA
1016
+ new_word += WAW
1017
+ #تحويل الياء إلى واو ساكنة
1018
+ #2 - إذا كانت الياء مضمومة (ضما قصيرا أو طويلا)،
1019
+ # وكان ما قبلها مفتوحا، تتحول الياء إلى واو ساكنة. #مثال :
1020
+ # بَ +يُ = > بَِوْ
1021
+ #بَ +يو = > بَوْ
1022
+
1023
+ elif (actual_haraka in (DAMMA, vconst.WAW_HARAKA))and\
1024
+ previous_haraka == FATHA and not shadda_in_next:
1025
+ new_harakat += SUKUN
1026
+ new_word += WAW
1027
+ elif (actual_haraka in (DAMMA, vconst.WAW_HARAKA))and \
1028
+ previous_haraka == FATHA and shadda_in_next:
1029
+ new_harakat += actual_haraka
1030
+ new_word += WAW
1031
+ #إذا كانت ساكنة، وماقبلها مضموما،
1032
+ # ولم يكن ما بعدها ياء، أخذ ما قبلها ضمة طويلة.
1033
+ #مثال :
1034
+ # بُ +يُت = >بُوت
1035
+
1036
+
1037
+ elif (actual_haraka == SUKUN) and previous_haraka == DAMMA \
1038
+ and next_letter != YEH and not shadda_in_next:
1039
+ new_harakat = new_harakat[:-1]+vconst.WAW_HARAKA
1040
+
1041
+ elif (actual_haraka == vconst.YEH_HARAKA)and \
1042
+ previous_haraka == FATHA:
1043
+ new_harakat += SUKUN
1044
+ new_word += YEH
1045
+ elif (actual_haraka == vconst.WAW_HARAKA) and previous_haraka == KASRA :
1046
+ new_harakat = new_harakat[:-1]+vconst.WAW_HARAKA
1047
+
1048
+ #~ if araby.is_hamza(previous_letter):
1049
+ #~ new_word = new_word[:-1] + araby.WAW_HAMZA
1050
+
1051
+
1052
+ else :
1053
+ new_harakat += actual_haraka
1054
+ new_word += YEH
1055
+
1056
+ elif actual_letter == vconst.ALEF_MAMDUDA or \
1057
+ actual_letter == WAW:
1058
+ if actual_letter == vconst.ALEF_MAMDUDA and \
1059
+ next_haraka == SUKUN:
1060
+ new_harakat += ""
1061
+ elif actual_letter == vconst.ALEF_MAMDUDA and \
1062
+ (actual_haraka in(SUKUN, DAMMA, vconst.WAW_HARAKA))and\
1063
+ (previous_haraka == DAMMA) and not shadda_in_next:
1064
+ new_harakat = new_harakat[:-1]+vconst.WAW_HARAKA
1065
+
1066
+ elif actual_letter == WAW and (actual_haraka in(SUKUN, DAMMA))\
1067
+ and (previous_haraka == DAMMA) and not shadda_in_next:
1068
+ new_harakat = new_harakat[:-1]+vconst.WAW_HARAKA
1069
+
1070
+ #تحويل الواو المضمومة أو الطويلة إلى واو ساكنة
1071
+ elif (actual_haraka in (DAMMA, vconst.WAW_HARAKA)) \
1072
+ and previous_haraka == FATHA :
1073
+ new_harakat += SUKUN
1074
+ new_word += WAW
1075
+ # حالة وجع ايجع
1076
+ elif (actual_haraka == (SUKUN))and \
1077
+ (previous_haraka == KASRA) and not shadda_in_next:
1078
+ new_harakat = new_harakat[:-1]+vconst.YEH_HARAKA
1079
+ elif (actual_haraka == KASRA)and shadda_in_next:
1080
+ new_harakat = new_harakat[:-1]+KASRA
1081
+
1082
+ elif actual_letter == vconst.ALEF_MAMDUDA and \
1083
+ (actual_haraka == DAMMA) and shadda_in_next:
1084
+ if previous_haraka == DAMMA:
1085
+ new_harakat = new_harakat[:-1]+DAMMA
1086
+ else:
1087
+ new_harakat += DAMMA
1088
+ new_word += WAW
1089
+
1090
+
1091
+ elif actual_letter == WAW and (actual_haraka == vconst.WAW_HARAKA):
1092
+ new_harakat = new_harakat[:-1]+ vconst.WAW_HARAKA
1093
+ elif actual_letter == WAW and (actual_haraka == DAMMA) and previous_haraka == DAMMA and shadda_in_next:
1094
+ new_harakat +=""
1095
+
1096
+ elif actual_letter == vconst.ALEF_MAMDUDA and \
1097
+ (actual_haraka == vconst.YEH_HARAKA) and \
1098
+ not shadda_in_next:
1099
+ new_harakat = new_harakat[:-1]+vconst.YEH_HARAKA
1100
+ elif (actual_letter == WAW ) and (actual_haraka == DAMMA) and previous_haraka in (FATHA,) and \
1101
+ shadda_in_next:
1102
+ new_harakat += DAMMA
1103
+ new_word += WAW
1104
+ elif actual_letter == WAW and (actual_haraka == DAMMA) and previous_haraka in (FATHA,) and \
1105
+ shadda_in_next:
1106
+ new_harakat += DAMMA
1107
+ new_word += WAW
1108
+ elif actual_letter == WAW and (actual_haraka == DAMMA) and\
1109
+ shadda_in_next:
1110
+ new_harakat += DAMMA
1111
+ new_word += WAW
1112
+ #elif actual_letter == WAW and actual_haraka == FATHA and (previous_haraka == FATHA):
1113
+ # new_harakat += "" #actual_haraka
1114
+ # new_word += ""#WAW
1115
+ else :
1116
+ new_harakat += actual_haraka
1117
+ new_word += WAW
1118
+ else:
1119
+ new_harakat += actual_haraka
1120
+ new_word += actual_letter
1121
+ i += 1
1122
+ # end of while
1123
+ # we have to treat the last letter
1124
+ ## دراسة حالة الحرف الأخير
1125
+ # Actual letter
1126
+ last_letter = word_nm[i]
1127
+ # Actual haraka
1128
+ last_haraka = harakat[i]
1129
+ if i-1 >= 0 :
1130
+ # previous letter
1131
+ previous_letter = word_nm[i-1]
1132
+ # previous haraka
1133
+ previous_haraka = harakat[i-1]
1134
+ else:
1135
+ previous_letter = ''
1136
+ previous_haraka = ''
1137
+
1138
+ if last_letter == ALEF_MAKSURA or last_letter == YEH :
1139
+ if (last_haraka in(KASRA, DAMMA)) and previous_haraka == KASRA:
1140
+ new_harakat = new_harakat[:-1]+vconst.YEH_HARAKA
1141
+ elif (last_haraka in(vconst.YEH_HARAKA)) and\
1142
+ previous_haraka == KASRA :
1143
+ new_harakat = new_harakat[:-1]+vconst.YEH_HARAKA
1144
+ #حذف حركة الحرف الأخير إذا كان ساكنا
1145
+ elif (last_haraka == SUKUN):
1146
+ ## pass
1147
+ new_harakat += ''
1148
+ new_word += ''
1149
+ elif previous_letter == YEH and \
1150
+ (last_haraka in(KASRA, DAMMA, FATHA)) and previous_haraka == FATHA:
1151
+ new_harakat += vconst.NOT_DEF_HARAKA
1152
+ new_word += ALEF
1153
+ elif previous_letter != YEH and \
1154
+ (last_haraka in(KASRA, DAMMA, FATHA)) and previous_haraka == FATHA:
1155
+ new_harakat += vconst.NOT_DEF_HARAKA
1156
+ new_word += ALEF_MAKSURA
1157
+ elif (last_haraka in(vconst.WAW_HARAKA)) and \
1158
+ previous_haraka == KASRA:
1159
+ new_harakat = new_harakat[:-1]+vconst.WAW_HARAKA
1160
+ #حالة تصريف الفعل الناقص في المضارع المجزوم مع أنت للمؤنث
1161
+ elif (last_haraka == vconst.YEH_HARAKA) and \
1162
+ previous_haraka == FATHA:
1163
+ new_harakat += SUKUN
1164
+ new_word += YEH
1165
+ else :
1166
+ new_harakat += last_haraka
1167
+ new_word += YEH
1168
+
1169
+ elif last_letter == vconst.ALEF_MAMDUDA :
1170
+ if (last_haraka in(DAMMA, KASRA, vconst.WAW_HARAKA)) and \
1171
+ previous_haraka == DAMMA :
1172
+ new_harakat = new_harakat[:-1]+vconst.WAW_HARAKA
1173
+ elif (last_haraka in(vconst.ALEF_HARAKA)) and \
1174
+ previous_haraka == DAMMA:
1175
+ ## pass
1176
+ new_harakat = new_harakat[:-1]+vconst.YEH_HARAKA
1177
+ elif (last_haraka == vconst.YEH_HARAKA):
1178
+ new_harakat = new_harakat[:-1]+vconst.YEH_HARAKA
1179
+ new_word += ''
1180
+ elif (last_haraka == SUKUN) and previous_haraka == KASRA :
1181
+ pass
1182
+
1183
+ elif (last_haraka == SUKUN):
1184
+ new_harakat += ''
1185
+ new_word += ''
1186
+ elif (last_haraka == FATHA)and previous_haraka == FATHA:
1187
+ new_harakat += vconst.NOT_DEF_HARAKA
1188
+ new_word += vconst.ALEF_MAMDUDA
1189
+ else :
1190
+ new_harakat += last_haraka
1191
+ new_word += WAW
1192
+ #new_word += vconst.ALEF_MAMDUDA
1193
+ elif last_letter == WAW :
1194
+ if (last_haraka in(DAMMA, FATHA)) and previous_haraka == FATHA:
1195
+ new_harakat += vconst.NOT_DEF_HARAKA
1196
+ new_word += ALEF_MAKSURA
1197
+ elif (last_haraka in(FATHA,)) and previous_haraka == KASRA:
1198
+ new_harakat += vconst.FATHA
1199
+ new_word += YEH
1200
+ elif (last_haraka in (vconst.YEH_HARAKA,)) and previous_haraka in (KASRA, DAMMA):
1201
+ new_harakat = new_harakat[:-1]+vconst.YEH_HARAKA
1202
+ new_word += ''
1203
+ elif (last_haraka in(SUKUN,)) and previous_haraka in (DAMMA, FATHA):
1204
+ new_harakat += ""
1205
+ new_word += ""
1206
+ else:
1207
+ new_harakat += harakat[i]
1208
+ new_word += word_nm[i]
1209
+ else:
1210
+ new_harakat += harakat[i]
1211
+ new_word += word_nm[i]
1212
+ return (new_word, new_harakat)
1213
+
1214
+
1215
+ def is_triliteral_verb(verb):
1216
+ """ Test if the verb is triliteral,
1217
+ used in selectionof verbs from the triliteral verb dictionnary
1218
+ @param verb: given verb.
1219
+ @type verb: unicode.
1220
+ @return: True if the verb is triliteral.
1221
+ @rtype: Boolean.
1222
+ """
1223
+ verb_nm = araby.strip_harakat(verb)
1224
+ verb_nm = verb_nm.replace(ALEF_MADDA, HAMZA+ALEF)
1225
+ if len(verb_nm) == 3:
1226
+ return True
1227
+ else : return False
1228
+
1229
+
1230
+
1231
+
1232
+
libqutrub/arabic_const.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- coding=utf-8 -*-
3
+ #---
4
+ # $Id: arabic_const.py,v 1.6 2003/04/22 17:18:22 elzubeir Exp $
5
+ #
6
+ # ------------
7
+ # Description:
8
+ # ------------
9
+ #
10
+ # Arabic codes
11
+ #
12
+ # (C) Copyright 2003, Arabeyes, Mohammed Elzubeir
13
+ # -----------------
14
+ # Revision Details: (Updated by Revision Control System)
15
+ # -----------------
16
+ # $Date: 2003/04/22 17:18:22 $
17
+ # $Author: elzubeir $
18
+ # $Revision: 1.6 $
19
+ # $Source: /home/arabeyes/cvs/projects/duali/pyduali/pyduali/arabic.py,v $
20
+ #
21
+ # This program is written under the BSD License.
22
+ #---
23
+
24
+ COMMA = u'\u060C'
25
+ SEMICOLON = u'\u061B'
26
+ QUESTION = u'\u061F'
27
+ HAMZA = u'\u0621'
28
+ ALEF_MADDA = u'\u0622'
29
+ ALEF_HAMZA_ABOVE = u'\u0623'
30
+ WAW_HAMZA = u'\u0624'
31
+ ALEF_HAMZA_BELOW = u'\u0625'
32
+ YEH_HAMZA = u'\u0626'
33
+ ALEF = u'\u0627'
34
+ BEH = u'\u0628'
35
+ TEH_MARBUTA = u'\u0629'
36
+ TEH = u'\u062a'
37
+ THEH = u'\u062b'
38
+ JEEM = u'\u062c'
39
+ HAH = u'\u062d'
40
+ KHAH = u'\u062e'
41
+ DAL = u'\u062f'
42
+ THAL = u'\u0630'
43
+ REH = u'\u0631'
44
+ ZAIN = u'\u0632'
45
+ SEEN = u'\u0633'
46
+ SHEEN = u'\u0634'
47
+ SAD = u'\u0635'
48
+ DAD = u'\u0636'
49
+ TAH = u'\u0637'
50
+ ZAH = u'\u0638'
51
+ AIN = u'\u0639'
52
+ GHAIN = u'\u063a'
53
+ TATWEEL = u'\u0640'
54
+ FEH = u'\u0641'
55
+ QAF = u'\u0642'
56
+ KAF = u'\u0643'
57
+ LAM = u'\u0644'
58
+ MEEM = u'\u0645'
59
+ NOON = u'\u0646'
60
+ HEH = u'\u0647'
61
+ WAW = u'\u0648'
62
+ ALEF_MAKSURA = u'\u0649'
63
+ YEH = u'\u064a'
64
+ MADDA_ABOVE = u'\u0653'
65
+ HAMZA_ABOVE = u'\u0654'
66
+ HAMZA_BELOW = u'\u0655'
67
+ ZERO = u'\u0660'
68
+ ONE = u'\u0661'
69
+ TWO = u'\u0662'
70
+ THREE = u'\u0663'
71
+ FOUR = u'\u0664'
72
+ FIVE = u'\u0665'
73
+ SIX = u'\u0666'
74
+ SEVEN = u'\u0667'
75
+ EIGHT = u'\u0668'
76
+ NINE = u'\u0669'
77
+ PERCENT = u'\u066a'
78
+ DECIMAL = u'\u066b'
79
+ THOUSANDS = u'\u066c'
80
+ STAR = u'\u066d'
81
+ MINI_ALEF = u'\u0670'
82
+ ALEF_WASLA = u'\u0671'
83
+ FULL_STOP = u'\u06d4'
84
+ BYTE_ORDER_MARK = u'\ufeff'
85
+
86
+ # Diacritics
87
+ FATHATAN = u'\u064b'
88
+ DAMMATAN = u'\u064c'
89
+ KASRATAN = u'\u064d'
90
+ FATHA = u'\u064e'
91
+ DAMMA = u'\u064f'
92
+ KASRA = u'\u0650'
93
+ SHADDA = u'\u0651'
94
+ SUKUN = u'\u0652'
95
+
96
+ SMALL_ALEF=u"\u0670"
97
+ SMALL_WAW=u"\u06E5"
98
+ SMALL_YEH=u"\u06E6"
99
+
100
+ #---------------------------------------------------------------------------
101
+ # Arabic ligatures
102
+ #---------------------------------------------------------------------------
103
+
104
+ LAM_ALEF=u'\ufefb'
105
+ LAM_ALEF_HAMZA_ABOVE=u'\ufef7'
106
+ LAM_ALEF_HAMZA_BELOW=u'\ufef9'
107
+ LAM_ALEF_MADDA_ABOVE=u'\ufef5'
108
+ simple_LAM_ALEF=LAM+ALEF
109
+ simple_LAM_ALEF_HAMZA_ABOVE=LAM+ALEF_HAMZA_ABOVE
110
+ simple_LAM_ALEF_HAMZA_BELOW=LAM+ALEF_HAMZA_BELOW
111
+ simple_LAM_ALEF_MADDA_ABOVE=LAM+HAMZA+FATHA+ALEF
112
+
113
+
libqutrub/classnoun.py ADDED
@@ -0,0 +1,368 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- coding=utf-8 -*-
3
+
4
+ #************************************************************************
5
+ # $Id: classnoun.py, v 0.1 2016/04/01 12:14:00 Taha Zerrouki $
6
+ #
7
+ # ------------
8
+ # Description:
9
+ # ------------
10
+ # Copyright (c) 2009, Arabtechies, Arabeyes Taha Zerrouki
11
+ #
12
+ # The Main class to do the Noun derivation
13
+ #
14
+ # -----------------
15
+ # Revision Details: (Updated by Revision Control System)
16
+ # -----------------
17
+ # $Date: 2016/04/01 12:14:00 $
18
+ # $Author: Taha Zerrouki $
19
+ # $Revision: 0.1 $
20
+ # $Source: qutrub.sourceforge.net
21
+ #
22
+ #***********************************************************************/
23
+ """
24
+ Noun Class for derivation
25
+ @author: Taha Zerrouki
26
+ @contact: taha dot zerrouki at gmail dot com
27
+ @copyright: Arabtechies, Arabeyes, Taha Zerrouki
28
+ @license: GPL
29
+ @date:2016/04/01
30
+ @version: 0.1
31
+ """
32
+ import pyarabic.araby as araby
33
+ from pyarabic.araby import FATHA, DAMMA, KASRA, SHADDA, SUKUN, HAMZA, ALEF, \
34
+ NOON, YEH_HAMZA, WAW, TATWEEL, MEEM, MEEM, YEH, TEH, ALEF_MAKSURA, DAMMATAN
35
+ import libqutrub.classverb as classverb
36
+ import libqutrub.verb_const as vconst
37
+ import libqutrub.ar_verb as ar_verb
38
+
39
+ #~ class ConjugStem:
40
+ # Noun derivation
41
+ class NounClass(classverb.VerbClass):
42
+ """
43
+ Noun Class: represent a derived noun from a verb or a root
44
+ """
45
+ def __init__(self, verb, transitive, future_type=FATHA):
46
+ """
47
+ init method
48
+ @param verb: the given verb
49
+ @type verb: unicode.
50
+ @param transitive: the verb is transitive or not
51
+ @type transitive: Boolean.
52
+ @param future_type: The mark of the third radical letter in the verb,
53
+ used for triletiral verb only. Default value is Fatha;
54
+ @type future_type: unicode; one arabic letter (Fatha, Damma, Kasra).
55
+ """
56
+ # we make transitive as True, to force the cverb conjugator
57
+ # to generate passive voices
58
+ classverb.VerbClass.__init__(self, verb, True, future_type)
59
+ self._prepare_subject_stem()
60
+ self._prepare_object_stem()
61
+
62
+
63
+ #####################################
64
+ #{ Attributes functions
65
+ #####################################
66
+ #####################################
67
+ #{ Extract information from verb functions
68
+ #####################################
69
+
70
+ def _prepare_subject_stem(self):
71
+ """
72
+ Prepare the derivation stems
73
+ Those stems will be concatenated with conjugation affixes.
74
+ This function store results in self.tab_conjug_stem.
75
+ This function prepare conjugation stems for the following nouns type:
76
+ - اسم الفاعل
77
+ """
78
+ #~ """
79
+ #~ اسم الفاعل /تعريفـه :
80
+ #~ اسم مشتق من الفعل المبني للمعلوم للدلالة على وصف من فعل الفعل على وجه الحدوث .
81
+ #~ مثل : كتب – كاتب ، جلس – جالس ، اجتهد – مُجتهد ، استمع – مُستمع .
82
+ #~ صوغه : يصاغ اسم الفاعل على النحو التالي :
83
+ #~ 1 ـ من الفعل الثلاثي على وزن فاعل :
84
+ #~ نحو : ضرب - ضارب ، وقف - واقف ، أخذ - آخذ ، قال - قائل ، بغى - باغ ، أتى - آت ، رمى - رام ، وقى - واق .
85
+ #~ فإن كان الفعل معتل الوسط بالألف " أجوف " تقلب ألفه همزة مثل : قال – قائل ، نام – نائم .
86
+ #~ ومنه قوله تعالى : { وفي أموالهم حق للسائل والمحروم } 19 الذاريات .
87
+ #~ أما إذا كان معتل الوسط بالواو أو بالياء فلا تتغير عينه في اسم الفاعل .
88
+ #~ مثل : حول – حاول ، حيد – حايد .
89
+ #~ وإن كان الفعل معتل الآخر " ناقصاً " فإن اسم الفاعل ينطبق عليه ما ينطبق على الاسم المنقوص . أي تحذف ياؤه الأخيرة في حالتي الرفع والجر ، وتبقى في حالة النصب .
90
+ #~ 2 ـ من الفعل المزيد :
91
+ #~ يصاغ اسم الفاعل من الفعل غير الثلاثي " المزيد " على وزن الفعل المضارع مع إبدال حرف المضارعة ميماً مضمومة وكسر ما قبل الآخر .
92
+ #~ مثل : طمأن – مُطمئِن ، انكسر - مُنكسِر ، استعمل – مُستعمِل .
93
+ #~ – الفعل المزيد الخماسي على وزن تفاعل هو ما تكون عينه مفتوحة في المضارع، لكنها تكون مكسورة في اسم الفاعل
94
+ #~
95
+ #~
96
+ #~ الخوارزمية:
97
+ #~ 1- إن كان ثلاثيا:
98
+ #~ - إن كان أجوفا، نغير حرفه الأوسط إلى همزة،
99
+ #~ - نشتقه على وزن فاعل
100
+ #~ 2- إن كان غير ثلاثي
101
+ #~ - إن كان خماسيا مبدوءا بتاء نأخذ مضارعه ونكسر ما قبل آخره
102
+ #~ - وإلا نأخذ مضارعه كما هو
103
+ #~ """
104
+ #~
105
+ letters = self.word_letters
106
+ marks = self.word_marks
107
+ # حركات مشتق اسم الفاعل
108
+ derivation_subject_marks = marks
109
+ derivation_subject_letters = letters
110
+ # حالة الفعل الثلاثي
111
+ if self.vlength == 3:
112
+ # الفعل الأجوف ممثل بحرفين باعتبار أنّ الألف هو حركة طويلة
113
+ # لذا نضع وسطه همزة
114
+ if len(letters) == 2:
115
+ # اسم الفاعل
116
+ derivation_subject_letters = letters[0] + HAMZA + letters[1]
117
+ elif letters.endswith(ALEF_MAKSURA) or letters.endswith(vconst.ALEF_MAMDUDA):
118
+ derivation_subject_letters = letters[0] + letters[1] + YEH
119
+
120
+
121
+ # اسم افاعل من الثلاثي جميعا
122
+ # حركاته
123
+ derivation_subject_marks = vconst.ALEF_HARAKA + KASRA + DAMMA
124
+
125
+ # الفعل غير الثلاثي
126
+ else:
127
+ # الفعل الخماسي المبدوء بتاء
128
+ # هذا مضارعه عينه مفتوحة
129
+ # لذا نحتاج إلى عين مكسورة
130
+ if self.vlength == 5 and letters.startswith(TEH):
131
+ #~ if len(letters) == 4: # تفاعل
132
+ #~ # الألف تعتبر مدة وليست حرفا
133
+ #~ # لذا يظهر الاختلاف بين طول الطلمة وعدد الحروف الفعلي
134
+ #~ # الفعل الخماسي المبدوء بتاء يختلف في حركة عين اسم الفاعل عن مضارعه
135
+ #~ derivation_subject_marks = FATHA + vconst.ALEF_HARAKA + KASRA + DAMMA
136
+ #~ else:
137
+ #~ # الفعل الخماسي المبدوء بتاء يختلف في حركة عين اسم الفاعل عن مضارعه
138
+ #~ derivation_subject_marks = FATHA + FATHA + SUKUN + KASRA + DAMMA
139
+ derivation_subject_marks = FATHA + FATHA + SUKUN + KASRA + DAMMA
140
+
141
+ # add Damma for MEEM
142
+ #~ derivation_subject_marks = DAMMA + derivation_subject_marks
143
+ else :
144
+ # الفعل غير الثلاثي يصاغ على منوال مضارعه
145
+ derivation_subject_marks = self.tab_conjug_stem[vconst.TenseFuture].marks
146
+ #~ if derivation_subject_marks.startswith(FATHA):
147
+ #~ derivation_subject_marks = DAMMA + derivation_subject_marks[1:]
148
+ #~ derivation_subject_marks = DAMMA + derivation_subject_marks
149
+ derivation_subject_letters = self.tab_conjug_stem[vconst.TenseFuture].letters
150
+
151
+ # معالجة الألفات في الفعل والحركات الطويلة
152
+ # إذا كان طول الحركات ألأصلية للفعل
153
+ # أقل من طول حركات الماضي المبني للمجهول
154
+ # هذا يعني وجود حركة طويلة
155
+ # نقوم بتحويل الحركة الطويلة إلى ما يوافقها
156
+ if len(marks) < len(derivation_subject_marks):
157
+ derivation_subject_marks = self._homogenize_harakat(marks, derivation_subject_marks)
158
+ # Add Meem Haraka
159
+ if self.vlength != 3:
160
+ if self.vlength == 5 and letters.startswith(TEH):
161
+ # add Damma for MEEM
162
+ derivation_subject_marks = DAMMA + derivation_subject_marks
163
+ else :
164
+ # الفعل غير الثلاثي يصاغ على منوال مضارعه
165
+ if derivation_subject_marks.startswith(FATHA):
166
+ derivation_subject_marks = DAMMA + derivation_subject_marks[1:]
167
+ ### اشتقاق اسم الفاعل
168
+ self.tab_conjug_stem[vconst.SubjectNoun] = classverb.ConjugStem(
169
+ vconst.SubjectNoun, derivation_subject_letters, derivation_subject_marks)
170
+
171
+ def _prepare_object_stem(self):
172
+ """
173
+ Prepare the derivation stems
174
+ Those stems will be concatenated with conjugation affixes.
175
+ This function store results in self.tab_conjug_stem.
176
+ This function prepare conjugation stems for the following nouns type:
177
+ - اسم المفعول
178
+
179
+ """
180
+ letters = self.word_letters
181
+ marks = self.word_marks
182
+ # حركات مشتق اسم المفعول
183
+ derivation_object_marks = marks
184
+ derivation_object_letters = letters
185
+ # حالة الفعل الثلاثي
186
+ #~ """
187
+ #~ اسم المفعول تعريفـه :
188
+ #~ اسم يشتق من الفعل المبني للمجهول للدلالة على وصف من يقع عليه الفعل .
189
+ #~ مثل : ضُرب مضروب ، أُكل مأكول ، شُرب مشروب ، بُث مبثوث ، وُعد موعود ، أُتى مأتي ، رُجي مرجي ، مُلئ مملوء .
190
+ #~ صوغـه :
191
+ #~ لا يصاغ إلا من الأفعال المتعدية المتصرفة على النحو التالي :
192
+ #~ 1 ـ من الثلاثي على وزن مفعول .
193
+ #~ كما في الأمثلة السابقة . ومنه : الحق صوته مسموع .
194
+ #~ والشاي مشروب لذيذ الطعم .
195
+ #~ فإن كان الفعل معتل الوسط بالألف فإنه يحدث فيه إعلال تقتضيه القواعد الصرفية ، فيكون اسم المفعول من الفعل قال : مقول ، وباع : مبيع .
196
+ #~ ومما سبق يتبع في أخذ اسم المفعول من الأفعال المعتلة الوسط الآتي :
197
+ #~ نأخذ الفعل المضارع من الفعل المراد اشتقاق اسم المفعول منه ثم نحذف حرف المضارعة ونستبدلها بالميم .
198
+ #~ مثل : قال يقول مقول ، باع يبيع مبيع .
199
+ #~ فإن كان وسط المضارع ألفاً ترد في اسم المفعول إلى أصلها الواو أو الياء .
200
+ #~ مثل : خاف يخاف مخوف ، فالألف أصلها الواو لأن مصدرها " الخوف " .
201
+ #~ وهاب يهاب مهيب ، فالألف أصلها الياء لأن مصدرها " الهيبة " .
202
+ #~ وإن كان الفعل معتل الآخر " ناقصاً " نأتي بالمضارع منه ثم نحذف حرف المضارعة ونضع مكانها ميماً مفتوحة ونضعف الحرف الأخير الذي هو حرف العلة سواء أكان أصله واواً أو ياءً أو ألفاً .
203
+ #~ مثل : دعا يدعو مدعوّ ، رجا يرجو مرجوّ ، رمى يرمي مرميّ ، سعى يسعى مسعيّ .
204
+ #~ 2 ـ ويصاغ من غير الثلاثي " المزيد " على وزن الفعل المضارع مع إبدال حرف المضارعة ميماً مضمومة وفتح ما قبل الآخر .
205
+ #~ مثل : أنزل ينزل مُنزَل ، انطلق ينطلق مُنطلَق ، انحاز ينحاز مُنحاز ، استعمل يستعمل مُستعمَل .
206
+ #~ ـ إذا كان الفعل لازماً يصح اشتقاق اسم المفعول منه حسب القواعد السابقة بشرط استعمال شبه الجملة " الجار والمجرور أو الظرف " مع الفعل ، وقد يصح المصدر أيضاً .
207
+ #~ مثال : ذهب به – مذهوب به ، سافر يوم الخميس – ما مُسافَرٌ يوم الخميس .
208
+ #~ ومثال استعمال المصدر مع اسم مفعول الفعل اللازم : العلم مُنتفَع انتفاع عظيم به .
209
+ #~
210
+ #~ 1- إن كان ثلاثيا
211
+ #~ - غير معتل: على وزن مفعول
212
+ #~ - معتل : 1- فعل مثال => كغير المعتل
213
+ #~ 2- الأجوف : من مضارعه يقول => مقول
214
+ #~ يسير => مسير
215
+ #~ يخاف => مخوف
216
+ #~ يهاب => يهيب
217
+ #~ 3- الناقص : من مضارعه مع تضعيف الحرف الأخير
218
+ #~ 2- غير ثلاثي
219
+ #~ - على غرار المضارع المبني للمجهول
220
+ #~ ل
221
+ #~ """
222
+ if self.vlength == 3:
223
+ # اسم المفعول
224
+ # حالة المعتل
225
+ # الأجوف
226
+ if len(letters) == 2:
227
+ # اسم المفعول من الأجوف
228
+ # يشتق من المضارع المعلوم
229
+ derivation_object_marks = self.tab_conjug_stem[vconst.TenseFuture].marks
230
+ derivation_object_letters = self.tab_conjug_stem[vconst.TenseFuture].letters
231
+
232
+ elif (self.word_letters.endswith(ALEF_MAKSURA) or
233
+ self.word_letters.endswith(ALEF) or self.word_letters.endswith(YEH)):
234
+ # والناقص
235
+ # يشتق من المضارع المعلوم
236
+ # يضاف إليه شدة في آخره
237
+ derivation_object_marks = self.tab_conjug_stem[vconst.TenseFuture].marks
238
+ derivation_object_letters = self.tab_conjug_stem[vconst.TenseFuture].letters
239
+ else: # السالم والمضعف والمثال
240
+ if self.word_letters.endswith(SHADDA):
241
+ # strip last letters which is Shadda, duplicate the second letters
242
+ derivation_object_letters = letters[0]+ letters[1]*2
243
+ else:
244
+ derivation_object_letters = letters
245
+ # الحروف
246
+ derivation_object_marks = FATHA + SUKUN + vconst.WAW_HARAKA + DAMMA
247
+ # الفعل غير الثلاثي
248
+ else:
249
+ # اسم المفعول من غير الثلاثي
250
+ derivation_object_marks = self.tab_conjug_stem[vconst.TensePassiveFuture].marks
251
+ derivation_object_letters = self.tab_conjug_stem[vconst.TensePassiveFuture].letters
252
+
253
+ # معالجة الألفات في الفعل والحركات الطويلة
254
+ # إذا كان طول الحركات ألأصلية للفعل
255
+ # أقل من طول حركات الماضي المبني للمجهول
256
+ # هذا يعني وجود حركة طويل��
257
+ # نقوم بتحويل الحركة الطويلة إلى ما يوافقها
258
+ #~ if len(marks) < len(derivation_object_marks):
259
+ #~ derivation_object_marks = self._homogenize_harakat(marks,
260
+ #~ derivation_object_marks)
261
+ ### اشتقاق اسم الفاعل والمفعول
262
+ self.tab_conjug_stem[vconst.ObjectNoun] = classverb.ConjugStem(
263
+ vconst.ObjectNoun, derivation_object_letters, derivation_object_marks)
264
+
265
+ def conjugate_noun(self, noun_type):
266
+ """
267
+ Conjugate a verb in a given tense with a pronoun.
268
+ @param tense: given tense
269
+ @type tense: unicode name of the tense
270
+ @param pronoun: given pronoun
271
+ @type pronoun: unicode name of the pronoun
272
+ @return: conjugated verb
273
+ @rtype: unicode;
274
+ """
275
+
276
+ if noun_type == vconst.SubjectNoun :
277
+ if self.vlength == 3 :
278
+ #prefix
279
+ pre_val = u""
280
+ else:
281
+ pre_val = MEEM
282
+ #suffix
283
+ suf_val = DAMMA
284
+ elif noun_type == vconst.ObjectNoun:
285
+ #prefix
286
+ pre_val = MEEM
287
+ #suffix
288
+ suf_val = DAMMA
289
+ else:
290
+ #prefix
291
+ pre_val = ""
292
+ #suffix
293
+ suf_val = ""
294
+
295
+ stem_l = self.tab_conjug_stem[noun_type].letters
296
+ stem_m = self.tab_conjug_stem[noun_type].marks
297
+ # _m : marks
298
+ #_l :letters
299
+ if pre_val != u"":
300
+ pre_val_l = pre_val
301
+ pre_val_m = stem_m[0]
302
+ stem_m = stem_m[1:]
303
+ else:
304
+ pre_val_l = u""
305
+ pre_val_m = u""
306
+
307
+ # the suffix already start by a HARAKA,
308
+ # we add Taweel to ensure valid word in the uniformate function
309
+ suf_val = TATWEEL + suf_val
310
+ #uniformate suffix
311
+ # the case is used to avoid duplicated staddization
312
+ if self.cache_standard['suffix'].has_key( suf_val):
313
+ (suf_val_l, suf_val_m) = self.cache_standard['suffix'][suf_val]
314
+ else:
315
+ (suf_val_l, suf_val_m) = ar_verb.uniformate_suffix(suf_val)
316
+ self.cache_standard['suffix'][suf_val] = (suf_val_l, suf_val_m)
317
+ # add affix to the stem
318
+ conj_l = pre_val_l + stem_l + suf_val_l
319
+ #The end of the stem marks takes the begining of the suffix marks
320
+ conj_m = pre_val_m + stem_m[:-1] + suf_val_m
321
+ # the begining of suffix letters is Tatweel, it will be striped
322
+ conj_l = pre_val_l + stem_l + suf_val_l[1:]
323
+
324
+ # Treat sukun
325
+ # the case is used to avoid duplicated staddization
326
+ key_cache = u'-'.join([conj_l, conj_m])
327
+ if self.cache_standard['sukun'].has_key(key_cache):
328
+ conj_m = self.cache_standard['sukun'][key_cache]
329
+ else:
330
+ #~ conj_m = ar_verb.treat_sukun2(conj_l, conj_m, self.future_type)
331
+ conj_m = ar_verb.treat_sukun2(conj_l, conj_m)
332
+ self.cache_standard['sukun'][key_cache] = conj_m
333
+ # standard orthographic form
334
+ # the case is used to avoid duplicated staddization
335
+ key_cache = u'-'.join([conj_l, conj_m])
336
+ if self.cache_standard['standard'].has_key(key_cache):
337
+ conj = self.cache_standard['standard'][key_cache]
338
+ else:
339
+ conj = ar_verb.standard2(conj_l, conj_m)
340
+ self.cache_standard['standard'][key_cache] = conj
341
+ return conj
342
+
343
+ def derivate(self):
344
+ """
345
+ Derivate a subject and object nouns from a verb
346
+ @param tense: given tense
347
+ @type tense: unicode name of the tense
348
+ @param pronoun: given pronoun
349
+ @type pronoun: unicode name of the pronoun
350
+ @return: conjugated verb
351
+ @rtype: unicode;
352
+ """
353
+ subj = self.conjugate_noun(vconst.SubjectNoun)
354
+ obj = self.conjugate_noun(vconst.ObjectNoun)
355
+
356
+ if subj.endswith(araby.DAMMA):
357
+ subj = subj[:-1]+araby.DAMMATAN
358
+ if self.vlength == 3 and obj.endswith(araby.YEH):
359
+ obj+= SHADDA + DAMMATAN
360
+ #~ if self.verb == u"مَحَا":
361
+ #~ print self.verb.encode('utf8'), len(self.word_letters), obj.endswith(WAW*2+
362
+ #~ DAMMA)
363
+ if self.vlength == 3 and obj.endswith(WAW*2+DAMMA):
364
+ obj = obj[:-2] +SHADDA + DAMMATAN
365
+ if obj.endswith(araby.DAMMA):
366
+ obj = obj[:-1]+araby.DAMMATAN
367
+
368
+ return u"\t".join([subj,obj])
libqutrub/classverb.py ADDED
@@ -0,0 +1,1101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- coding=utf-8 -*-
3
+
4
+ #************************************************************************
5
+ # $Id: classverb.py, v 0.7 2009/06/02 01:10:00 Taha Zerrouki $
6
+ #
7
+ # ------------
8
+ # Description:
9
+ # ------------
10
+ # Copyright (c) 2009, Arabtechies, Arabeyes Taha Zerrouki
11
+ #
12
+ # The Main class to do the conjugation
13
+ #
14
+ # -----------------
15
+ # Revision Details: (Updated by Revision Control System)
16
+ # -----------------
17
+ # $Date: 2009/06/02 01:10:00 $
18
+ # $Author: Taha Zerrouki $
19
+ # $Revision: 0.7 $
20
+ # $Source: arabtechies.sourceforge.net
21
+ #
22
+ #***********************************************************************/
23
+ """
24
+ Verb Class for conjugation
25
+ @author: Taha Zerrouki
26
+ @contact: taha dot zerrouki at gmail dot com
27
+ @copyright: Arabtechies, Arabeyes, Taha Zerrouki
28
+ @license: GPL
29
+ @date:2009/06/02
30
+ @version: 0.9
31
+ """
32
+ import copy
33
+ # from ar_ctype import *
34
+ #~ import sys
35
+ #~ import re
36
+ import pyarabic.araby as araby
37
+ from pyarabic.araby import FATHA, DAMMA, KASRA, SHADDA, SUKUN, HAMZA, ALEF, \
38
+ NOON, YEH_HAMZA, WAW, TATWEEL, MEEM, MEEM, YEH, TEH, ALEF_MAKSURA
39
+ #~ from libqutrub.ar_verb import *
40
+ import libqutrub.ar_verb as ar_verb
41
+ #~ from libqutrub.verb_const import *
42
+ import libqutrub.verb_const as vconst
43
+ import libqutrub.conjugatedisplay as conjugatedisplay
44
+
45
+
46
+ class ConjugStem:
47
+ """
48
+ A Class to represent a conjugated stem
49
+ """
50
+ # بنية جذع تصريف الجذع
51
+ #تتكون من الزمن، الحروف والحركات
52
+ # تستعمل لتخزين جذوع التصريف
53
+ tense = u""
54
+ #~ """ the actual tense"""
55
+ letters = u""
56
+ #~ """ letters of the conjugated stem"""
57
+ marks = u""
58
+ #~ """ marks of the conjugated stem"""
59
+ def __init__(self, tense, letters, marks):
60
+ """
61
+ init method
62
+ @param tense: the given tense
63
+ @type tense: unicode.
64
+ @param letters: the word letters
65
+ @type letters: unicode.
66
+ @param marks: the word marks;
67
+ @type marks: unicode.
68
+ """
69
+ self.tense = tense
70
+ self.letters = letters
71
+ self.marks = marks
72
+ # a global cache for verbs conjigation
73
+ cache_standard = {'standard':{},
74
+ 'sukun':{},
75
+ 'suffix':{}}
76
+ class VerbClass:
77
+ """
78
+ Verb Class: represent a verb, prepare it to be conjugated and store the conjugation result
79
+ """
80
+ #~ verb = u""
81
+ #~ #" internl verb : is the normalized form of the verb"
82
+ #~ internal_verb = u""
83
+ #~ word_letters = u""
84
+ #~ word_marks = u""
85
+ #~ unvocalized = u""
86
+ #~ vlength = 0
87
+ #~ vtype = u""
88
+ #~ future_type = u''
89
+ #~ transitive = u""
90
+ #~ hamza_zaida = False
91
+ #~ #deprecated
92
+ #~ # teh_zaida=False
93
+ #~ future_form = u""
94
+ #~ conj_display = None
95
+ #~ tab_conjug_stem = None
96
+ def __init__(self, verb, transitive, future_type=FATHA):
97
+ """
98
+ init method
99
+ @param verb: the given verb
100
+ @type verb: unicode.
101
+ @param transitive: the verb is transitive or not
102
+ @type transitive: Boolean.
103
+ @param future_type: The mark of the third radical letter in the verb,
104
+ used for triletiral verb only. Default value is Fatha;
105
+ @type future_type: unicode; one arabic letter (Fatha, Damma, Kasra).
106
+ """
107
+ self.verb = verb
108
+ # this cache is used to avoid duplicated operatioon in standardisation,
109
+ # treat_sukun, and uniformate suffix
110
+ self.cache_standard = cache_standard
111
+ self.internal_verb = ar_verb.normalize(verb)
112
+ self.future_type = ar_verb.get_future_type_by_name(future_type)
113
+ (self.word_letters, self.word_marks) = ar_verb.uniformate_verb(verb)
114
+ #Before last haraka in the past
115
+ self.past_haraka = araby.secondlast_char(self.word_marks)
116
+ self.word_marks = ar_verb.uniformate_alef_origin(self.word_marks,
117
+ self.internal_verb, self.future_type)
118
+
119
+ self.transitive = transitive
120
+ self.hamza_zaida = False
121
+ self.tab_conjug_stem = {}
122
+ verb = self.verb
123
+ tab_type = [u"", u"", u"", u"فعل ثلاثي", u"فعل رباعي", u"فعل خماسي",
124
+ u"فعل سداسي", u"فعل سباعي", u"فعل ثماني", u"فعل تساعي"]
125
+ verb = ar_verb.normalize(verb)
126
+
127
+ self.unvocalized = araby.strip_harakat(verb)
128
+ verb_nm = self.unvocalized
129
+ self.vlength = len(verb_nm)
130
+ self.vtype = tab_type[self.vlength]
131
+
132
+ # الهمزة زائدة
133
+ self.hamza_zaida = self._is_hamza_zaida(verb_nm)
134
+
135
+ # التاء الزائدة
136
+ #deprecated
137
+ #self.teh_zaida=self.is_teh_zaida(verb_nm)
138
+
139
+ # معالجة حالة الأفعال الشاذة
140
+ # إذا كان الفعل من الشواذ، استخرجنا جذوع التصريف من جدوله
141
+ #وإلا ولّدنا جذوع تصريفه
142
+ # في المضارع والأمر فقط
143
+ # أما الماضي فليس فيه شذوذ
144
+ self.past_stem = ""
145
+ self._prepare_past_stem()
146
+ self._prepare_passive_past_stem()
147
+ if self._is_irregular_verb():
148
+ self._prepare_irregular_future_imperative_stem()
149
+
150
+ else:
151
+ self._prepare_future_imperative_stem()
152
+
153
+
154
+ # display object
155
+ self.conj_display = conjugatedisplay.ConjugateDisplay(self.verb)
156
+ if self.transitive :
157
+ self.conj_display.add_attribut(u"اللزوم/التعدي", u"متعدي")
158
+ else :
159
+ self.conj_display.add_attribut(u"اللزوم/التعدي", u"لازم")
160
+ self.conj_display.add_attribut(u"الفعل", self.verb)
161
+ self.conj_display.add_attribut(u"نوع الفعل", self.vtype)
162
+ self.future_form = self.conjugate_tense_pronoun(vconst.TenseFuture,
163
+ vconst.PronounHuwa)
164
+ self.conj_display.set_future_form(self.future_form)
165
+ if self.transitive :
166
+ self.conj_display.settransitive()
167
+ self.conj_display.setbab(self.future_type)
168
+
169
+
170
+ def __del__(self):
171
+ """
172
+ Delete instance
173
+
174
+ """
175
+ self.conj_display = None
176
+ self.tab_conjug_stem = None
177
+
178
+
179
+ #####################################
180
+ #{ Attributes functions
181
+ #####################################
182
+ def set_display(self, mode = 'Text'):
183
+ """
184
+ Set the display mode as:
185
+ - 'Text':
186
+ - 'HTML':
187
+ - 'HTMLColoredDiacritics':
188
+ - 'DICT':
189
+ - 'CSV':
190
+ - 'GUI':
191
+ - 'TABLE':
192
+ - 'XML':
193
+ - 'TeX':
194
+ - 'ROWS':
195
+ @param mode: the given mode to display result
196
+ """
197
+ self.conj_display.setmode(mode)
198
+
199
+
200
+
201
+ def get_conj_display(self):
202
+ """
203
+ Get The conjugation display class with result.
204
+ @return: an object with result.
205
+ @rtype: conjugatedisplay class
206
+ """
207
+ return copy.copy(self.conj_display)
208
+ #####################################
209
+ #{ Extract information from verb functions
210
+ #####################################
211
+ def _is_hamza_zaida(self, verb_normalized_unvocalized):
212
+ """
213
+ Function to determine if the first HAMZA in the verb is not original
214
+ ترجع إذا كانت الهمزة الأولى في الفعل غير أصلية
215
+ Determine if the verb starts with Hamza and the Teh is not
216
+ @param verb_normalized_unvocalized: the unvovalized form f the verb.
217
+ @type verb_normalized_unvocalized: unicde
218
+ @return: return True if the start Teh is not original
219
+ @rtype: boolean;
220
+ """
221
+ # if the lenght of verb is exactely 4 letters and starts by hamza
222
+ # and it is in the AF3Al wazn and not FA33al or FAA3la
223
+ # ألوزن المعني هو أفعل
224
+ # الأوزان غير المعنية هي فاعل وفعّل
225
+ # الأوزان المشتقة هي أفعّ من أفعل
226
+ # الخلاصة أن يكون الفعل رباعيا، حرفه الأول همزة
227
+ # ولا يكون حرفه الثاني ألف، لمنع الوزن فاعل
228
+ # ولا يكون حرفه الثالث شدة، لمنع الوزن فعّل
229
+ verb = verb_normalized_unvocalized
230
+ if len(verb) != 4 or not verb.startswith(HAMZA):
231
+ return False
232
+ elif len(verb) == 4 and verb.startswith(HAMZA) and \
233
+ verb[1]!=ALEF and verb[2]!=SHADDA:
234
+ return True
235
+ else :
236
+ return False
237
+
238
+ def _homogenize_harakat(self, original_harakat, applied_harakat):
239
+ """
240
+ Treat Harakat to be homogenized with letters in conjugation.
241
+ إذا كان طول الحركات ألأصلية للفعل أقل من طول حركات الماضي المبني للمجهول
242
+ هذا يعني وجود حركة طويلة
243
+ نقوم بتحويل الحركة الطويلة إلى ما يوافقها
244
+
245
+ @param original_harakat: given original harakatof the verb.
246
+ @type original_harakat: unicode.
247
+ @param applied_harakat: given harakat to be applied to verb.
248
+ @type applied_harakat: unicode.
249
+ @return: nesw harakat to be applied to the verb.
250
+ @rtype: unicode.
251
+ """
252
+ marks = original_harakat
253
+ new_marks = applied_harakat
254
+ # إذا كان طول الحركات ألأصلية للفعل أقل من طول حركات الماضي المبني للمجهول
255
+ # هذا يعني وجود حركة طويلة
256
+ # نقوم بتحويل الحركة الطويلة إلى ما يوافقها
257
+ if len(marks) < len(new_marks):
258
+ alef_haraka_pos = marks.find(vconst.ALEF_HARAKA)
259
+ if alef_haraka_pos < 0:
260
+ alef_haraka_pos = marks.find(vconst.ALEF_WAW_HARAKA)
261
+ if alef_haraka_pos < 0:
262
+ alef_haraka_pos = marks.find(vconst.ALEF_YEH_HARAKA)
263
+ if alef_haraka_pos >= 0 and alef_haraka_pos + 1 < len(new_marks):
264
+ first = new_marks[alef_haraka_pos]
265
+ second = new_marks[alef_haraka_pos + 1]
266
+ changed_haraka = \
267
+ vconst.HOMOGENIZE_ALEF_HARAKA_TABLE[first][second]
268
+ new_marks = new_marks[:alef_haraka_pos] + changed_haraka \
269
+ + new_marks[alef_haraka_pos+2:]
270
+ return new_marks
271
+ #####################################
272
+ #{ Preparing conjugation stems for every tense functions
273
+ #####################################
274
+ def _prepare_future_imperative_stem(self):
275
+ """
276
+ Prepare the conjugation stems for future tenses
277
+ (future, jussive, subjective) and imperative tense.
278
+ Those stems will be concatenated with conjugation affixes.
279
+ This function store results in self.tab_conjug_stem.
280
+ This function prepare conjugation stems for the following tenses:
281
+ - vconst.TenseFuture : تصريف الفعل المضارع
282
+ - vconst.TenseJussiveFuture : تصريف الفعل المضارع المجزوم
283
+ - vconst.TenseSubjunctiveFuture : تصريف الفعل المضارع المنصوب
284
+ - vconst.TenseConfirmedFuture: المضارع المؤكد الثقيل
285
+ - vconst.TensePassiveFuture :تصريف الفعل المضارع المبني للمجهول
286
+ - vconst.TensePassiveJussiveFuture: تصريف الفعل المضارع المجزوم المني للمجهول
287
+ - vconst.TensePassiveSubjunctiveFuture:تصريف الفعل المضارع المنصوب
288
+ - vconst.TensePassiveConfirmedFuture:المضارع المؤكد الثقيل المنبي للمجهول
289
+ - vconst.TenseImperative:الفعل الامر
290
+ - vconst.TenseConfirmedImperative: الفعل الامر المؤكد.
291
+ """
292
+ letters = self.word_letters
293
+ marks = self.word_marks
294
+ future_letters = letters
295
+ # حالة الفعل الثلاثي
296
+ if self.vlength == 3:
297
+ first_future_mark = FATHA
298
+ first_passive_future_mark = DAMMA
299
+ future_marks = SUKUN + self.future_type + FATHA
300
+ passive_future_marks = SUKUN + FATHA + FATHA
301
+ # معالجة الفعل المثال الواوي
302
+ #ToDO
303
+
304
+ # الفعل الرباعي
305
+ elif self.vlength == 4:
306
+ first_future_mark = DAMMA
307
+ first_passive_future_mark = DAMMA
308
+ future_marks = FATHA + SUKUN + KASRA + DAMMA
309
+ passive_future_marks = FATHA + SUKUN + FATHA + DAMMA
310
+ # الفعل الخماسي
311
+ elif self.vlength == 5:
312
+ first_future_mark = FATHA
313
+ first_passive_future_mark = DAMMA
314
+ if letters.startswith(TEH):
315
+ future_marks = FATHA + FATHA + SUKUN + FATHA + DAMMA
316
+ passive_future_marks = FATHA + FATHA + SUKUN + FATHA + DAMMA
317
+ else :
318
+ future_marks = FATHA + SUKUN + FATHA + KASRA + DAMMA
319
+ passive_future_marks = FATHA + SUKUN + FATHA + FATHA + DAMMA
320
+ #الفعل السداسي
321
+ elif self.vlength == 6:
322
+ first_future_mark = FATHA
323
+ first_passive_future_mark = DAMMA
324
+ future_marks = FATHA + SUKUN + FATHA + SUKUN + KASRA + DAMMA
325
+ passive_future_marks = FATHA + SUKUN + FATHA + SUKUN + FATHA + DAMMA
326
+ # معالجة الألفات في الفعل والحركات الطويلة
327
+ # إذا كان طول الحركات ألأصلية للفعل
328
+ # أقل من طول حركات الماضي المبني للمجهول
329
+ # هذا يعني وجود حركة طويلة
330
+ # نقوم بتحويل الحركة الطويلة إلى ما يوافقها
331
+ if len(marks) < len(future_marks):
332
+ future_marks = self._homogenize_harakat(marks, future_marks)
333
+ passive_future_marks = self._homogenize_harakat(marks,
334
+ passive_future_marks)
335
+ imp_marks = future_marks
336
+ imp_letters = future_letters
337
+ # حالة الأفعال التي تبدأ بألف وصل
338
+ if letters.startswith(ALEF) or self.hamza_zaida:
339
+ future_letters = letters[1:]
340
+ future_marks = future_marks[1:]
341
+ passive_future_marks = passive_future_marks[1:]
342
+ passive_letters = letters[1:]
343
+ # حالة الفعل المثال
344
+ elif self.vlength == 3 and self.word_letters.startswith(WAW) and \
345
+ (self.future_type == KASRA or (self.future_type==FATHA and \
346
+ self.word_marks==FATHA+FATHA+FATHA and \
347
+ not self.word_letters.endswith(SHADDA))):
348
+ future_letters = letters[1:]
349
+ future_marks = future_marks[1:]
350
+ ## passive_future_marks=passive_future_marks[1:]
351
+ passive_letters = letters
352
+ else:
353
+ future_letters = letters
354
+ passive_letters = letters
355
+ new_marks = first_future_mark + future_marks
356
+ passive_marks = first_passive_future_mark + passive_future_marks
357
+
358
+ # حالة الأفعال التي تبدأ بألف وصل
359
+ if imp_letters.startswith(ALEF):
360
+ imp_letters = letters[1:]
361
+ imp_marks = imp_marks[1:]
362
+ elif self.vlength == 3 and self.word_letters.startswith(WAW) and \
363
+ (self.future_type == KASRA or (self.future_type==FATHA and \
364
+ self.word_marks==FATHA+FATHA+FATHA)):
365
+ imp_letters = letters[1:]
366
+ imp_marks = imp_marks[1:]
367
+ else:
368
+ imp_letters = letters
369
+
370
+ # معالجة الفعل الناقص عند تصريفه في المجهول
371
+ # تستبدل واو التاقص الذي حركة عين ماضيه فتحة بياء
372
+ ## passive_letters=future_letters
373
+ if self.vlength == 3 and passive_letters.endswith(vconst.ALEF_MAMDUDA):
374
+ passive_letters = passive_letters[:-1]+ALEF_MAKSURA
375
+ # القعل الأمر يأخذ نفس حركات الفعل المضارع دون حركة حرف المضارعة
376
+ ## imp_marks=future_marks
377
+ ### معلجة إضافة حرف ألف الوصل في الأفعال المسبوقة بالسكون
378
+ ## new_marks=first_future_mark+future_marks
379
+ ## passive_marks=first_passive_future_mark+passive_future_marks
380
+ self.tab_conjug_stem[vconst.TenseFuture] = ConjugStem(
381
+ vconst.TenseFuture, future_letters, new_marks)
382
+ # تصريف الفعل المضارع المنصوب والمجزوم
383
+ self.tab_conjug_stem[vconst.TenseJussiveFuture] = ConjugStem(
384
+ vconst.TenseJussiveFuture, future_letters, new_marks)
385
+ self.tab_conjug_stem[vconst.TenseSubjunctiveFuture] = ConjugStem(
386
+ vconst.TenseSubjunctiveFuture, future_letters, new_marks)
387
+ # المضارع المؤكد الثقيل
388
+ self.tab_conjug_stem[vconst.TenseConfirmedFuture] = ConjugStem(
389
+ vconst.TenseConfirmedFuture, future_letters, new_marks)
390
+
391
+ # المبني للمجهول
392
+ self.tab_conjug_stem[vconst.TensePassiveFuture] = ConjugStem(
393
+ vconst.TensePassiveFuture, passive_letters, passive_marks)
394
+ # تصريف الفعل المضارع المنصوب والمجزوم المني للمجهول
395
+ self.tab_conjug_stem[vconst.TensePassiveJussiveFuture] = ConjugStem(
396
+ vconst.TensePassiveJussiveFuture, passive_letters, passive_marks)
397
+ self.tab_conjug_stem[vconst.TensePassiveSubjunctiveFuture] = \
398
+ ConjugStem(vconst.TensePassiveSubjunctiveFuture, passive_letters,
399
+ passive_marks)
400
+ # المضارع المؤكد الثقيل المنبي للمجهول
401
+ self.tab_conjug_stem[vconst.TensePassiveConfirmedFuture] = ConjugStem(
402
+ vconst.TensePassiveConfirmedFuture, passive_letters, passive_marks)
403
+
404
+ # الفعل الامر
405
+ self.tab_conjug_stem[vconst.TenseImperative] = ConjugStem(
406
+ vconst.TenseImperative, imp_letters, imp_marks)
407
+ # الفعل الامر المؤكد
408
+ self.tab_conjug_stem[vconst.TenseConfirmedImperative] = ConjugStem(
409
+ vconst.TenseConfirmedImperative, imp_letters, imp_marks)
410
+
411
+ def _prepare_past_stem(self):
412
+ """
413
+ Prepare the conjugation stems for past tense.
414
+ Those stems will be concatenated with conjugation affixes.
415
+ This function store results in self.tab_conjug_stem.
416
+ This function prepare conjugation stems for the following tenses:
417
+ - vconst.TensePast: الفعل الماضي.
418
+ """
419
+ self.past_stem = self.internal_verb
420
+ self.tab_conjug_stem[vconst.TensePast] = ConjugStem(
421
+ vconst.TensePast, self.word_letters, self.word_marks)
422
+
423
+
424
+ def _prepare_passive_past_stem(self):
425
+ """
426
+ Prepare the conjugation stems for past tense.
427
+ Those stems will be concatenated with conjugation affixes.
428
+ This function store results in self.tab_conjug_stem.
429
+ This function prepare conjugation stems for the following tenses:
430
+ - vconst.TensePast: الفعل الماضي
431
+ """
432
+ letters = self.word_letters
433
+ marks = self.word_marks
434
+
435
+ if len(letters) == 3 and letters.endswith(vconst.ALEF_MAMDUDA) \
436
+ and marks[1] == FATHA:
437
+ letters = letters[:-1] + ALEF_MAKSURA
438
+ if self.vlength == 3:
439
+ passive_marks = DAMMA + KASRA + FATHA
440
+ elif self.vlength == 4:
441
+ passive_marks = DAMMA + SUKUN + KASRA + FATHA
442
+ elif self.vlength == 5:
443
+ if letters.startswith(TEH):
444
+ passive_marks = DAMMA + DAMMA + SUKUN + KASRA + FATHA
445
+ else :
446
+ passive_marks = DAMMA + SUKUN + DAMMA + KASRA + FATHA
447
+ elif self.vlength == 6:
448
+ passive_marks = DAMMA + SUKUN + DAMMA + SUKUN + KASRA + FATHA
449
+ # إذا كان طول الحركات ألأصلية للفعل أقل من طول حركات الماضي المبني للمجهول
450
+ # هذا يعني وجود حركة طويلة
451
+ # نقوم بتحويل الحركة الطويلة إلى ما يوافقها
452
+ if len(marks) < len(passive_marks):
453
+ passive_marks = self._homogenize_harakat(marks, passive_marks)
454
+
455
+ # - حالة الفعل الأجوف الذي حركة مضارعه فتحة أو كسرة،
456
+ #- فيصبح في الماضي عند التقاء الساكنين كسرة،
457
+ #لذا يجب تعديل ذلك في الماضي المجهول،
458
+ # بجعلها تتحول إلى ضمة عند التقاء الساكنين.
459
+ if len(passive_marks) == 2 and passive_marks[0] == vconst.YEH_HARAKA \
460
+ and self.future_type in (FATHA, KASRA):
461
+ passive_marks = vconst.ALTERNATIVE_YEH_HARAKA + FATHA
462
+ self.tab_conjug_stem[vconst.TensePassivePast] = ConjugStem(\
463
+ vconst.TensePassivePast, letters, passive_marks)
464
+
465
+ def conjugate_tense_pronoun(self, tense, pronoun):
466
+ """
467
+ Conjugate a verb in a given tense with a pronoun.
468
+ @param tense: given tense
469
+ @type tense: unicode name of the tense
470
+ @param pronoun: given pronoun
471
+ @type pronoun: unicode name of the pronoun
472
+ @return: conjugated verb
473
+ @rtype: unicode;
474
+ """
475
+ #prefix
476
+ pre_val = vconst.TableTensePronoun[tense][pronoun][0]
477
+ #suffix
478
+ suf_val = vconst.TableTensePronoun[tense][pronoun][1]
479
+ stem_l = self.tab_conjug_stem[tense].letters
480
+ stem_m = self.tab_conjug_stem[tense].marks
481
+ #deprecated
482
+ ## return self.join(stem_l, stem_m, prefix, suffix)
483
+ # _m : marks
484
+ #_l :letters
485
+ if pre_val != u"":
486
+ pre_val_l = pre_val
487
+ pre_val_m = stem_m[0]
488
+ stem_m = stem_m[1:]
489
+ else:
490
+ pre_val_l = u""
491
+ pre_val_m = u""
492
+
493
+ # the suffix already start by a HARAKA,
494
+ # we add Taweel to ensure valid word in the uniformate function
495
+ suf_val = TATWEEL + suf_val
496
+ #uniformate suffix
497
+ # the case is used to avoid duplicated staddization
498
+ if suf_val in self.cache_standard['suffix']:
499
+ (suf_val_l, suf_val_m) = self.cache_standard['suffix'][suf_val]
500
+ else:
501
+ (suf_val_l, suf_val_m) = ar_verb.uniformate_suffix(suf_val)
502
+ self.cache_standard['suffix'][suf_val] = (suf_val_l, suf_val_m)
503
+ # add affix to the stem
504
+ conj_l = pre_val_l + stem_l + suf_val_l
505
+ #The end of the stem marks takes the begining of the suffix marks
506
+ conj_m = pre_val_m + stem_m[:-1] + suf_val_m
507
+ # the begining of suffix letters is Tatweel, it will be striped
508
+ conj_l = pre_val_l + stem_l + suf_val_l[1:]
509
+
510
+ # Treat sukun
511
+ # the case is used to avoid duplicated staddization
512
+ key_cache = u'-'.join([conj_l, conj_m])
513
+ if key_cache in self.cache_standard['sukun']:
514
+ conj_m = self.cache_standard['sukun'][key_cache]
515
+ else:
516
+ #~ conj_m = ar_verb.treat_sukun2(conj_l, conj_m, self.future_type)
517
+ conj_m = ar_verb.treat_sukun2(conj_l, conj_m)
518
+ self.cache_standard['sukun'][key_cache] = conj_m
519
+ # standard orthographic form
520
+ # the case is used to avoid duplicated staddization
521
+ key_cache = u'-'.join([conj_l, conj_m])
522
+ if key_cache in self.cache_standard['standard']:
523
+ conj = self.cache_standard['standard'][key_cache]
524
+ else:
525
+ conj = ar_verb.standard2(conj_l, conj_m)
526
+ self.cache_standard['standard'][key_cache] = conj
527
+ return conj
528
+
529
+
530
+ #----------------------------------------------------------------
531
+ # التصريف في الأزمنة المختلفة،
532
+ # عند وضع قائمة خاصة بالأزمنة المختارة،
533
+ # تلقائيا كافة الأزمنة
534
+ #----------------------------------------------------------------
535
+ def conjugate_all_tenses(self, listtense = None):
536
+ """
537
+ Conjugate a verb with a list of tenses.
538
+ @param listtense: given tense
539
+ @type listtense: list of unicode
540
+ @return: conjugated verb
541
+ @rtype: the type is given according to the display mode;
542
+ """
543
+ if not listtense:
544
+ listtense = vconst.TABLE_TENSE
545
+ for tense in listtense:
546
+ if tense == vconst.TensePast:
547
+ conj_ana = self.conjugate_tense_pronoun(tense,
548
+ vconst.PronounAna)
549
+ self.conj_display.add(tense, vconst.PronounAna, conj_ana)
550
+ conj_ana_without_last_mark = conj_ana[:-1]
551
+ self.conj_display.add(tense, vconst.PronounAnta,
552
+ conj_ana_without_last_mark+FATHA)
553
+ self.conj_display.add(tense, vconst.PronounAnti,
554
+ conj_ana_without_last_mark+KASRA)
555
+ self.conj_display.add(tense, vconst.PronounAntuma,
556
+ conj_ana+MEEM+FATHA+ALEF)
557
+ self.conj_display.add(tense, vconst.PronounAntuma_f,
558
+ conj_ana+MEEM+FATHA+ALEF)
559
+ self.conj_display.add(tense, vconst.PronounAntum,
560
+ conj_ana+MEEM)
561
+ self.conj_display.add(tense, vconst.PronounAntunna,
562
+ conj_ana+NOON+SHADDA+FATHA)
563
+ self.conj_display.add(tense, vconst.PronounAna, conj_ana)
564
+
565
+ conj_nahnu = self.conjugate_tense_pronoun(tense,
566
+ vconst.PronounNahnu)
567
+ self.conj_display.add(tense, vconst.PronounNahnu, conj_nahnu)
568
+
569
+ conj_hunna = self.conjugate_tense_pronoun(tense,
570
+ vconst.PronounHunna)
571
+ self.conj_display.add(tense, vconst.PronounHunna, conj_hunna)
572
+
573
+ conj_huma = self.conjugate_tense_pronoun(tense,
574
+ vconst.PronounHuma)
575
+ self.conj_display.add(tense, vconst.PronounHuma, conj_huma)
576
+
577
+ conj_hum = self.conjugate_tense_pronoun(tense,
578
+ vconst.PronounHum)
579
+ self.conj_display.add(tense, vconst.PronounHum, conj_hum)
580
+
581
+ conj_hunna = self.conjugate_tense_pronoun(tense,
582
+ vconst.PronounHunna)
583
+ self.conj_display.add(tense, vconst.PronounHunna, conj_hunna)
584
+
585
+ conj_huwa = self.conjugate_tense_pronoun(tense,
586
+ vconst.PronounHuwa)
587
+ self.conj_display.add(tense, vconst.PronounHuwa, conj_huwa)
588
+ conj_hya = self.conjugate_tense_pronoun(tense,
589
+ vconst.PronounHya)
590
+ self.conj_display.add(tense, vconst.PronounHya, conj_hya)
591
+ self.conj_display.add(tense, vconst.PronounHuma_f,
592
+ conj_hya[:-1]+FATHA+ALEF)
593
+ elif tense == vconst.TensePassivePast:
594
+ conj_ana = self.conjugate_tense_pronoun(tense,
595
+ vconst.PronounAna)
596
+ self.conj_display.add(tense, vconst.PronounAna, conj_ana)
597
+ conj_ana_without_last_mark = conj_ana[:-1]
598
+ self.conj_display.add(tense, vconst.PronounAnta,
599
+ conj_ana_without_last_mark+FATHA)
600
+ self.conj_display.add(tense, vconst.PronounAnti,
601
+ conj_ana_without_last_mark+KASRA)
602
+ self.conj_display.add(tense, vconst.PronounAntuma,
603
+ conj_ana+MEEM+FATHA+ALEF)
604
+ self.conj_display.add(tense, vconst.PronounAntuma_f,
605
+ conj_ana+MEEM+FATHA+ALEF)
606
+ self.conj_display.add(tense, vconst.PronounAntum,
607
+ conj_ana+MEEM)
608
+ self.conj_display.add(tense, vconst.PronounAntunna,
609
+ conj_ana+NOON+SHADDA+FATHA)
610
+ self.conj_display.add(tense, vconst.PronounAna, conj_ana)
611
+
612
+ conj_nahnu = self.conjugate_tense_pronoun(tense,
613
+ vconst.PronounNahnu)
614
+ self.conj_display.add(tense, vconst.PronounNahnu,
615
+ conj_nahnu)
616
+
617
+ conj_hunna = self.conjugate_tense_pronoun(tense,
618
+ vconst.PronounHunna)
619
+ self.conj_display.add(tense, vconst.PronounHunna,
620
+ conj_hunna)
621
+
622
+ conj_hunna = self.conjugate_tense_pronoun(tense,
623
+ vconst.PronounHunna)
624
+ self.conj_display.add(tense, vconst.PronounHunna,
625
+ conj_hunna)
626
+
627
+ conj_huwa = self.conjugate_tense_pronoun(tense,
628
+ vconst.PronounHuwa)
629
+ self.conj_display.add(tense, vconst.PronounHuwa, conj_huwa)
630
+ conj_hum = self.conjugate_tense_pronoun(tense,
631
+ vconst.PronounHum)
632
+ self.conj_display.add(tense, vconst.PronounHum, conj_hum)
633
+ # حالة الفعل مهموز الآخر
634
+ if conj_huwa.endswith(YEH+HAMZA+FATHA) :
635
+ self.conj_display.add(tense, vconst.PronounHya,
636
+ conj_huwa[:-2]+YEH_HAMZA+FATHA+TEH+SUKUN)
637
+ self.conj_display.add(tense, vconst.PronounHuma_f,
638
+ conj_huwa[:-2]+YEH_HAMZA+FATHA+TEH+FATHA+ALEF)
639
+ ## conj_huma=self.conjugate_tense_pronoun(tense,
640
+ ## vconst.PronounHuma)
641
+ self.conj_display.add(tense, vconst.PronounHuma,
642
+ conj_huwa[:-2]+YEH_HAMZA+FATHA+ALEF)
643
+
644
+ ## conj_hum=self.conjugate_tense_pronoun(tense,
645
+ # vconst.PronounHum)
646
+ self.conj_display.add(tense, vconst.PronounHum,
647
+ conj_huwa[:-2]+YEH_HAMZA+DAMMA+WAW+ALEF)
648
+
649
+ else :
650
+ self.conj_display.add(tense, vconst.PronounHya,
651
+ conj_huwa+TEH+SUKUN)
652
+ self.conj_display.add(tense, vconst.PronounHuma_f,
653
+ conj_huwa+TEH+FATHA+ALEF)
654
+ self.conj_display.add(tense, vconst.PronounHuma,
655
+ conj_huwa+ALEF)
656
+ #~ if conj_huwa.endswith(KASRA+YEH+FATHA):
657
+ #~ self.conj_display.add(tense, vconst.PronounHum,
658
+ #~ conj_huwa[:-3]+DAMMA+WAW+ALEF)
659
+ #~ else:
660
+ #~ self.conj_display.add(tense, vconst.PronounHum,
661
+ #~ conj_huwa[:-1]+DAMMA+WAW+ALEF)
662
+ elif tense in (vconst.TenseFuture, vconst.TensePassiveFuture,
663
+ vconst.TenseJussiveFuture, vconst.TenseSubjunctiveFuture,
664
+ vconst.TenseConfirmedFuture, vconst.TensePassiveJussiveFuture,
665
+ vconst.TensePassiveSubjunctiveFuture,
666
+ vconst.TensePassiveConfirmedFuture):
667
+ conj_ana = self.conjugate_tense_pronoun(tense,
668
+ vconst.PronounAna)
669
+ self.conj_display.add(tense, vconst.PronounAna,
670
+ conj_ana)
671
+
672
+ conj_anta = self.conjugate_tense_pronoun(tense,
673
+ vconst.PronounAnta)
674
+ self.conj_display.add(tense, vconst.PronounAnta,
675
+ conj_anta)
676
+ conj_anta_without_future_letter = conj_anta[1:]
677
+ ## self.conj_display.add(tense, vconst.PronounAnta,
678
+ ## TEH+conj_ana_without_future_letter)
679
+ self.conj_display.add(tense, vconst.PronounNahnu,
680
+ NOON+conj_anta_without_future_letter)
681
+ self.conj_display.add(tense, vconst.PronounHuwa,
682
+ YEH+conj_anta_without_future_letter)
683
+ self.conj_display.add(tense, vconst.PronounHya,
684
+ TEH+conj_anta_without_future_letter)
685
+
686
+ conj_anti = self.conjugate_tense_pronoun(tense,
687
+ vconst.PronounAnti)
688
+ self.conj_display.add(tense, vconst.PronounAnti,
689
+ conj_anti)
690
+
691
+ conj_antuma = self.conjugate_tense_pronoun(tense,
692
+ vconst.PronounAntuma)
693
+ self.conj_display.add(tense, vconst.PronounAntuma,
694
+ conj_antuma)
695
+ self.conj_display.add(tense, vconst.PronounAntuma_f,
696
+ conj_antuma)
697
+ self.conj_display.add(tense, vconst.PronounHuma_f,
698
+ conj_antuma)
699
+ self.conj_display.add(tense, vconst.PronounHuma,
700
+ YEH+conj_antuma[1:])
701
+
702
+ conj_antum = self.conjugate_tense_pronoun(tense,
703
+ vconst.PronounAntum)
704
+ self.conj_display.add(tense, vconst.PronounAntum,
705
+ conj_antum)
706
+ self.conj_display.add(tense, vconst.PronounHum,
707
+ YEH+conj_antum[1:])
708
+
709
+ conj_antunna = self.conjugate_tense_pronoun(tense,
710
+ vconst.PronounAntunna)
711
+ self.conj_display.add(tense, vconst.PronounAntunna,
712
+ conj_antunna)
713
+ self.conj_display.add(tense, vconst.PronounHunna,
714
+ YEH+conj_antunna[1:])
715
+ elif tense == vconst.TenseImperative or \
716
+ tense == vconst.TenseConfirmedImperative:
717
+ for pron in vconst.ImperativePronouns:
718
+ conj = self.conjugate_tense_pronoun(tense, pron)
719
+ self.conj_display.add(tense, pron, conj)
720
+ if not self.transitive:
721
+ for tense in vconst.TablePassiveTense:
722
+ for pron in vconst.PronounsTableNotPassiveForUntransitive:
723
+ self.conj_display.add(tense, pron, u"")
724
+ # if the result is not diplyed directely on the screen, we return it
725
+ result = self.conj_display.display(self.conj_display.mode,
726
+ listtense)
727
+ if result:
728
+ return result
729
+
730
+ def conjugate_tense_for_pronoun(self, tense, pronoun):
731
+ """
732
+ Conjugate a verb for a pronoun in specific tense,
733
+ we use an homoginized conjugation
734
+ @param tense: given tense
735
+ @type tense: unicode
736
+ @param pronoun: given pronoun
737
+ @type pronoun: unicode
738
+ @return: conjugated verb
739
+ @rtype: unicode;
740
+ """
741
+ # the idea is to generate some conjugation from others
742
+ # in particalar cases, we can generate conjugation
743
+ # from others pronouns.
744
+ # for each tense we have two pronouns lists:
745
+ # - direct conjugated pronouns.
746
+ # - indirect conjugated pronouns.
747
+
748
+ if tense == vconst.TensePast:
749
+ # direct concongated pronouns
750
+ if pronoun in (vconst.PronounAna, vconst.PronounNahnu,
751
+ vconst.PronounHunna, vconst.PronounHuma , vconst.PronounHum,
752
+ vconst.PronounHunna, vconst.PronounHuwa, vconst.PronounHya):
753
+ conj = self.conjugate_tense_pronoun( tense, pronoun)
754
+ self.conj_display.add(tense, pronoun, conj)
755
+ # indirect conjugation
756
+ # from Aana Pronoun
757
+ elif pronoun in (vconst.PronounAnta, vconst.PronounAnta,
758
+ vconst.PronounAnti, vconst.PronounAntuma, vconst.PronounAntuma_f,
759
+ vconst.PronounAntum, vconst.PronounAntunna):
760
+ # test if the verb is conjugated
761
+ conj_ana = self.conj_display.get_conj(tense, pronoun)
762
+ if conj_ana == u"":
763
+ conj_ana = self.conjugate_tense_pronoun(tense,
764
+ vconst.PronounAna)
765
+ conj_ana_without_last_mark = conj_ana[:-1]
766
+ if pronoun == vconst.PronounAnta:
767
+ self.conj_display.add(tense, vconst.PronounAnta,
768
+ conj_ana_without_last_mark+FATHA)
769
+ elif pronoun == vconst.PronounAnti:
770
+ self.conj_display.add(tense, vconst.PronounAnti,
771
+ conj_ana_without_last_mark+KASRA)
772
+ elif pronoun == vconst.PronounAntuma :
773
+ self.conj_display.add(tense, vconst.PronounAntuma,
774
+ conj_ana+MEEM+FATHA+ALEF)
775
+ elif pronoun == vconst.PronounAntuma_f:
776
+ self.conj_display.add(tense, vconst.PronounAntuma_f,
777
+ conj_ana+MEEM+FATHA+ALEF)
778
+ elif pronoun == vconst.PronounAntum:
779
+ self.conj_display.add(tense, vconst.PronounAntum,
780
+ conj_ana+MEEM)
781
+ elif pronoun == vconst.PronounAntunna:
782
+ self.conj_display.add(tense, vconst.PronounAntunna,
783
+ conj_ana+NOON+SHADDA+FATHA)
784
+
785
+ # indirect conjugation
786
+ # from Hya Pronoun
787
+ elif pronoun == vconst.PronounHuma_f:
788
+ # test if the verb is conjugated
789
+ conj_hya = self.conj_display.get_conj(tense, vconst.PronounHya)
790
+ if conj_hya == u"":
791
+ conj_hya = self.conjugate_tense_pronoun(tense,
792
+ vconst.PronounHya)
793
+ self.conj_display.add(tense, vconst.PronounHuma_f,
794
+ conj_hya[:-1]+FATHA+ALEF)
795
+ elif tense == vconst.TensePassivePast:
796
+ # direct conjugation
797
+ if pronoun in (vconst.PronounAna, vconst.PronounNahnu,
798
+ vconst.PronounHunna, vconst.PronounHunna, vconst.PronounHuwa, vconst.PronounHum):
799
+ conj = self.conjugate_tense_pronoun(tense, pronoun)
800
+ self.conj_display.add(tense, pronoun, conj)
801
+ # indirect conjugation
802
+ # Ana pronoun like conjugation
803
+ elif pronoun in (vconst.PronounAnta, vconst.PronounAnti,
804
+ vconst.PronounAntuma, vconst.PronounAntuma_f, vconst.PronounAntum,
805
+ vconst.PronounAntunna):
806
+ conj_ana = self.conj_display.get_conj(tense, vconst.PronounAna)
807
+ if conj_ana == u"":
808
+ conj_ana = self.conjugate_tense_pronoun(tense,
809
+ vconst.PronounAna)
810
+ self.conj_display.add(tense, vconst.PronounAna,
811
+ conj_ana)
812
+ conj_ana_without_last_mark = conj_ana[:-1]
813
+ if pronoun == vconst.PronounAnta:
814
+ self.conj_display.add(tense, vconst.PronounAnta,
815
+ conj_ana_without_last_mark+FATHA)
816
+ elif pronoun == vconst.PronounAnti:
817
+ self.conj_display.add(tense, vconst.PronounAnti,
818
+ conj_ana_without_last_mark+KASRA)
819
+ elif pronoun == vconst.PronounAntuma:
820
+ self.conj_display.add(tense, vconst.PronounAntuma,
821
+ conj_ana+MEEM+FATHA+ALEF)
822
+ elif pronoun == vconst.PronounAntuma_f:
823
+ self.conj_display.add(tense, vconst.PronounAntuma_f,
824
+ conj_ana+MEEM+FATHA+ALEF)
825
+ elif pronoun == vconst.PronounAntum:
826
+ self.conj_display.add(tense, vconst.PronounAntum,
827
+ conj_ana+MEEM)
828
+ elif pronoun == vconst.PronounAntunna:
829
+ self.conj_display.add(tense, vconst.PronounAntunna,
830
+ conj_ana+NOON+SHADDA+FATHA)
831
+ # indirect conjugation
832
+ # Ana pronoun like conjugation
833
+ elif pronoun in ( vconst.PronounHya, vconst.PronounHuma_f,
834
+ #~ vconst.PronounHuma, vconst.PronounHum):
835
+ vconst.PronounHuma,):
836
+ conj_huwa = self.conj_display.get_conj(tense,
837
+ vconst.PronounHuwa)
838
+ if conj_huwa == u"":
839
+ conj_huwa = self.conjugate_tense_pronoun(tense,
840
+ vconst.PronounHuwa)
841
+ self.conj_display.add(tense, vconst.PronounHuwa, conj_huwa)
842
+ # حالة الفعل مهموز الآخر
843
+ if conj_huwa.endswith(YEH+HAMZA+FATHA) :
844
+ self.conj_display.add(tense, vconst.PronounHya,
845
+ conj_huwa[:-2]+YEH_HAMZA+FATHA+TEH+SUKUN)
846
+ self.conj_display.add(tense, vconst.PronounHuma_f,
847
+ conj_huwa[:-2]+YEH_HAMZA+FATHA+TEH+FATHA+ALEF)
848
+ self.conj_display.add(tense, vconst.PronounHuma,
849
+ conj_huwa[:-2]+YEH_HAMZA+FATHA+ALEF)
850
+
851
+ #~ self.conj_display.add(tense, vconst.PronounHum,
852
+ #~ conj_huwa[:-2]+YEH_HAMZA+DAMMA+WAW+ALEF)
853
+
854
+ else :
855
+ self.conj_display.add(tense, vconst.PronounHya,
856
+ conj_huwa+TEH+SUKUN)
857
+ self.conj_display.add(tense, vconst.PronounHuma_f,
858
+ conj_huwa+TEH+FATHA+ALEF)
859
+ self.conj_display.add(tense, vconst.PronounHuma,
860
+ conj_huwa+ALEF)
861
+ #~ if conj_huwa.endswith(KASRA+YEH+FATHA):
862
+ #~ self.conj_display.add(tense, vconst.PronounHum,
863
+ #~ conj_huwa[:-3]+DAMMA+WAW+ALEF)
864
+ #~ else:
865
+ #~ self.conj_display.add(tense, vconst.PronounHum,
866
+ #~ conj_huwa[:-1]+DAMMA+WAW+ALEF)
867
+ elif tense in (vconst.TenseFuture, vconst.TensePassiveFuture,
868
+ vconst.TenseJussiveFuture, vconst.TenseSubjunctiveFuture,
869
+ vconst.TenseConfirmedFuture, vconst.TensePassiveJussiveFuture,
870
+ vconst.TensePassiveSubjunctiveFuture,
871
+ vconst.TensePassiveConfirmedFuture):
872
+
873
+ # direct pronouns conjugations
874
+ if pronoun in (vconst.PronounAna, vconst.PronounAnta,
875
+ vconst.PronounAnti, vconst.PronounAntuma, vconst.PronounAntum,
876
+ vconst.PronounAntunna):
877
+ conj = self.conjugate_tense_pronoun(tense, pronoun)
878
+ self.conj_display.add(tense, pronoun, conj)
879
+ # indirect pronouns
880
+ # Anta pronouns conjugation like
881
+ elif pronoun in (vconst.PronounNahnu, vconst.PronounHuwa,
882
+ vconst.PronounHya):
883
+ conj_anta = self.conj_display.get_conj(tense,
884
+ vconst.PronounAnta)
885
+ if conj_anta == u"":
886
+ conj_anta = self.conjugate_tense_pronoun(tense,
887
+ vconst.PronounAnta)
888
+ self.conj_display.add(tense, vconst.PronounAnta,
889
+ conj_anta)
890
+
891
+ conj_anta_without_future_letter = conj_anta[1:]
892
+ if pronoun == vconst.PronounNahnu:
893
+ self.conj_display.add(tense, vconst.PronounNahnu,
894
+ NOON+conj_anta_without_future_letter)
895
+ elif pronoun == vconst.PronounHuwa:
896
+ self.conj_display.add(tense, vconst.PronounHuwa,
897
+ YEH+conj_anta_without_future_letter)
898
+ elif pronoun == vconst.PronounHya:
899
+ self.conj_display.add(tense, vconst.PronounHya,
900
+ TEH+conj_anta_without_future_letter)
901
+ # indirect pronouns
902
+ # Antuma pronouns conjugation like
903
+ elif pronoun in (vconst.PronounAntuma, vconst.PronounAntuma_f,
904
+ vconst.PronounHuma, vconst.PronounHuma_f ):
905
+ conj_antuma = self.conj_display.get_conj(tense,
906
+ vconst.PronounAntuma)
907
+ if conj_antuma == u"":
908
+ conj_antuma = self.conjugate_tense_pronoun(tense,
909
+ vconst.PronounAntuma)
910
+ self.conj_display.add(tense, vconst.PronounAntuma,
911
+ conj_antuma)
912
+ if pronoun == vconst.PronounAntuma_f:
913
+ self.conj_display.add(tense, vconst.PronounAntuma_f,
914
+ conj_antuma)
915
+ if pronoun == vconst.PronounHuma_f:
916
+ self.conj_display.add(tense, vconst.PronounHuma_f,
917
+ conj_antuma)
918
+ if pronoun == vconst.PronounHuma:
919
+ self.conj_display.add(tense, vconst.PronounHuma,
920
+ YEH+conj_antuma[1:])
921
+ # indirect pronouns
922
+ # Antum pronouns conjugation like
923
+ elif pronoun == vconst.PronounHum:
924
+ conj_antum = self.conj_display.get_conj(tense,
925
+ vconst.PronounAntum)
926
+ if conj_antum == u"":
927
+ conj_antum = self.conjugate_tense_pronoun(tense,
928
+ vconst.PronounAntum)
929
+ self.conj_display.add(tense, vconst.PronounAntum,
930
+ conj_antum)
931
+ self.conj_display.add(tense, vconst.PronounHum,
932
+ YEH+conj_antum[1:])
933
+ # indirect pronouns
934
+ # Antum pronouns conjugation like
935
+ elif pronoun == vconst.PronounHunna:
936
+ conj_antunna = self.conj_display.get_conj(tense,
937
+ vconst.PronounAntunna)
938
+ if conj_antunna == u"":
939
+ conj_antunna = self.conjugate_tense_pronoun(tense,
940
+ vconst.PronounAntunna)
941
+ self.conj_display.add(tense, vconst.PronounAntunna,
942
+ conj_antunna)
943
+ self.conj_display.add(tense, vconst.PronounHunna,
944
+ YEH+conj_antunna[1:])
945
+ elif tense == vconst.TenseImperative or \
946
+ tense == vconst.TenseConfirmedImperative:
947
+ conj = self.conjugate_tense_pronoun(tense, pronoun)
948
+ self.conj_display.add(tense, pronoun, conj)
949
+ # the cnjugated form is stored in cnj_display
950
+ return self.conj_display.get_conj(tense, pronoun)
951
+ #####################################
952
+ #{ Irregular verbs functions
953
+ #####################################
954
+ def _is_irregular_verb(self):
955
+ """
956
+ Return True if the verb is irregular,
957
+ founded in the irregular verb table
958
+ Aإرجاع إّذا كان الفعل ضاذا.
959
+ الأفعال العربية الخاصة هي
960
+ رأى، أكل أمر سأل،
961
+ # ج- إذا كان يتصرف من باب (مَنَعَ يَمْنَعُ)،
962
+ تحذف واوه, نحو: وَضَعَ، يَضَعُ، وَجَأَ يَجَأُ،
963
+ وَدَعَ يَدَعُ، وَزَعَ يَزَعُ، وَضَأَ يَضَأُ، وَطَأَ يَطَأُ،
964
+ وَقَعَ يَقَعُ، وَلَغَ يَلَغُ، وَهَبَ يَهَبُ،
965
+ عدا خمسة أفعال هي: (وَبَأ)، و(وَبَهَ)، و(وَجَعَ)، و(وَسَعَ)، و(وَهَلَ)،
966
+ فلا تحذف منها الواو؛ فنقول: يَوْبَأُ، يَوْبَهُ، يَوْجَعُ، يَوْسَعُ، يَوْهَلُ.
967
+ الأفعال (وَبَأ)، و(وَبَهَ)، و(وَجَعَ)، و(وَسَعَ)، و(وَهَلَ)، الفعل وبَأ يوبأ
968
+ @return:True if irregular
969
+ @rtype: Boolean
970
+ """
971
+ if len(self.word_letters) != 3:
972
+ return False
973
+ else:
974
+ # the key is composed from the letters and past and future marks,
975
+ # to identify irregular verb
976
+ if self.word_letters == u"ءرى":
977
+ #~ print self.word_letters.encode("utf8")
978
+ self.past_haraka = araby.FATHA
979
+ self.future_type = araby.KASRA
980
+ #~ self.vlength = 4
981
+ key = self.word_letters + self.past_haraka+self.future_type
982
+ if key in vconst.IRREGULAR_VERB_CONJUG:
983
+ return True
984
+ return False
985
+
986
+
987
+ def _get_irregular_future_stem(self):
988
+ """
989
+ Get the future stem for irregular verb.
990
+ @return: the future conjuagtion stem
991
+ @rtype: unicode;
992
+ """
993
+ # the key is composed from the letters and past and future marks,
994
+ # to identify irregular verb
995
+ key = self.word_letters+self.past_haraka+self.future_type
996
+ if key in vconst.IRREGULAR_VERB_CONJUG:
997
+ return vconst.IRREGULAR_VERB_CONJUG[key][vconst.TenseFuture]
998
+ else:
999
+ return self.word_letters
1000
+
1001
+
1002
+ def _get_irregular_passivefuture_stem(self):
1003
+ """
1004
+ Get the passive future stem for irregular verb.
1005
+ @return: the passive future conjuagtion stem
1006
+ @rtype: unicode;
1007
+ """
1008
+ # the key is composed from the letters and past and future marks,
1009
+ # to identify irregular verb
1010
+ key = self.word_letters+self.past_haraka+self.future_type
1011
+ if key in vconst.IRREGULAR_VERB_CONJUG:
1012
+ return vconst.IRREGULAR_VERB_CONJUG[key][vconst.TensePassiveFuture]
1013
+ else:
1014
+ return self.word_letters
1015
+
1016
+
1017
+ def _get_irregular_imperative_stem(self):
1018
+ """
1019
+ Get the imperative stem for irregular verb.
1020
+ @return: the passive imperative conjuagtion stem
1021
+ @rtype: unicode;
1022
+ """
1023
+ # the key is composed from the letters and past and future marks,
1024
+ # to identify irregular verb
1025
+ key = self.word_letters + self.past_haraka+self.future_type
1026
+ if key in vconst.IRREGULAR_VERB_CONJUG:
1027
+ return vconst.IRREGULAR_VERB_CONJUG[key][vconst.TenseImperative]
1028
+ else:
1029
+ return self.word_letters
1030
+
1031
+ # prepare the irregular conjug for future and imperative
1032
+ # تحضير جذوع التصريف في المضارع والأمر للأفعال الضاذة
1033
+ def _prepare_irregular_future_imperative_stem(self):
1034
+ """
1035
+ Prepare the conjugation stems for future tenses
1036
+ (future, jussive, subjective) and imperative tense.
1037
+ Those stems will be concatenated with conjugation affixes.
1038
+ """
1039
+ ## if self.word_letters in vconst.IRREGULAR_VERB_CONJUG.keys():
1040
+ if self._is_irregular_verb():
1041
+ (letters, marks) = self._get_irregular_future_stem()
1042
+ #vconst.IRREGULAR_VERB_CONJUG[self.word_letters][vconst.TenseFuture]
1043
+ #تمت إضافة حركة حرف المضارعة إلى الجذع المستعمل في الفعل الشاذ
1044
+ self.tab_conjug_stem[vconst.TenseFuture] = ConjugStem(
1045
+ vconst.TenseFuture, letters, marks)
1046
+ self.tab_conjug_stem[vconst.TenseJussiveFuture] = ConjugStem(
1047
+ vconst.TenseJussiveFuture, letters, marks)
1048
+ self.tab_conjug_stem[vconst.TenseSubjunctiveFuture] = ConjugStem(
1049
+ vconst.TenseSubjunctiveFuture, letters, marks)
1050
+ self.tab_conjug_stem[vconst.TenseConfirmedFuture] = ConjugStem(
1051
+ vconst.TenseConfirmedFuture, letters, marks)
1052
+
1053
+ (letters1, marks1) = self._get_irregular_passivefuture_stem()
1054
+ #تمت إضافة حركة حرف المضارعة إلى الجذع المستعمل في الفعل الشاذ
1055
+ self.tab_conjug_stem[vconst.TensePassiveFuture] = ConjugStem(
1056
+ vconst.TensePassiveFuture, letters1, marks1)
1057
+ self.tab_conjug_stem[vconst.TensePassiveJussiveFuture] = ConjugStem(
1058
+ vconst.TensePassiveJussiveFuture, letters1, marks1)
1059
+ self.tab_conjug_stem[vconst.TensePassiveSubjunctiveFuture] = \
1060
+ ConjugStem(vconst.TensePassiveSubjunctiveFuture, letters1, marks1)
1061
+ self.tab_conjug_stem[vconst.TensePassiveConfirmedFuture] = \
1062
+ ConjugStem(vconst.TensePassiveConfirmedFuture, letters1, marks1)
1063
+
1064
+ (letters2, marks2) = self._get_irregular_imperative_stem()
1065
+ self.tab_conjug_stem[vconst.TenseImperative] = ConjugStem(
1066
+ vconst.TenseImperative, letters2, marks2)
1067
+ self.tab_conjug_stem[vconst.TenseConfirmedImperative] = \
1068
+ ConjugStem(vconst.TenseConfirmedImperative, letters2, marks2)
1069
+ return False
1070
+
1071
+
1072
+ def get_conj(self, tense, pronoun):
1073
+ """
1074
+ Get the conjugated verb by tense and pronoun.
1075
+ @param tense: tense of the added conjuagtion.
1076
+ @type tense: unicode
1077
+ @param pronoun: pronoun of the added conjuagtion.
1078
+ @type pronoun: unicode
1079
+ @return : conjugated form of verb if exists.
1080
+ @rtype : unicode
1081
+ """
1082
+ return self.conj_display.get_conj(tense, pronoun)
1083
+
1084
+ def get_pronoun_features(self, pronoun):
1085
+ """
1086
+ Get the features of given pronoun.
1087
+ @param pronoun: pronoun of conjuagtion.
1088
+ @type pronoun: unicode
1089
+ @return : dictionary of pronoun attributes.
1090
+ @rtype : dictionary
1091
+ """
1092
+ return vconst.PRONOUN_FEATURES.get(pronoun, None)
1093
+ def get_tense_features(self, tense):
1094
+ """
1095
+ Get the features of given tense.
1096
+ @param tense: tense of the conjuagtion.
1097
+ @type tense: unicode
1098
+ @return : dictionary of tense attributes.
1099
+ @rtype : dictionary
1100
+ """
1101
+ return vconst.TENSE_FEATURES.get(tense, None)
libqutrub/conjugate.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- coding=utf-8 -*-
3
+ #************************************************************************
4
+ # $Id: conjugate.py, v 0.7 2009/06/02 01:10:00 Taha Zerrouki $
5
+ #
6
+ # ------------
7
+ # Description:
8
+ # ------------
9
+ # Copyright (c) 2009, Arabtechies, Arabeyes Taha Zerrouki
10
+ #
11
+ # This file is the main file to execute the application in the command line
12
+ #
13
+ # -----------------
14
+ # Revision Details: (Updated by Revision Control System)
15
+ # -----------------
16
+ # $Date: 2009/06/02 01:10:00 $
17
+ # $Author: Taha Zerrouki $
18
+ # $Revision: 0.7 $
19
+ # $Source: arabtechies.sourceforge.net
20
+ #
21
+ #***********************************************************************/
22
+ """
23
+ Conjugate console
24
+ """
25
+
26
+ import sys
27
+ import getopt
28
+ import os
29
+ sys.path.append('../')
30
+ import libqutrub.mosaref_main as mosaref_main
31
+ import libqutrub.ar_verb as ar_verb
32
+ import libqutrub.verb_valid as verb_valid
33
+
34
+ SCRIPT_NAME = os.path.splitext(os.path.basename(sys.argv[0]))[0]
35
+ SCRIPT_VERSION = '0.1'
36
+ AUTHOR_NAME = "Taha Zerrouki"
37
+ def usage():
38
+ """Display usage options"""
39
+ print "(C) CopyLeft 2009, %s" % AUTHOR_NAME
40
+ print "Usage: %s -f filename [OPTIONS]" % SCRIPT_NAME
41
+ #"Display usage options"
42
+ print "\t[-h | --help]\toutputs this usage message"
43
+ print "\t[-V | --version]\tprogram version"
44
+ print "\t[-f | --file=filename]\tinput file to %s" % SCRIPT_NAME
45
+ print "\t[-d | --display=format]\tdisplay format as html, csv, tex, xml"
46
+ print "\t[-a | --all ] \tConjugate in all tenses"
47
+ print "\t[-i | --imperative]\tConjugate in imperative"
48
+ print "\t[-F | --future]\tconjugate in the present and the future"
49
+ print "\t[-p | --past]\t conjugate in the past"
50
+ print "\t[-c | --confirmed] conjugate in confirmed (future or imperative)"
51
+ print """\t[-m | --moode]\tconjugate in future Subjunctive(mansoub)
52
+ or Jussive (majzoom)"""
53
+ print " \t[-v | --passive] passive form"
54
+ print "\r\nN.B. FILE FORMAT is descripted in README"
55
+ print "\r\nThis program is licensed under the GPL License\n"
56
+
57
+
58
+ def grabargs():
59
+ """Grab command-line arguments"""
60
+ alltense = False
61
+ future = False
62
+ past = False
63
+ passive = False
64
+ imperative = False
65
+ confirmed = False
66
+ future_moode = False
67
+ fname = ''
68
+ display_format = 'csv'
69
+
70
+ if not sys.argv[1:]:
71
+ usage()
72
+ sys.exit(0)
73
+ try:
74
+ opts, args = getopt.getopt(sys.argv[1:], "hVvcmaiFpi:d:f:",
75
+ ["help", "version", "imperative", "passive",
76
+ 'confirmed', 'moode', "past", "all",
77
+ "future", "file = ", "display = "], )
78
+ except getopt.GetoptError:
79
+ usage()
80
+ sys.exit(0)
81
+ for opt, val in opts:
82
+ if opt in ("-h", "--help"):
83
+ usage()
84
+ sys.exit(0)
85
+ if opt in ("-V", "--version"):
86
+ print SCRIPT_VERSION
87
+ sys.exit(0)
88
+ if opt in ("-v", "--passive"):
89
+ passive = True
90
+ if opt in ("-f", "--file"):
91
+ fname = val
92
+ if opt in ("-d", "--display"):
93
+ display_format = val.upper()
94
+ if opt in ("-F", "--future"):
95
+ future = True
96
+ if opt in ("-a", "--all"):
97
+ alltense = True
98
+ if opt in ("-p", "--past"):
99
+ past = True
100
+ if opt in ("-i", "--imperative"):
101
+ imperative = True
102
+ if opt in ("-c", "--confirmed"):
103
+ confirmed = True
104
+ if opt in ("-m", "--moode"):
105
+ future_moode = True
106
+
107
+ return (fname, alltense, future, past, passive, imperative, confirmed,
108
+ future_moode, display_format)
109
+
110
+ def main():
111
+ """Main function"""
112
+ filename, alltense, future, past, passive, imperative, confirmed, \
113
+ future_moode, display_format = grabargs()
114
+ try:
115
+ fle = open(filename)
116
+ except IOError:
117
+ print " Error :No such file or directory: %s" % filename
118
+ sys.exit(0)
119
+
120
+ print filename, alltense, future, past, passive, imperative, \
121
+ confirmed, future_moode
122
+
123
+ line = fle.readline().decode("utf")
124
+ text = u""
125
+ verb_table = []
126
+ nb_field = 2
127
+ while line :
128
+ if not line.startswith("#"):
129
+
130
+ text = text+" "+ line.strip()
131
+ liste = line.split("\t")
132
+ if len(liste) >= nb_field:
133
+ verb_table.append(liste)
134
+
135
+ line = fle.readline().decode("utf8")
136
+ fle.close()
137
+
138
+ for tuple_verb in verb_table:
139
+ word = tuple_verb[0]
140
+
141
+ if not verb_valid.is_valid_infinitive_verb(word):
142
+ print u"is invalid verb ",
143
+ print word.encode("utf8")
144
+ else:
145
+ future_type = u""+tuple_verb[1]
146
+ future_type = ar_verb.get_future_type_entree(future_type)
147
+ transitive = u""+tuple_verb[2]
148
+ if transitive in (u"متعدي", u"م", u"مشترك", u"ك", "t",
149
+ "transitive"):
150
+ transitive = True
151
+ else :
152
+ transitive = False
153
+ text = mosaref_main.do_sarf(word, future_type, alltense, past,
154
+ future, passive, imperative, future_moode, confirmed,
155
+ transitive, display_format)
156
+ print text.encode("utf8")
157
+
158
+ if __name__ == "__main__":
159
+ main()
160
+
161
+
162
+
163
+
164
+
165
+
166
+
libqutrub/conjugatedisplay.py ADDED
@@ -0,0 +1,568 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- coding=utf-8 -*-
3
+ #************************************************************************
4
+ # $Id: conjugateddisplay.py, v 0.7 2009/06/02 01:10:00 Taha Zerrouki $
5
+ #
6
+ # ------------
7
+ # Description:
8
+ # ------------
9
+ # Copyright (c) 2009, Arabtechies, Arabeyes Taha Zerrouki
10
+ #
11
+ # The Class used to display information after conjugated
12
+ # All print and views and display are redirected to this class
13
+ #
14
+ # -----------------
15
+ # Revision Details: (Updated by Revision Control System)
16
+ # -----------------
17
+ # $Date: 2009/06/02 01:10:00 $
18
+ # $Author: Taha Zerrouki $
19
+ # $Revision: 0.7 $
20
+ # $Source: arabtechies.sourceforge.net
21
+ #
22
+ #***********************************************************************/
23
+ """
24
+ The conjugation display class to manage different display format.
25
+ """
26
+ #~ from libqutrub.verb_const import *
27
+ import libqutrub.verb_const as vconst
28
+ import pyarabic.araby as araby
29
+
30
+ # صف عرض التصريفات حسب الضمائر
31
+ # جدول عرض التصريفات حسب الأزمنة
32
+ # تعيينه متغيرا شاملا من أجل تقليل
33
+ #~ بناء جدول عرض التصريفات في كل عرض.
34
+ ONE_TENSE_PRONOUN = {u"أنا":"" , u"أنت":"" , u"أنتِ":"" , u"هو":"" ,
35
+ u"هي":"" , u"أنتما":"" , u"أنتما مؤ":"" , u"هما":"" ,
36
+ u"هما مؤ":"" ,
37
+ u"نحن":"" , u"أنتم":"" , u"أنتن":"" , u"هم":"" , u"هن":""}
38
+
39
+ # delete the global TableConj vvariable because it causes problem
40
+ TAB_DISPLAY = {
41
+ vconst.PronounAna:u"1",
42
+ vconst.PronounNahnu:u"2",
43
+ vconst.PronounAnta:u"3",
44
+ vconst.PronounAnti:u"4ِ",
45
+ vconst.PronounAntuma:u"5",
46
+ vconst.PronounAntuma_f:u"6",
47
+ vconst.PronounAntum:u"7",
48
+ vconst.PronounAntunna:u"8",
49
+ vconst.PronounHuwa:u"9",
50
+ vconst.PronounHya:u"10",
51
+ vconst.PronounHuma:u"11",
52
+ vconst.PronounHuma_f:u"12",
53
+ vconst.PronounHum:u"13",
54
+ vconst.PronounHunna:u"14",
55
+
56
+
57
+ # const for Tense Name
58
+ vconst.TensePast:u"20",
59
+ vconst.TenseFuture:u"21",
60
+ vconst.TenseImperative:u"22",
61
+ vconst.TenseConfirmedImperative:u"23",
62
+ vconst.TenseJussiveFuture:u"24",
63
+ vconst.TenseSubjunctiveFuture:u"25",
64
+ vconst.TenseConfirmedFuture:u"26",
65
+
66
+
67
+ vconst.TensePassivePast:u"27",
68
+ vconst.TensePassiveFuture:u"28",
69
+ vconst.TensePassiveJussiveFuture:u"29",
70
+ vconst.TensePassiveSubjunctiveFuture:u"30",
71
+ vconst.TensePassiveConfirmedFuture:u"31",
72
+ }
73
+
74
+ class ConjugateDisplay:
75
+ """
76
+ conjugatedisplay class is used to display verb conjugation
77
+ in different ways and uses.
78
+ """
79
+ tab_conjug = {}
80
+ pronouns = {}
81
+ verb = u""
82
+ mode = 'Text'
83
+ future_form = u""
84
+ text = {}
85
+ transitive = False
86
+ def __init__(self, verb):
87
+ """
88
+ Create the conjugedtdisplay instance for the verb.
89
+ @param verb: given verb.
90
+ @type verb unicode.
91
+ """
92
+ # بناء جدول عرض التصريفات
93
+ self.tab_conjug = {
94
+ vconst.TensePast:ONE_TENSE_PRONOUN.copy(),
95
+ vconst.TensePassivePast:ONE_TENSE_PRONOUN.copy(),
96
+ vconst.TenseFuture:ONE_TENSE_PRONOUN.copy(),
97
+ vconst.TensePassiveFuture:ONE_TENSE_PRONOUN.copy(),
98
+ vconst.TenseJussiveFuture:ONE_TENSE_PRONOUN.copy(),
99
+ vconst.TensePassiveJussiveFuture:ONE_TENSE_PRONOUN.copy(),
100
+ vconst.TenseSubjunctiveFuture:ONE_TENSE_PRONOUN.copy(),
101
+ vconst.TensePassiveSubjunctiveFuture:ONE_TENSE_PRONOUN.copy(),
102
+ vconst.TenseImperative:ONE_TENSE_PRONOUN.copy(),
103
+ vconst.TenseConfirmedFuture:ONE_TENSE_PRONOUN.copy(),
104
+ vconst.TenseConfirmedImperative:ONE_TENSE_PRONOUN.copy()
105
+ }
106
+ self.verb = verb
107
+ self.text = {}
108
+ self.mode = 'Text'
109
+ self.future_form = u""
110
+ self.transitive = False
111
+ self.bab = "0"
112
+ def __del__(self):
113
+ self.tab_conjug = {}
114
+ self.verb = ""
115
+ self.text = {}
116
+ self.mode = 'Text'
117
+ self.future_form = u""
118
+ self.transitive = False
119
+ self.bab = "0"
120
+ #####################################
121
+ #{ Attributes functions
122
+ #####################################
123
+ def setmode(self, mode):
124
+ """
125
+ Set the display mode as:
126
+ - 'Text':
127
+ - 'HTML':
128
+ - 'HTMLColoredDiacritics':
129
+ - 'DICT':
130
+ - 'CSV':
131
+ - 'GUI':
132
+ - 'TABLE':
133
+ - 'XML':
134
+ - 'TeX':
135
+ - 'ROWS':
136
+ @param mode: the given mode to display result
137
+ @type mode: unicode
138
+ """
139
+ self.mode = mode
140
+ def settransitive(self):
141
+ """
142
+ Set the transitivity value to True.
143
+ """
144
+ self.transitive = True
145
+ def setbab(self, bab):
146
+ """
147
+ Set the bab sarf value to bab
148
+ @param bab: the given sarf bab.
149
+ @type bab: integer (1-6)
150
+ """
151
+ self.bab = bab
152
+ #------------------------------------------------------------------
153
+ def set_future_form(self, future_form):
154
+ """
155
+ Set the future form of the verb value to future_form.
156
+ مثلا: صرب يصرب
157
+ @param future_form: the future form.
158
+ @type future_form: unicode
159
+ """
160
+ self.future_form = future_form
161
+ def get_verb_attributs(self):
162
+ """
163
+ Get attributes as text
164
+ @return: Attributes as text.
165
+ @rtype: unicode
166
+ """
167
+ return self.text
168
+
169
+ def add_attribut(self, title, value):
170
+ """
171
+ Add a new attribut to display, like the transitivity
172
+ the root and the future form.
173
+ @param title: the title of the attribute to display.
174
+ @type title: unicode
175
+ @param value:the value if the attribute.
176
+ @type value: unicode
177
+ """
178
+ if title != '' :
179
+ self.text[title] = value
180
+ def get_conj(self, tense, pronoun):
181
+ """
182
+ Get the conjugated verb by tense and pronoun.
183
+ @param tense: tense of the added conjuagtion.
184
+ @type tense: unicode
185
+ @param pronoun: pronoun of the added conjuagtion.
186
+ @type pronoun: unicode
187
+ @return : conjugated form of verb if exists.
188
+ @rtype : unicode
189
+
190
+ """
191
+ if tense in self.tab_conjug:
192
+ if pronoun in self.tab_conjug[tense]:
193
+ return self.tab_conjug[tense][pronoun]
194
+ return u""
195
+
196
+ def add(self, tense, pronoun, verbconjugated):
197
+ """
198
+ Add a new conjugation to display.
199
+ @param tense: tense of the added conjuagtion.
200
+ @type tense: unicode
201
+ @param pronoun: pronoun of the added conjuagtion.
202
+ @type pronoun: unicode
203
+ @param verbconjugated:aded conjuagtion.
204
+ @type verbconjugated:unicode
205
+
206
+ """
207
+ if tense not in self.tab_conjug:
208
+ self.tab_conjug[tense] = {}
209
+ self.tab_conjug[tense][pronoun] = verbconjugated
210
+ #####################################
211
+ #{ Display functions
212
+ #####################################
213
+ def display(self, listtense = None):
214
+ """
215
+ Display The conjugation result for a list of tenses,
216
+ with a display mode given by the class attribute.
217
+ Set the display mode as:
218
+ - 'Text':
219
+ - 'HTML':
220
+ - 'HTMLColoredDiacritics':
221
+ - 'DICT':
222
+ - 'CSV':
223
+ - 'GUI':
224
+ - 'TABLE':
225
+ - 'XML':
226
+ - 'TeX':
227
+ - 'ROWS':
228
+ @param listtense: the given tenses list to display result
229
+ @type listtense: list of unicode
230
+ @return: the result in a specified dispaly mode.
231
+ @rtype: according to display mode.
232
+ """
233
+ return self.display(self.mode, listtense)
234
+ def display(self, mode, listtense = None):
235
+ """
236
+ Display The conjugation result for a list of tenses,
237
+ with a display mode.
238
+ Set the display mode as:
239
+ - 'Text':
240
+ - 'HTML':
241
+ - 'HTMLColoredDiacritics':
242
+ - 'DICT':
243
+ - 'CSV':
244
+ - 'GUI':
245
+ - 'TABLE':
246
+ - 'XML':
247
+ - 'TeX':
248
+ - 'ROWS':
249
+ @param mode: the given mode to display result
250
+ @type mode: unicode
251
+ @param listtense: the given tenses list to display result
252
+ @type listtense: list of unicode
253
+ @return: the result in a specified dispaly mode.
254
+ @rtype: according to display mode.
255
+ """
256
+ if not listtense:
257
+ listtense = vconst.TABLE_TENSE
258
+ if mode == 'Text':
259
+ return self.display_text(listtense)
260
+ elif mode == 'HTML':
261
+ return self.display_html(listtense)
262
+ elif mode == 'HTMLColoredDiacritics':
263
+ return self.display_html_colored_diacritics(listtense)
264
+ elif mode == 'DICT':
265
+ return self.display_dict(listtense)
266
+ elif mode == 'CSV':
267
+ return self.display_csv(listtense)
268
+ elif mode == 'GUI':
269
+ return self.display_table(listtense)
270
+ elif mode == 'TABLE':
271
+ return self.display_table(listtense)
272
+ elif mode == 'XML':
273
+ return self.display_xml(listtense)
274
+ elif mode.upper() == 'TeX'.upper():
275
+ return self.display_tex(listtense)
276
+ elif mode == 'ROWS'.upper():
277
+ return self.display_rows(listtense)
278
+ else:
279
+ return self.display_text(listtense)
280
+
281
+ def display_text(self, listtense):
282
+ """
283
+ Display The conjugation result for a list of tenses, as text.
284
+ @param listtense: the given tenses list to display result
285
+ @type listtense: list of unicode
286
+ @return: the result as text.
287
+ @rtype: uunicode.
288
+ """
289
+ text = u""
290
+ for title in self.text.keys():
291
+ text += u"%s: %s\n" % (title, self.text[title])
292
+ text += u"\t"
293
+ text += u"\t".join(listtense)
294
+ for pronoun in vconst.PronounsTable:
295
+ text += u"\n%s" % (pronoun)
296
+ for tense in listtense:
297
+ if pronoun in self.tab_conjug[tense]:
298
+ text += u"\t%s" % (self.tab_conjug[tense][pronoun])
299
+ return text
300
+
301
+
302
+ def display_csv(self, listtense ):
303
+ """
304
+ Display The conjugation result for a list of tenses,
305
+ as comma separeted value text.
306
+ every line contains:
307
+ example:
308
+ >>> اللزوم/التعدي: متعدي
309
+ الفعل: مَنَحَ
310
+ نوع الفعل: فعل ثلاثي
311
+ الماضي المعلومالمضارع المعلومالمضارع المجزومالمضارع المنصو
312
+ بالمضارع المؤكد الثقيلالأمرالأمر المؤكدالماضي المجهولالمضارع المجهولالمضارع المجهول المجزومالمضارع المجهول المنصوبالمضارع المؤكد الثقيل المجهول
313
+ أنامَنَحْتُأَمْنَحُأَمْنَحْأَمْنَحَأَمْنَحَنَّمُنِحْتُأُمْنَحُأُمْنَحْأُمْنَحَأُمْنَحَنَّ
314
+ نحنمَنَحْنَانَمْنَحُنَمْنَحْنَمْنَحَنَمْنَحَنَّمُنِحْنَانُمْنَحُنُمْنَحْنُمْنَحَنُمْنَحَنَّ
315
+ أنتمَنَحْتَتَمْنَحُتَمْنَحْتَمْنَحَتَمْنَحَنَّاِمْنَحْاِمْنَحَنَّمُنِحْتَتُمْنَحُتُمْنَحْتُمْنَحَتُمْنَحَنَّ
316
+
317
+
318
+ @param listtense: the given tenses list to display result
319
+ @type listtense: list of unicode
320
+ @return: the result as text in row.
321
+ @rtype: unicode.
322
+ """
323
+ text = u""
324
+ for title in self.text.keys():
325
+ text += u"%s: %s\n" % (title, self.text[title])
326
+ text += u"".join(listtense)
327
+ text += u"\n"
328
+ for pronoun in vconst.PronounsTable:
329
+ text += u"%s" % (pronoun)
330
+ for tense in listtense:
331
+ # print (self.verb).encode("utf-8"),
332
+ if pronoun in self.tab_conjug[tense]:
333
+ text += u"%s" % (self.tab_conjug[tense][pronoun])
334
+ text += u"\n"
335
+ return text
336
+
337
+
338
+
339
+
340
+ def display_rows(self, listtense ):
341
+ """
342
+ Display The conjugation result for a list of tenses, as text in rows.
343
+ every row contains:
344
+ - unvocalized conjugation,
345
+ - unvocalized conjugation,
346
+ - pronoun
347
+ - tense,
348
+ - transitive,
349
+ - original verb
350
+ - tasrif bab
351
+
352
+ @param listtense: the given tenses list to display result
353
+ @type listtense: list of unicode
354
+ @return: the result as text in row.
355
+ @rtype: unicode.
356
+ """
357
+ text = u""
358
+
359
+ transitive = "0"
360
+ if self.transitive:
361
+ transitive = '1'
362
+ for pronoun in vconst.PronounsTable:
363
+ ## text += u"%s" % (pronoun)
364
+ for tense in listtense:
365
+ # print (self.verb).encode("utf-8"),
366
+ if self.tab_conjug[tense][pronoun] != "":
367
+ text += "\t".join([
368
+ araby.strip_harakat(self.tab_conjug[tense][pronoun]),
369
+ self.tab_conjug[tense][pronoun],
370
+ TAB_DISPLAY[pronoun],
371
+ TAB_DISPLAY[tense],
372
+ transitive,
373
+ self.verb,
374
+ self.bab,
375
+ ])
376
+ text += u"\n"
377
+ return text
378
+
379
+
380
+ def display_html(self, listtense):
381
+ """
382
+ Display The conjugation result for a list of tenses, as HTML.
383
+ @param listtense: the given tenses list to display result
384
+ @type listtense: list of unicode
385
+ # @return: the result as HTML.
386
+ @rtype: unicode.
387
+ """
388
+ indicative_tenses = []
389
+ passive_tenses = []
390
+ for tense in listtense:
391
+ if tense in vconst.TableIndicativeTense:
392
+ indicative_tenses.append(tense)
393
+ else:
394
+ passive_tenses.append(tense)
395
+ text = u""
396
+ text += u"<h3>%s : %s - %s</h3>\n" % (self.verb, self.verb,
397
+ self.future_form)
398
+ # text += u"<h3>%s - %s</h3>\n\n" % (self.verb, self.future_form)
399
+ # print spelcial attribut of the verb
400
+ text += u"<ul>\n"
401
+ for title in self.text.keys():
402
+ text += u"<li><b>%s:</b> %s</li>\n" % (title, self.text[title])
403
+ text += u"</ul>\n\n"
404
+
405
+ for mode in("indicative", "passive"):
406
+ if mode == "indicative":
407
+ listtense_to_display = indicative_tenses
408
+
409
+ else:
410
+ listtense_to_display = passive_tenses
411
+ text += "<br/>"
412
+ if len(listtense_to_display) >0:
413
+ text += u"""<table class = 'resultarea' border = 1
414
+ cellspacing = 0>\n"""
415
+ text += u"<tr><th>&nbsp</th>"
416
+ for tense in listtense_to_display:
417
+ text += u"<th>%s</th>" % (tense)
418
+ text += u"</tr>\n"
419
+ for pronoun in vconst.PronounsTable:
420
+ text += u"<tr>"
421
+ text += u"<th>%s</th>" % (pronoun)
422
+ for tense in listtense_to_display:
423
+ text += u"<td>&nbsp%s</td>" % (
424
+ self.tab_conjug[tense][pronoun])
425
+ text += u"</tr>\n"
426
+ text += u"</table>\n"
427
+ return text
428
+
429
+ def display_html_colored_diacritics(self, listtense):
430
+ """
431
+ Display The conjugation result for a list of tenses,
432
+ as HTML with colored vocalization.
433
+ @param listtense: the given tenses list to display result
434
+ @type listtense: list of unicode
435
+ @return: the result as HTML.
436
+ @rtype: unicode.
437
+ """
438
+ text = self.display_html(listtense)
439
+ ## text = "<div style = 'color:red'>"+text+"</div>"
440
+ text = self.highlight_diacritics_html(text)
441
+ return text
442
+
443
+ def highlight_diacritics_html(self, text):
444
+ """
445
+ Highlight dfiactitics in the HTML text.
446
+ @param text: the given text
447
+ @type text: unicode.
448
+ @return: the result as HTML.
449
+ @rtype: unicode.
450
+ """
451
+ hight_text = u""
452
+ lefttag = u"<span class = 'tashkeel'>"
453
+ righttag = u"</span>"
454
+ for i in range(len(text)):
455
+ if text[i] in (araby.FATHA, araby.DAMMA, araby.KASRA, araby.SUKUN):
456
+ if (i>0 and text[i-1] not in (araby.ALEF,
457
+ araby.ALEF_HAMZA_ABOVE, araby.WAW_HAMZA, araby.ALEF_MADDA,
458
+ araby.DAL, araby.THAL, araby.WAW, araby.REH, araby.ZAIN,
459
+ araby.SHADDA)) and (i+1<len(text) and text[i+1] not in (" ", "<")):
460
+ hight_text += u"".join([lefttag, araby.TATWEEL,
461
+ text[i], righttag])
462
+ else :
463
+ ## hight_text += u"<span style = 'color:red'>%s</span>"%text[i]
464
+ hight_text += u"".join([lefttag, " ", text[i], righttag])
465
+ else:
466
+ hight_text += text[i]
467
+ return hight_text
468
+
469
+ def display_table(self, listtense):
470
+ """Display The conjugation result for a list of tenses, as array.
471
+ @param listtense: the given tenses list to display result
472
+ @type listtense: list of unicode
473
+ @return: the result as table, the table[0] contains pronouns.
474
+ @rtype: dict with number indice.
475
+ """
476
+ table = {}
477
+
478
+ j = 0
479
+ table[0] = {0:u"الضمائر"}
480
+ for j in range(len(listtense)):
481
+ table[0][j+1] = listtense[j]
482
+ i = 1
483
+ for pronoun in vconst.PronounsTable:
484
+ table[i] = {}
485
+ table[i][0] = pronoun
486
+ j = 1
487
+ for tense in listtense:
488
+ table[i][j] = self.tab_conjug[tense][pronoun]
489
+ j = j+1
490
+ i = i+1
491
+ return table
492
+
493
+ def display_dict(self, listtense):
494
+ """
495
+ Display The conjugation result for a list of tenses, as python dict.
496
+ @param listtense: the given tenses list to display result
497
+ @type listtense: list of unicode
498
+ @return: the result as python dict.
499
+ @rtype: dict.
500
+ """
501
+ table = {}
502
+ for tense in listtense:
503
+ table[tense] = self.tab_conjug[tense]
504
+ #text = json.dumps(table, ensure_ascii = False)
505
+ return table
506
+
507
+ def display_xml(self, listtense):
508
+ """
509
+ Display The conjugation result for a list of tenses, as XML.
510
+ @param listtense: the given tenses list to display result
511
+ @type listtense: list of unicode
512
+ @return: the result as XML.
513
+ @rtype: unicode.
514
+ """
515
+ text = u""
516
+ text += u"<verb_conjugation>\n"
517
+ text += u"\t<proprety name = 'verb' value = '%s'/>\n" % (self.verb)
518
+ for title in self.text.keys():
519
+ text += u"\t<proprety name = '%s' value = '%s'/>\n" % (title,
520
+ self.text[title])
521
+ for tense in listtense:
522
+ text += u"\t<tense name = '%s'>\n" % (tense)
523
+ for pronoun in vconst.PronounsTable:
524
+ if self.tab_conjug[tense][pronoun] != "":
525
+ text += u"""\t\t<conjugation pronoun = '%s' value = '%s'
526
+ />\n""" % (pronoun, self.tab_conjug[tense][pronoun])
527
+ text += u"\t</tense>\n"
528
+ text += u"</verb_conjugation>"
529
+ return text
530
+
531
+ def display_tex(self, listtense):
532
+ """
533
+ Display The conjugation result for a list of tenses, as TeX.
534
+ @param listtense: the given tenses list to display result
535
+ @type listtense: list of unicode
536
+ @return: the result as TeX format.
537
+ @rtype: unicode.
538
+ """
539
+ text = u""
540
+ text += u"\\environment qutrub-layout\n"
541
+ text += u"\\starttext\n"
542
+
543
+ text += u"\\Title{%s}\n" % (self.verb)
544
+
545
+ text += u"\\startitemize\n"
546
+ for title in self.text.keys():
547
+ if title == u" الكتابة الداخلية للفعل ":
548
+ text += u"\\item {\\bf %s} \\DeShape{%s}\n" % (title,
549
+ self.text[title])
550
+ else:
551
+ text += u"\\item {\\bf %s} %s\n" % (title, self.text[title])
552
+ text += u"\\stopitemize\n"
553
+
554
+ text += u"\\starttable[|lB|l|l|l|l|l|]\n"
555
+ text += u"\\HL[3]\n\\NC"
556
+ for tense in listtense:
557
+ text += u"\\NC {\\bf %s}" % (tense)
558
+ text += u"\\SR\n\\HL\n"
559
+ for pronoun in vconst.PronounsTable:
560
+ text += u"\\NC %s" % (pronoun)
561
+ for tense in listtense:
562
+ text += u"\\NC %s" % (self.tab_conjug[tense][pronoun])
563
+ text += u"\\AR\n"
564
+ text += u"\\LR\\HL[3]\n"
565
+ text += u"\\stoptable\n"
566
+
567
+ text += u"\\stoptext"
568
+ return text
libqutrub/mosaref_main.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #************************************************************************
2
+ # $Id: mosaref_main.py, v 0.7 2009/06/02 01:10:00 Taha Zerrouki $
3
+ #
4
+ # ------------
5
+ # Description:
6
+ # ------------
7
+ # Copyright (c) 2009, Arabtechies, Arabeyes Taha Zerrouki
8
+ #
9
+ # This file is used by the web interface to execute verb conjugation
10
+ #
11
+ # -----------------
12
+ # Revision Details: (Updated by Revision Control System)
13
+ # -----------------
14
+ # $Date: 2009/06/02 01:10:00 $
15
+ # $Author: Taha Zerrouki $
16
+ # $Revision: 0.7 $
17
+ # $Source: arabtechies.sourceforge.net
18
+ #
19
+ #***********************************************************************/
20
+ """
21
+ The main function to call qutrub conjugation from other programs.
22
+ """
23
+ import libqutrub.classverb as classverb
24
+ import libqutrub.ar_verb as ar_verb
25
+ import libqutrub.verb_valid as verb_valid
26
+ import libqutrub.verb_const as vconst
27
+ import pyarabic.araby as araby
28
+ import libqutrub.verb_db as verb_db
29
+ verb_db.create_index_triverbtable()
30
+ #~ """ you need to create the trileteral verb dictionary
31
+ # index to search within triverbs."""
32
+
33
+ def do_sarf(word, future_type, alltense = True, past = False, future = False,
34
+ passive = False, imperative = False, future_moode = False, confirmed = False,
35
+ transitive = False, display_format = "HTML"):
36
+ """
37
+ The main function to conjugate verbs.
38
+ You must specify all parameters.
39
+ Can be used as an example to call the conjugation class.
40
+ @param word: the givern verb. the given word must be vocalized,
41
+ if it's 3 letters length only, else, the verb can be unvocalized,
42
+ but the Shadda must be given, it' considered as letter.
43
+ @type word: unicode.
44
+ @param future_type: For Triliteral verbs,
45
+ you must give the mark of Ain in the future,
46
+ كة عين الفعل في المضارع. it's given as a name of haraka (فتحة، ضمة، كسرة).
47
+ @type future_type: unicode(فتحة، ضمة، كسرة).
48
+ @param all: conjugate in all arabic tenses.
49
+ @type all: Boolean, default(True)
50
+ @param past: conjugate in past tense ألماضي
51
+ @type past: Boolean, default(False)
52
+ @param future: conjugate in arabic present and future tenses المضارع
53
+ @type future: Boolean, default(False)
54
+ @param passive: conjugate in passive voice المبني للمجهول
55
+ @type passive: Boolean, default(False)
56
+ @param imperative: conjugate in imperative tense الأمر
57
+ @type imperative: Boolean, default(False)
58
+ @param future_moode: conjugate in future moode tenses المضارع المنصوب والمجزوم
59
+ @type future_moode: Boolean, default(False)
60
+ @param confirmed: conjugate in confirmed cases tense المؤكّد
61
+ @type confirmed: Boolean, default(False)
62
+ @param transitive: the verb transitivity التعدي واللزوم
63
+ @type transitive: Boolean, default(False)
64
+ @param display_format: Choose the display format:
65
+ - 'Text':
66
+ - 'HTML':
67
+ - 'HTMLColoredDiacritics':
68
+ - 'DICT':
69
+ - 'CSV':
70
+ - 'GUI':
71
+ - 'TABLE':
72
+ - 'XML':
73
+ - 'TeX':
74
+ - 'ROWS':
75
+ @type display_format: string, default("HTML")
76
+ @return: The conjugation result
77
+ @rtype: According to display_format.
78
+ """
79
+ valid = verb_valid.is_valid_infinitive_verb(word)
80
+ if valid:
81
+ future_type = ar_verb.get_future_type_by_name(future_type)
82
+ #~ bab_sarf = 0
83
+ #init the verb class to treat the verb
84
+ vbc = classverb.VerbClass(word, transitive, future_type)
85
+ vbc.set_display(display_format)
86
+
87
+ if alltense :
88
+ result = vbc.conjugate_all_tenses()
89
+ else :
90
+ listetenses = []
91
+ if past :
92
+ listetenses.append(vconst.TensePast)
93
+ if (past and passive ) :
94
+ listetenses.append(vconst.TensePassivePast)
95
+ if future :
96
+ listetenses.append(vconst.TenseFuture)
97
+ if (future and passive ) :
98
+ listetenses.append(vconst.TensePassiveFuture)
99
+ if (future_moode) :
100
+ listetenses.append(vconst.TenseSubjunctiveFuture)
101
+ listetenses.append(vconst.TenseJussiveFuture)
102
+ if (confirmed) :
103
+ if (future):
104
+ listetenses.append(vconst.TenseConfirmedFuture)
105
+ if (imperative):
106
+ listetenses.append(vconst.TenseConfirmedImperative)
107
+ if (future and transitive and confirmed) :
108
+ listetenses.append(vconst.TensePassiveConfirmedFuture)
109
+ if (passive and future_moode) :
110
+ listetenses.append(vconst.TensePassiveSubjunctiveFuture)
111
+ listetenses.append(vconst.TensePassiveJussiveFuture)
112
+ if imperative :
113
+ listetenses.append(vconst.TenseImperative)
114
+ result = vbc.conjugate_all_tenses(listetenses)
115
+ return result
116
+ else: return None
117
+
118
+ def get_future_form(verb_vocalised, haraka = araby.FATHA):
119
+ """
120
+ Get The future form of a verb. for example the future form of
121
+ qal with Damma as a Haraka of future verb, we get yqolu.
122
+ الحصول على صيغة الفعل في المضارع، فالفعل قال، وحركة عينه في المضارع صمة، نحصل على يقول.
123
+ @param verb_vocalised: given verb.
124
+ @type verb_vocalised:unicode.
125
+ @param haraka: the future mark for triverbs.
126
+ @type haraka: unicode.
127
+ @return: The conjugated form in the future tense.
128
+ @rtype: unicode.
129
+ """
130
+ word = verb_vocalised
131
+ transitive = True
132
+ future_type = haraka
133
+ if future_type not in (araby.FATHA, araby.DAMMA, araby.KASRA):
134
+ future_type = ar_verb.get_future_type_by_name(future_type)
135
+ vbc = classverb.VerbClass(word, transitive, future_type)
136
+ #vb.verb_class()
137
+ return vbc.conjugate_tense_pronoun(vconst.TenseFuture, vconst.PronounHuwa)
138
+
139
+
140
+
libqutrub/stack.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- coding=utf-8 -*-
3
+ #************************************************************************
4
+ # from arabic_const import *
5
+ from pyarabic.araby import *
6
+ from verb_const import *
7
+ class Stack :
8
+ def __init__(self,text="") :
9
+ self.items = list(text);
10
+
11
+ def push(self, item) :
12
+ self.items.append(item)
13
+
14
+ def pop(self) :
15
+ if not self.isEmpty():
16
+ return self.items.pop()
17
+ else:
18
+ return None;
19
+
20
+ def isEmpty(self) :
21
+ return (self.items == [])
22
+
23
+
24
+ def separate(word):
25
+ """
26
+ separate the letters from the vowels, in arabic word,
27
+ if a letter hasn't a haraka, the not definited haraka is attributed.
28
+ return ( letters,vowels);
29
+ """
30
+ #debug=True;
31
+ stack1=Stack(word)
32
+ # the word is inversed in the stack
33
+ stack1.items.reverse();
34
+ letters=Stack()
35
+ marks=Stack()
36
+ vowels=('a','u')
37
+ last1=stack1.pop();
38
+ # if the last element must be a letter,
39
+ # the arabic word can't starts with a haraka
40
+ # in th stack the word is inversed
41
+ while last1 in vowels: last1=stack1.pop();
42
+ while last1!=None:
43
+ if last1 in vowels:
44
+ # we can't have two harakats beside.
45
+ # the shadda is considered as a letter
46
+ marks.pop();
47
+ marks.push(last1);
48
+ elif last1==SHADDA:
49
+ # is the element is a Shadda,
50
+ # the previous letter must have a sukun as mark,
51
+ # and the shadda take the indefinate mark
52
+ marks.pop();
53
+ marks.push(SUKUN);
54
+ marks.push(NOT_DEF_HARAKA);
55
+ letters.push(SHADDA);
56
+ else:
57
+ marks.push(NOT_DEF_HARAKA);
58
+ letters.push(last1);
59
+ last1=stack1.pop();
60
+ return (''.join(letters.items),''.join(marks.items))
61
+
62
+
63
+ def joint(letters,marks):
64
+ """
65
+ joint the letters with the marks
66
+ the length ot letters and marks must be equal
67
+ return word;
68
+ """
69
+ #debug=True;
70
+ debug=False;
71
+ # The length ot letters and marks must be equal
72
+ if len(letters)!=len(marks): return "";
73
+
74
+ stackLetter=Stack(letters)
75
+ stackLetter.items.reverse();
76
+ stackMark=Stack(marks)
77
+ stackMark.items.reverse();
78
+ wordStack=Stack();
79
+ last1=stackLetter.pop();
80
+ last2=stackMark.pop();
81
+
82
+ vowels=('a','u','o','i',SUKUN)
83
+ while last1!=None and last2!=None:
84
+ if last1 == SHADDA:
85
+ top=wordStack.pop();
86
+ if top not in vowels:
87
+ wordStack.push(top);
88
+ wordStack.push(last1);
89
+ if last2!= NOT_DEF_HARAKA:
90
+ wordStack.push(last2);
91
+ else:
92
+ wordStack.push(last1);
93
+ if last2!= NOT_DEF_HARAKA:
94
+ wordStack.push(last2);
95
+
96
+ last1=stackLetter.pop();
97
+ last2=stackMark.pop();
98
+ if not (stackLetter.isEmpty() and stackMark.isEmpty()):
99
+ return False;
100
+ else:
101
+ #wordStack.items.reverse();
102
+ return ''.join(wordStack.items);
103
+
104
+ def vocalizedlike(word1,word2):
105
+ """
106
+ if the two words has the same letters and the same harakats, this fuction return True.
107
+ The two words can be full vocalized, or partial vocalized
108
+ """
109
+ debug=False;
110
+ stack1=Stack(word1)
111
+ stack2=Stack(word2)
112
+ last1=stack1.pop();
113
+ last2=stack2.pop();
114
+ if debug: print "+0", stack1, stack2;
115
+ vowels=('a','u')
116
+ while last1!=None and last2!=None:
117
+ if last1==last2:
118
+ if debug: print "+2", stack1.items,last1, stack2.items,last2
119
+ last1=stack1.pop();
120
+ last2=stack2.pop();
121
+ elif last1 in vowels and last2 not in vowels:
122
+ if debug: print "+2", stack1.items,last1, stack2.items,last2
123
+ last1=stack1.pop();
124
+ elif last1 not in vowels and last2 in vowels:
125
+ if debug: print "+2", stack1.items,last1, stack2.items,last2
126
+ last2=stack2.pop();
127
+ else:
128
+ if debug: print "+2", stack1.items,last1, stack2.items,last2
129
+ break;
130
+ if not (stack1.isEmpty() and stack2.isEmpty()):
131
+ return False;
132
+ else: return True;
133
+ #-------------------------
134
+ # Function def vaznlike(word1,wazn):
135
+ #-------------------------
136
+ def waznlike(word1,wazn):
137
+ """
138
+ if the word1 is like a wazn (pattern),
139
+ the letters must be equal,
140
+ the wazn has FEH, AIN, LAM letters.
141
+ this are as generic letters.
142
+ The two words can be full vocalized, or partial vocalized
143
+ """
144
+ debug=False;
145
+ stack1=Stack(word1)
146
+ stack2=Stack(wazn)
147
+ root=Stack()
148
+ last1=stack1.pop();
149
+ last2=stack2.pop();
150
+ if debug: print "+0", stack1, stack2;
151
+ vowels=('a','u')
152
+ while last1!=None and last2!=None:
153
+ if last1==last2 and last2 not in (FEH, AIN,LAM):
154
+ if debug: print "+2", stack1.items,last1, stack2.items,last2
155
+ last1=stack1.pop();
156
+ last2=stack2.pop();
157
+ elif last1 not in vowels and last2 in (FEH, AIN,LAM):
158
+ if debug: print "+2", stack1.items,last1, stack2.items,last2
159
+ root.push(last1);
160
+ print "t";
161
+ last1=stack1.pop();
162
+ last2=stack2.pop();
163
+ elif last1 in vowels and last2 not in vowels:
164
+ if debug: print "+2", stack1.items,last1, stack2.items,last2
165
+ last1=stack1.pop();
166
+ elif last1 not in vowels and last2 in vowels:
167
+ if debug: print "+2", stack1.items,last1, stack2.items,last2
168
+ last2=stack2.pop();
169
+ else:
170
+ if debug: print "+2", stack1.items,last1, stack2.items,last2
171
+ break;
172
+ # reverse the root letters
173
+ root.items.reverse();
174
+ print " the root is ", root.items#"".join(root.items);
175
+ if not (stack1.isEmpty() and stack2.isEmpty()):
176
+ return False;
177
+ else: return True;
libqutrub/triverbtable.py ADDED
The diff for this file is too large to render. See raw diff
 
libqutrub/verb_const.py ADDED
@@ -0,0 +1,613 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- coding=utf-8 -*-
3
+ #---
4
+ #************************************************************************
5
+ # $Id: verb_const.py, v 0.7 2009/06/02 01:10:00 Taha Zerrouki $
6
+ #
7
+ # ------------
8
+ # Description:
9
+ # ------------
10
+ # Copyright (c) 2009, Arabtechies, Arabeyes Taha Zerrouki
11
+ #
12
+ # List of constants used in the arabic verb conjugation
13
+ #
14
+ # -----------------
15
+ # Revision Details:
16
+ # -----------------
17
+ # $Date: 2009/06/02 01:10:00 $
18
+ # $Author: Taha Zerrouki $
19
+ # $Revision: 0.7 $
20
+ # $Source: arabtechies.sourceforge.net
21
+ #
22
+ #***********************************************************************/
23
+ """
24
+ Arabic Qutrub verb conjugation, verb_const file
25
+ """
26
+ from pyarabic.araby import FATHA, DAMMA, KASRA, SHADDA, SUKUN, HAMZA, ALEF, \
27
+ NOON, ALEF_WASLA, WAW, ALEF_HAMZA_ABOVE, ALEF_HAMZA_BELOW, ALEF_MADDA, \
28
+ YEH_HAMZA, WAW_HAMZA, TATWEEL, SMALL_ALEF, SMALL_YEH, SMALL_WAW, YEH, \
29
+ ALEF_MAKSURA
30
+
31
+ import re
32
+
33
+ PronounsTable = (u"أنا" , u"نحن" , u"أنت" , u"أنتِ" , u"أنتما" ,
34
+ u"أنتما مؤ" , u"أنتم" , u"أنتن" , u"هو" , u"هي" , u"هما" ,
35
+ u"هما مؤ" , u"هم" , u"هن")
36
+ PronounsTableNotPassiveForUntransitive = (u"أنا" , u"نحن" , u"أنت" ,
37
+ u"أنتِ" , u"أنتما" , u"أنتما مؤ" , u"أنتم" , u"أنتن" , u"هما" ,
38
+ u"هما مؤ" , u"هم" , u"هن")
39
+ PronounAna = u"أنا"
40
+ PronounNahnu = u"نحن"
41
+ PronounAnta = u"أنت"
42
+ PronounAnti = u"أنتِ"
43
+ PronounAntuma = u"أنتما"
44
+ PronounAntuma_f = u"أنتما مؤ"
45
+ PronounAntum = u"أنتم"
46
+ PronounAntunna = u"أنتن"
47
+ PronounHuwa = u"هو"
48
+ PronounHya = u"هي"
49
+ PronounHuma = u"هما"
50
+ PronounHuma_f = u"هما مؤ"
51
+ PronounHum = u"هم"
52
+ PronounHunna = u"هن"
53
+ PRONOUN_FEATURES = {
54
+ u"أنا" : {'person':u'متكلم', 'gender':u'', 'number': u'مفرد'}
55
+ , u"أنت" : {'person':u'مخاطب', 'gender':u'مذكر', 'number': u'مفرد'}
56
+ , u"أنتِ" : {'person':u'مخاطب', 'gender':u'مؤنث', 'number': u'مفرد'}
57
+ , u"هو" : {'person':u'غائب', 'gender':u'مذكر', 'number': u'مفرد'}
58
+ , u"هي" : {'person':u'غائب', 'gender':u'مؤنث', 'number': u'مفرد'}
59
+ , u"أنتما" : {'person':u'مخاطب', 'gender':u'مذكر', 'number': u'مثنى'}
60
+ , u"أنتما مؤ" : {'person':u'مخاطب', 'gender':u'مؤنث', 'number': u'مثنى'}
61
+ , u"هما" : {'person':u'غائب', 'gender':u'مذكر', 'number': u'مثنى'}
62
+ , u"هما مؤ" : {'person':u'غائب', 'gender':u'مؤنث', 'number': u'مثنى'}
63
+ , u"نحن" : {'person':u'متكلم', 'gender':u'', 'number': u'جمع'}
64
+ , u"أنتم" : {'person':u'مخاطب', 'gender':u'مذكر', 'number': u'جمع'}
65
+ , u"أنتن" : {'person':u'مخاطب', 'gender':u'مؤنث', 'number': u'جمع'}
66
+ , u"هم" : {'person':u'غائب', 'gender':u'مذكر', 'number': u'جمع'}
67
+ , u"هن" : {'person':u'غائب', 'gender':u'مؤنث', 'number': u'جمع'}
68
+ }
69
+
70
+ ImperativePronouns = (u"أنت" , u"أنتِ" , u"أنتما" , u"أنتما مؤ" , u"أنتم"
71
+ , u"أنتن" )
72
+ # const for Tense Name
73
+ TensePast = u"الماضي المعلوم"
74
+ TenseFuture = u"المضارع المعلوم"
75
+ TenseImperative = u"الأمر"
76
+ TenseConfirmedImperative = u"الأمر المؤكد"
77
+ TenseJussiveFuture = u"المضارع المجزوم"
78
+ TenseSubjunctiveFuture = u"المضارع المنصوب"
79
+ TenseConfirmedFuture = u"المضارع المؤكد الثقيل"
80
+
81
+
82
+ TensePassivePast = u"الماضي المجهول"
83
+ TensePassiveFuture = u"المضارع المجهول"
84
+ TensePassiveJussiveFuture = u"المضارع المجهول المجزوم"
85
+ TensePassiveSubjunctiveFuture = u"المضارع المجهول المنصوب"
86
+ TensePassiveConfirmedFuture = u"المضارع المؤكد الثقيل المجهول "
87
+
88
+
89
+ TABLE_TENSE = [TensePast, TenseFuture, TenseJussiveFuture,
90
+ TenseSubjunctiveFuture, TenseConfirmedFuture, TenseImperative,
91
+ TenseConfirmedImperative,
92
+ TensePassivePast, TensePassiveFuture,
93
+ TensePassiveJussiveFuture, TensePassiveSubjunctiveFuture,
94
+ TensePassiveConfirmedFuture]
95
+ TableIndicativeTense = [TensePast, TenseFuture, TenseJussiveFuture,
96
+ TenseSubjunctiveFuture, TenseConfirmedFuture, TenseImperative,
97
+ TenseConfirmedImperative]
98
+ TablePassiveTense = [TensePassivePast, TensePassiveFuture,
99
+ TensePassiveJussiveFuture, TensePassiveSubjunctiveFuture,
100
+ TensePassiveConfirmedFuture]
101
+
102
+ TENSE_FEATURES = {
103
+ TensePast : { 'tense':u'ماضي', 'voice':u'معلوم', 'mood':u'', 'confirmed':u'', },
104
+ TenseFuture : { 'tense':u'مضارع', 'voice':u'معلوم', 'mood':u'مرفوع', 'confirmed':u'', },
105
+ TenseImperative : { 'tense':u'أمر', 'voice':u'', 'mood':u'', 'confirmed':u'', },
106
+ TenseConfirmedImperative : { 'tense':u'أمر', 'voice':u'', 'mood':u'', 'confirmed':u'مؤكذ', },
107
+ TenseJussiveFuture : { 'tense':u'مضارع', 'voice':u'معلوم', 'mood':u'مجزوم', 'confirmed':u'', },
108
+ TenseSubjunctiveFuture : { 'tense':u'مضارع', 'voice':u'معلوم', 'mood':u'منص��ب', 'confirmed':u'', },
109
+ TenseConfirmedFuture : { 'tense':u'مضارع', 'voice':u'معلوم', 'mood':u'', 'confirmed':u'مؤكد', },
110
+
111
+
112
+ TensePassivePast : { 'tense':u'ماضي', 'voice':u'مجهول', 'mood':u'', 'confirmed':u'', },
113
+ TensePassiveFuture : { 'tense':u'مضارع', 'voice':u'مجهول', 'mood':u'مرفوع', 'confirmed':u'', },
114
+ TensePassiveJussiveFuture : { 'tense':u'مضارع', 'voice':u'مجهول', 'mood':u'مجزوم', 'confirmed':u'', },
115
+ TensePassiveSubjunctiveFuture : { 'tense':u'مضارع', 'voice':u'مجهول', 'mood':u'منصوب', 'confirmed':u'', },
116
+ TensePassiveConfirmedFuture : { 'tense':u'مضارع', 'voice':u'مجهول', 'mood':u'', 'confirmed':u'مؤكد', },
117
+ }
118
+
119
+ past = {
120
+ u"أنا" : [u"", u"ْتُ"]
121
+ , u"أنت" : [u"", u"ْتَ"]
122
+ , u"أنتِ" : [u"", u"ْتِ"]
123
+ , u"هو" : [u"", u"َ"]
124
+ , u"هي" : [u"", u"َتْ"]
125
+ , u"أنتما" : [u"", u"ْتُما"]
126
+ , u"أنتما مؤ" : [u"", u"ْتُما"]
127
+ , u"هما" : [u"", u"َا"]
128
+ , u"هما مؤ" : [u"", u"َتَا"]
129
+ , u"نحن" : [u"", u"ْنَا"]
130
+ , u"أنتم" : [u"", u"ْتُم"]
131
+ , u"أنتن" : [u"", u"ْتُنَّ"]
132
+ ##, u"هم" : [u"", u"ُوا"]
133
+ , u"هم" : [u"", DAMMA + WAW + ALEF_WASLA]
134
+ , u"هن" : [u"", u"ْنَ"]
135
+ }
136
+ future = {
137
+ u"أنا" : [u"أ", u"ُ"]
138
+ , u"أنت" : [u"ت", u"ُ"]
139
+ , u"أنتِ" : [u"ت", u"ِينَ"]
140
+ , u"أنتم" : [u"ت", u"ُونَ"]
141
+ , u"أنتما" : [u"ت", FATHA + ALEF + NOON + KASRA]
142
+ , u"أنتما مؤ" : [u"ت", FATHA + ALEF + NOON + KASRA]
143
+ , u"أنتن" : [u"ت", SUKUN + NOON + FATHA]
144
+ , u"نحن" : [u"ن", u"ُ"]
145
+ , u"هم" : [u"ي", u"ُونَ"]
146
+ , u"هما" : [u"ي", u"َانِ"]
147
+ , u"هما مؤ" : [u"ت", u"َانِ"]
148
+ , u"هن" : [u"ي", u"ْنَ"]
149
+ , u"هو" : [u"ي", u"ُ"]
150
+ , u"هي" : [u"ت", u"ُ"]
151
+ }
152
+ future_majzoom = {
153
+ u"أنا" : [u"أ", u"ْ"]
154
+ , u"أنت" : [u"ت", u"ْ"]
155
+ , u"أنتِ" : [u"ت", u"ِي"]
156
+ , u"أنتم" : [u"ت", DAMMA + WAW + ALEF_WASLA]
157
+ ##, u"أنتم" : [u"ت", DAMMA+WAW+ALEF]
158
+ , u"أنتما" : [u"ت", u"َا"]
159
+ , u"أنتما مؤ" : [u"ت", u"َا"]
160
+ , u"أنتن" : [u"ت", u"ْنَ"]
161
+ , u"نحن" : [u"ن", u"ْ"]
162
+ ##, u"هم" : [u"ي", DAMMA+WAW+ALEF]
163
+ , u"هم" : [u"ي", DAMMA+WAW+ALEF_WASLA]
164
+ , u"هما" : [u"ي", u"َا"]
165
+ , u"هما مؤ" : [u"ت", u"َا"]
166
+ , u"هن" : [u"ي", u"ْنَ"]
167
+ , u"هو" : [u"ي", u"ْ"]
168
+ , u"هي" : [u"ت", u"ْ"]
169
+ }
170
+ future_mansoub = {
171
+ u"أنا" : [u"أ", u"َ"]
172
+ , u"أنت" : [u"ت", u"َ"]
173
+ , u"أنتِ" : [u"ت", u"ِي"]
174
+ , u"أنتم" : [u"ت", DAMMA+WAW+ALEF_WASLA]
175
+ ##, u"أنتم" : [u"ت", DAMMA+WAW+ALEF]
176
+ , u"أنتما" : [u"ت", u"َا"]
177
+ , u"أنتما مؤ" : [u"ت", u"َا"]
178
+ , u"أنتن" : [u"ت", u"ْنَ"]
179
+ , u"نحن" : [u"ن", u"َ"]
180
+ ##, u"هم" : [u"ي", DAMMA+WAW+ALEF]
181
+ , u"هم" : [u"ي", DAMMA+WAW+ALEF_WASLA]
182
+ , u"هما" : [u"ي", u"َا"]
183
+ , u"هما مؤ" : [u"ت", u"َا"]
184
+ , u"هن" : [u"ي", u"ْنَ"]
185
+ , u"هو" : [u"ي", u"َ"]
186
+ , u"هي" : [u"ت", u"َ"]
187
+ }
188
+
189
+ future_confirmed = {
190
+ u"أنا" : [u"أ", FATHA+NOON+SHADDA+FATHA]
191
+ , u"أنت" : [u"ت", FATHA+NOON+SHADDA+FATHA]
192
+ , u"أنتِ" : [u"ت", KASRA+NOON+SHADDA+FATHA]
193
+ , u"أنتما" : [u"ت", FATHA+ALEF+NOON+SHADDA+KASRA]
194
+ , u"أنتما مؤ" : [u"ت", FATHA+ALEF+NOON+SHADDA+KASRA]
195
+ , u"أنتم" : [u"ت", DAMMA+NOON+SHADDA+FATHA]
196
+ , u"أنتن" : [u"ت", SUKUN+NOON+FATHA+ALEF+NOON+SHADDA+KASRA]
197
+ , u"نحن" : [u"ن", FATHA+NOON+SHADDA+FATHA]
198
+ , u"هم" : [u"ي", DAMMA+NOON+SHADDA+FATHA]
199
+ , u"هما" : [u"ي", FATHA+ALEF+NOON+SHADDA+KASRA]
200
+ , u"هما مؤ" : [u"ت", FATHA+ALEF+NOON+SHADDA+KASRA]
201
+ , u"هن" : [u"ي", SUKUN+NOON+FATHA+ALEF+NOON+SHADDA+KASRA]
202
+ , u"هو" : [u"ي", FATHA+NOON+SHADDA+FATHA]
203
+ , u"هي" : [u"ت", FATHA+NOON+SHADDA+FATHA]
204
+ }
205
+ imperative = {
206
+ u"أنت" : [u"", u"ْ"]
207
+ , u"أنتِ" : [u"", u"ِي"]
208
+ , u"أنتم" : [u"", DAMMA+WAW+ALEF_WASLA]
209
+ , u"أنتما" : [u"", u"َا"]
210
+ , u"أنتما مؤ" : [u"", u"َا"]
211
+ , u"أنتن" : [u"", u"ْنَ"]
212
+ }
213
+ imperative_confirmed = {
214
+ u"أنت" : [u"", FATHA+NOON+SHADDA+FATHA]
215
+ , u"أنتِ" : [u"", KASRA+NOON+SHADDA+FATHA]
216
+ , u"أنتم" : [u"", DAMMA+NOON+SHADDA+FATHA]
217
+ , u"أنتما" : [u"", FATHA+ALEF+NOON+SHADDA+KASRA]
218
+ , u"أنتما مؤ" : [u"", FATHA+ALEF+NOON+SHADDA+KASRA]
219
+ , u"أنتن" : [u"", SUKUN+NOON+FATHA+ALEF+NOON+SHADDA+KASRA]
220
+ }
221
+
222
+ TableTensePronoun = {}
223
+ TableTensePronoun[TensePast] = past
224
+ TableTensePronoun[TenseFuture] = future
225
+ TableTensePronoun[TenseImperative] = imperative
226
+ TableTensePronoun[TenseJussiveFuture] = future_majzoom
227
+ TableTensePronoun[TenseSubjunctiveFuture] = future_mansoub
228
+ TableTensePronoun[TenseConfirmedFuture] = future_confirmed
229
+ TableTensePronoun[TenseConfirmedImperative] = imperative_confirmed
230
+
231
+ TableTensePronoun[TensePassivePast] = past
232
+ TableTensePronoun[TensePassiveFuture] = future
233
+ TableTensePronoun[TensePassiveJussiveFuture] = future_majzoom
234
+ TableTensePronoun[TensePassiveSubjunctiveFuture] = future_mansoub
235
+ TableTensePronoun[TensePassiveConfirmedFuture] = future_confirmed
236
+
237
+
238
+ TAB_SARF = {
239
+ #باب تصريف الفعل، الصفر لكل الأفعال عدا الثلاثي
240
+ 0: {"past":FATHA, "future":KASRA},
241
+ # فَعَل يَفْعُل
242
+
243
+ 1: {"past":FATHA, "future":DAMMA},
244
+ # فَعَل يَفْعِل
245
+ 2: {"past":FATHA, "future":KASRA},
246
+ # فَعَل يَفْعَل
247
+ 3: {"past":FATHA, "future":FATHA},
248
+ # فَعِل يَفْعَل
249
+ 4: {"past":KASRA, "future":FATHA},
250
+ # فَعِل يَفْعِل
251
+ 5: {"past":KASRA, "future":KASRA},
252
+ # فَعُل يَفْعُل
253
+ 6: {"past":DAMMA, "future":DAMMA},
254
+ }
255
+
256
+ NOT_DEF_HARAKA = TATWEEL
257
+ ##NOT_DEF_HARAKA = FATHA
258
+
259
+ STRIP_HARAKA = u"i"
260
+ ALEF_HARAKA = SMALL_ALEF
261
+ ALEF4_HARAKA = u"y"
262
+ ALEF_YEH_HARAKA = u"#"
263
+ ALEF_WAW_HARAKA = u"*"
264
+
265
+ YEH_HARAKA = SMALL_YEH
266
+
267
+ ALTERNATIVE_YEH_HARAKA = u"t"
268
+ ALEF_YEH_ALTERNATIVE = u"x"
269
+ WAW_HARAKA = SMALL_WAW
270
+ ALEF_MAMDUDA = "9"
271
+ YEH_NAKISA = "5"
272
+
273
+ WRITTEN_HARAKA = {
274
+ ALEF_HARAKA:FATHA+ALEF,
275
+ ALEF_WAW_HARAKA:FATHA+ALEF,
276
+ ALEF_YEH_HARAKA:FATHA+ALEF,
277
+ WAW_HARAKA:DAMMA+WAW,
278
+ YEH_HARAKA:KASRA+YEH,
279
+ ALTERNATIVE_YEH_HARAKA:KASRA+YEH,
280
+ NOT_DEF_HARAKA:'',
281
+ FATHA: FATHA,
282
+ DAMMA:DAMMA,
283
+ KASRA:KASRA,
284
+ SUKUN:SUKUN,
285
+ SHADDA:SHADDA
286
+ }
287
+
288
+ # table of conversion if التقاء الساكنين
289
+ CONVERSION_TABLE = {
290
+ ALEF_YEH_HARAKA: KASRA,
291
+ ALEF_WAW_HARAKA: DAMMA,
292
+ WAW_HARAKA: DAMMA,
293
+ YEH_HARAKA : KASRA,
294
+ ALTERNATIVE_YEH_HARAKA: DAMMA,
295
+ }
296
+ ##WAW_MAKSURA = WAW
297
+
298
+ #HARAKAT = u"%s%s%s%s%s"%(SUKUN, FATHA, DAMMA, KASRA, SHADDA)
299
+ HARAKAT = (SUKUN, FATHA, DAMMA, KASRA)
300
+ HARAKAT2 = u"".join([ALEF_HARAKA, WAW_HARAKA, YEH_HARAKA, SUKUN,
301
+ FATHA, DAMMA, KASRA])
302
+ HAMZAT_PATTERN = re.compile(u"[%s%s%s%s%s]"%(ALEF_HAMZA_ABOVE, WAW_HAMZA,
303
+ YEH_HAMZA , HAMZA, ALEF_HAMZA_BELOW), re.UNICODE)
304
+ HAMZAT = (ALEF_HAMZA_ABOVE, WAW_HAMZA, YEH_HAMZA , HAMZA, ALEF_HAMZA_BELOW)
305
+
306
+
307
+ LAM_ALEF_PAT = re.compile(u'[\ufef7\ufef9\ufef5]', re.UNICODE)
308
+
309
+ #uniformate harkat
310
+ UNIFORMATE_MARKS_4 = FATHA+SUKUN+FATHA+FATHA
311
+ UNIFORMATE_MARKS_5TEH = FATHA+FATHA+SUKUN+FATHA+FATHA
312
+ UNIFORMATE_MARKS_5 = KASRA+SUKUN+FATHA+FATHA+FATHA
313
+ UNIFORMATE_MARKS_6 = KASRA+SUKUN+FATHA+SUKUN+FATHA+FATHA
314
+
315
+ BEGIN_WORD = u"^"
316
+ END_WORD = u"$"
317
+
318
+ LONG_HARAKAT = (ALEF_HARAKA, YEH_HARAKA, WAW_HARAKA, ALEF_YEH_HARAKA,
319
+ ALEF_WAW_HARAKA)
320
+ _F = FATHA
321
+ _D = DAMMA
322
+ _K = KASRA
323
+ _S = SUKUN
324
+ _A = ALEF_HARAKA
325
+ _W = WAW_HARAKA
326
+ _Y = YEH_HARAKA
327
+
328
+ _AH = ALEF_HARAKA
329
+ _YH = YEH_HARAKA
330
+ _WH = WAW_HARAKA
331
+ _AYH = ALEF_YEH_HARAKA
332
+ _AWH = ALEF_WAW_HARAKA
333
+ _YHALT = ALTERNATIVE_YEH_HARAKA
334
+ #HAMZAT
335
+ _AHA = ALEF_HAMZA_ABOVE
336
+ _AHB = ALEF_HAMZA_BELOW
337
+ _AM = ALEF_MADDA
338
+ _YHA = YEH_HAMZA
339
+ _WHA = WAW_HAMZA
340
+ _HZ = HAMZA
341
+
342
+
343
+ INITIAL_TAHMEEZ_TABLE = {_S:_HZ, _F:_AHA, _D:_AHA, _K:_AHB, _AH:_AM ,
344
+ _WH:_AHA, _YH:_AHB, _YHALT:_AHB}
345
+
346
+
347
+ MIDDLE_TAHMEEZ_TABLE = {
348
+ _S: {_S:_HZ, _F:_AHA, _D:_WHA, _K:_YHA, _AH:_AHA, _WH:_WHA, _YH:_YHA },
349
+ _F: {_S:_AHA, _F:_AHA, _D:_WHA, _K:_YHA, _AH:_AHA, _WH:_WHA, _YH:_YHA },
350
+ _D: {_S:_WHA, _F:_WHA, _D:_WHA, _K:_YHA, _AH:_WHA, _WH:_WHA, _YH:_YHA },
351
+ _K: {_S:_YHA, _F:_YHA, _D:_YHA, _K:_YHA, _AH:_YHA, _WH:_YHA, _YH:_YHA },
352
+ #_AH: {_S:_HZ, _F:_HZ, _D:_WHA, _K:_YHA, _AH:_HZ, _WH:_WHA, _YH:_YHA },
353
+ _AH: {_S:_HZ, _F:_HZ, _D:_WHA, _K:_YHA, _AH:_HZ, _WH:_WHA, _YH:_YHA },
354
+ #_WH: {_S:_HZ, _F:_HZ, _D:_WHA, _K:_YHA, _AH:_HZ, _WH:_WHA, _YH:_YHA },
355
+ _WH: {_S:_HZ, _F:_HZ, _D:_HZ, _K:_YHA, _AH:_HZ, _WH:_HZ, _YH:_YHA },
356
+ _YH: {_S:_YHA, _F:_YHA, _D:_YHA, _K:_YHA, _AH:_YHA, _WH:_YHA, _YH:_YHA },
357
+ }
358
+
359
+ FINAL_TAHMEEZ_TABLE = {
360
+ u"%s" % BEGIN_WORD :{_S:_HZ, _F:_AHA, _D:_AHA, _K:_YHA, _AH:_AM, _WH:_AHA,
361
+ _YH:_AHA},
362
+ #~ _S: {_S:_HZ, _F:_AHA, _D:_WHA, _K:_YHA, _AH:_AHA, _WH:_WHA, _YH:_YHA },
363
+ _S: {_S:_HZ, _F:_AHA, _D:_WHA, _K:_YHA, _AH:_AHA, _WH:_WHA, _YH:_YHA },
364
+ _F: {_S:_AHA, _F:_AHA, _D:_AHA, _K:_YHA, _AH:_AHA, _WH:_WHA, _YH:_YHA },
365
+ _D: {_S:_WHA, _F:_WHA, _D:_WHA, _K:_YHA, _AH:_WHA, _WH:_WHA, _YH:_YHA },
366
+ _K: {_S:_YHA, _F:_YHA, _D:_YHA, _K:_YHA, _AH:_WHA, _WH:_WHA, _YH:_YHA },
367
+ _AH: {_S:_HZ, _F:_HZ, _D:_HZ, _K:_HZ, _AH:_HZ, _WH:_WHA, _YH:_YHA },
368
+ _WH: {_S:_HZ, _F:_HZ, _D:_HZ, _K:_HZ, _AH:_WHA, _WH:_WHA, _YH:_YHA},
369
+ _YH: {_S:_HZ, _F:_HZ, _D:_HZ, _K:_HZ, _AH:_WHA, _WH:_WHA, _YH:_YHA}
370
+ }
371
+
372
+ # جدول تحويل الألف الفتحة الطويلة إلى حركات أخرى حسب سياقها
373
+ HOMOGENIZE_ALEF_HARAKA_TABLE = {
374
+ _S:{_S:'*' , _F:ALEF_HARAKA, _D:WAW_HARAKA, _K:YEH_HARAKA },
375
+ _F:{_S:ALEF_HARAKA, _F:ALEF_HARAKA, _D:ALEF_HARAKA, _K:ALEF_HARAKA },
376
+ _D:{_S:WAW_HARAKA, _F:ALEF_HARAKA, _D:ALEF_HARAKA, _K:YEH_HARAKA },
377
+ _K:{_S:YEH_HARAKA, _F:ALEF_HARAKA, _D:YEH_HARAKA, _K:ALEF_HARAKA},
378
+ }
379
+
380
+
381
+ # Table of irregular verbs
382
+ # irregular verbs have common forms
383
+ # جدول الأفعال عربية الشاذة،
384
+ # مثل الفعل رأى، أرى، أخذ أكل، سأل
385
+ #الأفعال المثال
386
+ # كل سطر يحتوي على جذوع تصريف الفعل
387
+ # في زمن معين
388
+ IRREGULAR_VERB_CONJUG = {}
389
+ CONJUG_BAB = u"باب التصريف"
390
+
391
+ # في الحركات، الحركة الأولى هي لحركة حرف المضارعة
392
+ IRREGULAR_VERB_CONJUG[u"رءى"+FATHA+FATHA] = {
393
+ CONJUG_BAB:(FATHA, FATHA),
394
+ TenseFuture:(u"رى", FATHA+FATHA+FATHA),
395
+ TensePassiveFuture:(u"رى", DAMMA+FATHA+FATHA),
396
+ TenseImperative:(u"رى", FATHA+FATHA),
397
+ }
398
+ #ToDO
399
+ # الفعل أرى مشكلة كبيرة
400
+ # لأنه الفعل الشاذ الوحيد الرباعي
401
+
402
+ IRREGULAR_VERB_CONJUG[u"ءرى"+FATHA+KASRA] = {
403
+ CONJUG_BAB:(KASRA, FATHA),
404
+ TenseFuture:(u"ري", DAMMA+KASRA+FATHA),
405
+ TensePassiveFuture:(u"ري", DAMMA+FATHA+FATHA),
406
+ TenseImperative:(u"ءري", FATHA+KASRA+FATHA),
407
+ }
408
+ #~ ان يتصرف من باب (عَلِمَ يَعْلَمُ)،
409
+ #~ لا تحذف واوه؛ نحو: وَجِلَ، يَوْجَلُ،
410
+ #~ عدا ثلاثة أفعال هي: (وذر), و(وسع)، و(وطأ)،
411
+ #~ تحذف واوها؛ فنقول: وَذِرَ، يَذَرُ،
412
+ # ونقول: وَسِعَ، يَسَعُ، ونقول: وَطِئَ، يَطَأُ.
413
+ #إذا ك# الفعل وذر يذر
414
+ # KASRA FATHA
415
+ IRREGULAR_VERB_CONJUG[u"وذر"+KASRA+FATHA] = {
416
+ CONJUG_BAB:(KASRA, FATHA),
417
+ TenseFuture:(u"ذر", FATHA+FATHA+DAMMA),
418
+ TensePassiveFuture:(u"ذر", DAMMA+FATHA+DAMMA),
419
+ TenseImperative:(u"ذر", FATHA+SUKUN),
420
+ }
421
+ # الفعل وَسِعَ يسع
422
+ # KASRA FATHA
423
+ IRREGULAR_VERB_CONJUG[u"وسع"+KASRA+FATHA] = {
424
+ CONJUG_BAB:(KASRA, FATHA),
425
+ TenseFuture:(u"سع", FATHA+FATHA+DAMMA),
426
+ TensePassiveFuture:(u"سع", DAMMA+FATHA+DAMMA),
427
+ TenseImperative:(u"سع", FATHA+SUKUN),
428
+ }
429
+ # الفعل وطئ يطأ
430
+ # KASRA FATHA
431
+ IRREGULAR_VERB_CONJUG[u"وطء"+KASRA+FATHA] = {
432
+ CONJUG_BAB:(KASRA, FATHA),
433
+ TenseFuture:(u"طء", FATHA+FATHA+DAMMA),
434
+ TensePassiveFuture:(u"وطء", DAMMA+SUKUN+FATHA+DAMMA),
435
+ TenseImperative:(u"طء", FATHA+SUKUN),
436
+ }
437
+
438
+
439
+
440
+ # الأفعال التي يتغير أمرها بحذف الهمزة وجوبا، مثل أكل، أخذ
441
+ # أما ما لا تحذف همزته وجوبا مثل سأل وأمر، فلا تعتبر شاذة
442
+
443
+ # الفعل أكَل يأكُل، كُل
444
+ #FATHA, DAMMA
445
+ IRREGULAR_VERB_CONJUG[u"ءكل"+FATHA+DAMMA] = {
446
+ CONJUG_BAB:(FATHA, DAMMA),
447
+ TenseFuture:(u"ءكل", FATHA+SUKUN+DAMMA+DAMMA),
448
+ TensePassiveFuture:(u"ءكل", DAMMA+SUKUN+FATHA+FATHA),
449
+ TenseImperative:(u"كل", DAMMA+SUKUN),
450
+ }
451
+ #الفعل أخَذَ يأخُذُ، خُذ
452
+ #FATHA, DAMMA
453
+ IRREGULAR_VERB_CONJUG[u"ءخذ"+FATHA+DAMMA] = {
454
+ CONJUG_BAB:(FATHA, DAMMA),
455
+ TenseFuture:(u"ءخذ", FATHA+SUKUN+DAMMA+DAMMA),
456
+ TensePassiveFuture:(u"ءخذ", DAMMA+SUKUN+FATHA+FATHA),
457
+ TenseImperative:(u"خذ", DAMMA+SUKUN),
458
+ }
459
+ #ج- إذا كان يتصرف من باب (مَنَعَ يَمْنَعُ)،
460
+ #~ تحذف واوه, نحو: وَضَعَ، يَضَعُ، وَجَأَ يَجَأُ، وَدَعَ يَدَعُ، وَزَعَ يَزَعُ،
461
+ #~ وَضَأَ يَضَأُ، وَطَأَ يَطَأُ، وَقَعَ يَقَعُ، وَلَغَ يَلَغُ، وَهَبَ يَهَبُ،
462
+ #~ عدا خمسة أفعال هي:
463
+ #~ (وَبَأ)، و(وَبَهَ)، و(وَجَعَ)، و(وَسَعَ)، و(وَهَلَ)،
464
+ #~ فلا تحذف منها الواو؛ فنقول: يَوْبَأُ، يَوْبَهُ، يَوْجَعُ، يَوْسَعُ، يَوْهَلُ.
465
+ # الأفعال (وَبَأ)، و(وَبَهَ)، و(وَجَعَ)، و(وَسَعَ)، و(وَهَلَ)،#الفعل وبَأ يوبأ
466
+ #FATHA FATHA
467
+ IRREGULAR_VERB_CONJUG[u"وبء"+FATHA+FATHA] = {
468
+ CONJUG_BAB:(FATHA, FATHA),
469
+ TenseFuture:(u"وبء", FATHA+SUKUN+FATHA+DAMMA),
470
+ TensePassiveFuture:(u"وبء", DAMMA+SUKUN+FATHA+DAMMA),
471
+ TenseImperative:(u"وبء", SUKUN+FATHA+SUKUN),
472
+ }
473
+ # الفعل وبه يوبه
474
+ #FATHA FATHA
475
+ IRREGULAR_VERB_CONJUG[u"وبه"+FATHA+FATHA] = {
476
+ CONJUG_BAB:(FATHA, FATHA),
477
+ TenseFuture:(u"وبه", FATHA+SUKUN+FATHA+DAMMA),
478
+ TensePassiveFuture:(u"وبه", DAMMA+SUKUN+FATHA+DAMMA),
479
+ TenseImperative:(u"وبه", SUKUN+FATHA+SUKUN),
480
+ }
481
+ # الفعل وجع يوجع
482
+ #FATHA FATHA
483
+ IRREGULAR_VERB_CONJUG[u"وجع"+FATHA+FATHA] = {
484
+ CONJUG_BAB: (FATHA, FATHA),
485
+ TenseFuture: (u"وجع", FATHA+SUKUN+FATHA+DAMMA),
486
+ TensePassiveFuture: (u"وجع", DAMMA+SUKUN+FATHA+DAMMA),
487
+ TenseImperative: (u"وجع", SUKUN+FATHA+SUKUN),
488
+ }
489
+ #الفعل وسع يوسع
490
+ #FATHA FATHA
491
+ IRREGULAR_VERB_CONJUG[u"وسع"+FATHA+FATHA] = {
492
+ CONJUG_BAB: (FATHA, FATHA),
493
+ TenseFuture: (u"وسع", FATHA+SUKUN+FATHA+DAMMA),
494
+ TensePassiveFuture: (u"وسع", DAMMA+SUKUN+FATHA+DAMMA),
495
+ TenseImperative: (u"وسع", SUKUN+FATHA+SUKUN),
496
+ }
497
+
498
+ # الفعل وهل يوهل
499
+ #FATHA FATHA
500
+ IRREGULAR_VERB_CONJUG[u"وهل"+FATHA+FATHA] = {
501
+ CONJUG_BAB: (FATHA, FATHA),
502
+ TenseFuture: (u"وهل", FATHA+SUKUN+FATHA+DAMMA),
503
+ TensePassiveFuture: (u"وهل", DAMMA+SUKUN+FATHA+DAMMA),
504
+ TenseImperative: (u"وهل", SUKUN+FATHA+SUKUN),
505
+ }
506
+
507
+
508
+
509
+ ALEF_MADDA_VERB_TABLE = {
510
+ u'آبل':[u'أءبل'],
511
+ u'آبه':[u'أءبه'],
512
+ u'آبى':[u'أءبى'],
513
+ u'آتم':[u'أءتم'],
514
+ u'آتن':[u'أءتن'],
515
+ u'آتى':[u'أءتى'],
516
+ #~ u'آتى':[u'أءتى'],
517
+ u'آثر':[u'أءثر'],
518
+ u'آثف':[u'أءثف'],
519
+ u'آثم':[u'أءثم'],
520
+ u'آثى':[u'ءاثى'],
521
+ u'آجد':[u'أءجد'],
522
+ u'آجر':[u'أءجر', u'ءاجر'],
523
+ u'آجل':[u'أءجل'],
524
+ u'آجم':[u'أءجم'],
525
+ u'آحن':[u'ءاحن'],
526
+ u'آخذ':[u'ءاخذ'],
527
+ u'آخى':[u'أءخى', u'ءاخى'],
528
+ u'آدب':[u'أءدب'],
529
+ u'آدم':[u'أءدم'],
530
+ u'آدى':[u'أءدى'],
531
+ u'آذن':[u'أءذن'],
532
+ u'آذى':[u'أءذى'],
533
+ u'آرب':[u'أءرب', u'ءارب'],
534
+ u'آرخ':[u'أءرخ'],
535
+ u'آرس':[u'أءرس'],
536
+ u'آرض':[u'أءرض'],
537
+ u'آرط':[u'أءرط'],
538
+ u'آرف':[u'ءارف'],
539
+ u'آرق':[u'أءرق'],
540
+ u'آرك':[u'أءرك'],
541
+ u'آرم':[u'ءارم'],
542
+ u'آرن':[u'أءرن', u'ءارن'],
543
+ u'آرى':[u'أءرى'],
544
+ u'آزر':[u'ءازر'],
545
+ u'آزف':[u'أءزف'],
546
+ u'آزل':[u'أءزل'],
547
+ u'آزى':[u'أءزى', u'ءازى'],
548
+ u'آسب':[u'أءسب'],
549
+ u'آسد':[u'أءسد'],
550
+ u'آسف':[u'أءسف'],
551
+ u'آسن':[u'أءسن'],
552
+ #~ u'آسى':[u'ءاسى'],
553
+ u'آسى':[u'أءسى', u'ءاسى'],
554
+ u'آشى':[u'أءشى'],
555
+ u'آصد':[u'أءصد'],
556
+ u'آصر':[u'ءاصر'],
557
+ u'آصل':[u'أءصل'],
558
+ u'آضّ':[u'ءاضّ'],
559
+ u'آض':[u'ءاضّ'],
560
+ u'آطم':[u'أءطم'],
561
+ u'آفك':[u'أءفك'],
562
+ u'آفى':[u'أءفى'],
563
+ u'آقط':[u'أءقط'],
564
+ u'آكد':[u'أءكد'],
565
+ u'آكر':[u'ءاكر'],
566
+ u'آكف':[u'أءكف'],
567
+ u'آكل':[u'أءكل', u'ءاكل'],
568
+ u'آلت':[u'أءلت'],
569
+ u'آلس':[u'ءالس'],
570
+ u'آلف':[u'أءلف', u'ءالف'],
571
+ u'آلم':[u'أءلم'],
572
+ u'آلى':[u'أءلى'],
573
+ u'آمر':[u'أءمر', u'ءامر'],
574
+ u'آمن':[u'أءمن'],
575
+ u'آنث':[u'أءنث'],
576
+ u'آنس':[u'أءنس', u'ءانس'],
577
+ u'آنض':[u'أءنض'],
578
+ u'آنف':[u'أءنف'],
579
+ u'آنق':[u'أءنق'],
580
+ u'آنى':[u'أءنى'],
581
+ u'آهل':[u'أءهل'],
582
+ u'آوب':[u'ءاوب'],
583
+ u'آوى':[u'أءوى'],
584
+ u'آيد':[u'ءايد'],
585
+ u'آيس':[u'أءيس'],
586
+ }
587
+
588
+ STANDARD_REPLACEMENT=[
589
+ #-تحويل همزة القطع على الألف بعدها فتحة
590
+ #وهمزة القطع على الألف بعدها سكون إلى ألف ممدودة
591
+ ( u"".join([ALEF_HAMZA_ABOVE, FATHA, ALEF]), ALEF_MADDA)
592
+ , ( u"".join([ALEF_MADDA, FATHA]), ALEF_MADDA)
593
+ , ( u"".join([ALEF_MADDA, ALEF]), ALEF_MADDA)
594
+ , ( u"".join([ALEF_HAMZA_ABOVE, FATHA, ALEF_HAMZA_ABOVE, SUKUN]), ALEF_MADDA)
595
+ , ( u"".join([ALEF_HAMZA_ABOVE, FATHA, ALEF_HAMZA_ABOVE, FATHA]), ALEF_MADDA)
596
+ , ( u"".join([ALEF_HAMZA_ABOVE, DAMMA, WAW_HAMZA, SUKUN]), ALEF_HAMZA_ABOVE+DAMMA+WAW)
597
+ , ( u"".join([YEH, SHADDA, FATHA, ALEF_MAKSURA]), YEH+SHADDA+FATHA+ALEF)
598
+ # إدغام النون الساكنة
599
+ , ( u"".join([NOON, SUKUN, NOON]), NOON+SHADDA)
600
+ # إذا كان الحرف الأول ساكنا وبعده شدة، ثم أضيفت إليه الألف
601
+ , ( u"".join([SUKUN, SHADDA]), SHADDA)
602
+ ## معالجة ألف التفريق
603
+ , ( ALEF_WASLA, ALEF)
604
+ ## معالجة ألف التفريق
605
+ , ( ALEF_MAMDUDA, ALEF)
606
+
607
+ ## معالجة ألف الوصل الزائدة عند إضافتها إلى أول الفعل المثال
608
+ ## word = word.replace( u"%s%s%s%s"%(ALEF, DAMMA, YEH, SUKUN), ALEF+DAMMA+WAW)
609
+
610
+
611
+
612
+
613
+ ]
libqutrub/verb_db.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- coding = utf-8 -*-
3
+ #************************************************************************
4
+ # $Id: ar_verb.py, v 0.7 2009/06/02 01:10:00 Taha Zerrouki $
5
+ #
6
+ # ------------
7
+ # Description:
8
+ # ------------
9
+ # Copyright (c) 2009, Arabtechies, Arabeyes Taha Zerrouki
10
+ #
11
+ # Elementary function to manipulate arabic texte
12
+ #
13
+ # -----------------
14
+ # Revision Details: (Updated by Revision Control System)
15
+ # -----------------
16
+ # $Date: 2009/06/02 01:10:00 $
17
+ # $Author: Taha Zerrouki $
18
+ # $Revision: 0.7 $
19
+ # $Source: arabtechies.sourceforge.net
20
+ #
21
+ #***********************************************************************/
22
+ """
23
+ Basic routines to treat verbs
24
+ ar_verb
25
+ """
26
+ import os
27
+ # the db file
28
+ db_path = os.path.join(os.path.dirname(__file__), "data/verbdict.db")
29
+
30
+ import pyarabic.araby as araby
31
+ import libqutrub.triverbtable as triverbtable
32
+ TRIVERBTABLE_INDEX = {}
33
+
34
+ def create_index_triverbtable():
35
+ """ Create index from the verb dictionary
36
+ to accelerate the search in the dictionary for verbs
37
+ @return: create the TRIVERBTABLE_INDEX
38
+ @rtype: None
39
+ """
40
+ # the key is the vocverb + the bab number
41
+ for key in triverbtable.TriVerbTable.keys():
42
+ vocverb = triverbtable.TriVerbTable[key]['verb']
43
+ unvverb = araby.strip_harakat(vocverb)
44
+ normverb = araby.normalize_hamza(unvverb)
45
+ if normverb in TRIVERBTABLE_INDEX:
46
+ TRIVERBTABLE_INDEX[normverb].append(key)
47
+ else:
48
+ TRIVERBTABLE_INDEX[normverb] = [key, ]
49
+
50
+
51
+
52
+ def find_alltriverb(triverb, givenharaka = araby.FATHA,
53
+ vocalised_entree = False):
54
+ """
55
+ Find the triliteral verb in the dictionary (TriVerbTable)
56
+ return a list of possible verb forms
57
+ each item contains:
58
+ - 'root':
59
+ - 'haraka:
60
+ - 'bab':
61
+ - 'transitive':
62
+ @param triverb: given verb.
63
+ @type triverb: unicode.
64
+ @param givenharaka: given haraka of tuture type of the verb,
65
+ default(FATHA).
66
+ @type givenharaka: unicode.
67
+ @param VocalisedEntree: True if the given verb is vocalized,
68
+ default False.
69
+ @type VocalisedEntree: Boolean.
70
+ @return: list of triliteral verbs.
71
+ @rtype: list of dicts.
72
+ """
73
+ liste = []
74
+
75
+ if vocalised_entree:
76
+ verb_nm = araby.strip_harakat(triverb)
77
+ else:
78
+ verb_nm = triverb
79
+
80
+ normalized = araby.normalize_hamza(verb_nm)
81
+ if TRIVERBTABLE_INDEX.has_key(normalized):
82
+ for verb_voc_id in TRIVERBTABLE_INDEX[normalized]:
83
+ if triverb == triverbtable.TriVerbTable[verb_voc_id]['verb'] and \
84
+ givenharaka == triverbtable.TriVerbTable[verb_voc_id]['haraka']:
85
+ liste.insert(0, triverbtable.TriVerbTable[verb_voc_id])
86
+ # if VocalisedEntree:
87
+ #if verb_voc_id[:-1] == triverb:
88
+ # liste.append(TriVerbTable[verb_voc_id])
89
+ else:
90
+ liste.append(triverbtable.TriVerbTable[verb_voc_id])
91
+ else:
92
+ print("triverb has no verb")
93
+ return liste
94
+
95
+
96
+
97
+ def find_triliteral_verb(db_base_path, triliteralverb, givenharaka):
98
+ """
99
+ Find the triliteral verb in the dictionary,
100
+ return a list of possible verb forms
101
+ @param db_base_path: the database path
102
+ @type db_base_path: path string.
103
+ @param triliteralverb: given verb.
104
+ @type triliteralverb: unicode.
105
+ @param givenharaka: given haraka of tuture type of the verb.
106
+ @type givenharaka: unicode.
107
+ @return: list of triliteral verbs.
108
+ @rtype: list of unicode.
109
+ """
110
+ liste = []
111
+ try:
112
+ import sqlite3 as sqlite
113
+ import os
114
+ # db_path = os.path.join(_base_directory(req), "data/verbdict.db")
115
+
116
+ #db_path = os.path.join(db_base_path, "data/verbdict.db")
117
+ conn = sqlite.connect(db_path)
118
+ cursor = conn.cursor()
119
+ verb_nm = araby.strip_harakat(triliteralverb)
120
+ tup = (verb_nm, )
121
+ cursor.execute("""select verb_vocalised, haraka, transitive
122
+ from verbdict
123
+ where verb_unvocalised = ?""", tup)
124
+ for row in cursor:
125
+ verb_vocalised = row[0]
126
+ haraka = row[1]
127
+ transitive = row[2]
128
+ # Return the transitivity option
129
+ #MEEM is transitive
130
+ # KAF is commun ( transitive and intransitive)
131
+ # LAM is intransitive
132
+ if transitive in (araby.KAF, araby.MEEM):
133
+ transitive = True
134
+ else:
135
+ transitive = False
136
+ # if the given verb is the list,
137
+ #it will be inserted in the top of the list,
138
+ #to be treated in prior
139
+ if triliteralverb == verb_vocalised and givenharaka == haraka:
140
+ liste.insert(0, {"verb":verb_vocalised,
141
+ "haraka":haraka, "transitive":transitive})
142
+ # else the verb is appended in the liste
143
+ else:
144
+ liste.append({"verb":verb_vocalised,
145
+ "haraka":haraka, "transitive":transitive})
146
+ cursor.close()
147
+ return liste
148
+ except IOError:
149
+ return None
libqutrub/verb_valid.py ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- coding = utf-8 -*-
3
+ #************************************************************************
4
+ # $Id: verb_valid.py, v 0.7 2009/06/02 01:10:00 Taha Zerrouki $
5
+ #
6
+ # ------------
7
+ # Description:
8
+ # ------------
9
+ # Copyright (c) 2009, Arabtechies, Arabeyes Taha Zerrouki
10
+ #
11
+ # Elementary function to validate verbs
12
+ #
13
+ # -----------------
14
+ # Revision Details: (Updated by Revision Control System)
15
+ # -----------------
16
+ # $Date: 2009/06/02 01:10:00 $
17
+ # $Author: Taha Zerrouki $
18
+ # $Revision: 0.7 $
19
+ # $Source: arabtechies.sourceforge.net
20
+ #
21
+ #***********************************************************************/
22
+ """
23
+ Basic routines to validate verbs
24
+ ar_verb
25
+ """
26
+ import re
27
+ # import string
28
+ # import sys
29
+ # import os
30
+ # import types
31
+ # from arabic_const import *
32
+ import libqutrub.verb_const as vconst #~ from verb_const import *
33
+ # import ar_ctype
34
+ import pyarabic.araby as araby
35
+ from pyarabic.araby import FATHA, SHADDA, HAMZA, ALEF, \
36
+ NOON, ALEF_HAMZA_ABOVE, ALEF_HAMZA_BELOW, ALEF_MADDA, \
37
+ ALEF_MAKSURA, BEH, DAD, DAL, DAMMATAN, FATHATAN, FEH, GHAIN, HAH, \
38
+ HEH, JEEM, KAF, KASRATAN, KHAH, LAM, REH, SAD, SHEEN, TAH, TEH, \
39
+ TEH_MARBUTA, THAL, THEH, YEH, ZAH, ZAIN
40
+ #used to
41
+ VALID_INFINITIVE_VERB6_PATTERN = \
42
+ re.compile(u"^است...|ا..ن..|ا..و..|ا..ا.ّ|ا....ّ|ا.ّ.ّ.|ا.ّا..$", re.UNICODE)
43
+
44
+ VALID_INFINITIVE_VERB4_PATTERN = re.compile(\
45
+ u"^([%s%s][^%s]{2}.|[^%s%s]%s[^%s%s].|[^%s%s]{2}%s[^%s]|[^%s%s]{4})$"\
46
+ %(ALEF_HAMZA_ABOVE, HAMZA, SHADDA, ALEF, SHADDA, ALEF, ALEF, SHADDA, ALEF,
47
+ SHADDA, SHADDA, SHADDA, ALEF, SHADDA), re.UNICODE)
48
+
49
+ VALID_INFINITIVE_VERB5_PATTERN = re.compile( u"|".join([
50
+ u"^ا...ّ$",
51
+ # حالة اتخذ أو اذّكر أو اطّلع
52
+ u"^%s[%s%s%s]%s..$"%(ALEF, TEH, THAL, TAH, SHADDA),
53
+ # حالة اتخذ أو اذّكر أو اطّلع
54
+ u"^ا[تذط]ّ[^اّ][^اّ]$",
55
+ # انفعل
56
+ u"^ان...$",
57
+ #افتعل
58
+ u"^(ازد|اصط|اضط)..$"
59
+ u"^ا[^صضطظد]ت..$",
60
+ u"^ا...ّ$",
61
+ # حالة اتخذ أو اذّكر أو اطّلع
62
+ u"^ا.ّ..$",
63
+ u"^ا...ى$",
64
+ ]) , re.UNICODE)
65
+
66
+ #####################################
67
+ #{validation functions
68
+ #####################################
69
+ def is_valid_infinitive_verb(word, vocalized = True):
70
+ """
71
+ Determine if the given word is a valid infinitive form of an arabic verb.
72
+ A word is not valid infinitive if
73
+ - lenght < 3 letters.
74
+ - starts with : ALEF_MAKSURA, WAW_HAMZA, YEH_HAMZA, HARAKAT
75
+ - contains TEH_MARBUTA, Tanwin
76
+ - contains non arabic letters.
77
+ - contains ALEF_MAKSURA not in the end.
78
+ - contains double haraka : a warning
79
+ @param word: given word.
80
+ @type word: unicode.
81
+ @param is_vocalized: if the given word is vocalized.
82
+ @type is_vocalized:Boolean, default(True).
83
+ @return: True if the word is a valid infinitive form of verb.
84
+ @rtype: Boolean.
85
+ """
86
+ # test if the word is an arabic valid word,
87
+ if not araby.is_arabicword(word):
88
+ return False
89
+ if vocalized :
90
+ word_nm = araby.strip_harakat(word)
91
+ else:
92
+ word_nm = word
93
+ # the alef_madda is considered as 2 letters
94
+
95
+ word_nm = word_nm.replace(ALEF_MADDA, HAMZA+ALEF)
96
+ length = len(word_nm)
97
+
98
+ # lenght with shadda must be between 3 and 6
99
+ if length < 3 or length >= 7:
100
+ return False
101
+ # a 3 length verb can't start by Alef or Shadda,
102
+ #and the second letter can't be shadda
103
+ elif length == 3 and (word_nm[0] == ALEF or word_nm[0] == SHADDA \
104
+ or word_nm[1] == SHADDA):
105
+ return False
106
+
107
+ # a 5 length verb must start by ALEF or TEH
108
+ elif length == 5 and word_nm[0] not in (TEH, ALEF):
109
+ return False
110
+ # a 6 length verb must start by ALEF
111
+ elif length == 6 and word_nm[0] != ALEF:
112
+ return False
113
+
114
+ # contains some invalide letters in verb
115
+ elif re.search(u"[%s%s%s%s%s]"%(ALEF_HAMZA_BELOW, TEH_MARBUTA,
116
+ DAMMATAN, KASRATAN, FATHATAN), word):
117
+ return False
118
+ # contains some SHADDA sequence letters in verb
119
+ # Like shadda shadda, shadda on alef, start
120
+ # by shadda, shadda on alef_ maksura,
121
+ # ALEF folowed by (ALEF, ALEF_MAKSURA)
122
+ # ALEF Folowed by a letter and ALEF
123
+ # end with ALEF folowed by (YEH, ALEF_MAKSURA)
124
+ # first letter is alef and ALLw alef and two letters aand shadda
125
+ elif re.search(u"([%s%s%s]%s|^%s|^%s..%s|^.%s|%s.%s|%s%s|%s[%s%s]$)"%(
126
+ ALEF, ALEF_MAKSURA, SHADDA, SHADDA, SHADDA, ALEF, SHADDA, SHADDA,
127
+ ALEF, ALEF, ALEF, ALEF, ALEF, ALEF_MAKSURA, YEH), word_nm):
128
+ return False
129
+
130
+
131
+ # Invalid root form some letters :
132
+ #~ # initial YEH folowed by
133
+ #~ ((THEH, JEEM, HAH, KHAH, THAL, ZAIN, SHEEN, SAD, DAD,
134
+ #~ TAH, ZAH, GHAIN, KAF, HEH, YEH))
135
+ elif re.search(u"^%s[%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s]"%(
136
+ YEH, THEH, JEEM, HAH, KHAH, THAL, ZAIN, SHEEN, SAD, DAD,
137
+ TAH, ZAH, GHAIN, KAF, HEH, YEH), word_nm):
138
+ return False
139
+
140
+
141
+ # TEH After (DAL, THAL, TAH, ZAH, DAD)
142
+ elif re.search(u"[%s%s%s%s%s]%s"%(DAL, THAL, DAD, TAH, ZAH, TEH), word_nm):
143
+ return False
144
+ # Contains invalid root sequence in arabic, near in phonetic
145
+ # like BEH and FEH, LAM And REH
146
+ elif re.search(u"%s%s|%s%s|%s%s|%s%s|%s%s|%s%s|%s%s"%(
147
+ LAM, REH, REH, LAM, FEH, BEH, BEH, FEH, NOON,
148
+ LAM, HEH, HAH, HAH, HEH), word_nm):
149
+ return False
150
+
151
+
152
+ # in non 5 letters verbs :initial TEH followed by
153
+ # (THEH, DAL, THAL, ZAIN, SHEEN, SAD, DAD, TAH, ZAH)
154
+ elif length != 5 and word_nm.startswith(TEH) and word_nm[1] in (
155
+ TEH, THEH, DAL, THAL, ZAIN, SHEEN, SAD, DAD, TAH, ZAH):
156
+ return False
157
+ # if word start by the same letter doubled
158
+ elif word_nm[0] == word_nm[1] and word[0] != TEH:
159
+ return False
160
+
161
+ #verify the wazn of the verb
162
+ elif length == 3:
163
+ if re.match("^[^%s][^%s].$"%(ALEF, SHADDA), word_nm):
164
+ return True
165
+ # الأوزان المقبولة هي فعل، فعّ،
166
+ # الأوزان غير المقبولة
167
+ # اعل، فّل
168
+ else: return False
169
+ elif length == 4:
170
+ #1- أفعل، 2- فاعل، 3 فعّل 4 فعلل
171
+ if re.match(\
172
+ "^([%s%s][^%s]{2}.|[^%s%s]%s[^%s%s].|[^%s%s]{2}%s[^%s]|[^%s%s]{4})$"\
173
+ %(ALEF_HAMZA_ABOVE, HAMZA, SHADDA, ALEF, SHADDA, ALEF, ALEF, SHADDA,
174
+ ALEF, SHADDA, SHADDA, SHADDA, ALEF, SHADDA), word_nm):
175
+
176
+ return True
177
+ # الأوزان المقبولة هي فعل، فعّ،
178
+ # الأوزان غير المقبولة
179
+ # افعل: يجب تثبيت همزة القطع
180
+ #فّعل، فعلّ: الشدة لها موضع خاص
181
+ # فعال، فعلا: للألف موضع خاص
182
+ else: return False
183
+ elif length == 5:
184
+
185
+ if word_nm.startswith(ALEF):
186
+ if re.match(u"^ا...ّ$", word_nm):
187
+ return True
188
+ # حالة اتخذ أو اذّكر أو اطّلع
189
+ if re.match(u"^%s[%s%s%s]%s..$"%(ALEF, TEH, THAL, TAH, SHADDA), \
190
+ word_nm):
191
+ return True
192
+
193
+ # انفعل
194
+ elif re.match(u"^ان...$", word_nm):
195
+ return True
196
+ #افتعل
197
+ elif re.match(u"^(ازد|اصط|اضط)..$", word_nm):
198
+ return True
199
+ elif re.match(u"^ا[^صضطظد]ت..$", word_nm):
200
+ return True
201
+ elif re.match(u"^ا...ّ$", word_nm):
202
+ return True
203
+ # حالة اتخذ أو اذّكر أو اطّلع
204
+ elif re.match(u"^ا.ّ..$", word_nm):
205
+ return True
206
+ elif re.match(u"^ا...ى$", word_nm):
207
+ return True
208
+ else: return False
209
+ elif word_nm.startswith(TEH):
210
+ return True
211
+ else:
212
+ return False
213
+
214
+ # الأوزان المقبولة هي فعل، فعّ،
215
+ # الأوزان غير المقبولة
216
+ #للشدة موضع خاص: تفعّل، افتعّ
217
+ # للألف مواضع خاصة،
218
+ elif length == 6:
219
+ if not (word_nm.startswith(ALEF) or word_nm.startswith(TEH)):
220
+ return False
221
+ if VALID_INFINITIVE_VERB6_PATTERN.match(word_nm):
222
+ return True
223
+ # الأوزان المقبولة هي فعل، فعّ،
224
+ # الأوزان غير المقبولة
225
+ #للشدة موضع خاص: تفعّل، افتعّ
226
+ # للألف مواضع خاصة،
227
+ else: return False
228
+ return True
229
+
230
+
231
+ def suggest_verb(verb):
232
+ """
233
+ Generate a list of valid infinitive verb for an invalid infinitive form.
234
+ @param verb: given verb, of invalid infinitive form.
235
+ @type verb: unicode.
236
+ @return: a list of suggested infinitive verb forms
237
+ @rtype: list of unicode.
238
+ """
239
+ # the verb is invalid
240
+ list_suggest = []
241
+ # first strip harakat, shadda is not striped
242
+ verb = araby.strip_harakat(verb)
243
+ # second strip all inacceptable letters in an infinivive form
244
+ verb = re.sub(u"[%s%s%s%s]"%( TEH_MARBUTA, DAMMATAN, KASRATAN, FATHATAN), \
245
+ '', verb)
246
+ # test the resulted verb if it's valid, if ok,
247
+ # add it to the suggestion list.
248
+ if is_valid_infinitive_verb(verb):
249
+ list_suggest.append(verb)
250
+ return list_suggest
251
+ # if the verb starts by ALEF_HAMZA_BELOW like إستعمل,
252
+ #replace if by an ALEF, because it's a common error.
253
+ # if the result is valid add it to the suggestions list
254
+ elif verb.startswith(ALEF_HAMZA_BELOW):
255
+ verb = re.sub(ALEF_HAMZA_BELOW, ALEF, verb)
256
+ if is_valid_infinitive_verb(verb):
257
+ list_suggest.append(verb)
258
+ return list_suggest
259
+ # if the verb starts by ALEF like اضرب,
260
+ #replace if by an ALEF_HAMZA_ABOVE, because it's a common error.
261
+ # if the result is valid add it to the suggestions list
262
+ elif verb.startswith(ALEF):
263
+ verb_one = re.sub(ALEF, ALEF_HAMZA_ABOVE+FATHA, verb, 1)
264
+ if is_valid_infinitive_verb(verb_one):
265
+ list_suggest.append(verb_one)
266
+ return list_suggest
267
+ # if the verb is 2 letters length,
268
+ # suggest to add the third letter as :
269
+ # Shadda, Alef, Alef Maksura, Yeh at the end
270
+ # if the result is valid add it to the suggestions list
271
+ elif len(verb) == 2:
272
+ verb = re.sub(ALEF, ALEF_HAMZA_ABOVE, verb, 1)
273
+ #suggest to add the third letter as : Shadda at the end
274
+ verb_one = verb+SHADDA
275
+ if is_valid_infinitive_verb(verb_one):
276
+ list_suggest.append(verb_one)
277
+ #suggest to add the third letter as : Alef Maksura
278
+ verb_one = verb+ALEF_MAKSURA
279
+ if is_valid_infinitive_verb(verb_one):
280
+ list_suggest.append(verb_one)
281
+ #suggest to add the third letter as :Alef at the end
282
+ verb_one = verb+ALEF
283
+ if is_valid_infinitive_verb(verb_one):
284
+ list_suggest.append(verb_one)
285
+ #suggest to add the third letter as :Alef in middle
286
+ verb_one = verb[0]+ALEF+verb[1]
287
+ if is_valid_infinitive_verb(verb_one):
288
+ list_suggest.append(verb_one)
289
+ return list_suggest
290
+ elif len(verb) >= 6:
291
+ # if the verb is more than 6 letters length,
292
+ #suggest to replace the over added letters by Alef
293
+ # if the result is valid add it to the suggestions list
294
+ for i in range(len(verb)-6):
295
+ verb_one = ALEF+verb[i:i+5]
296
+ if is_valid_infinitive_verb(verb_one):
297
+ list_suggest.append(verb_one)
298
+ elif len(verb) == 5:
299
+ # if the verb is 5 letters length, suggest
300
+ # if the result is valid add it to the suggestions list
301
+ # ToDo: review this part
302
+ for i in range(len(verb)-5):
303
+ verb_one = ALEF+verb[i:i+4]
304
+ if is_valid_infinitive_verb(verb_one):
305
+ list_suggest.append(verb_one)
306
+ elif len(verb) == 4:
307
+
308
+ # if the verb is 5 letters length,
309
+ #suggest to replace the over added letters by Alef
310
+ # if the result is valid add it to the suggestions list
311
+ # فعال = > فاعل
312
+ #فّعل = > فعّل
313
+ if verb[2] == ALEF or verb[1] == SHADDA:
314
+ verb_one = verb[0]+verb[2]+verb[1]+verb[3]
315
+ if is_valid_infinitive_verb(verb_one):
316
+ list_suggest.append(verb_one)
317
+ if verb.endswith(SHADDA):
318
+ # if the verb is 4 letters length,
319
+ #suggest to correct the alef and shadda position
320
+ # if the result is valid add it to the suggestions list
321
+ #فعلّ = > فعّل
322
+ verb_one = verb[0]+verb[1]+verb[3]+verb[2]
323
+ if is_valid_infinitive_verb(verb_one):
324
+ list_suggest.append(verb_one)
325
+ return list_suggest
326
+ else:
327
+ # else sugest to conjugate another verb
328
+ list_suggest.append(u"كتب")
329
+ return list_suggest
330
+ return list_suggest
331
+
332
+ #####################################
333
+ #{verb pretreatment functions
334
+ #####################################
335
+ def normalize_alef_madda(word):
336
+ """
337
+ Convert Alef madda into two letters.
338
+ @param word: given word.
339
+ @type word: unicode.
340
+ @return: converted word.
341
+ @rtype: unicode.
342
+ """
343
+ if word.startswith(ALEF_MADDA):
344
+ word_nm = araby.strip_harakat(word)
345
+ if len(word_nm) == 2:
346
+ return word_nm.replace(ALEF_MADDA, HAMZA+ALEF)
347
+ elif len(word_nm) == 3:
348
+ if vconst.ALEF_MADDA_VERB_TABLE.has_key(word_nm):
349
+ #return the first one only
350
+ #mylist = ALEF_MADDA_VERB_TABLE[word_nm]
351
+ return vconst.ALEF_MADDA_VERB_TABLE[word_nm][0]
352
+ else:
353
+ return word_nm.replace(ALEF_MADDA, HAMZA+ALEF)
354
+ else:
355
+ return word_nm.replace(ALEF_MADDA, HAMZA+ALEF)
356
+ else:
357
+ return word_nm
358
+
359
+
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Flask==3.1.0
2
+ PyArabic==0.6.15
templates/index.html ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="ar">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>المصرّف</title>
6
+
7
+ <style>
8
+ body {
9
+ direction: rtl;
10
+ text-align: right;
11
+ font-family: Arial;
12
+ padding: 30px;
13
+ background: #f5f5f5;
14
+ }
15
+
16
+ .container {
17
+ background: white;
18
+ padding: 20px;
19
+ border-radius: 10px;
20
+ }
21
+
22
+ input, select, button {
23
+ padding: 10px;
24
+ margin: 10px 0;
25
+ width: 100%;
26
+ font-size: 16px;
27
+ }
28
+
29
+ button {
30
+ background: #2c7be5;
31
+ color: white;
32
+ border: none;
33
+ cursor: pointer;
34
+ }
35
+
36
+ #result {
37
+ margin-top: 20px;
38
+ }
39
+ </style>
40
+ </head>
41
+
42
+ <body>
43
+
44
+ <div class="container">
45
+ <h1>المصرّف</h1>
46
+
47
+ <input id="word" placeholder="أدخل الفعل (مثال: كتب)">
48
+
49
+ <select id="tense">
50
+ <option value="past">الماضي</option>
51
+ <option value="future">المضارع</option>
52
+ <option value="jussive">المجزوم</option>
53
+ <option value="subjunctive">المنصوب</option>
54
+ <option value="confirmed">المؤكد</option>
55
+ <option value="imperative">الأمر</option>
56
+ </select>
57
+
58
+ <select id="future_type">
59
+ <option value="فتحة">فتحة</option>
60
+ <option value="ضمة">ضمة</option>
61
+ <option value="كسرة">كسرة</option>
62
+ </select>
63
+
64
+ <button onclick="generate()">توليد التصريف</button>
65
+
66
+ <div id="result"></div>
67
+ </div>
68
+
69
+ <script>
70
+ async function generate() {
71
+ const word = document.getElementById("word").value;
72
+ const tense = document.getElementById("tense").value;
73
+ const future_type = document.getElementById("future_type").value;
74
+
75
+ const response = await fetch("http://127.0.0.1:5000/conjugate", {
76
+ method: "POST",
77
+ headers: {
78
+ "Content-Type": "application/json"
79
+ },
80
+ body: JSON.stringify({
81
+ word,
82
+ tense,
83
+ future_type
84
+ })
85
+ });
86
+
87
+ const data = await response.json();
88
+
89
+ document.getElementById("result").innerHTML = data.result;
90
+ }
91
+ </script>
92
+
93
+ </body>
94
+ </html>