File size: 4,638 Bytes
21baa2f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
'''
Created on ٠٢/٠٤/٢٠١٠
@Created by: Muhammad Altabba
'''
from ...Controllers.Tokenization.TokenType import TokenType;
from ..Transducers.StatesGraph import *;
from ..Transducers.State import *;
class AffixParser(object):
"""
# PyUML: Do not remove this line! # XMI_ID:_qyiSsI35Ed-gg8GOK1TmhA
"""
'''
Morphological Affix Parser.
'''
def __init__(self):
'''
Constructor
'''
def ParsePrefix(self, sentences, statesGraphs):
for i in range(len(sentences)):
j = 0;
while j <len(sentences[i].Words):
while(j < len(sentences[i].Words) and (
sentences[i].Words[j].TokenType.Id != TokenType.Constants.Id.ArabicText\
or sentences[i].Words[j].MorphologicalParsingCompleted == True)):
j += 1;
if(j == len(sentences[i].Words)):
break;
for k2 in range(len(statesGraphs)):
#Create new different instance of StatesGraph since there are temporary variables inside it.
statesGraph = StatesGraph(statesGraphs[k2].Start, statesGraphs[k2].States);
state = statesGraph.Start;
l = 0;
while l < len(sentences[i].Words[j].String):
[nextState, numberToConsume] = statesGraph.Match(state, sentences[i], j, l);
if type(nextState) is State: #There is a match:
l += numberToConsume;
if nextState.IsEnd == True:
#Apply Actions:
for currentWordIndex, actions in statesGraph.ActionsToApply.items():
actions.ApplyToWord(sentences[i], currentWordIndex);
#Clear is used to forbid applying the same actions more than once if there are many ends.
statesGraph.ActionsToApply.clear();
state = nextState;
else:
break;
j += 1;
pass
def ParseSuffix(self, sentences, statesGraphs):
for i in range(len(sentences)):
j = 0;
while j <len(sentences[i].Words):
while(j < len(sentences[i].Words) and (
sentences[i].Words[j].TokenType.Id != TokenType.Constants.Id.ArabicText\
or sentences[i].Words[j].MorphologicalParsingCompleted == True)):
j += 1;
if(j == len(sentences[i].Words)):
break;
for k2 in range(len(statesGraphs)):
#Create new different instance of StatesGraph since there are temporary variables inside it.
statesGraph = StatesGraph(statesGraphs[k2].Start, statesGraphs[k2].States);
state = statesGraph.Start;
l = len(sentences[i].Words[j].String)-1;
while l >= 0:
# print('-- test match'+str(l));
[nextState, numberToConsume] = statesGraph.Match(state, sentences[i], j, l, False);
if type(nextState) is State: #There is a match:
# print('-- is state'+str(l));
l -= numberToConsume;
if nextState.IsEnd == True:
#Apply Actions:
for currentWordIndex, actions in statesGraph.ActionsToApply.items():
# print('-- apply actions one index: '+str(currentWordIndex));
actions.ApplyToWord(sentences[i], currentWordIndex);
# print('--'+str(currentWordIndex));
#Clear is used to forbid applying the same actions more than once if there are many ends.
statesGraph.ActionsToApply.clear();
state = nextState;
else:
# print('-- not state'+str(l));
break;
j += 1;
|