File size: 2,883 Bytes
21baa2f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
'''
Created on ٢٨/٠٦/٢٠١٠
@Created by: Muhammad Altabba
'''
from ....Controllers.General.ArabicStringUtility import *;
from ....Controllers.Tokenization.TokenType import TokenType;
from ....Controllers.Morphology.Entities.SurfaceFormMorphemes import *;
class CompoundParsing(object):
"""
# PyUML: Do not remove this line! # XMI_ID:_q0LRgY35Ed-gg8GOK1TmhA
"""
'''
Parsing Compound words
'''
def __init__(self):
'''
Constructor
'''
pass
def Parsing(self, textEncapsulator, specialWords):
compoundNounsDict = specialWords.CompoundNouns;
for sentence in textEncapsulator.Sentences[:]:
index = 0;
while index < len(sentence.Words)-2:
if(not (sentence.Words[index].TokenType.Id == TokenType.Constants.Id.ArabicText and \
sentence.Words[index+1].TokenType.Id == TokenType.Constants.Id.WhiteSpace and \
sentence.Words[index+2].TokenType.Id == TokenType.Constants.Id.ArabicText)):
index += 1;
continue;
unvoweldForm = ' '.join([sentence.Words[index].SecondNormalizationForm , sentence.Words[index+2].SecondNormalizationForm]) ;
firstNormalizationForm = ' '.join([sentence.Words[index].FirstNormalizationForm , sentence.Words[index+2].FirstNormalizationForm]) ;
if unvoweldForm in compoundNounsDict.keys():
compound = compoundNounsDict[unvoweldForm];
if(not ArabicStringUtility.IsCompatible(ArabicStringUtility, firstNormalizationForm, compound.VoweledForm)):
index += 1;
continue;
originalString = ' '.join([sentence.Words[index].OriginalString, sentence.Words[index+2].OriginalString]) ;
#Compound:
sentence.Words[index].OriginalString = originalString;
sentence.Words[index].FirstNormalizationForm = compound.VoweledForm;
sentence.Words[index].SecondNormalizationForm = compound.UnvoweledForm;
sentence.Words[index].String = compound.UnvoweledForm;
sentence.Words[index].MorphologicalParsingCompleted = True;
sentence.Words[index].SurfaceFormMorphemes = [];
sentence.Words[index].SurfaceFormMorphemes.append(SurfaceFormMorphemes([], compound.ReturnAsUnderivedCliticless(), []));
#remove space
sentence.Words.remove(sentence.Words[index+1]);
#remove second Part
sentence.Words.remove(sentence.Words[index+1]);
index += 1;
pass
|