File size: 2,883 Bytes
21baa2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

'''
Created on ٢٨‏/٠٦‏/٢٠١٠

@Created by: Muhammad Altabba
'''
from ....Controllers.General.ArabicStringUtility import *;
from ....Controllers.Tokenization.TokenType import TokenType;
from ....Controllers.Morphology.Entities.SurfaceFormMorphemes import *;

class CompoundParsing(object):
    """
     # PyUML: Do not remove this line! # XMI_ID:_q0LRgY35Ed-gg8GOK1TmhA
    """
    '''
    Parsing Compound words
    '''


    def __init__(self):
        '''
        Constructor
        '''
    pass

    def Parsing(self, textEncapsulator, specialWords):
        compoundNounsDict = specialWords.CompoundNouns;
        
        for sentence in textEncapsulator.Sentences[:]:
            index = 0;
            while index < len(sentence.Words)-2:
                if(not (sentence.Words[index].TokenType.Id == TokenType.Constants.Id.ArabicText and \
                   sentence.Words[index+1].TokenType.Id == TokenType.Constants.Id.WhiteSpace and \
                   sentence.Words[index+2].TokenType.Id == TokenType.Constants.Id.ArabicText)):
                    index += 1;
                    continue;
                
                unvoweldForm = ' '.join([sentence.Words[index].SecondNormalizationForm , sentence.Words[index+2].SecondNormalizationForm]) ;
                firstNormalizationForm = ' '.join([sentence.Words[index].FirstNormalizationForm , sentence.Words[index+2].FirstNormalizationForm]) ;
                
                if unvoweldForm in compoundNounsDict.keys():
                    compound = compoundNounsDict[unvoweldForm];
                    if(not ArabicStringUtility.IsCompatible(ArabicStringUtility, firstNormalizationForm, compound.VoweledForm)):
                        index += 1;
                        continue;
                    originalString = ' '.join([sentence.Words[index].OriginalString,  sentence.Words[index+2].OriginalString]) ;
                    
                    #Compound:
                    sentence.Words[index].OriginalString = originalString;
                    sentence.Words[index].FirstNormalizationForm = compound.VoweledForm;
                    sentence.Words[index].SecondNormalizationForm = compound.UnvoweledForm;
                    sentence.Words[index].String = compound.UnvoweledForm;
                    sentence.Words[index].MorphologicalParsingCompleted = True;
                    sentence.Words[index].SurfaceFormMorphemes = [];
                    sentence.Words[index].SurfaceFormMorphemes.append(SurfaceFormMorphemes([], compound.ReturnAsUnderivedCliticless(), []));
                
                    #remove space
                    sentence.Words.remove(sentence.Words[index+1]);
                    #remove second Part
                    sentence.Words.remove(sentence.Words[index+1]);
                    
                index += 1;
                    
        
    pass