| |
| ''' |
| Created on ١٧/٠٥/٢٠١٠ |
| |
| @Created by: Muhammad Altabba |
| |
| ''' |
|
|
| from Lexicon.RootsAndPatternsRepository import *; |
| from TextEntities.Word import *; |
| from Tokenization.Tokenizer import * |
| from Normalization.Normalizer import * |
| from Morphology.AffixParser import * |
| from Morphology.MorphologicalAnalyzer import *; |
|
|
| import codecs; |
| import io; |
|
|
| MAX_EXAMPLES_COUNT = 15; |
|
|
| def GetAppliedRootsWithExamples(pattern, rootRule, conditionallyApplicableRoots): |
| applicableRoots = []; |
| examples = []; |
| |
| for i in range(len(conditionallyApplicableRoots)): |
| if(len(rootRule) != len(conditionallyApplicableRoots[i].String)): |
| continue; |
| |
| generatedWord = io.StringIO(); |
| |
| rootIndexCounter = 0; |
| for j in range(len(pattern.String)): |
| while(rootIndexCounter < len(rootRule) and not rootRule[rootIndexCounter].isdigit()): |
| rootIndexCounter += 1; |
| if(rootIndexCounter < len(rootRule)): |
| index = int(rootRule[rootIndexCounter]) - 1; |
| if(j == index): |
| |
| |
| generatedWord.write(conditionallyApplicableRoots[i].String[rootIndexCounter]); |
| rootIndexCounter +=1 ; |
| else: |
| generatedWord.write(pattern.String[j]); |
| else: |
| generatedWord.write(pattern.String[j]); |
| |
| generatedWord = generatedWord.getvalue(); |
| newRoot = ''; |
| for k in range(len(rootRule)): |
| if(rootRule[k].isnumeric()): |
| newRoot += generatedWord[int(rootRule[k])-1]; |
| else: |
| newRoot += rootRule[k]; |
| |
| if (newRoot == conditionallyApplicableRoots[i].String): |
| applicableRoots.append(newRoot); |
| examples.append(generatedWord); |
| if(i > MAX_EXAMPLES_COUNT): |
| break; |
| |
| return [applicableRoots, examples]; |
|
|
|
|
|
|
| def SaveAcquire(unvoweledPatterns, roots, fileName): |
| f = codecs.open(fileName, 'w', 'utf-8'); |
| counter = 1; |
| for patternLength, patterns in unvoweledPatterns.items(): |
| |
| f.write('\n;الأوزان التي بطول (' + str(patternLength) + ') وعددها (' + str(len(patterns)) + '):'); |
| f.write('\nالرقم;الوزن;الجذور;;الأصل;الجذوع;أمثلة على الجذور;'); |
| |
| for patternString, pattern in patterns.items(): |
| |
| f.write('\n'); |
| f.write(str(counter)); |
| f.write(';'); |
| f.write(patternString); |
| f.write(';'); |
| |
| [rootStrings, rootRules] = pattern.GetRootsStringsAndRules(None); |
| |
| |
| conditionallyApplicableRoots = []; |
| for letter, rootItems in roots.items(): |
| for rootString, rootItem in rootItems.items(): |
| for k in range(len(pattern.IDs)): |
| if(rootItem.PatternsIDs.count(pattern.IDs[k]) > 0 \ |
| and conditionallyApplicableRoots.count(rootItem) == 0): |
| conditionallyApplicableRoots.append(rootItem); |
| |
| for j in range(len(rootStrings)): |
| if j != 0: |
| f.write('\n'); |
| f.write(str(counter)); |
| f.write(';'); |
| f.write(';'); |
| counter += 1; |
| f.write(''.join(list(rootRules[j])) + ';(' + rootStrings[j] + ')'); |
| [applicableRoots, examples] = GetAppliedRootsWithExamples(pattern, rootRules[j], conditionallyApplicableRoots); |
| |
| for k in range(len(applicableRoots)): |
| f.write(''.join([';', applicableRoots[k], ';', examples[k]])); |
|
|
| |
| |
| f.write(';'); |
| |
| f.flush(); |
| |
| |
| |
| f.close(); |
| pass |
|
|
|
|
|
|
| wordDB = RootsAndPatternsRepository(); |
| wordDB.Load('D:/temp/AlKhalil_1/db/', 'roots2'); |
|
|
| SaveAcquire(wordDB.UnvoweledVerbalPatterns, wordDB.VerbalRoots, '../../Data/stem_acquiring_verbal.csv') |
|
|
| SaveAcquire(wordDB.UnvoweledNominalPatterns, wordDB.NominalRoots, '../../Data/stem_acquiring_nominal.csv') |
|
|
|
|