qutuf / SourceCode /Views /MorphologicalAnalyzer /run_stem_acquiring.py
Boulbaba's picture
Upload 210 files
21baa2f verified
# -*- coding: utf-8 -*-
'''
Created on ١٧‏/٠٥‏/٢٠١٠
@Created by: Muhammad Altabba
'''
from Lexicon.RootsAndPatternsRepository import *;
from TextEntities.Word import *;
from Tokenization.Tokenizer import *
from Normalization.Normalizer import *
from Morphology.AffixParser import *
from Morphology.MorphologicalAnalyzer import *;
import codecs;
import io;
MAX_EXAMPLES_COUNT = 15;
def GetAppliedRootsWithExamples(pattern, rootRule, conditionallyApplicableRoots):
applicableRoots = [];
examples = [];
for i in range(len(conditionallyApplicableRoots)):
if(len(rootRule) != len(conditionallyApplicableRoots[i].String)):
continue;
generatedWord = io.StringIO();
rootIndexCounter = 0;
for j in range(len(pattern.String)):
while(rootIndexCounter < len(rootRule) and not rootRule[rootIndexCounter].isdigit()):
rootIndexCounter += 1;
if(rootIndexCounter < len(rootRule)):
index = int(rootRule[rootIndexCounter]) - 1;
if(j == index):
# if(len(conditionallyApplicableRoots[i].String) <= rootIndexCounter):
# break;
generatedWord.write(conditionallyApplicableRoots[i].String[rootIndexCounter]);
rootIndexCounter +=1 ;
else:
generatedWord.write(pattern.String[j]);
else:
generatedWord.write(pattern.String[j]);
generatedWord = generatedWord.getvalue();
newRoot = '';
for k in range(len(rootRule)):
if(rootRule[k].isnumeric()):
newRoot += generatedWord[int(rootRule[k])-1];
else:
newRoot += rootRule[k];
if (newRoot == conditionallyApplicableRoots[i].String):
applicableRoots.append(newRoot);
examples.append(generatedWord);
if(i > MAX_EXAMPLES_COUNT):
break;
return [applicableRoots, examples];
def SaveAcquire(unvoweledPatterns, roots, fileName):
f = codecs.open(fileName, 'w', 'utf-8');
counter = 1;
for patternLength, patterns in unvoweledPatterns.items():
# row = io.StringIO();
f.write('\n;الأوزان التي بطول (' + str(patternLength) + ') وعددها (' + str(len(patterns)) + '):');
f.write('\nالرقم;الوزن;الجذور;;الأصل;الجذوع;أمثلة على الجذور;');
# f.write(row.getvalue());
for patternString, pattern in patterns.items():
f.write('\n');
f.write(str(counter));
f.write(';');
f.write(patternString);
f.write(';');
[rootStrings, rootRules] = pattern.GetRootsStringsAndRules(None);
conditionallyApplicableRoots = [];
for letter, rootItems in roots.items():
for rootString, rootItem in rootItems.items():
for k in range(len(pattern.IDs)):
if(rootItem.PatternsIDs.count(pattern.IDs[k]) > 0 \
and conditionallyApplicableRoots.count(rootItem) == 0):
conditionallyApplicableRoots.append(rootItem);
for j in range(len(rootStrings)):
if j != 0:
f.write('\n');
f.write(str(counter));
f.write(';');
f.write(';');
counter += 1;
f.write(''.join(list(rootRules[j])) + ';(' + rootStrings[j] + ')');
[applicableRoots, examples] = GetAppliedRootsWithExamples(pattern, rootRules[j], conditionallyApplicableRoots);
for k in range(len(applicableRoots)):
f.write(''.join([';', applicableRoots[k], ';', examples[k]]));
f.write(';');
f.flush();
f.close();
pass
wordDB = RootsAndPatternsRepository();
wordDB.Load('D:/temp/AlKhalil_1/db/', 'roots2');
SaveAcquire(wordDB.UnvoweledVerbalPatterns, wordDB.VerbalRoots, '../../Data/stem_acquiring_verbal.csv')
SaveAcquire(wordDB.UnvoweledNominalPatterns, wordDB.NominalRoots, '../../Data/stem_acquiring_nominal.csv')