''' Created on ١١‏/٠٥‏/٢٠١٠ @Created by: Muhammad Altabba ''' from Models.Lexicon.RootsAndPatternsRepository import *; from Models.Lexicon.SpecialWords.StandAloneParticle import *; from Models.Lexicon.SpecialWords.ProperNoun import *; from Controllers.TextEntities.TextEncapsulator import *; from Controllers.TextEntities.Word import *; from Controllers.Tokenization.Tokenizer import *; from Controllers.Normalization.Normalizer import *; from Controllers.Morphology.AffixParser import *; from Controllers.Morphology.MorphologicalAnalyzer import *; import codecs; import io; import os; from os.path import join, getsize; compoundNounsXmlFile = '../../Data/MorphologyTransducers/Proclitics.xml'; procliticsXmlFile = '../../Data/MorphologyTransducers/Proclitics.xml'; encliticsXmlFile = '../../Data/MorphologyTransducers/Enclitics.xml'; prematureTaggingRulesXmlFile = '../../Data/TaggingRepository/PrematureTaggingRules.xml'; overdueTaggingRulesXmlFile = '../../Data/TaggingRepository/OverdueTaggingRules.xml'; baseDirectoryOfAlKhalil = 'D:/temp/AlKhalil_1/db/' rootsFolder = 'roots2' text = TextEncapsulator(); text.LoadFromFiles(baseDirectoryOfAlKhalil, rootsFolder, \ procliticsXmlFile, encliticsXmlFile,\ prematureTaggingRulesXmlFile, \ overdueTaggingRulesXmlFile); base = 'D:/temp/Latifa2/' for root, dirs, files in os.walk(base): for dir in dirs: print('Start parsing directory: ['+dir+']'); for subroot, subdirs, subfiles in os.walk(root+dir): for file in subfiles: if file.endswith('.txt') and file.find('-') == -1 : if(file.find('Edu') == -1 ): continue; print('\tStart parsing file: ['+file+']'); f = codecs.open('/'.join([subroot, file]), 'r', 'utf-8'); string = f.read(); f.close(); text.String = string; text.Tokenize(); text.Normalize(2); text.ParseClitics(); print('\tProcessing...'); text.PatternMatchingSimpleStem(); print('\tWriting...'); xmlStreamWriter = io.StringIO(); text.RenderTextSimpleStem(xmlStreamWriter); writer = codecs.open('/'.join([subroot, file.replace('.txt','-Qutuf.txt')]), 'w', 'utf-8'); writer.write(xmlStreamWriter.getvalue()); xmlStreamWriter.close(); writer.close(); print ('\tEnd parsing file: ['+file+']'); print('------------------------------------------------------');