qutuf / SourceCode /Views /run_Qutuf.py
Boulbaba's picture
Upload 210 files
21baa2f verified
'''
Created on ١١‏/٠٥‏/٢٠١٠
@author: Muhammad Altabba
'''
import codecs
import io
import os
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))+'/../..'
from ..Controllers.TextEntities.TextEncapsulator import *
#Set Files Locations Variables:
'''
Change the next few parameters as appropriate.
'''
baseDirectory = ROOT_DIR
baseDirectoryOfQutufDB = os.path.join(baseDirectory,'Data')
inputTextFile = os.path.join(baseDirectoryOfQutufDB,'test_Qutuf.txt')
baseDirectoryOfAlKhalilDB = os.path.join(baseDirectory,'AlKhalil_V1_Modified','db/')
# ouputXmlFile = os.path.join(baseDirectory,'Output','test.xml')
# ouputHtmlFile = os.path.join(baseDirectory,'Output','test.html')
#Set Operations Variables:
prematureTaggingPositiveThreshold = 0.0
prematureTaggingNegativeThreshold = -0.0
overdureTaggingThreshold = None
overdureTaggingTopReservants = None
'''
The next few parameters are fixed do not change them.
'''
procliticsXmlFile = os.path.join(baseDirectoryOfQutufDB, 'MorphologyTransducers','Proclitics.xml')
encliticsXmlFile = os.path.join(baseDirectoryOfQutufDB,'MorphologyTransducers','Enclitics.xml')
prematureTaggingRulesXmlFile = os.path.join(baseDirectoryOfQutufDB, 'TaggingRepository','PrematureTaggingRules.xml')
overdueTaggingRulesXmlFile = os.path.join(baseDirectoryOfQutufDB, 'TaggingRepository','OverdueTaggingRules.xml')
rootsFolder = 'roots2'
# Initialize:
text = TextEncapsulator()
# Load Data from Files:
text.LoadFromFiles(baseDirectoryOfAlKhalilDB, rootsFolder,
procliticsXmlFile, encliticsXmlFile,
prematureTaggingRulesXmlFile,
overdueTaggingRulesXmlFile)
def runit(phrase, functionality, outputFormat):
# Read input text into Qutuf:
text.String = phrase
# Process:
text.Tokenize()
text.Normalize(2)
text.CompoundParsing()
text.PrematureTagging()
text.ParseClitics()
text.PatternMatching(prematureTaggingPositiveThreshold, prematureTaggingNegativeThreshold)
text.OverdueTagging(overdureTaggingThreshold, overdureTaggingTopReservants)
if functionality == 'lemma':
text.exposeLemma()
# Write Output:
streamWriter = io.StringIO()
if outputFormat == 'html':
text.RenderHtml(streamWriter, functionality)
else:
text.RenderXml(streamWriter, functionality)
output = streamWriter.getvalue()
# Log to terminal:
# print('---------------------------------------------------------------------------')
# text.Print()
# print('---------------------------------------------------------------------------')
# Log to file:
# writer = codecs.open(ouputFile, 'w', 'utf-8')
# writer.write(output)
# writer.close()
return output