varox34's picture
Upload 64 files
366b225 verified
# -*- coding: utf-8 -*-
from collections import namedtuple
from collections.abc import Iterable
from parser.utils.field import Field
CoNLL = namedtuple(typename='CoNLL',
field_names=['ID', 'FORM', 'LEMMA', 'CPOS', 'POS',
'FEATS', 'HEAD', 'DEPREL', 'PHEAD', 'PDEPREL'],
defaults=[None]*10)
class Sentence(object):
def __init__(self, fields, values):
for field, value in zip(fields, values):
if isinstance(field, Iterable):
for j in range(len(field)):
setattr(self, field[j].name, value)
else:
setattr(self, field.name, value)
self.fields = fields
@property
def values(self):
for field in self.fields:
if isinstance(field, Iterable):
yield getattr(self, field[0].name)
else:
yield getattr(self, field.name)
def __len__(self):
return len(next(iter(self.values)))
def __repr__(self):
return '\n'.join('\t'.join(map(str, line))
for line in zip(*self.values)) + '\n'
class Corpus(object):
def __init__(self, fields, sentences):
super(Corpus, self).__init__()
self.fields = fields
self.sentences = sentences
def __len__(self):
return len(self.sentences)
def __repr__(self):
return '\n'.join(str(sentence) for sentence in self)
def __getitem__(self, index):
return self.sentences[index]
def __getattr__(self, name):
print(name)
if not hasattr(self.sentences[0], name):
raise AttributeError
for sentence in self.sentences:
yield getattr(sentence, name)
def __setattr__(self, name, value):
if name in ['fields', 'sentences']:
self.__dict__[name] = value
else:
for i, sentence in enumerate(self.sentences):
setattr(sentence, name, value[i])
@classmethod
def load(cls, path, fields):
start, sentences = 0, []
fields = [field if field is not None else Field(str(i))
for i, field in enumerate(fields)]
with open(path, 'r') as f:
lines = [line.strip() for line in f]
for i, line in enumerate(lines):
if not line:
values = list(zip(*[l.split('\t') for l in lines[start:i]]))
sentences.append(Sentence(fields, values))
start = i + 1
return cls(fields, sentences)
def save(self, path):
with open(path, 'w') as f:
f.write(f"{self}\n")