File size: 6,931 Bytes
65642c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
class Doc:
def __init__(self, fulltext: str = '', title: str = '', params: dict = {}):
self.params = params
self.lines = [Line(text.strip(), self.params) for text in fulltext.split("\n") if text.strip()]
self.title, self.lines = self._get_title(title)
self.container = Container(lines=self.lines, title=self.title, father=self, params=params)
self.tasks = [c.get_task(self.container.one_liner) for c in self.container.containers if c.task]
self.fulltext = fulltext
def _get_title(self, title):
lines = self.lines
if self.params['type'] == 'input_text':
if self.lines and self.lines[0] and self.lines[0].type == 'title':
title = self.lines[0].text
lines = lines[1:]
else:
title = 'the title is missing'
return title, lines
def replace_tasks(self, resolutions: [str]):
starts = self.params['startswith_']
reverts = {starts[k]: k for k in starts}
task_starter = reverts['task']
lines = self.fulltext.split('\n')
new_lines = [line if not line.startswith(task_starter) else next(iter(resolutions)) for line in lines]
new_fulltext = "\n".join(new_lines)
return new_fulltext
class InputDoc(Doc):
def __init__(self, fulltext='', title=''):
self.params = {
'type': 'input_text',
'startswith_':
{'!!': 'title', '++': 'comment', '??': 'task',
'# ': '1', '## ': '2', '### ': '3', '####': '4', '#####': '5', '######': '6'}
}
super().__init__(fulltext=fulltext, title=title, params=self.params)
class WikiPage(Doc):
def __init__(self, fulltext='', title=''):
self.params = {
'type': 'wiki',
'startswith_':
{'== ': '1', '=== ': '2', '==== ': '3', '===== ': '4', '====== ': '5', '======= ': '6'},
'endswith_':
[' ==', ' ===', ' ====', ' =====', ' ======', ' ======'],
'discarded': ["See also", "Notes", "References", "Sources", "External links", "Bibliography",
"Cinematic adaptations", "Further reading", "Maps"]
}
super().__init__(fulltext=fulltext, title=title, params=self.params)
def get_paragraphs(self, chunk=500):
return self.container.get_paragraphs(chunk)
class Container:
def __init__(self, lines=[], level=0, title='', father=None, params={}):
self.normals = []
self.normal = ''
self.comments = []
self.comment = ''
self.tasks = []
self.task = ''
self.children = []
self.level = level
self.title = title
self.father = father
self._expand(lines)
if params and 'discarded' in params.keys():
self.children = [child for child in self.children if child.title not in params['discarded']]
self.containers = [self]
for child in self.children:
self.containers += child.containers
self.one_liner = self.title + ' ' + self.comment
self.root_text = self.one_liner + ' ' + self.normal
self.text = self.root_text
for child in self.children:
self.text += ' ' + child.text
self.summary = self.text
def _expand(self, lines):
new_child = False
new_child_lines = []
new_child_title = []
for line in lines:
if not new_child:
if line.type == 'normal':
self.normals.append(line)
self.normal += ' ' + line.text
elif line.type == 'comment':
self.comments.append(line)
self.comment += ' ' + line.text
elif line.type == 'task':
self.tasks.append(line)
self.task += ' ' + line.text
elif line.is_structure:
new_child = True
new_child_lines = []
new_child_title = line.text
line.level = self.level + 1
self.one_liner = self.title + self.comment
else:
if self.level + 1 < line.level or not line.is_structure:
new_child_lines.append(line)
elif self.level + 1 == line.level:
self.children.append(Container(lines=new_child_lines,
level=self.level + 1,
title=new_child_title,
father=self))
new_child_lines = []
new_child_title = line.text
if new_child:
self.children.append(Container(lines=new_child_lines,
level=self.level + 1,
title=new_child_title,
father=self))
def get_task(self, doc_one_liner):
siblings_ = self.father.children.copy()
index = siblings_.index(self)
siblings_before_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if idx < index]
siblings_after_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if index < idx]
task = {'description': self.task,
'about': self.one_liner,
'doc_description': doc_one_liner,
'above': self.father.one_liner,
'before': siblings_before_context,
'after': siblings_after_context}
return task
def get_paragraphs(self, chunk=500):
if len(self.text) < chunk:
paragraphs = [self.text]
else:
paragraphs = [self.root_text]
for child in self.children:
paragraphs += child.get_paragraphs(chunk)
return paragraphs
class Line:
def __init__(self, text, params):
self.text = text
self.type, self.text = self._parse_text(params)
self.level = int(self.type) if self.type.isdigit() else -1
self.is_structure = 0 < self.level
def _parse_text(self, params):
def strip_text(text_, start, end):
text_ = text_.split(start)[1]
if end != "":
text_ = text_.split(end)[0]
# text += ". \n"
return text_.strip()
startswith_ = params['startswith_']
endswith_ = params['endswith_'] if 'endswith_' in params.keys() else [""] * len(startswith_)
types = [(strip_text(self.text, starter, endswith_[i]), startswith_[starter])
for i, starter in enumerate(startswith_.keys())
if self.text.startswith(starter)]
(text, type_) = types[0] if types else (self.text, 'normal')
return type_, text.strip()
|