Spaces:
Runtime error
Runtime error
File size: 3,325 Bytes
e489264 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
from threading import Thread
import constants
from pathlib import Path
import random
from typing import Union, Any, List
from interfaces import IProcess, IProcessor
from processes import (
RandomCharRemover,
RandomCharsInjector,
RandomCharsSwapper,
RandomNeighborReplacer,
RandomWordsCollapsor,
PunctuationRemover,
SentencePermutation,
)
class FilesProcessor(IProcessor):
def __init__(
self, processes: List[IProcess],
n_dist: int = 32
) -> None:
self.processes = processes
self.n_dist = n_dist
self.__dist = False
self.__cache = []
def file_run(self, file: Union[str, Path]) -> Any:
result = file
for process in self.processes:
result = process.execute(result)
return result
def run(
self,
files: List[Union[str, Path]]
) -> Any:
result = list(map(self.file_run, files))
if self.__dist is True:
self.__cache.append(result)
return
return result
def _divde(self, data: List[Any]):
items_per_div = len(data) // self.n_dist
divs = []
for i in range(items_per_div):
start = i * items_per_div
end = (i + 1) * items_per_div
if i == (items_per_div - 1):
end = len(divs)
divs.append(data[start: end])
return divs
def dist_run(
self,
files: List[Union[str, Path]]
) -> Any:
self.__dist = True
self.__cache = []
divs = self._divde(files)
threads = []
for div in divs:
t = Thread(target=self.run, args=(div,))
t.start()
threads.append(t)
for t in threads:
t.join()
self.__dist = False
results = []
for item in self.__cache:
results.extend(item)
self.__cache = []
return results
class TextDistorter(IProcessor):
def __init__(
self, ratio: float, processes: List[IProcess]
) -> None:
super().__init__()
self.ratio = ratio
self.processes = processes
def run(self, line: str) -> str:
length = len(line)
n = int(self.ratio * length)
for _ in range(n):
line = random.choice(self.processes).execute(line)
return line
def dist_run(self):
# TODO
pass
class TextProcessor(IProcessor):
def __init__(self, processes: List[IProcess]) -> None:
super().__init__()
self.processes = processes
def run(self, sentence: str):
for process in self.processes:
sentence = process.execute(sentence)
return sentence
def dist_run(self, sentence: str) -> str:
return self.run(sentence)
def get_text_distorter(ratio, sentences: List[str]):
return TextDistorter(
ratio=ratio,
processes=[
SentencePermutation(sentences),
RandomCharsInjector(constants.KURDISH_CHARS),
RandomCharsSwapper(),
RandomCharRemover(),
RandomWordsCollapsor(),
RandomNeighborReplacer(
constants.KEYBOARD_KEYS, constants.KEYBOARD_BLANK
)
]
)
|