File size: 1,003 Bytes
ae2ef1b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
import regex
"""
This class is used to identify if a string contains any bangla word or not
|-------------------------------------------------------------------------------------------------|
| * This class is written for basic usage, it can be improved dramatically |
| * Changing the for loop with a binary search would result in finding specific bangla word index |
| * Sorting the word chunks based on lexicographic order would also produce better results |
|-------------------------------------------------------------------------------------------------|
"""
class Identifier:
pattern = r"\P{L}*\p{Bengali}+(?:\P{L}+\p{Bengali}+)*\P{L}*"
is_bangla = False
def __init__(self, query):
self.parts = query.split(' ')
for part in self.parts:
if bool(regex.match(self.pattern, part)):
self.is_bangla = True
break
def is_bangla(self) -> bool:
return self.is_bangla |