ml-server / src /utils /identifier.py
Partha11's picture
added project files from private repo
ae2ef1b
import regex
"""
This class is used to identify if a string contains any bangla word or not
|-------------------------------------------------------------------------------------------------|
| * This class is written for basic usage, it can be improved dramatically |
| * Changing the for loop with a binary search would result in finding specific bangla word index |
| * Sorting the word chunks based on lexicographic order would also produce better results |
|-------------------------------------------------------------------------------------------------|
"""
class Identifier:
pattern = r"\P{L}*\p{Bengali}+(?:\P{L}+\p{Bengali}+)*\P{L}*"
is_bangla = False
def __init__(self, query):
self.parts = query.split(' ')
for part in self.parts:
if bool(regex.match(self.pattern, part)):
self.is_bangla = True
break
def is_bangla(self) -> bool:
return self.is_bangla