| | import re |
| |
|
| | |
| |
|
| | def remove_words(text, words_to_remove): |
| | |
| | pattern = r'\b(?:' + '|'.join(map(re.escape, words_to_remove)) + r')\b' |
| | |
| | cleaned_text = re.sub(pattern, '', text) |
| | |
| | cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip() |
| | return cleaned_text |
| |
|
| | def convert_special_to_normal(text): |
| | |
| | text = html.unescape(text) |
| | |
| | text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8') |
| | |
| | text = re.sub(r'[^A-Za-z0-9 ]+', '', text) |
| | return text |
| |
|
| | def clean_string_special(input_string): |
| | |
| | |
| | cleaned_string = re.sub(r'[^\w\s]+', '', input_string) |
| | return cleaned_string |
| |
|
| | def clean_text(input_text): |
| | |
| | text = input_text.replace('\n', '').replace('@', '') |
| | emoji_pattern = re.compile( |
| | "[" |
| | "\U0001F600-\U0001F64F" |
| | "\U0001F300-\U0001F5FF" |
| | "\U0001F680-\U0001F6FF" |
| | "\U0001F700-\U0001F77F" |
| | "\U00002600-\U000026FF" |
| | "\U00002700-\U000027BF" |
| | "\U0001F900-\U0001F9FF" |
| | "\U0001FA70-\U0001FAFF" |
| | "\U0001F1E0-\U0001F1FF" |
| | "]+", |
| | flags=re.UNICODE) |
| | output_text = emoji_pattern.sub(r'', text) |
| | return output_text |
| |
|
| |
|
| |
|
| | def Get_Title_Year(name): |
| | |
| | words_to_remove = ["Fษชสแด","Fษชสแด ษดแดแดแด :","FC", "HEVC","ษดแดแดแด","-","BuLMoviee" ,"๐๐ผ๐ถ๐ป ๐จ๐ ๐ข๐ป ๐ง๐ฒ๐น๐ฒ๐ด๐ฟ๐ฎ๏ฟฝ","๐๐ผ๐ถ๐ป ๐จ๐ ๐ข๐ป ๐ง๐ฒ๐น๐ฒ๐ด๐ฟ๐ฎ๐บ","SIDHUU 591","๐ฑ๐๐๐ ๐ผ๐ ๐ถษด ๐ปแดสแดษขสแดแด","Tษชแดสแด :","Bollywood","mkv","Mแดแด ษชแด", "ษขสแดแดแด" , "TGxMALLU_MOVIE"] |
| | name=remove_words(name, words_to_remove) |
| | match = re.search(r'(?P<title>.+?)[\s\.\(\)]*(?P<year>\d{4})',name ) |
| |
|
| | if match: |
| | return clean_string_special(match.group('title').strip()), int(match.group('year')) |
| | return None, None |
| |
|