# Script to clean and preprocess Macedonian Wikipedia text

import re

def clean_text(text):
    text = re.sub(r'\[.*?\]', '', text)  # Remove Wikipedia category links
    text = re.sub(r'\{\{.*?\}\}', '', text)  # Remove Wikipedia templates
    text = re.sub(r'<.*?>', '', text)  # Remove HTML tags
    text = text.replace("\n", " ").strip()
    return text

if __name__ == "__main__":
    print("Data cleaning script ready to process text!")