""" config.py Central configuration for the claim analysis system """ import os # Base directories BASE_DIR = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.join(BASE_DIR, "data") OUTPUT_DIR = os.path.join(BASE_DIR, "output") REPORTS_DIR = os.path.join(BASE_DIR, "reports") # Create directories if they don't exist for directory in [DATA_DIR, OUTPUT_DIR, REPORTS_DIR]: os.makedirs(directory, exist_ok=True) # API Keys GOOGLE_API_KEY = "AIzaSyAnXTkB_0HKXKul3eI-1A56ZQWyjTVj1cQ" # Google Custom Search API key GOOGLE_SEARCH_ENGINE_ID = "e7e6c19ee7a984f30" # Add your search engine ID here (you'll need to create this) # Serper.dev API Key (alternative search API) SERPER_API_KEY = "e0af440fd71fb125dd38644fe378831c3ed741ca" # SerpApi Google Search API Key SERPAPI_API_KEY = "007928aeb7d86d4a85af12728e3534163961837027afb63ec7b89a4624a9f4ac" # Data source settings USE_FACEBOOK = False # Disable Facebook data collection USE_TIKTOK = True # Enable TikTok data collection USE_SERPAPI = True # Enable SerpApi web search USE_SERPER = True # Enable Serper.dev web search USE_DUCKDUCKGO = False # Disable DuckDuckGo web search USE_LOWYAT = True # Enable Lowyat Forum data collection # Number of results to collect from each source FACEBOOK_MAX_RESULTS = 100 TIKTOK_MAX_RESULTS = 10 # Significantly reduced to save Apify costs WEB_SEARCH_MAX_RESULTS = 20 LOWYAT_MAX_THREADS = 20 # Maximum number of Lowyat Forum threads to collect # Lowyat Forum settings LOWYAT_SECTIONS = [ "Kopitiam", "SeriousKopitiam", "News", "Politics", "Malaysia", "Lowyat.NET", "Technology", "Computers", "Notebooks", "Smartphones", "Photography", "GamingPC", "GamingConsole", "Automotive", "Finance", "Property", "Travel", "Food", "Health", "Sports", "Entertainment", "SpecialInterestGarageSales", "JobsCorner", "DigitalMarketplace" ] # All available forum sections # Social Media API tokens APIFY_TOKEN = "apify_api_INtF6uUT4c6nOStYDYTllxuTBNSbng1IlTTB" # Main Apify API token APIFY_TOKEN_FB = APIFY_TOKEN # For Facebook actors APIFY_TOKEN_TIKTOK = APIFY_TOKEN # For TikTok actors # Actor task IDs # From danek/facebook-search-ppr POST_TASK_ID_SEARCH = "l5DitJrtfCyOfrjn6" # Facebook Search PPR (rajamohd/facebook-search-ppr-rm-bernama) # From datavoyantlab/facebook-comments-scraper COMMENT_TASK_ID = "qiAp6PQwkyYcLQiyC" # Facebook Comments Scraper (rajamohd/facebook-comments-scraper-task) # From clockworks/free-tiktok-scraper TIKTOK_VIDEO_TASK_ID = "rfk0BzRAjuLPbccaZ" # TikTok Data Extractor (devlab/tiktok-data-extractor-bernama2-video) # From clockworks/tiktok-comments-scraper TIKTOK_COMMENT_TASK_ID = "rgXeWIhnXKRD5bjGp" # TikTok Comments Scraper (devlab/tiktok-comments-scraper-bernama2) # Apify settings USE_COMMENTS = True # Whether to collect comments in addition to posts/videos # Sentiment model SENTIMENT_MODEL = "rmtariq/ft-Malay-bert" # Priority indexer settings PRIORITY_WEIGHTS = { "fact_check_value": 1.5, # Higher weight for factual importance "cause_confusion": 1.2, # Medium-high weight for confusion potential "cause_chaos": 1.8, # High weight for potential harm "affects_government": 1.3, # Medium-high for government impact "economic_impact": 1.4, # Medium-high for economic impact "law_related": 1.5, # Higher weight for legal implications "public_interest": 1.2, # Medium weight for public interest "lives_in_danger": 2.0, # Highest weight for safety concerns "viral": 1.1, # Lower weight for virality alone "urgent": 1.3 # Medium-high for urgency } PRIORITY_THRESHOLDS = { "high_priority": 7.0, "medium_priority": 5.0, "low_priority": 3.0 } # Classification settings VERDICT_CATEGORIES = { "TIDAK_BENAR": { "name": "TIDAK BENAR", "description": "Dakwaan ini tidak benar berdasarkan bukti yang ada.", "threshold": 7.0, "conditions": ["fact_check_value", "law_related"] }, "BERCAMPUR": { "name": "BERCAMPUR", "description": "Dakwaan ini mengandungi unsur-unsur benar dan tidak benar.", "threshold": 5.0, "conditions": ["cause_confusion"] }, "BENAR": { "name": "BENAR", "description": "Dakwaan ini benar berdasarkan bukti yang ada.", "threshold": 3.0, "conditions": [] }, "TIDAK_PASTI": { "name": "TIDAK PASTI", "description": "Tidak cukup bukti untuk menentukan kebenaran dakwaan ini.", "threshold": 0.0, "conditions": [] } } # Database settings DB_PATH = os.path.join(DATA_DIR, "claims.db") # Malaysian filter settings MALAYSIAN_FILTER_THRESHOLD = 0.5 # Confidence threshold for Malaysian content # Report settings REPORT_TEMPLATE = None # Path to DOCX template (optional) GOOGLE_SEARCH_ENGINE_ID = "e7e6c19ee7a984f30" # Google Search Engine ID