File size: 4,947 Bytes
090987a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
"""
config.py
Central configuration for the claim analysis system
"""

import os

# Base directories
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(BASE_DIR, "data")
OUTPUT_DIR = os.path.join(BASE_DIR, "output")
REPORTS_DIR = os.path.join(BASE_DIR, "reports")

# Create directories if they don't exist
for directory in [DATA_DIR, OUTPUT_DIR, REPORTS_DIR]:
    os.makedirs(directory, exist_ok=True)

# API Keys
GOOGLE_API_KEY = "AIzaSyAnXTkB_0HKXKul3eI-1A56ZQWyjTVj1cQ"  # Google Custom Search API key
GOOGLE_SEARCH_ENGINE_ID = "e7e6c19ee7a984f30"  # Add your search engine ID here (you'll need to create this)

# Serper.dev API Key (alternative search API)
SERPER_API_KEY = "e0af440fd71fb125dd38644fe378831c3ed741ca"

# SerpApi Google Search API Key
SERPAPI_API_KEY = "007928aeb7d86d4a85af12728e3534163961837027afb63ec7b89a4624a9f4ac"

# Data source settings
USE_FACEBOOK = False    # Disable Facebook data collection
USE_TIKTOK = True       # Enable TikTok data collection
USE_SERPAPI = True      # Enable SerpApi web search
USE_SERPER = True       # Enable Serper.dev web search
USE_DUCKDUCKGO = False  # Disable DuckDuckGo web search
USE_LOWYAT = True       # Enable Lowyat Forum data collection

# Number of results to collect from each source
FACEBOOK_MAX_RESULTS = 100
TIKTOK_MAX_RESULTS = 10  # Significantly reduced to save Apify costs
WEB_SEARCH_MAX_RESULTS = 20
LOWYAT_MAX_THREADS = 20  # Maximum number of Lowyat Forum threads to collect

# Lowyat Forum settings
LOWYAT_SECTIONS = [
    "Kopitiam", "SeriousKopitiam", "News", "Politics", "Malaysia", "Lowyat.NET",
    "Technology", "Computers", "Notebooks", "Smartphones", "Photography", "GamingPC", "GamingConsole",
    "Automotive", "Finance", "Property", "Travel", "Food", "Health", "Sports", "Entertainment",
    "SpecialInterestGarageSales", "JobsCorner", "DigitalMarketplace"
]  # All available forum sections

# Social Media API tokens
APIFY_TOKEN = "apify_api_INtF6uUT4c6nOStYDYTllxuTBNSbng1IlTTB"  # Main Apify API token
APIFY_TOKEN_FB = APIFY_TOKEN  # For Facebook actors
APIFY_TOKEN_TIKTOK = APIFY_TOKEN  # For TikTok actors

# Actor task IDs
# From danek/facebook-search-ppr
POST_TASK_ID_SEARCH = "l5DitJrtfCyOfrjn6"  # Facebook Search PPR (rajamohd/facebook-search-ppr-rm-bernama)

# From datavoyantlab/facebook-comments-scraper
COMMENT_TASK_ID = "qiAp6PQwkyYcLQiyC"  # Facebook Comments Scraper (rajamohd/facebook-comments-scraper-task)

# From clockworks/free-tiktok-scraper
TIKTOK_VIDEO_TASK_ID = "rfk0BzRAjuLPbccaZ"  # TikTok Data Extractor (devlab/tiktok-data-extractor-bernama2-video)

# From clockworks/tiktok-comments-scraper
TIKTOK_COMMENT_TASK_ID = "rgXeWIhnXKRD5bjGp"  # TikTok Comments Scraper (devlab/tiktok-comments-scraper-bernama2)

# Apify settings
USE_COMMENTS = True  # Whether to collect comments in addition to posts/videos

# Sentiment model
SENTIMENT_MODEL = "rmtariq/ft-Malay-bert"

# Priority indexer settings
PRIORITY_WEIGHTS = {
    "fact_check_value": 1.5,      # Higher weight for factual importance
    "cause_confusion": 1.2,        # Medium-high weight for confusion potential
    "cause_chaos": 1.8,            # High weight for potential harm
    "affects_government": 1.3,     # Medium-high for government impact
    "economic_impact": 1.4,        # Medium-high for economic impact
    "law_related": 1.5,            # Higher weight for legal implications
    "public_interest": 1.2,        # Medium weight for public interest
    "lives_in_danger": 2.0,        # Highest weight for safety concerns
    "viral": 1.1,                  # Lower weight for virality alone
    "urgent": 1.3                  # Medium-high for urgency
}

PRIORITY_THRESHOLDS = {
    "high_priority": 7.0,
    "medium_priority": 5.0,
    "low_priority": 3.0
}

# Classification settings
VERDICT_CATEGORIES = {
    "TIDAK_BENAR": {
        "name": "TIDAK BENAR",
        "description": "Dakwaan ini tidak benar berdasarkan bukti yang ada.",
        "threshold": 7.0,
        "conditions": ["fact_check_value", "law_related"]
    },
    "BERCAMPUR": {
        "name": "BERCAMPUR",
        "description": "Dakwaan ini mengandungi unsur-unsur benar dan tidak benar.",
        "threshold": 5.0,
        "conditions": ["cause_confusion"]
    },
    "BENAR": {
        "name": "BENAR",
        "description": "Dakwaan ini benar berdasarkan bukti yang ada.",
        "threshold": 3.0,
        "conditions": []
    },
    "TIDAK_PASTI": {
        "name": "TIDAK PASTI",
        "description": "Tidak cukup bukti untuk menentukan kebenaran dakwaan ini.",
        "threshold": 0.0,
        "conditions": []
    }
}

# Database settings
DB_PATH = os.path.join(DATA_DIR, "claims.db")

# Malaysian filter settings
MALAYSIAN_FILTER_THRESHOLD = 0.5  # Confidence threshold for Malaysian content

# Report settings
REPORT_TEMPLATE = None  # Path to DOCX template (optional)
GOOGLE_SEARCH_ENGINE_ID = "e7e6c19ee7a984f30"  # Google Search Engine ID