File size: 4,947 Bytes
090987a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
"""
config.py
Central configuration for the claim analysis system
"""
import os
# Base directories
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(BASE_DIR, "data")
OUTPUT_DIR = os.path.join(BASE_DIR, "output")
REPORTS_DIR = os.path.join(BASE_DIR, "reports")
# Create directories if they don't exist
for directory in [DATA_DIR, OUTPUT_DIR, REPORTS_DIR]:
os.makedirs(directory, exist_ok=True)
# API Keys
GOOGLE_API_KEY = "AIzaSyAnXTkB_0HKXKul3eI-1A56ZQWyjTVj1cQ" # Google Custom Search API key
GOOGLE_SEARCH_ENGINE_ID = "e7e6c19ee7a984f30" # Add your search engine ID here (you'll need to create this)
# Serper.dev API Key (alternative search API)
SERPER_API_KEY = "e0af440fd71fb125dd38644fe378831c3ed741ca"
# SerpApi Google Search API Key
SERPAPI_API_KEY = "007928aeb7d86d4a85af12728e3534163961837027afb63ec7b89a4624a9f4ac"
# Data source settings
USE_FACEBOOK = False # Disable Facebook data collection
USE_TIKTOK = True # Enable TikTok data collection
USE_SERPAPI = True # Enable SerpApi web search
USE_SERPER = True # Enable Serper.dev web search
USE_DUCKDUCKGO = False # Disable DuckDuckGo web search
USE_LOWYAT = True # Enable Lowyat Forum data collection
# Number of results to collect from each source
FACEBOOK_MAX_RESULTS = 100
TIKTOK_MAX_RESULTS = 10 # Significantly reduced to save Apify costs
WEB_SEARCH_MAX_RESULTS = 20
LOWYAT_MAX_THREADS = 20 # Maximum number of Lowyat Forum threads to collect
# Lowyat Forum settings
LOWYAT_SECTIONS = [
"Kopitiam", "SeriousKopitiam", "News", "Politics", "Malaysia", "Lowyat.NET",
"Technology", "Computers", "Notebooks", "Smartphones", "Photography", "GamingPC", "GamingConsole",
"Automotive", "Finance", "Property", "Travel", "Food", "Health", "Sports", "Entertainment",
"SpecialInterestGarageSales", "JobsCorner", "DigitalMarketplace"
] # All available forum sections
# Social Media API tokens
APIFY_TOKEN = "apify_api_INtF6uUT4c6nOStYDYTllxuTBNSbng1IlTTB" # Main Apify API token
APIFY_TOKEN_FB = APIFY_TOKEN # For Facebook actors
APIFY_TOKEN_TIKTOK = APIFY_TOKEN # For TikTok actors
# Actor task IDs
# From danek/facebook-search-ppr
POST_TASK_ID_SEARCH = "l5DitJrtfCyOfrjn6" # Facebook Search PPR (rajamohd/facebook-search-ppr-rm-bernama)
# From datavoyantlab/facebook-comments-scraper
COMMENT_TASK_ID = "qiAp6PQwkyYcLQiyC" # Facebook Comments Scraper (rajamohd/facebook-comments-scraper-task)
# From clockworks/free-tiktok-scraper
TIKTOK_VIDEO_TASK_ID = "rfk0BzRAjuLPbccaZ" # TikTok Data Extractor (devlab/tiktok-data-extractor-bernama2-video)
# From clockworks/tiktok-comments-scraper
TIKTOK_COMMENT_TASK_ID = "rgXeWIhnXKRD5bjGp" # TikTok Comments Scraper (devlab/tiktok-comments-scraper-bernama2)
# Apify settings
USE_COMMENTS = True # Whether to collect comments in addition to posts/videos
# Sentiment model
SENTIMENT_MODEL = "rmtariq/ft-Malay-bert"
# Priority indexer settings
PRIORITY_WEIGHTS = {
"fact_check_value": 1.5, # Higher weight for factual importance
"cause_confusion": 1.2, # Medium-high weight for confusion potential
"cause_chaos": 1.8, # High weight for potential harm
"affects_government": 1.3, # Medium-high for government impact
"economic_impact": 1.4, # Medium-high for economic impact
"law_related": 1.5, # Higher weight for legal implications
"public_interest": 1.2, # Medium weight for public interest
"lives_in_danger": 2.0, # Highest weight for safety concerns
"viral": 1.1, # Lower weight for virality alone
"urgent": 1.3 # Medium-high for urgency
}
PRIORITY_THRESHOLDS = {
"high_priority": 7.0,
"medium_priority": 5.0,
"low_priority": 3.0
}
# Classification settings
VERDICT_CATEGORIES = {
"TIDAK_BENAR": {
"name": "TIDAK BENAR",
"description": "Dakwaan ini tidak benar berdasarkan bukti yang ada.",
"threshold": 7.0,
"conditions": ["fact_check_value", "law_related"]
},
"BERCAMPUR": {
"name": "BERCAMPUR",
"description": "Dakwaan ini mengandungi unsur-unsur benar dan tidak benar.",
"threshold": 5.0,
"conditions": ["cause_confusion"]
},
"BENAR": {
"name": "BENAR",
"description": "Dakwaan ini benar berdasarkan bukti yang ada.",
"threshold": 3.0,
"conditions": []
},
"TIDAK_PASTI": {
"name": "TIDAK PASTI",
"description": "Tidak cukup bukti untuk menentukan kebenaran dakwaan ini.",
"threshold": 0.0,
"conditions": []
}
}
# Database settings
DB_PATH = os.path.join(DATA_DIR, "claims.db")
# Malaysian filter settings
MALAYSIAN_FILTER_THRESHOLD = 0.5 # Confidence threshold for Malaysian content
# Report settings
REPORT_TEMPLATE = None # Path to DOCX template (optional)
GOOGLE_SEARCH_ENGINE_ID = "e7e6c19ee7a984f30" # Google Search Engine ID
|