testbed / ai_api /library /config.py
xspinners's picture
initial
090987a
"""
config.py
Central configuration for the claim analysis system
"""
import os
# Base directories
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(BASE_DIR, "data")
OUTPUT_DIR = os.path.join(BASE_DIR, "output")
REPORTS_DIR = os.path.join(BASE_DIR, "reports")
# Create directories if they don't exist
for directory in [DATA_DIR, OUTPUT_DIR, REPORTS_DIR]:
os.makedirs(directory, exist_ok=True)
# API Keys
GOOGLE_API_KEY = "AIzaSyAnXTkB_0HKXKul3eI-1A56ZQWyjTVj1cQ" # Google Custom Search API key
GOOGLE_SEARCH_ENGINE_ID = "e7e6c19ee7a984f30" # Add your search engine ID here (you'll need to create this)
# Serper.dev API Key (alternative search API)
SERPER_API_KEY = "e0af440fd71fb125dd38644fe378831c3ed741ca"
# SerpApi Google Search API Key
SERPAPI_API_KEY = "007928aeb7d86d4a85af12728e3534163961837027afb63ec7b89a4624a9f4ac"
# Data source settings
USE_FACEBOOK = False # Disable Facebook data collection
USE_TIKTOK = True # Enable TikTok data collection
USE_SERPAPI = True # Enable SerpApi web search
USE_SERPER = True # Enable Serper.dev web search
USE_DUCKDUCKGO = False # Disable DuckDuckGo web search
USE_LOWYAT = True # Enable Lowyat Forum data collection
# Number of results to collect from each source
FACEBOOK_MAX_RESULTS = 100
TIKTOK_MAX_RESULTS = 10 # Significantly reduced to save Apify costs
WEB_SEARCH_MAX_RESULTS = 20
LOWYAT_MAX_THREADS = 20 # Maximum number of Lowyat Forum threads to collect
# Lowyat Forum settings
LOWYAT_SECTIONS = [
"Kopitiam", "SeriousKopitiam", "News", "Politics", "Malaysia", "Lowyat.NET",
"Technology", "Computers", "Notebooks", "Smartphones", "Photography", "GamingPC", "GamingConsole",
"Automotive", "Finance", "Property", "Travel", "Food", "Health", "Sports", "Entertainment",
"SpecialInterestGarageSales", "JobsCorner", "DigitalMarketplace"
] # All available forum sections
# Social Media API tokens
APIFY_TOKEN = "apify_api_INtF6uUT4c6nOStYDYTllxuTBNSbng1IlTTB" # Main Apify API token
APIFY_TOKEN_FB = APIFY_TOKEN # For Facebook actors
APIFY_TOKEN_TIKTOK = APIFY_TOKEN # For TikTok actors
# Actor task IDs
# From danek/facebook-search-ppr
POST_TASK_ID_SEARCH = "l5DitJrtfCyOfrjn6" # Facebook Search PPR (rajamohd/facebook-search-ppr-rm-bernama)
# From datavoyantlab/facebook-comments-scraper
COMMENT_TASK_ID = "qiAp6PQwkyYcLQiyC" # Facebook Comments Scraper (rajamohd/facebook-comments-scraper-task)
# From clockworks/free-tiktok-scraper
TIKTOK_VIDEO_TASK_ID = "rfk0BzRAjuLPbccaZ" # TikTok Data Extractor (devlab/tiktok-data-extractor-bernama2-video)
# From clockworks/tiktok-comments-scraper
TIKTOK_COMMENT_TASK_ID = "rgXeWIhnXKRD5bjGp" # TikTok Comments Scraper (devlab/tiktok-comments-scraper-bernama2)
# Apify settings
USE_COMMENTS = True # Whether to collect comments in addition to posts/videos
# Sentiment model
SENTIMENT_MODEL = "rmtariq/ft-Malay-bert"
# Priority indexer settings
PRIORITY_WEIGHTS = {
"fact_check_value": 1.5, # Higher weight for factual importance
"cause_confusion": 1.2, # Medium-high weight for confusion potential
"cause_chaos": 1.8, # High weight for potential harm
"affects_government": 1.3, # Medium-high for government impact
"economic_impact": 1.4, # Medium-high for economic impact
"law_related": 1.5, # Higher weight for legal implications
"public_interest": 1.2, # Medium weight for public interest
"lives_in_danger": 2.0, # Highest weight for safety concerns
"viral": 1.1, # Lower weight for virality alone
"urgent": 1.3 # Medium-high for urgency
}
PRIORITY_THRESHOLDS = {
"high_priority": 7.0,
"medium_priority": 5.0,
"low_priority": 3.0
}
# Classification settings
VERDICT_CATEGORIES = {
"TIDAK_BENAR": {
"name": "TIDAK BENAR",
"description": "Dakwaan ini tidak benar berdasarkan bukti yang ada.",
"threshold": 7.0,
"conditions": ["fact_check_value", "law_related"]
},
"BERCAMPUR": {
"name": "BERCAMPUR",
"description": "Dakwaan ini mengandungi unsur-unsur benar dan tidak benar.",
"threshold": 5.0,
"conditions": ["cause_confusion"]
},
"BENAR": {
"name": "BENAR",
"description": "Dakwaan ini benar berdasarkan bukti yang ada.",
"threshold": 3.0,
"conditions": []
},
"TIDAK_PASTI": {
"name": "TIDAK PASTI",
"description": "Tidak cukup bukti untuk menentukan kebenaran dakwaan ini.",
"threshold": 0.0,
"conditions": []
}
}
# Database settings
DB_PATH = os.path.join(DATA_DIR, "claims.db")
# Malaysian filter settings
MALAYSIAN_FILTER_THRESHOLD = 0.5 # Confidence threshold for Malaysian content
# Report settings
REPORT_TEMPLATE = None # Path to DOCX template (optional)
GOOGLE_SEARCH_ENGINE_ID = "e7e6c19ee7a984f30" # Google Search Engine ID