Spaces:

xspinners
/

testbed

Runtime error

File size: 4,947 Bytes

090987a

"""
config.py
Central configuration for the claim analysis system
"""

import os

# Base directories
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(BASE_DIR, "data")
OUTPUT_DIR = os.path.join(BASE_DIR, "output")
REPORTS_DIR = os.path.join(BASE_DIR, "reports")

# Create directories if they don't exist
for directory in [DATA_DIR, OUTPUT_DIR, REPORTS_DIR]:
    os.makedirs(directory, exist_ok=True)

# API Keys
GOOGLE_API_KEY = "AIzaSyAnXTkB_0HKXKul3eI-1A56ZQWyjTVj1cQ"  # Google Custom Search API key
GOOGLE_SEARCH_ENGINE_ID = "e7e6c19ee7a984f30"  # Add your search engine ID here (you'll need to create this)

# Serper.dev API Key (alternative search API)
SERPER_API_KEY = "e0af440fd71fb125dd38644fe378831c3ed741ca"

# SerpApi Google Search API Key
SERPAPI_API_KEY = "007928aeb7d86d4a85af12728e3534163961837027afb63ec7b89a4624a9f4ac"

# Data source settings
USE_FACEBOOK = False    # Disable Facebook data collection
USE_TIKTOK = True       # Enable TikTok data collection
USE_SERPAPI = True      # Enable SerpApi web search
USE_SERPER = True       # Enable Serper.dev web search
USE_DUCKDUCKGO = False  # Disable DuckDuckGo web search
USE_LOWYAT = True       # Enable Lowyat Forum data collection

# Number of results to collect from each source
FACEBOOK_MAX_RESULTS = 100
TIKTOK_MAX_RESULTS = 10  # Significantly reduced to save Apify costs
WEB_SEARCH_MAX_RESULTS = 20
LOWYAT_MAX_THREADS = 20  # Maximum number of Lowyat Forum threads to collect

# Lowyat Forum settings
LOWYAT_SECTIONS = [
    "Kopitiam", "SeriousKopitiam", "News", "Politics", "Malaysia", "Lowyat.NET",
    "Technology", "Computers", "Notebooks", "Smartphones", "Photography", "GamingPC", "GamingConsole",
    "Automotive", "Finance", "Property", "Travel", "Food", "Health", "Sports", "Entertainment",
    "SpecialInterestGarageSales", "JobsCorner", "DigitalMarketplace"
]  # All available forum sections

# Social Media API tokens
APIFY_TOKEN = "apify_api_INtF6uUT4c6nOStYDYTllxuTBNSbng1IlTTB"  # Main Apify API token
APIFY_TOKEN_FB = APIFY_TOKEN  # For Facebook actors
APIFY_TOKEN_TIKTOK = APIFY_TOKEN  # For TikTok actors

# Actor task IDs
# From danek/facebook-search-ppr
POST_TASK_ID_SEARCH = "l5DitJrtfCyOfrjn6"  # Facebook Search PPR (rajamohd/facebook-search-ppr-rm-bernama)

# From datavoyantlab/facebook-comments-scraper
COMMENT_TASK_ID = "qiAp6PQwkyYcLQiyC"  # Facebook Comments Scraper (rajamohd/facebook-comments-scraper-task)

# From clockworks/free-tiktok-scraper
TIKTOK_VIDEO_TASK_ID = "rfk0BzRAjuLPbccaZ"  # TikTok Data Extractor (devlab/tiktok-data-extractor-bernama2-video)

# From clockworks/tiktok-comments-scraper
TIKTOK_COMMENT_TASK_ID = "rgXeWIhnXKRD5bjGp"  # TikTok Comments Scraper (devlab/tiktok-comments-scraper-bernama2)

# Apify settings
USE_COMMENTS = True  # Whether to collect comments in addition to posts/videos

# Sentiment model
SENTIMENT_MODEL = "rmtariq/ft-Malay-bert"

# Priority indexer settings
PRIORITY_WEIGHTS = {
    "fact_check_value": 1.5,      # Higher weight for factual importance
    "cause_confusion": 1.2,        # Medium-high weight for confusion potential
    "cause_chaos": 1.8,            # High weight for potential harm
    "affects_government": 1.3,     # Medium-high for government impact
    "economic_impact": 1.4,        # Medium-high for economic impact
    "law_related": 1.5,            # Higher weight for legal implications
    "public_interest": 1.2,        # Medium weight for public interest
    "lives_in_danger": 2.0,        # Highest weight for safety concerns
    "viral": 1.1,                  # Lower weight for virality alone
    "urgent": 1.3                  # Medium-high for urgency
}

PRIORITY_THRESHOLDS = {
    "high_priority": 7.0,
    "medium_priority": 5.0,
    "low_priority": 3.0
}

# Classification settings
VERDICT_CATEGORIES = {
    "TIDAK_BENAR": {
        "name": "TIDAK BENAR",
        "description": "Dakwaan ini tidak benar berdasarkan bukti yang ada.",
        "threshold": 7.0,
        "conditions": ["fact_check_value", "law_related"]
    },
    "BERCAMPUR": {
        "name": "BERCAMPUR",
        "description": "Dakwaan ini mengandungi unsur-unsur benar dan tidak benar.",
        "threshold": 5.0,
        "conditions": ["cause_confusion"]
    },
    "BENAR": {
        "name": "BENAR",
        "description": "Dakwaan ini benar berdasarkan bukti yang ada.",
        "threshold": 3.0,
        "conditions": []
    },
    "TIDAK_PASTI": {
        "name": "TIDAK PASTI",
        "description": "Tidak cukup bukti untuk menentukan kebenaran dakwaan ini.",
        "threshold": 0.0,
        "conditions": []
    }
}

# Database settings
DB_PATH = os.path.join(DATA_DIR, "claims.db")

# Malaysian filter settings
MALAYSIAN_FILTER_THRESHOLD = 0.5  # Confidence threshold for Malaysian content

# Report settings
REPORT_TEMPLATE = None  # Path to DOCX template (optional)
GOOGLE_SEARCH_ENGINE_ID = "e7e6c19ee7a984f30"  # Google Search Engine ID