Spaces:

xspinners
/

testbed

Runtime error

App Files Files Community

testbed / ai_api /library /config.py

xspinners

initial

090987a 8 months ago

raw

history blame contribute delete

4.95 kB

	"""
	config.py
	Central configuration for the claim analysis system
	"""

	import os

	# Base directories
	BASE_DIR = os.path.dirname(os.path.abspath(__file__))
	DATA_DIR = os.path.join(BASE_DIR, "data")
	OUTPUT_DIR = os.path.join(BASE_DIR, "output")
	REPORTS_DIR = os.path.join(BASE_DIR, "reports")

	# Create directories if they don't exist
	for directory in [DATA_DIR, OUTPUT_DIR, REPORTS_DIR]:
	os.makedirs(directory, exist_ok=True)

	# API Keys
	GOOGLE_API_KEY = "AIzaSyAnXTkB_0HKXKul3eI-1A56ZQWyjTVj1cQ" # Google Custom Search API key
	GOOGLE_SEARCH_ENGINE_ID = "e7e6c19ee7a984f30" # Add your search engine ID here (you'll need to create this)

	# Serper.dev API Key (alternative search API)
	SERPER_API_KEY = "e0af440fd71fb125dd38644fe378831c3ed741ca"

	# SerpApi Google Search API Key
	SERPAPI_API_KEY = "007928aeb7d86d4a85af12728e3534163961837027afb63ec7b89a4624a9f4ac"

	# Data source settings
	USE_FACEBOOK = False # Disable Facebook data collection
	USE_TIKTOK = True # Enable TikTok data collection
	USE_SERPAPI = True # Enable SerpApi web search
	USE_SERPER = True # Enable Serper.dev web search
	USE_DUCKDUCKGO = False # Disable DuckDuckGo web search
	USE_LOWYAT = True # Enable Lowyat Forum data collection

	# Number of results to collect from each source
	FACEBOOK_MAX_RESULTS = 100
	TIKTOK_MAX_RESULTS = 10 # Significantly reduced to save Apify costs
	WEB_SEARCH_MAX_RESULTS = 20
	LOWYAT_MAX_THREADS = 20 # Maximum number of Lowyat Forum threads to collect

	# Lowyat Forum settings
	LOWYAT_SECTIONS = [
	"Kopitiam", "SeriousKopitiam", "News", "Politics", "Malaysia", "Lowyat.NET",
	"Technology", "Computers", "Notebooks", "Smartphones", "Photography", "GamingPC", "GamingConsole",
	"Automotive", "Finance", "Property", "Travel", "Food", "Health", "Sports", "Entertainment",
	"SpecialInterestGarageSales", "JobsCorner", "DigitalMarketplace"
	] # All available forum sections

	# Social Media API tokens
	APIFY_TOKEN = "apify_api_INtF6uUT4c6nOStYDYTllxuTBNSbng1IlTTB" # Main Apify API token
	APIFY_TOKEN_FB = APIFY_TOKEN # For Facebook actors
	APIFY_TOKEN_TIKTOK = APIFY_TOKEN # For TikTok actors

	# Actor task IDs
	# From danek/facebook-search-ppr
	POST_TASK_ID_SEARCH = "l5DitJrtfCyOfrjn6" # Facebook Search PPR (rajamohd/facebook-search-ppr-rm-bernama)

	# From datavoyantlab/facebook-comments-scraper
	COMMENT_TASK_ID = "qiAp6PQwkyYcLQiyC" # Facebook Comments Scraper (rajamohd/facebook-comments-scraper-task)

	# From clockworks/free-tiktok-scraper
	TIKTOK_VIDEO_TASK_ID = "rfk0BzRAjuLPbccaZ" # TikTok Data Extractor (devlab/tiktok-data-extractor-bernama2-video)

	# From clockworks/tiktok-comments-scraper
	TIKTOK_COMMENT_TASK_ID = "rgXeWIhnXKRD5bjGp" # TikTok Comments Scraper (devlab/tiktok-comments-scraper-bernama2)

	# Apify settings
	USE_COMMENTS = True # Whether to collect comments in addition to posts/videos

	# Sentiment model
	SENTIMENT_MODEL = "rmtariq/ft-Malay-bert"

	# Priority indexer settings
	PRIORITY_WEIGHTS = {
	"fact_check_value": 1.5, # Higher weight for factual importance
	"cause_confusion": 1.2, # Medium-high weight for confusion potential
	"cause_chaos": 1.8, # High weight for potential harm
	"affects_government": 1.3, # Medium-high for government impact
	"economic_impact": 1.4, # Medium-high for economic impact
	"law_related": 1.5, # Higher weight for legal implications
	"public_interest": 1.2, # Medium weight for public interest
	"lives_in_danger": 2.0, # Highest weight for safety concerns
	"viral": 1.1, # Lower weight for virality alone
	"urgent": 1.3 # Medium-high for urgency
	}

	PRIORITY_THRESHOLDS = {
	"high_priority": 7.0,
	"medium_priority": 5.0,
	"low_priority": 3.0
	}

	# Classification settings
	VERDICT_CATEGORIES = {
	"TIDAK_BENAR": {
	"name": "TIDAK BENAR",
	"description": "Dakwaan ini tidak benar berdasarkan bukti yang ada.",
	"threshold": 7.0,
	"conditions": ["fact_check_value", "law_related"]
	},
	"BERCAMPUR": {
	"name": "BERCAMPUR",
	"description": "Dakwaan ini mengandungi unsur-unsur benar dan tidak benar.",
	"threshold": 5.0,
	"conditions": ["cause_confusion"]
	},
	"BENAR": {
	"name": "BENAR",
	"description": "Dakwaan ini benar berdasarkan bukti yang ada.",
	"threshold": 3.0,
	"conditions": []
	},
	"TIDAK_PASTI": {
	"name": "TIDAK PASTI",
	"description": "Tidak cukup bukti untuk menentukan kebenaran dakwaan ini.",
	"threshold": 0.0,
	"conditions": []
	}
	}

	# Database settings
	DB_PATH = os.path.join(DATA_DIR, "claims.db")

	# Malaysian filter settings
	MALAYSIAN_FILTER_THRESHOLD = 0.5 # Confidence threshold for Malaysian content

	# Report settings
	REPORT_TEMPLATE = None # Path to DOCX template (optional)
	GOOGLE_SEARCH_ENGINE_ID = "e7e6c19ee7a984f30" # Google Search Engine ID