Spaces:
Sleeping
Sleeping
Create config/settings.py
Browse files- config/settings.py +160 -0
config/settings.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# config/settings.py
|
| 2 |
+
import os
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
load_dotenv()
|
| 6 |
+
|
| 7 |
+
# --------------------
|
| 8 |
+
# Reddit API Credentials
|
| 9 |
+
# --------------------
|
| 10 |
+
REDDIT_CLIENT_ID = os.getenv("REDDIT_CLIENT_ID")
|
| 11 |
+
REDDIT_CLIENT_SECRET = os.getenv("REDDIT_CLIENT_SECRET")
|
| 12 |
+
REDDIT_USER_AGENT = os.getenv("REDDIT_USER_AGENT")
|
| 13 |
+
|
| 14 |
+
# --------------------
|
| 15 |
+
# MongoDB Configuration
|
| 16 |
+
# --------------------
|
| 17 |
+
MONGO_URI = os.getenv("MONGO_URI")
|
| 18 |
+
MONGO_DB_NAME = os.getenv("MONGO_DB_NAME", "reddit_db")
|
| 19 |
+
|
| 20 |
+
# --------------------
|
| 21 |
+
# Logging
|
| 22 |
+
# --------------------
|
| 23 |
+
LOG_FILE = "logs/reddit_scraper.log"
|
| 24 |
+
LOG_LEVEL = "INFO" # or DEBUG
|
| 25 |
+
|
| 26 |
+
# --------------------
|
| 27 |
+
# Automation Flags
|
| 28 |
+
# --------------------
|
| 29 |
+
AUTOMATED_FETCH_LIMITS = True # Fetch max posts/comments dynamically
|
| 30 |
+
AUTOMATED_RISK = True # Compute risk dynamically from data
|
| 31 |
+
AUTOMATED_LOCATION_EXTRACTION = True # Extract locations from text automatically
|
| 32 |
+
|
| 33 |
+
# --------------------
|
| 34 |
+
# Fetching Settings
|
| 35 |
+
# --------------------
|
| 36 |
+
FETCH_DAYS = 90
|
| 37 |
+
POST_LIMIT = None # If AUTOMATED_FETCH_LIMITS = True, fetch max allowed
|
| 38 |
+
COMMENT_LIMIT = None
|
| 39 |
+
MAX_COMMENTS_PER_POST = None
|
| 40 |
+
|
| 41 |
+
# --------------------
|
| 42 |
+
# Subreddits & Keywords (Auto-updated from data)
|
| 43 |
+
# --------------------
|
| 44 |
+
SUBREDDITS = [
|
| 45 |
+
"India", "Karnataka", "Drugs", "bangalore",
|
| 46 |
+
"narcotics", "DarkNetMarkets", "IndianEnts"
|
| 47 |
+
]
|
| 48 |
+
|
| 49 |
+
DRUG_KEYWORDS = [
|
| 50 |
+
# Common drugs
|
| 51 |
+
"weed", "charas", "brown sugar", "cocaine", "MDMA", "ganja",
|
| 52 |
+
"mdma", "lsd", "drug", "smuggle", "heroin", "meth", "cannabis",
|
| 53 |
+
|
| 54 |
+
# Drug-related activities
|
| 55 |
+
"dealer", "peddler", "trafficking", "bust", "raid", "seized",
|
| 56 |
+
"arrested", "narcotics", "contraband", "substance abuse",
|
| 57 |
+
|
| 58 |
+
# Street names and slang will be handled separately
|
| 59 |
+
]
|
| 60 |
+
|
| 61 |
+
LOCATIONS = [
|
| 62 |
+
"Bengaluru", "BTM", "Majestic", "Koramangala", "Indiranagar",
|
| 63 |
+
"Whitefield", "Electronic City", "Marathahalli", "HSR Layout",
|
| 64 |
+
"Jayanagar", "Malleshwaram", "Rajajinagar", "Yelahanka"
|
| 65 |
+
]
|
| 66 |
+
|
| 67 |
+
SLANG_DICT = [
|
| 68 |
+
# Cannabis slang
|
| 69 |
+
"chronic", "blunt", "dope", "kush", "420", "ganja", "pot", "mary jane",
|
| 70 |
+
"grass", "herb", "green", "bud",
|
| 71 |
+
|
| 72 |
+
# MDMA/Ecstasy slang
|
| 73 |
+
"molly", "x", "e", "rolls",
|
| 74 |
+
|
| 75 |
+
# Cocaine slang
|
| 76 |
+
"coke", "snow", "blow", "white", "powder",
|
| 77 |
+
|
| 78 |
+
# LSD slang
|
| 79 |
+
"acid", "tabs", "doses",
|
| 80 |
+
|
| 81 |
+
# General slang
|
| 82 |
+
"stash", "score", "plug", "connect", "trap", "deal"
|
| 83 |
+
]
|
| 84 |
+
|
| 85 |
+
# --------------------
|
| 86 |
+
# Location Coordinates (Auto-detected from data)
|
| 87 |
+
# These are fallback coordinates if location detection fails
|
| 88 |
+
# --------------------
|
| 89 |
+
LOCATION_COORDS = {
|
| 90 |
+
# Bengaluru areas
|
| 91 |
+
"BTM": (12.917, 77.610),
|
| 92 |
+
"Majestic": (12.976, 77.592),
|
| 93 |
+
"Koramangala": (12.935, 77.622),
|
| 94 |
+
"Indiranagar": (12.971, 77.641),
|
| 95 |
+
"Whitefield": (12.970, 77.750),
|
| 96 |
+
"Electronic City": (12.839, 77.677),
|
| 97 |
+
"Marathahalli": (12.959, 77.697),
|
| 98 |
+
"HSR Layout": (12.912, 77.641),
|
| 99 |
+
"Jayanagar": (12.926, 77.584),
|
| 100 |
+
"Malleshwaram": (13.003, 77.571),
|
| 101 |
+
"Rajajinagar": (12.990, 77.552),
|
| 102 |
+
"Yelahanka": (13.100, 77.594),
|
| 103 |
+
"Bengaluru": (12.9716, 77.5946),
|
| 104 |
+
|
| 105 |
+
# Karnataka cities (for district-level analysis)
|
| 106 |
+
"Mysuru": (12.2958, 76.6394),
|
| 107 |
+
"Hubli": (15.3647, 75.1240),
|
| 108 |
+
"Mangaluru": (12.9141, 74.8560),
|
| 109 |
+
"Belagavi": (15.8497, 74.4977),
|
| 110 |
+
"Tumakuru": (13.3392, 77.1012)
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
# --------------------
|
| 114 |
+
# Data paths
|
| 115 |
+
# --------------------
|
| 116 |
+
RAW_DATA_PATH = "data/raw/"
|
| 117 |
+
PROCESSED_DATA_PATH = "data/processed/"
|
| 118 |
+
|
| 119 |
+
# --------------------
|
| 120 |
+
# Risk Score Weights (for automated risk calculation)
|
| 121 |
+
# --------------------
|
| 122 |
+
RISK_WEIGHTS = {
|
| 123 |
+
"keyword_frequency": 0.4,
|
| 124 |
+
"location_frequency": 0.3,
|
| 125 |
+
"sentiment_negative": 0.2,
|
| 126 |
+
"slang_usage": 0.1
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
# --------------------
|
| 130 |
+
# Sentiment Thresholds
|
| 131 |
+
# --------------------
|
| 132 |
+
SENTIMENT_POSITIVE_THRESHOLD = 0.05
|
| 133 |
+
SENTIMENT_NEGATIVE_THRESHOLD = -0.05
|
| 134 |
+
|
| 135 |
+
# --------------------
|
| 136 |
+
# High-Risk Keywords (weighted higher in risk calculation)
|
| 137 |
+
# --------------------
|
| 138 |
+
HIGH_RISK_KEYWORDS = [
|
| 139 |
+
"smuggle", "trafficking", "dealer", "peddler", "bust",
|
| 140 |
+
"raid", "seized", "arrested", "contraband"
|
| 141 |
+
]
|
| 142 |
+
|
| 143 |
+
# --------------------
|
| 144 |
+
# Dashboard Configuration
|
| 145 |
+
# --------------------
|
| 146 |
+
DASHBOARD_REFRESH_INTERVAL = 300 # seconds (5 minutes)
|
| 147 |
+
MAX_POSTS_DISPLAY = 100
|
| 148 |
+
MAP_DEFAULT_ZOOM = 11
|
| 149 |
+
|
| 150 |
+
# --------------------
|
| 151 |
+
# Alert Thresholds (for automated alerts)
|
| 152 |
+
# --------------------
|
| 153 |
+
HIGH_RISK_THRESHOLD = 0.75 # Top 25% risk scores
|
| 154 |
+
ALERT_KEYWORDS = ["bust", "raid", "arrested", "seized"]
|
| 155 |
+
|
| 156 |
+
# --------------------
|
| 157 |
+
# Export Settings
|
| 158 |
+
# --------------------
|
| 159 |
+
EXPORT_FORMAT = "csv" # or "json", "excel"
|
| 160 |
+
INCLUDE_METADATA = True
|