lawlevisan commited on
Commit
7e698a8
·
verified ·
1 Parent(s): f6c54d5

Create config/settings.py

Browse files
Files changed (1) hide show
  1. config/settings.py +160 -0
config/settings.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # config/settings.py
2
+ import os
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+
7
+ # --------------------
8
+ # Reddit API Credentials
9
+ # --------------------
10
+ REDDIT_CLIENT_ID = os.getenv("REDDIT_CLIENT_ID")
11
+ REDDIT_CLIENT_SECRET = os.getenv("REDDIT_CLIENT_SECRET")
12
+ REDDIT_USER_AGENT = os.getenv("REDDIT_USER_AGENT")
13
+
14
+ # --------------------
15
+ # MongoDB Configuration
16
+ # --------------------
17
+ MONGO_URI = os.getenv("MONGO_URI")
18
+ MONGO_DB_NAME = os.getenv("MONGO_DB_NAME", "reddit_db")
19
+
20
+ # --------------------
21
+ # Logging
22
+ # --------------------
23
+ LOG_FILE = "logs/reddit_scraper.log"
24
+ LOG_LEVEL = "INFO" # or DEBUG
25
+
26
+ # --------------------
27
+ # Automation Flags
28
+ # --------------------
29
+ AUTOMATED_FETCH_LIMITS = True # Fetch max posts/comments dynamically
30
+ AUTOMATED_RISK = True # Compute risk dynamically from data
31
+ AUTOMATED_LOCATION_EXTRACTION = True # Extract locations from text automatically
32
+
33
+ # --------------------
34
+ # Fetching Settings
35
+ # --------------------
36
+ FETCH_DAYS = 90
37
+ POST_LIMIT = None # If AUTOMATED_FETCH_LIMITS = True, fetch max allowed
38
+ COMMENT_LIMIT = None
39
+ MAX_COMMENTS_PER_POST = None
40
+
41
+ # --------------------
42
+ # Subreddits & Keywords (Auto-updated from data)
43
+ # --------------------
44
+ SUBREDDITS = [
45
+ "India", "Karnataka", "Drugs", "bangalore",
46
+ "narcotics", "DarkNetMarkets", "IndianEnts"
47
+ ]
48
+
49
+ DRUG_KEYWORDS = [
50
+ # Common drugs
51
+ "weed", "charas", "brown sugar", "cocaine", "MDMA", "ganja",
52
+ "mdma", "lsd", "drug", "smuggle", "heroin", "meth", "cannabis",
53
+
54
+ # Drug-related activities
55
+ "dealer", "peddler", "trafficking", "bust", "raid", "seized",
56
+ "arrested", "narcotics", "contraband", "substance abuse",
57
+
58
+ # Street names and slang will be handled separately
59
+ ]
60
+
61
+ LOCATIONS = [
62
+ "Bengaluru", "BTM", "Majestic", "Koramangala", "Indiranagar",
63
+ "Whitefield", "Electronic City", "Marathahalli", "HSR Layout",
64
+ "Jayanagar", "Malleshwaram", "Rajajinagar", "Yelahanka"
65
+ ]
66
+
67
+ SLANG_DICT = [
68
+ # Cannabis slang
69
+ "chronic", "blunt", "dope", "kush", "420", "ganja", "pot", "mary jane",
70
+ "grass", "herb", "green", "bud",
71
+
72
+ # MDMA/Ecstasy slang
73
+ "molly", "x", "e", "rolls",
74
+
75
+ # Cocaine slang
76
+ "coke", "snow", "blow", "white", "powder",
77
+
78
+ # LSD slang
79
+ "acid", "tabs", "doses",
80
+
81
+ # General slang
82
+ "stash", "score", "plug", "connect", "trap", "deal"
83
+ ]
84
+
85
+ # --------------------
86
+ # Location Coordinates (Auto-detected from data)
87
+ # These are fallback coordinates if location detection fails
88
+ # --------------------
89
+ LOCATION_COORDS = {
90
+ # Bengaluru areas
91
+ "BTM": (12.917, 77.610),
92
+ "Majestic": (12.976, 77.592),
93
+ "Koramangala": (12.935, 77.622),
94
+ "Indiranagar": (12.971, 77.641),
95
+ "Whitefield": (12.970, 77.750),
96
+ "Electronic City": (12.839, 77.677),
97
+ "Marathahalli": (12.959, 77.697),
98
+ "HSR Layout": (12.912, 77.641),
99
+ "Jayanagar": (12.926, 77.584),
100
+ "Malleshwaram": (13.003, 77.571),
101
+ "Rajajinagar": (12.990, 77.552),
102
+ "Yelahanka": (13.100, 77.594),
103
+ "Bengaluru": (12.9716, 77.5946),
104
+
105
+ # Karnataka cities (for district-level analysis)
106
+ "Mysuru": (12.2958, 76.6394),
107
+ "Hubli": (15.3647, 75.1240),
108
+ "Mangaluru": (12.9141, 74.8560),
109
+ "Belagavi": (15.8497, 74.4977),
110
+ "Tumakuru": (13.3392, 77.1012)
111
+ }
112
+
113
+ # --------------------
114
+ # Data paths
115
+ # --------------------
116
+ RAW_DATA_PATH = "data/raw/"
117
+ PROCESSED_DATA_PATH = "data/processed/"
118
+
119
+ # --------------------
120
+ # Risk Score Weights (for automated risk calculation)
121
+ # --------------------
122
+ RISK_WEIGHTS = {
123
+ "keyword_frequency": 0.4,
124
+ "location_frequency": 0.3,
125
+ "sentiment_negative": 0.2,
126
+ "slang_usage": 0.1
127
+ }
128
+
129
+ # --------------------
130
+ # Sentiment Thresholds
131
+ # --------------------
132
+ SENTIMENT_POSITIVE_THRESHOLD = 0.05
133
+ SENTIMENT_NEGATIVE_THRESHOLD = -0.05
134
+
135
+ # --------------------
136
+ # High-Risk Keywords (weighted higher in risk calculation)
137
+ # --------------------
138
+ HIGH_RISK_KEYWORDS = [
139
+ "smuggle", "trafficking", "dealer", "peddler", "bust",
140
+ "raid", "seized", "arrested", "contraband"
141
+ ]
142
+
143
+ # --------------------
144
+ # Dashboard Configuration
145
+ # --------------------
146
+ DASHBOARD_REFRESH_INTERVAL = 300 # seconds (5 minutes)
147
+ MAX_POSTS_DISPLAY = 100
148
+ MAP_DEFAULT_ZOOM = 11
149
+
150
+ # --------------------
151
+ # Alert Thresholds (for automated alerts)
152
+ # --------------------
153
+ HIGH_RISK_THRESHOLD = 0.75 # Top 25% risk scores
154
+ ALERT_KEYWORDS = ["bust", "raid", "arrested", "seized"]
155
+
156
+ # --------------------
157
+ # Export Settings
158
+ # --------------------
159
+ EXPORT_FORMAT = "csv" # or "json", "excel"
160
+ INCLUDE_METADATA = True