Commit
·
96fd51a
1
Parent(s):
bbd259b
update
Browse files- main.py +1 -1
- processor.py +19 -21
main.py
CHANGED
|
@@ -30,7 +30,7 @@ class RerunRequest(BaseModel):
|
|
| 30 |
intent: Literal["light", "medium", "deep"]
|
| 31 |
|
| 32 |
INTENT_LIMITS = {
|
| 33 |
-
"light": {"per_query":
|
| 34 |
"medium": {"per_query": 50, "total": 300},
|
| 35 |
"deep": {"per_query": 100, "total": 800},
|
| 36 |
}
|
|
|
|
| 30 |
intent: Literal["light", "medium", "deep"]
|
| 31 |
|
| 32 |
INTENT_LIMITS = {
|
| 33 |
+
"light": {"per_query": 10, "total": 25},
|
| 34 |
"medium": {"per_query": 50, "total": 300},
|
| 35 |
"deep": {"per_query": 100, "total": 800},
|
| 36 |
}
|
processor.py
CHANGED
|
@@ -163,32 +163,30 @@ def text_matches_any(text, patterns):
|
|
| 163 |
def determine_nature(text, sentiment_label):
|
| 164 |
t = (text or "").lower()
|
| 165 |
# 1. High-priority flags (dangerous or specific categories)
|
| 166 |
-
if text_matches_any(t, SEPARATIST_RE): return "
|
| 167 |
-
if text_matches_any(t, CALL_TO_ACTION_RE): return "
|
| 168 |
-
if text_matches_any(t, COMMUNAL_RE): return "
|
| 169 |
-
if text_matches_any(t, CONSPIRACY_RE): return "
|
| 170 |
|
| 171 |
# 2. Trust the advanced model's label if available
|
| 172 |
s = str(sentiment_label)
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
if s == "Pro-
|
| 176 |
-
if s == "Anti-
|
| 177 |
-
if s == "Pro-Government": return "Pro-Government"
|
| 178 |
-
if s == "Anti-Government": return "Anti-Government"
|
| 179 |
|
| 180 |
# 3. Fallback to Regex for other cases or if model was Neutral
|
| 181 |
-
if text_matches_any(t, ANTI_INDIA_RE): return "
|
| 182 |
-
if text_matches_any(t, PRO_INDIA_RE): return "
|
| 183 |
-
if text_matches_any(t, CRITICAL_GOVT_RE): return "
|
| 184 |
-
if text_matches_any(t, SUPPORT_OPPOSITION_RE): return "
|
| 185 |
|
| 186 |
# 4. Fallback to generic POS/NEG (legacy)
|
| 187 |
s_upper = s.upper()
|
| 188 |
-
if "POS" in s_upper: return "
|
| 189 |
-
if "NEG" in s_upper: return "
|
| 190 |
|
| 191 |
-
return "
|
| 192 |
|
| 193 |
# ---------------- DANGEROUS FLAG ----------------
|
| 194 |
danger_keywords = ["kill","attack","bomb","violence","terror","terrorist","militant",
|
|
@@ -197,8 +195,8 @@ pattern = re.compile(r'\b(?:' + '|'.join(map(re.escape, danger_keywords)) + r')\
|
|
| 197 |
flags=re.IGNORECASE)
|
| 198 |
|
| 199 |
def is_dangerous(text, sentiment):
|
| 200 |
-
if pattern.search(text or ""): return True
|
| 201 |
-
return (str(sentiment).upper() == "
|
| 202 |
|
| 203 |
def generate_reports_from_csv(input_csv:str, out_dir:str) -> dict:
|
| 204 |
"""
|
|
@@ -277,9 +275,9 @@ def generate_reports_from_csv(input_csv:str, out_dir:str) -> dict:
|
|
| 277 |
|
| 278 |
# Handle error or valid result
|
| 279 |
if "error" in out:
|
| 280 |
-
preds.append(("
|
| 281 |
else:
|
| 282 |
-
label = out.get("label", "
|
| 283 |
score = float(out.get("confidence", 0.0))
|
| 284 |
preds.append((label, score))
|
| 285 |
|
|
|
|
| 163 |
def determine_nature(text, sentiment_label):
|
| 164 |
t = (text or "").lower()
|
| 165 |
# 1. High-priority flags (dangerous or specific categories)
|
| 166 |
+
if text_matches_any(t, SEPARATIST_RE): return "separatist"
|
| 167 |
+
if text_matches_any(t, CALL_TO_ACTION_RE): return "call-to-action"
|
| 168 |
+
if text_matches_any(t, COMMUNAL_RE): return "communal"
|
| 169 |
+
if text_matches_any(t, CONSPIRACY_RE): return "conspiratorial"
|
| 170 |
|
| 171 |
# 2. Trust the advanced model's label if available
|
| 172 |
s = str(sentiment_label)
|
| 173 |
+
if s == "Pro-India": return "pro-india"
|
| 174 |
+
if s == "Anti-India": return "anti-india"
|
| 175 |
+
if s == "Pro-Government": return "pro-government"
|
| 176 |
+
if s == "Anti-Government": return "anti-government"
|
|
|
|
|
|
|
| 177 |
|
| 178 |
# 3. Fallback to Regex for other cases or if model was Neutral
|
| 179 |
+
if text_matches_any(t, ANTI_INDIA_RE): return "anti-india"
|
| 180 |
+
if text_matches_any(t, PRO_INDIA_RE): return "pro-india"
|
| 181 |
+
if text_matches_any(t, CRITICAL_GOVT_RE): return "critical-of-government"
|
| 182 |
+
if text_matches_any(t, SUPPORT_OPPOSITION_RE): return "supportive-of-opposition"
|
| 183 |
|
| 184 |
# 4. Fallback to generic POS/NEG (legacy)
|
| 185 |
s_upper = s.upper()
|
| 186 |
+
if "POS" in s_upper: return "supportive"
|
| 187 |
+
if "NEG" in s_upper: return "critical"
|
| 188 |
|
| 189 |
+
return "neutral"
|
| 190 |
|
| 191 |
# ---------------- DANGEROUS FLAG ----------------
|
| 192 |
danger_keywords = ["kill","attack","bomb","violence","terror","terrorist","militant",
|
|
|
|
| 195 |
flags=re.IGNORECASE)
|
| 196 |
|
| 197 |
def is_dangerous(text, sentiment):
|
| 198 |
+
# if pattern.search(text or ""): return True
|
| 199 |
+
return (str(sentiment).upper() == "ANTI-INDIA" and text.strip() != "")
|
| 200 |
|
| 201 |
def generate_reports_from_csv(input_csv:str, out_dir:str) -> dict:
|
| 202 |
"""
|
|
|
|
| 275 |
|
| 276 |
# Handle error or valid result
|
| 277 |
if "error" in out:
|
| 278 |
+
preds.append(("NEUTRAL", 0.0))
|
| 279 |
else:
|
| 280 |
+
label = out.get("label", "NEUTRAL")
|
| 281 |
score = float(out.get("confidence", 0.0))
|
| 282 |
preds.append((label, score))
|
| 283 |
|