reachify-ai-service / core /community_brain.py
amitbhatt6075's picture
Added Community AI Module: Toxicity Check and Smart Tagging
5ac001d
import re
import json
import traceback
from typing import List, Dict, Any
from llama_cpp import Llama
class CommunityBrain:
def __init__(self, llm_instance: Llama):
self.llm = llm_instance
print("--- Community Brain initialized. ---")
# πŸ›‘ HYBRID MODERATION (Fast + Cheap)
def moderate_content(self, text: str) -> Dict[str, Any]:
"""
Check content for toxicity using a local Keyword list first,
then AI for deeper semantic checks if needed.
"""
# 1. First Line of Defense: Static Python Keyword Match (Instant)
# Add your Hindi/English blacklist here
TOXIC_KEYWORDS = [
"abuse", "kill", "suicide", "hate", "idiot", "scam",
"hack", "betting", "casino", "stupid", "fuck"
]
score = 0.0
flags = []
text_lower = text.lower()
# Simple string matching
for word in TOXIC_KEYWORDS:
if word in text_lower:
score += 0.4
flags.append(f"Potential inappropriate word: {word}")
# If highly toxic by keywords alone, fail immediately (Don't waste LLM)
if score > 0.5:
return {"toxicity_score": min(score, 1.0), "is_safe": False, "flags": flags}
# 2. Second Line of Defense: Contextual Analysis via AI (Mocked for speed if needed)
# Note: Running LLM for every post creates latency. We strictly format tags here.
# Returning current score if keyword check passed semi-clean
return {
"toxicity_score": score,
"is_safe": score < 0.5,
"flags": flags
}
# 🏷️ SMART TAGGING
def generate_smart_tags(self, content: str) -> List[str]:
"""
Extracts 3-5 relevant hashtags from the content using LLM.
"""
try:
prompt = f"""[INST] Extract 4 relevant hashtags for this post. Return ONLY hashtags separated by spaces. No explanation.
Post: "I just bought this amazing lipstick shade, the red is so vibrant and it lasts all day! Perfect for summer looks."
Hashtags: #Beauty #Lipstick #Makeup #SummerVibes
Post: "{content[:300]}"
Hashtags: [/INST]"""
response = self.llm(
prompt,
max_tokens=30,
temperature=0.3, # Low temp for precision
stop=["[INST]", "\n"],
echo=False
)
raw_tags = response['choices'][0]['text'].strip()
# Clean up hashtags (ensure they start with #)
tags = [t.strip() for t in raw_tags.split() if t.strip()]
# Format correction
clean_tags = []
for t in tags:
if not t.startswith('#'): t = f"#{t}"
# Remove symbols excluding #
t = re.sub(r'[^a-zA-Z0-9#]', '', t)
clean_tags.append(t)
return clean_tags[:5] # Max 5 tags
except Exception as e:
print(f"Smart Tag Error: {e}")
return ["#Community"]
# 🧠 DISCUSSION SUMMARIZATION
def summarize_thread(self, comments: List[str]) -> str:
"""
Summarizes a list of comments into a key insight.
"""
if not comments: return "No activity yet."
# Combine last 10 comments (truncate context)
context_text = " | ".join(comments[:15])[:1500]
prompt = f"""[INST] Summarize the main sentiment and topic of this discussion in one sentence.
Discussion: {context_text}
Summary: [/INST]"""
try:
response = self.llm(
prompt,
max_tokens=60,
temperature=0.5,
stop=["[INST]", "\n"],
echo=False
)
return response['choices'][0]['text'].strip()
except Exception as e:
return "Discussion is active."