File size: 4,027 Bytes
5ac001d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import re
import json
import traceback
from typing import List, Dict, Any
from llama_cpp import Llama
class CommunityBrain:
def __init__(self, llm_instance: Llama):
self.llm = llm_instance
print("--- Community Brain initialized. ---")
# π HYBRID MODERATION (Fast + Cheap)
def moderate_content(self, text: str) -> Dict[str, Any]:
"""
Check content for toxicity using a local Keyword list first,
then AI for deeper semantic checks if needed.
"""
# 1. First Line of Defense: Static Python Keyword Match (Instant)
# Add your Hindi/English blacklist here
TOXIC_KEYWORDS = [
"abuse", "kill", "suicide", "hate", "idiot", "scam",
"hack", "betting", "casino", "stupid", "fuck"
]
score = 0.0
flags = []
text_lower = text.lower()
# Simple string matching
for word in TOXIC_KEYWORDS:
if word in text_lower:
score += 0.4
flags.append(f"Potential inappropriate word: {word}")
# If highly toxic by keywords alone, fail immediately (Don't waste LLM)
if score > 0.5:
return {"toxicity_score": min(score, 1.0), "is_safe": False, "flags": flags}
# 2. Second Line of Defense: Contextual Analysis via AI (Mocked for speed if needed)
# Note: Running LLM for every post creates latency. We strictly format tags here.
# Returning current score if keyword check passed semi-clean
return {
"toxicity_score": score,
"is_safe": score < 0.5,
"flags": flags
}
# π·οΈ SMART TAGGING
def generate_smart_tags(self, content: str) -> List[str]:
"""
Extracts 3-5 relevant hashtags from the content using LLM.
"""
try:
prompt = f"""[INST] Extract 4 relevant hashtags for this post. Return ONLY hashtags separated by spaces. No explanation.
Post: "I just bought this amazing lipstick shade, the red is so vibrant and it lasts all day! Perfect for summer looks."
Hashtags: #Beauty #Lipstick #Makeup #SummerVibes
Post: "{content[:300]}"
Hashtags: [/INST]"""
response = self.llm(
prompt,
max_tokens=30,
temperature=0.3, # Low temp for precision
stop=["[INST]", "\n"],
echo=False
)
raw_tags = response['choices'][0]['text'].strip()
# Clean up hashtags (ensure they start with #)
tags = [t.strip() for t in raw_tags.split() if t.strip()]
# Format correction
clean_tags = []
for t in tags:
if not t.startswith('#'): t = f"#{t}"
# Remove symbols excluding #
t = re.sub(r'[^a-zA-Z0-9#]', '', t)
clean_tags.append(t)
return clean_tags[:5] # Max 5 tags
except Exception as e:
print(f"Smart Tag Error: {e}")
return ["#Community"]
# π§ DISCUSSION SUMMARIZATION
def summarize_thread(self, comments: List[str]) -> str:
"""
Summarizes a list of comments into a key insight.
"""
if not comments: return "No activity yet."
# Combine last 10 comments (truncate context)
context_text = " | ".join(comments[:15])[:1500]
prompt = f"""[INST] Summarize the main sentiment and topic of this discussion in one sentence.
Discussion: {context_text}
Summary: [/INST]"""
try:
response = self.llm(
prompt,
max_tokens=60,
temperature=0.5,
stop=["[INST]", "\n"],
echo=False
)
return response['choices'][0]['text'].strip()
except Exception as e:
return "Discussion is active." |