import re import json import traceback from typing import List, Dict, Any from llama_cpp import Llama class CommunityBrain: def __init__(self, llm_instance: Llama): self.llm = llm_instance print("--- Community Brain initialized. ---") # 🛑 HYBRID MODERATION (Fast + Cheap) def moderate_content(self, text: str) -> Dict[str, Any]: """ Check content for toxicity using a local Keyword list first, then AI for deeper semantic checks if needed. """ # 1. First Line of Defense: Static Python Keyword Match (Instant) # Add your Hindi/English blacklist here TOXIC_KEYWORDS = [ "abuse", "kill", "suicide", "hate", "idiot", "scam", "hack", "betting", "casino", "stupid", "fuck" ] score = 0.0 flags = [] text_lower = text.lower() # Simple string matching for word in TOXIC_KEYWORDS: if word in text_lower: score += 0.4 flags.append(f"Potential inappropriate word: {word}") # If highly toxic by keywords alone, fail immediately (Don't waste LLM) if score > 0.5: return {"toxicity_score": min(score, 1.0), "is_safe": False, "flags": flags} # 2. Second Line of Defense: Contextual Analysis via AI (Mocked for speed if needed) # Note: Running LLM for every post creates latency. We strictly format tags here. # Returning current score if keyword check passed semi-clean return { "toxicity_score": score, "is_safe": score < 0.5, "flags": flags } # 🏷️ SMART TAGGING def generate_smart_tags(self, content: str) -> List[str]: """ Extracts 3-5 relevant hashtags from the content using LLM. """ try: prompt = f"""[INST] Extract 4 relevant hashtags for this post. Return ONLY hashtags separated by spaces. No explanation. Post: "I just bought this amazing lipstick shade, the red is so vibrant and it lasts all day! Perfect for summer looks." Hashtags: #Beauty #Lipstick #Makeup #SummerVibes Post: "{content[:300]}" Hashtags: [/INST]""" response = self.llm( prompt, max_tokens=30, temperature=0.3, # Low temp for precision stop=["[INST]", "\n"], echo=False ) raw_tags = response['choices'][0]['text'].strip() # Clean up hashtags (ensure they start with #) tags = [t.strip() for t in raw_tags.split() if t.strip()] # Format correction clean_tags = [] for t in tags: if not t.startswith('#'): t = f"#{t}" # Remove symbols excluding # t = re.sub(r'[^a-zA-Z0-9#]', '', t) clean_tags.append(t) return clean_tags[:5] # Max 5 tags except Exception as e: print(f"Smart Tag Error: {e}") return ["#Community"] # 🧠 DISCUSSION SUMMARIZATION def summarize_thread(self, comments: List[str]) -> str: """ Summarizes a list of comments into a key insight. """ if not comments: return "No activity yet." # Combine last 10 comments (truncate context) context_text = " | ".join(comments[:15])[:1500] prompt = f"""[INST] Summarize the main sentiment and topic of this discussion in one sentence. Discussion: {context_text} Summary: [/INST]""" try: response = self.llm( prompt, max_tokens=60, temperature=0.5, stop=["[INST]", "\n"], echo=False ) return response['choices'][0]['text'].strip() except Exception as e: return "Discussion is active."