File size: 4,027 Bytes
5ac001d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import re
import json
import traceback
from typing import List, Dict, Any
from llama_cpp import Llama

class CommunityBrain:
    def __init__(self, llm_instance: Llama):
        self.llm = llm_instance
        print("--- Community Brain initialized. ---")

    # πŸ›‘ HYBRID MODERATION (Fast + Cheap)
    def moderate_content(self, text: str) -> Dict[str, Any]:
        """
        Check content for toxicity using a local Keyword list first, 
        then AI for deeper semantic checks if needed.
        """
        # 1. First Line of Defense: Static Python Keyword Match (Instant)
        # Add your Hindi/English blacklist here
        TOXIC_KEYWORDS = [
            "abuse", "kill", "suicide", "hate", "idiot", "scam", 
            "hack", "betting", "casino", "stupid", "fuck"
        ]
        
        score = 0.0
        flags = []
        text_lower = text.lower()

        # Simple string matching
        for word in TOXIC_KEYWORDS:
            if word in text_lower:
                score += 0.4
                flags.append(f"Potential inappropriate word: {word}")

        # If highly toxic by keywords alone, fail immediately (Don't waste LLM)
        if score > 0.5:
            return {"toxicity_score": min(score, 1.0), "is_safe": False, "flags": flags}

        # 2. Second Line of Defense: Contextual Analysis via AI (Mocked for speed if needed)
        # Note: Running LLM for every post creates latency. We strictly format tags here.
        # Returning current score if keyword check passed semi-clean
        return {
            "toxicity_score": score,
            "is_safe": score < 0.5,
            "flags": flags
        }

    # 🏷️ SMART TAGGING
    def generate_smart_tags(self, content: str) -> List[str]:
        """
        Extracts 3-5 relevant hashtags from the content using LLM.
        """
        try:
            prompt = f"""[INST] Extract 4 relevant hashtags for this post. Return ONLY hashtags separated by spaces. No explanation.
            
            Post: "I just bought this amazing lipstick shade, the red is so vibrant and it lasts all day! Perfect for summer looks."
            Hashtags: #Beauty #Lipstick #Makeup #SummerVibes

            Post: "{content[:300]}"
            Hashtags: [/INST]"""

            response = self.llm(
                prompt,
                max_tokens=30,
                temperature=0.3, # Low temp for precision
                stop=["[INST]", "\n"],
                echo=False
            )
            
            raw_tags = response['choices'][0]['text'].strip()
            # Clean up hashtags (ensure they start with #)
            tags = [t.strip() for t in raw_tags.split() if t.strip()]
            
            # Format correction
            clean_tags = []
            for t in tags:
                if not t.startswith('#'): t = f"#{t}"
                # Remove symbols excluding #
                t = re.sub(r'[^a-zA-Z0-9#]', '', t)
                clean_tags.append(t)
            
            return clean_tags[:5] # Max 5 tags

        except Exception as e:
            print(f"Smart Tag Error: {e}")
            return ["#Community"]

    # 🧠 DISCUSSION SUMMARIZATION
    def summarize_thread(self, comments: List[str]) -> str:
        """
        Summarizes a list of comments into a key insight.
        """
        if not comments: return "No activity yet."
        
        # Combine last 10 comments (truncate context)
        context_text = " | ".join(comments[:15])[:1500] 

        prompt = f"""[INST] Summarize the main sentiment and topic of this discussion in one sentence.
        
        Discussion: {context_text}
        
        Summary: [/INST]"""

        try:
            response = self.llm(
                prompt,
                max_tokens=60,
                temperature=0.5,
                stop=["[INST]", "\n"],
                echo=False
            )
            return response['choices'][0]['text'].strip()
        except Exception as e:
            return "Discussion is active."