Spaces:

minh9972t12
/

ModerateContent

Sleeping

App Files Files Community

minh9972t12 commited on Oct 18, 2025

Commit

295cedb

verified ·

1 Parent(s): cfef899

Update app.py

Browse files

Files changed (1) hide show

app.py +229 -27

app.py CHANGED Viewed

@@ -38,6 +38,42 @@ else:
     print("⚠ Warning: No HUGGINGFACE_TOKEN found. Set it in environment variable.")
 # Pydantic models
 class ContentValidationResult(BaseModel):
     is_valid: bool
@@ -94,6 +130,118 @@ async def root():
     }
 def build_validation_prompt(
     event_name: str,
     category: str,
@@ -153,10 +301,39 @@ async def validate_content(
     token: str
 ) -> ContentValidationResult:
     """
-    Validate content using LLM
     """
     try:
         # Build validation prompt
         prompt = build_validation_prompt(
             event_name=event_name,
@@ -168,23 +345,48 @@ async def validate_content(
         # Initialize client
         client = InferenceClient(token=token)
-        # Use Mistral for fast validation
-        print("🔍 Validating content with Mistral-7B-Instruct-v0.3...")
         messages = [{"role": "user", "content": prompt}]
-        response = client.chat_completion(
-            messages=messages,
-            model="mistralai/Mistral-7B-Instruct-v0.3",
-            max_tokens=500,
-            temperature=0.1,  # Very low temperature for consistent JSON
-            top_p=0.9
-        )
-        llm_response = response.choices[0].message.content.strip()
         print(f"\n{'='*60}")
-        print(f"VALIDATION RESPONSE:")
         print(f"{'='*60}")
         print(llm_response)
         print(f"{'='*60}\n")
@@ -216,14 +418,14 @@ async def validate_content(
             print(f"⚠ JSON Parse Error: {str(parse_error)}")
             print(f"Response was: {llm_response[:200]}")
-            # Fallback: assume valid if can't parse
-            data = {
-                "is_valid": True,
-                "confidence_score": 0.5,
-                "reason": "Không thể parse validation response, cho phép qua",
-                "issues": [],
-                "suggestions": []
-            }
         return ContentValidationResult(
             is_valid=data.get("is_valid", True),
@@ -235,13 +437,13 @@ async def validate_content(
     except Exception as e:
         print(f"⚠ Validation error: {str(e)}")
-        # On error, allow content but with warning
         return ContentValidationResult(
-            is_valid=True,
-            confidence_score=0.5,
-            reason=f"Lỗi validation: {str(e)}. Cho phép qua mặc định.",
-            issues=[],
-            suggestions=[]
         )

     print("⚠ Warning: No HUGGINGFACE_TOKEN found. Set it in environment variable.")
+# Vietnamese profanity/offensive words list (expandable)
+VIETNAMESE_PROFANITY = [
+    "đjt", "địt", "đm", "dm", "đéo", "đệch", "vl", "vcl", "cc", "cặc",
+    "lồn", "buồi", "đụ", "chó", "súc vật", "con chó", "thằng chó",
+    "con đĩ", "đĩ", "điếm", "cave", "gái gọi", "mẹ mày", "bố mày",
+    "cha mày", "cụ mày", "óc chó", "não chó", "não lợn", "ngu như chó",
+    "chết mẹ", "chết cha", "đồ khốn", "thằng khốn", "con khốn"
+]
+# Spam patterns
+SPAM_PATTERNS = [
+    r'(\w)\1{4,}',  # Repeated characters: "aaaa", "!!!!!"
+    r'(\.{3,}|!{3,}|\?{3,}|\${3,}|\*{3,})',  # Excessive punctuation
+    r'\d{9,}',  # Long numbers (phone numbers)
+    r'(http|www)\S+',  # URLs
+    r'(\w+\s+){0,3}(mua|bán|giảm giá|khuyến mãi|liên hệ|zalo|telegram)\s+\d',  # Sales spam
+]
+# Gibberish patterns
+GIBBERISH_PATTERNS = [
+    r'^[a-z]{20,}$',  # Very long random lowercase string
+    r'(qwerty|asdfgh|zxcvbn|123456|abcdef)',  # Keyboard patterns
+    r'[a-z]{5,}[0-9]{5,}',  # Mixed random: "asdfg12345"
+]
+# Bypass attempt patterns
+BYPASS_PATTERNS = [
+    r'ignore\s+(previous|above|all)\s+(instruction|prompt|rule)',
+    r'you\s+are\s+(now|a|an)\s+',
+    r'act\s+as\s+',
+    r'<script|<iframe|javascript:|onerror=',
+    r'(SELECT|INSERT|DELETE|DROP|UPDATE)\s+.*FROM',
+    r'system\s*\(|exec\s*\(|eval\s*\(',
+]
 # Pydantic models
 class ContentValidationResult(BaseModel):
     is_valid: bool
     }
+def check_profanity_vietnamese(text: str) -> tuple[bool, List[str]]:
+    """
+    Check for Vietnamese profanity using word list
+    Returns (has_profanity, found_words)
+    """
+    text_lower = text.lower()
+    found = []
+    for word in VIETNAMESE_PROFANITY:
+        # Check for exact word boundaries
+        pattern = r'\b' + re.escape(word) + r'\b'
+        if re.search(pattern, text_lower):
+            found.append(word)
+    return len(found) > 0, found
+def check_spam_patterns(text: str) -> tuple[bool, List[str]]:
+    """
+    Check for spam patterns
+    Returns (is_spam, issues)
+    """
+    issues = []
+    for pattern in SPAM_PATTERNS:
+        matches = re.findall(pattern, text, re.IGNORECASE)
+        if matches:
+            issues.append(f"Spam pattern detected: {pattern[:30]}...")
+    return len(issues) > 0, issues
+def check_gibberish(text: str) -> tuple[bool, List[str]]:
+    """
+    Check for gibberish patterns
+    Returns (is_gibberish, issues)
+    """
+    issues = []
+    for pattern in GIBBERISH_PATTERNS:
+        if re.search(pattern, text, re.IGNORECASE):
+            issues.append(f"Gibberish pattern detected")
+    # Check for very low vowel ratio (Vietnamese needs vowels)
+    vowels = len(re.findall(r'[aeiouàáảãạăằắẳẵặâầấẩẫậèéẻẽẹêềếểễệìíỉĩịòóỏõọôồốổỗộơờớởỡợùúủũụưừứửữựỳýỷỹỵ]', text.lower()))
+    consonants = len(re.findall(r'[bcdfghjklmnpqrstvwxyz]', text.lower()))
+    if consonants > 10 and vowels / (consonants + vowels) < 0.3:
+        issues.append("Low vowel ratio - possibly gibberish")
+    return len(issues) > 0, issues
+def check_bypass_attempts(text: str) -> tuple[bool, List[str]]:
+    """
+    Check for bypass/injection attempts
+    Returns (is_bypass, issues)
+    """
+    issues = []
+    for pattern in BYPASS_PATTERNS:
+        if re.search(pattern, text, re.IGNORECASE):
+            issues.append(f"Bypass attempt detected")
+    return len(issues) > 0, issues
+def rule_based_validation(
+    event_name: str,
+    category: str,
+    short_desc: str,
+    detailed_desc: str
+) -> tuple[bool, float, str, List[str]]:
+    """
+    Rule-based validation before LLM
+    Returns (is_valid, confidence, reason, issues)
+    """
+    all_text = f"{event_name} {category} {short_desc} {detailed_desc}"
+    issues = []
+    # Check profanity
+    has_profanity, profane_words = check_profanity_vietnamese(all_text)
+    if has_profanity:
+        issues.append(f"Phát hiện từ ngữ tục tĩu: {', '.join(profane_words[:3])}")
+    # Check spam
+    is_spam, spam_issues = check_spam_patterns(all_text)
+    if is_spam:
+        issues.extend(spam_issues)
+    # Check gibberish
+    is_gibberish, gibberish_issues = check_gibberish(all_text)
+    if is_gibberish:
+        issues.extend(gibberish_issues)
+    # Check bypass attempts
+    is_bypass, bypass_issues = check_bypass_attempts(all_text)
+    if is_bypass:
+        issues.extend(bypass_issues)
+    # Determine validity
+    if has_profanity or is_bypass:
+        return False, 0.1, "Nội dung vi phạm: chứa từ ngữ không phù hợp hoặc cố gắng bypass", issues
+    elif is_spam:
+        return False, 0.3, "Nội dung có dấu hiệu spam", issues
+    elif is_gibberish:
+        return False, 0.2, "Nội dung có dấu hiệu vô nghĩa (gibberish)", issues
+    return True, 0.8, "Nội dung hợp lệ (rule-based check)", []
 def build_validation_prompt(
     event_name: str,
     category: str,
     token: str
 ) -> ContentValidationResult:
     """
+    Validate content using Rule-Based + LLM hybrid approach
     """
     try:
+        # STEP 1: Rule-based validation (fast, accurate for common cases)
+        print("🔍 Step 1: Rule-based validation...")
+        is_valid_rule, confidence_rule, reason_rule, issues_rule = rule_based_validation(
+            event_name=event_name,
+            category=category,
+            short_desc=short_desc,
+            detailed_desc=detailed_desc
+        )
+        # If rule-based catches issues, return immediately
+        if not is_valid_rule:
+            print(f"❌ Rule-based validation FAILED: {reason_rule}")
+            return ContentValidationResult(
+                is_valid=False,
+                confidence_score=confidence_rule,
+                reason=reason_rule,
+                issues=issues_rule,
+                suggestions=[
+                    "Loại bỏ các từ ngữ không phù hợp",
+                    "Sử dụng ngôn ngữ lịch sự và chuyên nghiệp",
+                    "Đảm bảo nội dung liên quan đến sự kiện"
+                ]
+            )
+        print("✓ Rule-based validation PASSED")
+        # STEP 2: LLM validation (for nuanced cases)
+        print("🔍 Step 2: LLM validation with Qwen2.5-7B-Instruct...")
         # Build validation prompt
         prompt = build_validation_prompt(
             event_name=event_name,
         # Initialize client
         client = InferenceClient(token=token)
         messages = [{"role": "user", "content": prompt}]
+        # Try multiple models in order of preference
+        models_to_try = [
+            "Qwen/Qwen2.5-7B-Instruct",  # Best for Vietnamese
+            "google/gemma-2-2b-it",       # Good JSON adherence
+            "mistralai/Mistral-7B-Instruct-v0.3",  # Fallback
+        ]
+        llm_response = None
+        model_used = None
+        for model in models_to_try:
+            try:
+                print(f"  Trying {model}...")
+                response = client.chat_completion(
+                    messages=messages,
+                    model=model,
+                    max_tokens=500,
+                    temperature=0.1,
+                    top_p=0.9
+                )
+                llm_response = response.choices[0].message.content.strip()
+                model_used = model
+                print(f"  ✓ Success with {model}")
+                break
+            except Exception as e:
+                print(f"  ✗ Failed with {model}: {str(e)[:100]}")
+                continue
+        if not llm_response:
+            print("⚠ All LLM models failed, using rule-based result")
+            return ContentValidationResult(
+                is_valid=is_valid_rule,
+                confidence_score=confidence_rule,
+                reason=reason_rule + " (LLM unavailable)",
+                issues=issues_rule,
+                suggestions=[]
+            )
         print(f"\n{'='*60}")
+        print(f"VALIDATION RESPONSE ({model_used}):")
         print(f"{'='*60}")
         print(llm_response)
         print(f"{'='*60}\n")
             print(f"⚠ JSON Parse Error: {str(parse_error)}")
             print(f"Response was: {llm_response[:200]}")
+            # Fallback to rule-based result
+            return ContentValidationResult(
+                is_valid=is_valid_rule,
+                confidence_score=confidence_rule,
+                reason=reason_rule + " (LLM parse failed)",
+                issues=issues_rule,
+                suggestions=[]
+            )
         return ContentValidationResult(
             is_valid=data.get("is_valid", True),
     except Exception as e:
         print(f"⚠ Validation error: {str(e)}")
+        # On error, deny content to be safe
         return ContentValidationResult(
+            is_valid=False,
+            confidence_score=0.3,
+            reason=f"Lỗi validation: {str(e)}. Từ chối để đảm bảo an toàn.",
+            issues=[str(e)],
+            suggestions=["Vui lòng thử lại hoặc liên hệ support"]
         )