Spaces:

walker11
/

RawiPostReview

Sleeping

App Files Files Community

walker11 commited on Jun 16, 2025

Commit

930b9b6

verified ·

1 Parent(s): e3c83c3

Update app.py

Browse files

Files changed (1) hide show

app.py +360 -346

app.py CHANGED Viewed

@@ -1,347 +1,361 @@
-import os
-import json
-import logging
-from typing import Dict, Any, List
-import requests
-from datetime import datetime
-import re
-from flask import Flask, request, jsonify
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-class ArabicContentModerator:
-    """
-    Arabic Story Content Moderation Model using Deepseek API
-    Checks for cultural violations and inappropriate content
-    """
-    def __init__(self, deepseek_api_key: str = None):
-        """
-        Initialize the content moderator
-        Args:
-            deepseek_api_key: Deepseek API key
-        """
-        self.api_key = deepseek_api_key or os.getenv('DEEPSEEK_API_KEY')
-        if not self.api_key:
-            raise ValueError("Deepseek API key is required")
-        self.api_url = "https://api.deepseek.com/chat/completions"
-        self.headers = {
-            "Authorization": f"Bearer {self.api_key}",
-            "Content-Type": "application/json"
-        }
-        # Cultural and content guidelines for Arabic stories
-        self.moderation_prompt = """
-You are a content reviewer specialized in Arabic literature and culture. Your task is to review short Arabic stories to ensure they comply with Arab and Islamic cultural values and do not contain inappropriate content.
-Review criteria:
-1. Cultural and religious content:
-   - No mockery of Islam or Arab traditions
-   - No disrespectful approach to topics that contradict Islamic values
-   - Respect for social and religious symbols
-2. Sexual content and violence:
-   - No explicit sexual content or overt sexual innuendos
-   - No excessive or graphic violence
-   - No profanity or obscene language
-3. Sensitive political content:
-   - Avoid sectarian or ethnic incitement
-   - No approach to controversial political topics in an offensive manner
-4. Social values:
-   - Respect for family values and Arab society
-   - No promotion of socially destructive behaviors
-Response instructions:
-- If the story complies with all criteria, answer with "true"
-- If the story violates any of the criteria, answer with "no"
-- Your answer must only be "true" or "no" without any additional text
-Story to review:
-"""
-    def _call_deepseek_api(self, story_content: str) -> Dict[str, Any]:
-        """
-        Call Deepseek API for content moderation
-        Args:
-            story_content: The Arabic story content to moderate
-        Returns:
-            API response dictionary
-        """
-        try:
-            payload = {
-                "model": "deepseek-chat",
-                "messages": [
-                    {
-                        "role": "system",
-                        "content": "You are a content reviewer specialized in Arabic literature. Your task is to review stories to ensure they comply with Arab cultural values."
-                    },
-                    {
-                        "role": "user",
-                        "content": f"{self.moderation_prompt}\n\n{story_content}"
-                    }
-                ],
-                "max_tokens": 10,
-                "temperature": 0.1,
-                "stream": False
-            }
-            response = requests.post(
-                self.api_url,
-                headers=self.headers,
-                json=payload,
-                timeout=30
-            )
-            if response.status_code == 200:
-                return response.json()
-            else:
-                logger.error(f"API Error: {response.status_code} - {response.text}")
-                return {"error": f"API Error: {response.status_code}"}
-        except Exception as e:
-            logger.error(f"Exception calling Deepseek API: {str(e)}")
-            return {"error": str(e)}
-    def _validate_story_format(self, story_content: str) -> bool:
-        """
-        Basic validation of story format and content
-        Args:
-            story_content: Story content to validate
-        Returns:
-            Boolean indicating if format is valid
-        """
-        if not story_content or not isinstance(story_content, str):
-            return False
-        # Check minimum length (at least 10 characters)
-        if len(story_content.strip()) < 10:
-            return False
-        # Check for Arabic characters
-        arabic_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
-        if not arabic_pattern.search(story_content):
-            return False
-        return True
-    def moderate_story(self, story_content: str) -> Dict[str, Any]:
-        """
-        Main method to moderate Arabic story content
-        Args:
-            story_content: The Arabic story to moderate
-        Returns:
-            Dictionary with moderation result
-        """
-        # Validate input
-        if not self._validate_story_format(story_content):
-            return {
-                "approved": False,
-                "response": "no",
-                "reason": "Invalid story format or missing Arabic content",
-                "timestamp": datetime.now().isoformat()
-            }
-        # Clean and prepare content
-        cleaned_content = story_content.strip()
-        # Call Deepseek API
-        api_response = self._call_deepseek_api(cleaned_content)
-        if "error" in api_response:
-            logger.error(f"Moderation failed: {api_response['error']}")
-            return {
-                "approved": False,
-                "response": "no",
-                "reason": "Moderation service error",
-                "error": api_response["error"],
-                "timestamp": datetime.now().isoformat()
-            }
-        try:
-            # Extract the moderation decision
-            ai_response = api_response.get("choices", [{}])[0].get("message", {}).get("content", "").strip().lower()
-            # Determine if content is approved
-            approved = ai_response == "true"
-            response_value = "true" if approved else "no"
-            result = {
-                "approved": approved,
-                "response": response_value,
-                "ai_decision": ai_response,
-                "timestamp": datetime.now().isoformat()
-            }
-            if not approved:
-                result["reason"] = "Content violates community guidelines or cultural norms"
-            logger.info(f"Moderation completed: {response_value}")
-            return result
-        except Exception as e:
-            logger.error(f"Error processing API response: {str(e)}")
-            return {
-                "approved": False,
-                "response": "no",
-                "reason": "Error processing moderation result",
-                "error": str(e),
-                "timestamp": datetime.now().isoformat()
-            }
-# Flask application
-app = Flask(__name__)
-# Initialize the moderator (API key will be set via environment variable)
-try:
-    moderator = ArabicContentModerator()
-    logger.info("Arabic Content Moderator initialized successfully")
-except ValueError as e:
-    logger.error(f"Failed to initialize moderator: {e}")
-    moderator = None
-@app.route('/', methods=['GET'])
-def home():
-    """Home endpoint with API documentation"""
-    return jsonify({
-        "service": "Arabic Story Content Moderator",
-        "version": "1.0.0",
-        "description": "AI-powered moderation for Arabic short stories",
-        "endpoints": {
-            "/health": "Health check",
-            "/moderate": "POST - Moderate single story",
-            "/moderate/batch": "POST - Moderate multiple stories"
-        },
-        "usage": {
-            "moderate": {
-                "method": "POST",
-                "payload": {"story_content": "Arabic story text"},
-                "response": {"approved": "boolean", "response": "true/no"}
-            }
-        },
-        "status": "healthy" if moderator else "service unavailable"
-    })
-@app.route('/health', methods=['GET'])
-def health_check():
-    """Health check endpoint"""
-    return jsonify({
-        "status": "healthy" if moderator else "unhealthy",
-        "service": "Arabic Content Moderator",
-        "timestamp": datetime.now().isoformat(),
-        "api_available": moderator is not None
-    })
-@app.route('/moderate', methods=['POST'])
-def moderate_content():
-    """
-    Main moderation endpoint
-    Expected JSON payload:
-    {
-        "story_content": "Arabic story text here"
-    }
-    Returns:
-    {
-        "approved": true/false,
-        "response": "true"/"no",
-        "timestamp": "ISO timestamp"
-    }
-    """
-    if not moderator:
-        return jsonify({
-            "error": "Moderation service not available - API key not configured",
-            "approved": False,
-            "response": "no"
-        }), 500
-    try:
-        data = request.get_json()
-        if not data or 'story_content' not in data:
-            return jsonify({
-                "error": "Missing story_content in request",
-                "approved": False,
-                "response": "no"
-            }), 400
-        story_content = data['story_content']
-        result = moderator.moderate_story(story_content)
-        return jsonify(result)
-    except Exception as e:
-        logger.error(f"Error in moderate_content: {str(e)}")
-        return jsonify({
-            "error": "Internal server error",
-            "approved": False,
-            "response": "no",
-            "details": str(e)
-        }), 500
-@app.route('/moderate/batch', methods=['POST'])
-def moderate_batch():
-    """
-    Batch moderation endpoint
-    Expected JSON payload:
-    {
-        "stories": ["story1", "story2", "story3"]
-    }
-    """
-    if not moderator:
-        return jsonify({
-            "error": "Moderation service not available - API key not configured"
-        }), 500
-    try:
-        data = request.get_json()
-        if not data or 'stories' not in data:
-            return jsonify({
-                "error": "Missing stories array in request"
-            }), 400
-        stories = data['stories']
-        if not isinstance(stories, list):
-            return jsonify({
-                "error": "Stories must be an array"
-            }), 400
-        results = []
-        for i, story in enumerate(stories):
-            logger.info(f"Moderating story {i+1}/{len(stories)}")
-            result = moderator.moderate_story(story)
-            results.append(result)
-        return jsonify({
-            "results": results,
-            "total_processed": len(results),
-            "timestamp": datetime.now().isoformat()
-        })
-    except Exception as e:
-        logger.error(f"Error in moderate_batch: {str(e)}")
-        return jsonify({
-            "error": "Internal server error",
-            "details": str(e)
-        }), 500
-if __name__ == '__main__':
-    # For local testing
-    port = int(os.environ.get('PORT', 7860))
     app.run(host='0.0.0.0', port=port, debug=False)

+import os
+import json
+import logging
+from typing import Dict, Any, List
+import requests
+from datetime import datetime
+import re
+from flask import Flask, request, jsonify
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class ArabicContentModerator:
+    """
+    Arabic Story Content Moderation Model using Deepseek API
+    Checks for cultural violations and inappropriate content
+    """
+    def __init__(self, deepseek_api_key: str = None):
+        """
+        Initialize the content moderator
+        Args:
+            deepseek_api_key: Deepseek API key
+        """
+        self.api_key = deepseek_api_key or os.getenv('DEEPSEEK_API_KEY')
+        if not self.api_key:
+            raise ValueError("Deepseek API key is required")
+        self.api_url = "https://api.deepseek.com/chat/completions"
+        self.headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json"
+        }
+        # Cultural and content guidelines for Arabic stories
+        self.moderation_prompt = """
+**Strict Arabic Content Moderation Protocol**
+You are an AI cultural compliance scanner for Arabic stories. Perform sequential analysis with forensic precision:
+1. **Lexical Deep Scan**
+   - Tokenize every word/phrase comparing against:
+     a. Religious Profanity Database: [سب الدين, سب الله, سب الرسول, سب القرآن, لحس دينه, كفر بالله, ...]
+     b. Egyptian/Arabic Profanity Index: [متناك, خول, قحبة, كس أمك, طيز, زق, ...]
+     c. Adult Content Triggers: جنس صريح, عري, علاقة جنسية, إباحية
+2. **Contextual Zero-Tolerance Checks**
+   - Immediate violation if ANY religious blasphemy detected (e.g., "سب الدين" = automatic fail)
+   - Red-flag Egyptian profanity regardless of context (e.g., "يا خول" = violation even if joking)
+   - Reject any sexual descriptions beyond medical/educational needs
+3. **Cultural Alignment Verification**
+   Verify story DOES NOT:
+   - Mock Islamic pillars/prayers/Quran
+   - Normalize haram relationships (زنا, شذوذ)
+   - Promote violence/drugs without moral condemnation
+   - Degrade family values (e.g., rebellious children as heroes)
+4. **Dialect-Specific Profanity Filter**
+   Activate regional sub-scanners for:
+   - Egyptian: [يا وسخة, يا ابن الوسخة, كسختك, ...]
+   - Gulf: [يا حمار, يا كلب, خنيث, ...]
+   - Levantine: [شرميط, عرص, ...]
+**Output Requirements:**
+```json
+{
+  "compliance_status": "approved/rejected",
+  "violations": [
+    {
+      "excerpt": "exact offending phrase",
+      "violation_type": "religious/sexual/profanity/cultural",
+      "severity": "critical/high",
+      "context": "explain why it violates norms"
+    }
+  ],
+  "risk_score": 0-100%,
+  "dialect_flags": ["egyptian_profanity", "levantine_slurs", ...]
+}
+"""
+    def _call_deepseek_api(self, story_content: str) -> Dict[str, Any]:
+        """
+        Call Deepseek API for content moderation
+        Args:
+            story_content: The Arabic story content to moderate
+        Returns:
+            API response dictionary
+        """
+        try:
+            payload = {
+                "model": "deepseek-chat",
+                "messages": [
+                    {
+                        "role": "system",
+                        "content": "You are a content reviewer specialized in Arabic literature. Your task is to review stories to ensure they comply with Arab cultural values."
+                    },
+                    {
+                        "role": "user",
+                        "content": f"{self.moderation_prompt}\n\n{story_content}"
+                    }
+                ],
+                "max_tokens": 10,
+                "temperature": 0.1,
+                "stream": False
+            }
+            response = requests.post(
+                self.api_url,
+                headers=self.headers,
+                json=payload,
+                timeout=30
+            )
+            if response.status_code == 200:
+                return response.json()
+            else:
+                logger.error(f"API Error: {response.status_code} - {response.text}")
+                return {"error": f"API Error: {response.status_code}"}
+        except Exception as e:
+            logger.error(f"Exception calling Deepseek API: {str(e)}")
+            return {"error": str(e)}
+    def _validate_story_format(self, story_content: str) -> bool:
+        """
+        Basic validation of story format and content
+        Args:
+            story_content: Story content to validate
+        Returns:
+            Boolean indicating if format is valid
+        """
+        if not story_content or not isinstance(story_content, str):
+            return False
+        # Check minimum length (at least 10 characters)
+        if len(story_content.strip()) < 10:
+            return False
+        # Check for Arabic characters
+        arabic_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
+        if not arabic_pattern.search(story_content):
+            return False
+        return True
+    def moderate_story(self, story_content: str) -> Dict[str, Any]:
+        """
+        Main method to moderate Arabic story content
+        Args:
+            story_content: The Arabic story to moderate
+        Returns:
+            Dictionary with moderation result
+        """
+        # Validate input
+        if not self._validate_story_format(story_content):
+            return {
+                "approved": False,
+                "response": "no",
+                "reason": "Invalid story format or missing Arabic content",
+                "timestamp": datetime.now().isoformat()
+            }
+        # Clean and prepare content
+        cleaned_content = story_content.strip()
+        # Call Deepseek API
+        api_response = self._call_deepseek_api(cleaned_content)
+        if "error" in api_response:
+            logger.error(f"Moderation failed: {api_response['error']}")
+            return {
+                "approved": False,
+                "response": "no",
+                "reason": "Moderation service error",
+                "error": api_response["error"],
+                "timestamp": datetime.now().isoformat()
+            }
+        try:
+            # Extract the moderation decision
+            ai_response = api_response.get("choices", [{}])[0].get("message", {}).get("content", "").strip().lower()
+            # Determine if content is approved
+            approved = ai_response == "true"
+            response_value = "true" if approved else "no"
+            result = {
+                "approved": approved,
+                "response": response_value,
+                "ai_decision": ai_response,
+                "timestamp": datetime.now().isoformat()
+            }
+            if not approved:
+                result["reason"] = "Content violates community guidelines or cultural norms"
+            logger.info(f"Moderation completed: {response_value}")
+            return result
+        except Exception as e:
+            logger.error(f"Error processing API response: {str(e)}")
+            return {
+                "approved": False,
+                "response": "no",
+                "reason": "Error processing moderation result",
+                "error": str(e),
+                "timestamp": datetime.now().isoformat()
+            }
+# Flask application
+app = Flask(__name__)
+# Initialize the moderator (API key will be set via environment variable)
+try:
+    moderator = ArabicContentModerator()
+    logger.info("Arabic Content Moderator initialized successfully")
+except ValueError as e:
+    logger.error(f"Failed to initialize moderator: {e}")
+    moderator = None
+@app.route('/', methods=['GET'])
+def home():
+    """Home endpoint with API documentation"""
+    return jsonify({
+        "service": "Arabic Story Content Moderator",
+        "version": "1.0.0",
+        "description": "AI-powered moderation for Arabic short stories",
+        "endpoints": {
+            "/health": "Health check",
+            "/moderate": "POST - Moderate single story",
+            "/moderate/batch": "POST - Moderate multiple stories"
+        },
+        "usage": {
+            "moderate": {
+                "method": "POST",
+                "payload": {"story_content": "Arabic story text"},
+                "response": {"approved": "boolean", "response": "true/no"}
+            }
+        },
+        "status": "healthy" if moderator else "service unavailable"
+    })
+@app.route('/health', methods=['GET'])
+def health_check():
+    """Health check endpoint"""
+    return jsonify({
+        "status": "healthy" if moderator else "unhealthy",
+        "service": "Arabic Content Moderator",
+        "timestamp": datetime.now().isoformat(),
+        "api_available": moderator is not None
+    })
+@app.route('/moderate', methods=['POST'])
+def moderate_content():
+    """
+    Main moderation endpoint
+    Expected JSON payload:
+    {
+        "story_content": "Arabic story text here"
+    }
+    Returns:
+    {
+        "approved": true/false,
+        "response": "true"/"no",
+        "timestamp": "ISO timestamp"
+    }
+    """
+    if not moderator:
+        return jsonify({
+            "error": "Moderation service not available - API key not configured",
+            "approved": False,
+            "response": "no"
+        }), 500
+    try:
+        data = request.get_json()
+        if not data or 'story_content' not in data:
+            return jsonify({
+                "error": "Missing story_content in request",
+                "approved": False,
+                "response": "no"
+            }), 400
+        story_content = data['story_content']
+        result = moderator.moderate_story(story_content)
+        return jsonify(result)
+    except Exception as e:
+        logger.error(f"Error in moderate_content: {str(e)}")
+        return jsonify({
+            "error": "Internal server error",
+            "approved": False,
+            "response": "no",
+            "details": str(e)
+        }), 500
+@app.route('/moderate/batch', methods=['POST'])
+def moderate_batch():
+    """
+    Batch moderation endpoint
+    Expected JSON payload:
+    {
+        "stories": ["story1", "story2", "story3"]
+    }
+    """
+    if not moderator:
+        return jsonify({
+            "error": "Moderation service not available - API key not configured"
+        }), 500
+    try:
+        data = request.get_json()
+        if not data or 'stories' not in data:
+            return jsonify({
+                "error": "Missing stories array in request"
+            }), 400
+        stories = data['stories']
+        if not isinstance(stories, list):
+            return jsonify({
+                "error": "Stories must be an array"
+            }), 400
+        results = []
+        for i, story in enumerate(stories):
+            logger.info(f"Moderating story {i+1}/{len(stories)}")
+            result = moderator.moderate_story(story)
+            results.append(result)
+        return jsonify({
+            "results": results,
+            "total_processed": len(results),
+            "timestamp": datetime.now().isoformat()
+        })
+    except Exception as e:
+        logger.error(f"Error in moderate_batch: {str(e)}")
+        return jsonify({
+            "error": "Internal server error",
+            "details": str(e)
+        }), 500
+if __name__ == '__main__':
+    # For local testing
+    port = int(os.environ.get('PORT', 7860))
     app.run(host='0.0.0.0', port=port, debug=False)