Spaces:

walker11
/

RawiPostReview

Sleeping

App Files Files Community

walker11 commited on Jun 16, 2025

Commit

ee989b2

verified ·

1 Parent(s): aa1bf0e

Update app.py

Browse files

Files changed (1) hide show

app.py +737 -183

app.py CHANGED Viewed

@@ -13,8 +13,8 @@ logger = logging.getLogger(__name__)
 class ArabicContentModerator:
     """
-    Arabic Story Content Moderation Model using Deepseek API
-    Checks for cultural violations and inappropriate content
     """
     def __init__(self, deepseek_api_key: str = None):
@@ -34,77 +34,329 @@ class ArabicContentModerator:
             "Content-Type": "application/json"
         }
-        # Flexible Arabic Content Moderation - Strict only on Religion & Profanity
         self.moderation_prompt = """
-أنت مراجع محتوى عربي محترف. مهمتك مراجعة النصوص العربية مع التركيز الشديد على الانتهاكات الدينية والسب والشتم فقط، مع المرونة في باقي الجوانب.
-## معايير المراجعة
-### 1. التحقق من طبيعة النص (مرن):
-- قبول أي نص سردي يحتوي على عناصر القصة الأساسية
-- قبول النصوص الإبداعية حتى لو كانت قصيرة أو بسيطة
-- قبول الحوارات والمونولوجات والنصوص التجريبية
-- قب��ل الأساليب الأدبية المختلفة (حديثة، تقليدية، تجريبية)
-- **لا رفض** للنصوص الإبداعية إلا إذا كانت أخباراً صحفية واضحة أو مقالات أكاديمية بحتة
-### 2. الانتهاكات الدينية - فحص صارم جداً:
-**رفض فوري للمحتوى الذي يحتوي على:**
-- أي استهزاء أو تهكم على الله أو الأنبياء أو الأسماء الحسنى
-- انتقاد أو تشكيك في الآيات القرآنية أو الأحاديث النبوية
-- السخرية من الصلاة أو الصوم أو الحج أو أي شعيرة دينية
-- التطاول على الصحابة أو العلماء المعتبرين
-- التجديف أو الكفر أو الردة الصريحة
-- نسبة أقوال كاذبة للرسول أو تحريف النصوص الدينية
-- السب بالدين أو اللعن بالآيات
-### 3. السب والشتم - فحص صارم جداً:
-**رفض فوري للمحتوى الذي يحتوي على:**
-- الألفاظ الجنسية الصريحة أو الوصف الجنسي المبتذل
-- السب بالأعضاء التناسلية أو الوظائف الجنسية
-- الألفاظ الإخراجية والمصطلحات القذرة
-- إهانة الأم أو العرض أو الشرف بألفاظ نابية
-- السب العرقي أو الطائفي بألفاظ قبيحة
-- الكلمات المبتذلة الخادشة للحياء العام
-### 4. الجوانب الأخرى - مرونة عالية:
-**قبول المحتوى حتى لو احتوى على:**
-- نقد اجتماعي أو سياسي معتدل
-- مواضيع حساسة إذا تم تناولها بأدب
-- عنف درامي غير مفرط ومبرر قصصياً
-- صراعات عاطفية أو اجتماعية
-- تناول قضايا معاصرة أو تاريخية
-- اختلافات ثقافية أو اجتماعية
-- انتقاد سلوكيات أو ممارسات اجتماعية (بدون سب)
-### 5. اللغة والأسلوب - مرونة عالية:
-**قبول:**
-- اللهجات المحلية والعامية المفهومة
-- الأساليب الأدبية المختلفة
-- التجديد في الشكل الأدبي
-- المزج بين الفصحى والعامية
-- استخدام كلمات أجنبية إذا كانت مناسبة للسياق
-## إرشادات التقييم المرن:
-1. **أعط الأولوية للإبداع**: اقبل النصوص الإبداعية حتى لو كانت غير تقليدية
-2. **السياق مهم**: اعتبر السياق القصصي والغرض الأدبي
-3. **التساهل في النقد**: لا ترفض النصوص للنقد الاجتماعي المهذب
-4. **المرونة الثقافية**: اقبل تنوع الأساليب والمواضيع العربية
-5. **التركيز على الجوهر**: ركز فقط على الانتهاكات الدينية والسب الصريح
-## الانتهاكات الوحيدة المرفوضة:
-1. **الانتهاكات الدينية الصريحة** (كما هو محدد أعلاه)
-2. **السب والشتم الفاحش** (كما هو محدد أعلاه)
-3. **النصوص غير السردية تماماً** (الأخبار الصحفية البحتة أو المقالات الأكاديمية)
 ## الاستجابة المطلوبة:
-بعد المراجعة، أجب بكلمة واحدة فقط:
-- "true" - إذا كان النص مقبولاً (خال من الانتهاكات الدينية والسب الفاحش)
-- "no" - إذا احتوى على انتهاكات دينية صريحة أو سب فاحش فقط
 النص المطلوب مراجعته:
 """
     def _call_deepseek_api(self, story_content: str) -> Dict[str, Any]:
         """
         Call Deepseek API for content moderation
@@ -121,21 +373,21 @@ class ArabicContentModerator:
                 "messages": [
                     {
                         "role": "system",
-                        "content": "أنت مراجع محتوى عربي محترف. مهمتك مراجعة النصوص العربية مع التركيز الشديد على الانتهاكات الدينية والسب والشتم فقط، مع المرونة في باقي الجوانب."
                     },
                     {
-                        "role": "user",
                         "content": f"{self.moderation_prompt}\n\n{story_content}"
                     }
                 ],
                 "max_tokens": 10,
-                "temperature": 0.0,  # Changed to 0.0 for more consistent results
                 "stream": False
             }
             response = requests.post(
-                self.api_url,
-                headers=self.headers,
                 json=payload,
                 timeout=30
             )
@@ -146,105 +398,13 @@ class ArabicContentModerator:
                 logger.error(f"API Error: {response.status_code} - {response.text}")
                 return {"error": f"API Error: {response.status_code}"}
-        except Exception as e:
-            logger.error(f"Exception calling Deepseek API: {str(e)}")
-            return {"error": str(e)}
-    def _validate_story_format(self, story_content: str) -> bool:
-        """
-        Enhanced validation of story format and content
-        Args:
-            story_content: Story content to validate
-        Returns:
-            Boolean indicating if format is valid
-        """
-        if not story_content or not isinstance(story_content, str):
-            return False
-        # Check minimum length (at least 50 characters for a meaningful story)
-        if len(story_content.strip()) < 50:
-            return False
-        # Check for Arabic characters (must have substantial Arabic content)
-        arabic_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
-        arabic_chars = len(arabic_pattern.findall(story_content))
-        # Arabic characters should be at least 30% of total characters
-        if arabic_chars < len(story_content.strip()) * 0.3:
-            return False
-        return True
-    def moderate_story(self, story_content: str) -> Dict[str, Any]:
-        """
-        Main method to moderate Arabic story content with enhanced validation
-        Args:
-            story_content: The Arabic story to moderate
-        Returns:
-            Dictionary with moderation result
-        """
-        # Enhanced validation
-        if not self._validate_story_format(story_content):
-            return {
-                "approved": False,
-                "response": "no",
-                "reason": "فشل في التحقق من صحة تنسيق القصة أو عدم وجود محتوى عربي كافٍ",
-                "timestamp": datetime.now().isoformat()
-            }
-        # Clean and prepare content
-        cleaned_content = story_content.strip()
-        # Call Deepseek API
-        api_response = self._call_deepseek_api(cleaned_content)
-        if "error" in api_response:
-            logger.error(f"Moderation failed: {api_response['error']}")
-            return {
-                "approved": False,
-                "response": "no",
-                "reason": "خطأ في خدمة المراجعة",
-                "error": api_response["error"],
-                "timestamp": datetime.now().isoformat()
-            }
-        try:
-            # Extract the moderation decision
-            ai_response = api_response.get("choices", [{}])[0].get("message", {}).get("content", "").strip().lower()
-            # Clean the response (remove any extra whitespace or characters)
-            ai_response = re.sub(r'[^\w]', '', ai_response)
-            # Determine if content is approved (be more strict)
-            approved = ai_response == "true"
-            response_value = "true" if approved else "no"
-            result = {
-                "approved": approved,
-                "response": response_value,
-                "ai_decision": ai_response,
-                "timestamp": datetime.now().isoformat(),
-                "content_length": len(cleaned_content)
-            }
-            if not approved:
-                result["reason"] = "المحتوى ينتهك القواعد المجتمعية أو الثقافية أو الدينية، أو أنه ليس قصة أدبية حقيقية"
-            else:
-                result["reason"] = "المحتوى مقبول ويلتزم بالمعايير المطلوبة"
-            logger.info(f"Moderation completed: {response_value} for content of length {len(cleaned_content)}")
-            return result
         except Exception as e:
             logger.error(f"Error processing API response: {str(e)}")
             return {
                 "approved": False,
                 "response": "no",
                 "reason": "خطأ في معالجة نتيجة المراجعة",
                 "error": str(e),
                 "timestamp": datetime.now().isoformat()
             }
@@ -256,7 +416,7 @@ app = Flask(__name__)
 # Initialize the moderator (API key will be set via environment variable)
 try:
     moderator = ArabicContentModerator()
-    logger.info("Arabic Content Moderator initialized successfully")
 except ValueError as e:
     logger.error(f"Failed to initialize moderator: {e}")
     moderator = None
@@ -265,28 +425,71 @@ except ValueError as e:
 def home():
     """Home endpoint with API documentation"""
     return jsonify({
-        "service": "مراجع المحتوى الأدبي العربي المحسن",
-        "service_en": "Enhanced Arabic Literary Content Moderator",
-        "version": "2.0.0",
-        "description": "AI-powered professional literary critic for Arabic short stories with enhanced cultural and religious compliance",
-        "description_ar": "ناقد أدبي محترف مدعوم بالذكاء الاصطناعي للقصص العربية القصيرة مع التزام معزز بالقيم الثقافية والدينية",
         "endpoints": {
             "/health": "Health check",
             "/moderate": "POST - Moderate single story",
-            "/moderate/batch": "POST - Moderate multiple stories"
         },
         "features": [
-            "Enhanced religious and cultural compliance checking",
             "Professional literary criticism standards",
-            "Strict content type validation (stories only)",
-            "Comprehensive profanity and inappropriate content detection",
-            "Arabic language purity validation"
         ],
         "usage": {
             "moderate": {
                 "method": "POST",
                 "payload": {"story_content": "Arabic story text"},
-                "response": {"approved": "boolean", "response": "true/no"}
             }
         },
         "status": "healthy" if moderator else "service unavailable"
@@ -297,15 +500,77 @@ def health_check():
     """Health check endpoint"""
     return jsonify({
         "status": "healthy" if moderator else "unhealthy",
-        "service": "Enhanced Arabic Content Moderator",
         "timestamp": datetime.now().isoformat(),
-        "api_available": moderator is not None
     })
 @app.route('/moderate', methods=['POST'])
 def moderate_content():
     """
-    Enhanced moderation endpoint
     Expected JSON payload:
     {
@@ -316,7 +581,9 @@ def moderate_content():
     {
         "approved": true/false,
         "response": "true"/"no",
-        "reason": "reason in Arabic",
         "timestamp": "ISO timestamp"
     }
     """
@@ -342,6 +609,11 @@ def moderate_content():
         story_content = data['story_content']
         result = moderator.moderate_story(story_content)
         return jsonify(result)
     except Exception as e:
@@ -351,13 +623,14 @@ def moderate_content():
             "error_en": "Internal server error",
             "approved": False,
             "response": "no",
             "details": str(e)
         }), 500
 @app.route('/moderate/batch', methods=['POST'])
 def moderate_batch():
     """
-    Enhanced batch moderation endpoint
     Expected JSON payload:
     {
@@ -388,14 +661,21 @@ def moderate_batch():
         results = []
         approved_count = 0
         for i, story in enumerate(stories):
             logger.info(f"Moderating story {i+1}/{len(stories)}")
             result = moderator.moderate_story(story)
             results.append({
                 "story_index": i,
                 "result": result
             })
             if result.get("approved", False):
                 approved_count += 1
@@ -405,7 +685,14 @@ def moderate_batch():
                 "total_processed": len(results),
                 "approved_count": approved_count,
                 "rejected_count": len(results) - approved_count,
-                "approval_rate": f"{(approved_count/len(results)*100):.1f}%" if results else "0%"
             },
             "timestamp": datetime.now().isoformat()
         })
@@ -418,7 +705,274 @@ def moderate_batch():
             "details": str(e)
         }), 500
 if __name__ == '__main__':
     # For local testing
     port = int(os.environ.get('PORT', 7860))
-    app.run(host='0.0.0.0', port=port, debug=False)

 class ArabicContentModerator:
     """
+    Enhanced Arabic Story Content Moderation Model using Deepseek API
+    Checks for cultural violations, religious content, and inappropriate content with strict enforcement
     """
     def __init__(self, deepseek_api_key: str = None):
             "Content-Type": "application/json"
         }
+        # Comprehensive Arabic profanity and offensive terms
+        self.profanity_terms = {
+            # Sexual terms (explicit)
+            'explicit_sexual': [
+                'زب', 'زبر', 'زبي', 'كس', 'كسك', 'كسها', 'كسي', 'طيز', 'طيزك', 'طيزها',
+                'نيك', 'نيكه', 'نيكها', 'ناك', 'منيوك', 'منيوكة', 'مناك', 'متناك',
+                'لحس', 'لحسه', 'لحسها', 'لاحس', 'ملحوس', 'ملحوسة', 'عرص', 'عرصه',
+                'شرموط', 'شرموطه', 'قحبه', 'قحبة', 'عاهر', 'عاهرة', 'بغي', 'بغيه',
+                'ديوث', 'ديوثه', 'قواد', 'قوادة', 'فاجر', 'فاجرة', 'فاسق', 'فاسقة'
+            ],
+            # Bathroom/excretory terms
+            'excretory': [
+                'خرا', 'خراء', 'خرة', 'زفت', 'زفته', 'بول', 'بوله', 'غائط', 'براز',
+                'تبول', 'تبرز', 'حقير', 'نجس', 'قذر', 'قذره', 'وسخ', 'وسخه'
+            ],
+            # Family honor insults
+            'family_honor': [
+                'ابن الكلب', 'ابن الكلبه', 'ابن الحرام', 'ابن الزنا', 'ابن القحبه',
+                'بنت الكلب', 'بنت الكلبه', 'بنت الحرام', 'بنت الزنا', 'بنت القحبه',
+                'يلعن ابوك', 'يلعن امك', 'يلعن اختك', 'يلعن اهلك', 'يلعن عرضك',
+                'كلب', 'كلبه', 'حيوان', 'حيوانه', 'وضيع', 'وضيعه', 'حقير', 'حقيره'
+            ],
+            # Religious blasphemy
+            'religious_blasphemy': [
+                'يلعن الله', 'لعنة الله', 'يلعن الدين', 'لعنة الدين', 'يلعن الرسول',
+                'كسم الله', 'كسم الدين', 'كسم الرسول', 'كسم المسيح', 'كسم النبي',
+                'تف على الله', 'تف على الدين', 'تف على الرسول', 'خرا على الله',
+                'زب الله', 'زب الدين', 'زب الرسول', 'طيز الله', 'طيز الدين'
+            ],
+            # Ethnic/racial slurs
+            'ethnic_slurs': [
+                'عبد', 'عبده', 'زنجي', 'زنجيه', 'اسود', 'سوده', 'خواجه', 'خواجة',
+                'يهودي نجس', 'يهوديه نجسه', 'مسيحي كافر', 'مسيحيه كافره',
+                'بدوي', 'بدويه', 'فلاح', 'فلاحه', 'صعيدي', 'صعيديه'
+            ]
+        }
+        # Religious terms that require careful handling
+        self.religious_sensitive_terms = [
+            'الله', 'محمد', 'الرسول', 'النبي', 'القرآن', 'الإسلام', 'المسيح', 'عيسى',
+            'موسى', 'إبراهيم', 'الأنبياء', 'الصحابة', 'الخلفاء', 'الإمام', 'الشيخ',
+            'المسجد', 'الكعبة', 'مكة', 'المدينة', 'الحج', 'الصلاة', 'الصوم', 'الزكاة'
+        ]
+        # Cultural taboos in Arabic society
+        self.cultural_taboos = [
+            'عري', 'عريان', 'عريانه', 'عاري', 'عاريه', 'مثلي', 'مثليه', 'شاذ', 'شاذه',
+            'خمر', 'خمرة', 'مخمور', 'مخمورة', 'سكران', 'سكرانه', 'مسكر', 'مسكرة',
+            'قمار', 'مقامر', 'مقامرة', 'ميسر', 'رهان', 'مراهنة', 'خنزير', 'خنزيرة'
+        ]
+        # Enhanced Arabic Content Moderation with Comprehensive Checking
         self.moderation_prompt = """
+أنت مراجع محتوى عربي محترف متخصص في التمييز بين القصص الأدبية والمحتوى الإخباري مع التطبيق الصارم للمعايير الثقافية والدينية العربية.
+## معايير الرفض الصارمة:
+### 1. المحتوى الإخباري والصحفي - رفض فوري:
+**يجب رفض النصوص التي تحتوي على:**
+**أ) التقارير الرياضية:**
+- "بعد المباراة خرج وقال"
+- "اللاعب تألق ومنع أهداف"
+- "فاز بجائزة رجل المباراة"
+- "المباراة انتهت بنتيجة"
+- "في الشوط الأول"
+- "المدرب صرح"
+**ب) المؤتمرات الصحفية:**
+- "في مؤتمر صحفي"
+- "صرح الوزير"
+- "أعلن المسؤول"
+- "في تصريحات خاصة"
+- "قال النائب"
+- "أكد الخبير"
+**ج) الاجتماعات والفعاليات:**
+- "في اجتماع اليوم"
+- "خلال الجلسة"
+- "في المنتدى"
+- "أثناء المؤتمر"
+- "في الورشة"
+- "خلال اللقاء"
+### 2. الانتهاكات الدينية - رفض صارم ومطلق:
+**رفض فوري ونهائي للمحتوى الذي يحتوي على:**
+**أ) التجديف والكفر:**
+- أي استهزاء أو تهكم على الله سبحانه وتعالى
+- السخرية من الأنبياء أو الرسل (محمد، عيسى، موسى، إبراهيم)
+- انتقاد أو تشكيك في القرآن الكريم أو الأحاديث النبوية
+- التطاول على الصحابة أو أمهات المؤمنين
+- السب أو اللعن بالدين أو الله أو الرسول
+**ب) الاستهزاء بالشعائر:**
+- السخرية من الصلاة، الصوم، الحج، الزكاة
+- التهكم على المساجد أو الأماكن المقدسة
+- الاستهزاء بالحجاب أو اللباس الإسلامي
+- انتقاد الأحكام الشرعية بطريقة مسيئة
+**ج) التشكيك في العقيدة:**
+- إنكار وجود الله أو صفاته
+- التشكيك في الآخرة أو يوم القيامة
+- إنكار النبوة أو الوحي
+- الترويج للإلحاد أو الكفر
+### 3. الانتهاكات الثقافية العربية - رفض صارم:
+**أ) انتهاك الحياء والعفة:**
+- الوصف الجنسي الصريح أو المبطن
+- المشاهد الإباحية أو الإيحاءات الجنسية
+- الحديث عن العلاقات غير الشرعية بتفصيل
+- وصف الأجساد بطريقة مثيرة أو فاضحة
+- الترويج للعري أو السفور
+**ب) انتهاك قيم الأسرة:**
+- تمجيد العلاقات خارج إطار الزواج
+- السخرية من الزواج أو الأسرة
+- الترويج للشذوذ الجنسي أو المثلية
+- انتهاك احترام الوالدين أو كبار السن
+- تشجيع العقوق أو قطيعة الرحم
+**ج) انتهاك الأخلاق الاجتماعية:**
+- الترويج للخمر أو المخدرات
+- تمجيد القمار أو الميسر
+- الترويج للجريمة أو العنف
+- انتهاك كرامة المرأة أو الرجل
+- الطعن في الشرف أو العرض
+### 4. السب والشتم والألفاظ النابية - رفض مطلق:
+**أ) السب الجنسي الصريح:**
+- الألفاظ المتعلقة بالأعضاء التناسلية
+- الكلمات الجنسية الفاحشة
+- السب بالعرض أو الشرف
+**ب) السب الإخراجي:**
+- الألفاظ المتعلقة بالفضلات أو النجاسة
+- السب بالقذارة أو الوسخ
+**ج) السب العائلي:**
+- إهانة الأم أو الأب
+- السب بالأخت أو الزوجة
+- انتهاك كرامة الأسرة
+**د) السب العرقي والطائفي:**
+- الألفاظ العنصرية ضد الأعراق
+- السب الطائفي أو المذهبي
+- التحقير القبلي أو الجهوي
+### 5. المحتوى المقبول - القصص الأدبية الراقية:
+**أ) القصص التربوية:**
+- تعزيز القيم الإسلامية والأخلاق
+- التركيز على الفضائل والأخلاق الحميدة
+- قصص الأنبياء والصالحين (بأدب واحترام)
+- الحكايات التراثية الراقية
+**ب) القصص الاجتماعية الهادفة:**
+- معالجة المشاكل الاجتماعية بحكمة
+- تقوية الروابط الأسرية
+- احترام الثقافة والتقاليد العربية
+- التأكيد على أهمية العلم والتعلم
+**ج) القصص الخيالية المناسبة:**
+- الحكايات الشعبية النظيفة
+- قصص الأطفال التربوية
+- الخيال العلمي المناسب ثقافياً
+- قصص المغامرات النظيفة
+### 6. معايير الأسلوب والأدب:
+**أ) اللغة الراقية:**
+- استخدام اللغة العربية الفصحى أو العامية المهذبة
+- تجنب الألفاظ الركيكة أو المبتذلة
+- الحفاظ على جمال اللغة وبلاغتها
+**ب) الأسلوب المحترم:**
+- تجنب الإثارة أو الإسفاف
+- الحفاظ على كرامة الشخصيات
+- احترام المشاعر والأحاسيس
+## أمثلة للرفض الفوري:
+**انتهاكات دينية (يجب رفضها):**
+- أي نص يحتوي على سب أو استهزاء بالله أو الرسول
+- أي نص يشكك في العقيدة الإسلامية
+- أي نص يسخر من الشعائر الدينية
+**انتهاكات ثقافية (يجب رفضها):**
+- أي نص يروج للعلاقات غير الشرعية
+- أي نص يحتوي على وصف جنسي صريح
+- أي نص يروج للخمر أو المخدرات
+**سب وشتم (يجب رفضه):**
+- أي نص يحتوي على ألفاظ نابية أو فاحشة
+- أي نص يسب الأسرة أو العرض
+- أي نص يحتوي على تحقير عرقي أو طائفي
+## أمثلة للقبول:
+**قصة أدبية راقية (يجب قبولها):**
+"كان رجل صالح يعيش في قرية صغيرة، يساعد الفقراء ويعطف على اليتامى. في يوم من الأيام، مرض ابنه الوحيد، فص��ر واحتسب، ولم يفقد إيمانه بالله..."
+**حكاية تراثية (يجب قبولها):**
+"يحكى أن ملكاً عادلاً كان يتنكر في زي فقير ليتفقد أحوال رعيته. في إحدى جولاته، رأى رجلاً يطعم الطيور من قوت يومه..."
 ## الاستجابة المطلوبة:
+بعد المراجعة الدقيقة والشاملة، أجب بكلمة واحدة فقط:
+- "true" - إذا كان النص قصة أدبية إبداعية راقية خالية من جميع الانتهاكات الدينية والثقافية والأخلاقية
+- "no" - إذا كان النص إخبارياً أو يحتوي على أي انتهاك ديني أو ثقافي أو أخلاقي مهما كان بسيطاً
+**تذكر: المعايير صارمة جداً، وأي شك في المحتوى يستوجب الرفض الفوري**
 النص المطلوب مراجعته:
 """
+    def _check_profanity_and_violations(self, content: str) -> Dict[str, Any]:
+        """
+        Enhanced pre-check for profanity and cultural violations
+        Args:
+            content: Content to check
+        Returns:
+            Dictionary with violation details
+        """
+        violations = {
+            'has_violations': False,
+            'violation_types': [],
+            'severity': 'none',
+            'details': []
+        }
+        content_lower = content.lower()
+        # Check for explicit profanity
+        for category, terms in self.profanity_terms.items():
+            for term in terms:
+                if term in content_lower:
+                    violations['has_violations'] = True
+                    violations['violation_types'].append(category)
+                    violations['severity'] = 'critical'
+                    violations['details'].append(f"Found {category} term: {term}")
+        # Check for inappropriate religious content
+        religious_violations = [
+            'يلعن الله', 'لعنة الله', 'يلعن الدين', 'تف على الله', 'كسم الله',
+            'خرا على الله', 'زب الله', 'طيز الله', 'كسم الدين', 'يلعن الرسول'
+        ]
+        for violation in religious_violations:
+            if violation in content_lower:
+                violations['has_violations'] = True
+                violations['violation_types'].append('religious_blasphemy')
+                violations['severity'] = 'critical'
+                violations['details'].append(f"Religious violation detected: {violation}")
+        # Check for cultural taboos
+        for taboo in self.cultural_taboos:
+            if taboo in content_lower:
+                violations['has_violations'] = True
+                violations['violation_types'].append('cultural_taboo')
+                violations['severity'] = 'high' if violations['severity'] != 'critical' else 'critical'
+                violations['details'].append(f"Cultural taboo detected: {taboo}")
+        # Check for inappropriate sexual content patterns
+        sexual_patterns = [
+            r'نيك', r'ناك', r'منيوك', r'لحس', r'ملحوس', r'عرص', r'شرموط',
+            r'قحبة', r'عاهر', r'بغي', r'ديوث', r'قواد'
+        ]
+        for pattern in sexual_patterns:
+            if re.search(pattern, content_lower):
+                violations['has_violations'] = True
+                violations['violation_types'].append('sexual_content')
+                violations['severity'] = 'critical'
+                violations['details'].append(f"Sexual content pattern detected: {pattern}")
+        return violations
+    def _check_religious_sensitivity(self, content: str) -> Dict[str, Any]:
+        """
+        Check for inappropriate use of religious terms
+        Args:
+            content: Content to check
+        Returns:
+            Dictionary with religious sensitivity analysis
+        """
+        sensitivity = {
+            'has_issues': False,
+            'religious_terms_found': [],
+            'context_issues': [],
+            'severity': 'none'
+        }
+        content_lower = content.lower()
+        # Find religious terms
+        for term in self.religious_sensitive_terms:
+            if term.lower() in content_lower:
+                sensitivity['religious_terms_found'].append(term)
+        # Check for inappropriate contexts with religious terms
+        if sensitivity['religious_terms_found']:
+            inappropriate_contexts = [
+                'يلعن', 'تف على', 'خرا على', 'كسم', 'زب', 'طيز', 'نيك', 'لعنة'
+            ]
+            for context in inappropriate_contexts:
+                if context in content_lower:
+                    sensitivity['has_issues'] = True
+                    sensitivity['context_issues'].append(context)
+                    sensitivity['severity'] = 'critical'
+        return sensitivity
     def _call_deepseek_api(self, story_content: str) -> Dict[str, Any]:
         """
         Call Deepseek API for content moderation
                 "messages": [
                     {
                         "role": "system",
+                        "content": "أنت مراجع محتوى عربي محترف متخصص في التطبيق الصارم للمعايير الثقافية والدينية العربية. يجب عليك رفض أي محتوى إخباري أو يحتوي على انتهاكات دينية أو ثقافية أو أخلاقية بصرامة تامة."
                     },
                     {
+                        "role": "user",
                         "content": f"{self.moderation_prompt}\n\n{story_content}"
                     }
                 ],
                 "max_tokens": 10,
+                "temperature": 0.0,
                 "stream": False
             }
             response = requests.post(
+                self.api_url,
+                headers=self.headers,
                 json=payload,
                 timeout=30
             )
                 logger.error(f"API Error: {response.status_code} - {response.text}")
                 return {"error": f"API Error: {response.status_code}"}
         except Exception as e:
             logger.error(f"Error processing API response: {str(e)}")
             return {
                 "approved": False,
                 "response": "no",
                 "reason": "خطأ في معالجة نتيجة المراجعة",
+                "violation_type": "processing_error",
                 "error": str(e),
                 "timestamp": datetime.now().isoformat()
             }
 # Initialize the moderator (API key will be set via environment variable)
 try:
     moderator = ArabicContentModerator()
+    logger.info("Enhanced Arabic Content Moderator initialized successfully")
 except ValueError as e:
     logger.error(f"Failed to initialize moderator: {e}")
     moderator = None
 def home():
     """Home endpoint with API documentation"""
     return jsonify({
+        "service": "مراجع المحتوى الأدبي العربي المحسن مع التطبيق الصارم للمعايير الثقافية والدينية",
+        "service_en": "Enhanced Arabic Literary Content Moderator with Strict Cultural and Religious Standards",
+        "version": "4.0.0",
+        "description": "AI-powered professional literary critic for Arabic short stories with comprehensive cultural, religious, and profanity filtering",
+        "description_ar": "ناقد أدبي محترف مدعوم بالذكاء الاصطناعي للقصص العربية القصيرة مع فلترة شاملة للمحتوى الثقافي والديني والألفاظ النابية",
         "endpoints": {
             "/health": "Health check",
             "/moderate": "POST - Moderate single story",
+            "/moderate/batch": "POST - Moderate multiple stories",
+            "/violations/check": "POST - Check for specific violations without full moderation"
         },
         "features": [
+            "Comprehensive profanity detection with Arabic terms database",
+            "Strict religious content filtering and blasphemy detection",
+            "Cultural taboo identification and rejection",
+            "Enhanced news content detection and rejection",
+            "Family honor and respect enforcement",
+            "Ethnic and racial slur detection",
+            "Sexual content and inappropriate material filtering",
+            "Religious sensitivity analysis",
             "Professional literary criticism standards",
+            "Multi-level violation severity assessment"
         ],
+        "violation_categories": [
+            "explicit_sexual - Explicit sexual terms and content",
+            "excretory - Bathroom and excretory terms",
+            "family_honor - Family honor insults and disrespect",
+            "religious_blasphemy - Religious blasphemy and disrespect",
+            "ethnic_slurs - Ethnic and racial discrimination",
+            "cultural_taboo - Cultural taboos and inappropriate content",
+            "sexual_content - Sexual content patterns",
+            "religious_violation - Inappropriate religious content usage"
+        ],
+        "rejected_content_types": [
+            "Sports reports and match analysis",
+            "Press conferences and official statements",
+            "Meeting minutes and proceedings",
+            "Political news and announcements",
+            "Economic reports and market updates",
+            "Technical reviews and product launches",
+            "Local news and municipal updates",
+            "Content with profanity or offensive language",
+            "Religious blasphemy or disrespectful content",
+            "Culturally inappropriate material",
+            "Sexual or adult content",
+            "Family honor violations"
+        ],
+        "cultural_standards": {
+            "religious_respect": "Strict enforcement of Islamic values and respect for all religions",
+            "family_values": "Protection of family honor and traditional values",
+            "language_purity": "Rejection of profanity and offensive language",
+            "cultural_sensitivity": "Adherence to Arab cultural norms and traditions",
+            "moral_guidelines": "Enforcement of high moral and ethical standards"
+        },
         "usage": {
             "moderate": {
                 "method": "POST",
                 "payload": {"story_content": "Arabic story text"},
+                "response": {
+                    "approved": "boolean",
+                    "response": "true/no",
+                    "reason": "detailed reason in Arabic",
+                    "violation_type": "type of violation if any",
+                    "violation_details": "detailed violation analysis"
+                }
             }
         },
         "status": "healthy" if moderator else "service unavailable"
     """Health check endpoint"""
     return jsonify({
         "status": "healthy" if moderator else "unhealthy",
+        "service": "Enhanced Arabic Content Moderator with Strict Standards",
         "timestamp": datetime.now().isoformat(),
+        "api_available": moderator is not None,
+        "features_active": [
+            "profanity_detection",
+            "religious_filtering",
+            "cultural_compliance",
+            "news_detection",
+            "ai_moderation"
+        ] if moderator else []
     })
+@app.route('/violations/check', methods=['POST'])
+def check_violations():
+    """
+    Check for specific violations without full moderation
+    Expected JSON payload:
+    {
+        "content": "Text to check for violations"
+    }
+    Returns detailed violation analysis
+    """
+    if not moderator:
+        return jsonify({
+            "error": "خدمة المراجعة غير متوفرة - لم يتم تكوين مفتاح API",
+            "error_en": "Moderation service not available - API key not configured"
+        }), 500
+    try:
+        data = request.get_json()
+        if not data or 'content' not in data:
+            return jsonify({
+                "error": "المحتوى مفقود في الطلب",
+                "error_en": "Missing content in request"
+            }), 400
+        content = data['content']
+        # Check for violations
+        profanity_check = moderator._check_profanity_and_violations(content)
+        religious_check = moderator._check_religious_sensitivity(content)
+        news_check = moderator._pre_check_news_content(content)
+        return jsonify({
+            "content_analysis": {
+                "profanity_violations": profanity_check,
+                "religious_sensitivity": religious_check,
+                "news_content_detected": news_check,
+                "overall_safe": not (profanity_check['has_violations'] or
+                                   religious_check['has_issues'] or
+                                   news_check),
+                "content_length": len(content),
+                "timestamp": datetime.now().isoformat()
+            }
+        })
+    except Exception as e:
+        logger.error(f"Error in check_violations: {str(e)}")
+        return jsonify({
+            "error": "خطأ داخلي في الخادم",
+            "error_en": "Internal server error",
+            "details": str(e)
+        }), 500
 @app.route('/moderate', methods=['POST'])
 def moderate_content():
     """
+    Enhanced moderation endpoint with strict cultural and religious standards
     Expected JSON payload:
     {
     {
         "approved": true/false,
         "response": "true"/"no",
+        "reason": "detailed reason in Arabic",
+        "violation_type": "type of violation",
+        "violation_details": "detailed analysis",
         "timestamp": "ISO timestamp"
     }
     """
         story_content = data['story_content']
         result = moderator.moderate_story(story_content)
+        # Add additional metadata
+        result["moderation_version"] = "4.0.0"
+        result["strict_mode"] = True
+        result["cultural_compliance"] = "enforced"
         return jsonify(result)
     except Exception as e:
             "error_en": "Internal server error",
             "approved": False,
             "response": "no",
+            "violation_type": "system_error",
             "details": str(e)
         }), 500
 @app.route('/moderate/batch', methods=['POST'])
 def moderate_batch():
     """
+    Enhanced batch moderation endpoint with detailed violation tracking
     Expected JSON payload:
     {
         results = []
         approved_count = 0
+        violation_stats = {}
         for i, story in enumerate(stories):
             logger.info(f"Moderating story {i+1}/{len(stories)}")
             result = moderator.moderate_story(story)
+            # Track violation statistics
+            violation_type = result.get("violation_type", "none")
+            violation_stats[violation_type] = violation_stats.get(violation_type, 0) + 1
             results.append({
                 "story_index": i,
                 "result": result
             })
             if result.get("approved", False):
                 approved_count += 1
                 "total_processed": len(results),
                 "approved_count": approved_count,
                 "rejected_count": len(results) - approved_count,
+                "approval_rate": f"{(approved_count/len(results)*100):.1f}%" if results else "0%",
+                "violation_statistics": violation_stats
+            },
+            "moderation_info": {
+                "version": "4.0.0",
+                "strict_mode": True,
+                "cultural_compliance": "enforced",
+                "religious_filtering": "active"
             },
             "timestamp": datetime.now().isoformat()
         })
             "details": str(e)
         }), 500
+@app.route('/standards', methods=['GET'])
+def get_standards():
+    """
+    Get detailed information about moderation standards and criteria
+    """
+    return jsonify({
+        "moderation_standards": {
+            "version": "4.0.0",
+            "enforcement_level": "strict",
+            "cultural_compliance": {
+                "religious_respect": {
+                    "description": "Strict enforcement of respect for Islamic values and all religions",
+                    "violations_include": [
+                        "Blasphemy against Allah, prophets, or religious figures",
+                        "Mockery of religious practices or symbols",
+                        "Inappropriate use of religious terms",
+                        "Disrespect towards religious texts or teachings"
+                    ]
+                },
+                "family_values": {
+                    "description": "Protection of family honor and traditional values",
+                    "violations_include": [
+                        "Insults targeting family members",
+                        "Disrespect towards parents or elders",
+                        "Inappropriate sexual content",
+                        "Promotion of immoral relationships"
+                    ]
+                },
+                "language_purity": {
+                    "description": "Rejection of profanity and offensive language",
+                    "categories": [
+                        "Sexual explicit terms",
+                        "Excretory language",
+                        "Family honor insults",
+                        "Ethnic and racial slurs"
+                    ]
+                }
+            },
+            "content_requirements": {
+                "acceptable_genres": [
+                    "Educational moral stories",
+                    "Traditional folk tales",
+                    "Religious stories (respectful)",
+                    "Social stories with positive messages",
+                    "Children's educational content",
+                    "Historical narratives (appropriate)",
+                    "Fantasy and adventure (culturally appropriate)"
+                ],
+                "rejected_content": [
+                    "News reports and journalism",
+                    "Sports commentary and analysis",
+                    "Political statements and speeches",
+                    "Commercial product reviews",
+                    "Technical documentation",
+                    "Adult or sexual content",
+                    "Content with profanity",
+                    "Religious blasphemy",
+                    "Cultural violations"
+                ]
+            }
+        },
+        "violation_categories": {
+            "critical": [
+                "religious_blasphemy",
+                "explicit_sexual",
+                "family_honor_severe"
+            ],
+            "high": [
+                "cultural_taboo",
+                "ethnic_slurs",
+                "sexual_content"
+            ],
+            "medium": [
+                "excretory",
+                "inappropriate_language"
+            ]
+        },
+        "enforcement_policy": {
+            "zero_tolerance": [
+                "Religious blasphemy or disrespect",
+                "Explicit sexual content",
+                "Severe family honor violations",
+                "Ethnic or racial discrimination"
+            ],
+            "strict_filtering": [
+                "All forms of profanity",
+                "Cultural taboos",
+                "Inappropriate religious content usage",
+                "News and journalistic content"
+            ]
+        }
+    })
 if __name__ == '__main__':
     # For local testing
     port = int(os.environ.get('PORT', 7860))
+    app.run(host='0.0.0.0', port=port, debug=False)f"Exception calling Deepseek API: {str(e)}")
+            return {"error": str(e)}
+    def _pre_check_news_content(self, story_content: str) -> bool:
+        """
+        Pre-check for obvious news content patterns
+        Args:
+            story_content: Content to check
+        Returns:
+            True if appears to be news content, False otherwise
+        """
+        # News indicators in Arabic
+        news_patterns = [
+            r'بعد المباراة.*قال',
+            r'في مؤتمر صحفي',
+            r'صرح.*الوزير',
+            r'أعلن.*المسؤول',
+            r'فاز.*بجائزة.*رجل المباراة',
+            r'تألق.*ومنع.*أهداف',
+            r'خلال.*الاجتماع',
+            r'في.*الجلسة',
+            r'الرئيس.*التقى',
+            r'البرلمان.*ناقش',
+            r'الحكومة.*قررت',
+            r'البورصة.*ارتفعت',
+            r'أسعار.*النفط',
+            r'الشركة.*حققت',
+            r'المحافظ.*افتتح',
+            r'بلدية.*المدينة',
+            r'التطبيق.*الجديد',
+            r'الهاتف.*يتميز',
+            r'في.*محافظة'
+        ]
+        # Check for news patterns
+        for pattern in news_patterns:
+            if re.search(pattern, story_content, re.IGNORECASE):
+                return True
+        # Check for sports-specific terms
+        sports_terms = ['المباراة', 'اللاعب', 'المدرب', 'الفريق', 'الهدف', 'الشوط']
+        news_verbs = ['صرح', 'أعلن', 'أكد', 'قال', 'فاز', 'تألق']
+        has_sports = any(term in story_content for term in sports_terms)
+        has_news_verbs = any(verb in story_content for verb in news_verbs)
+        if has_sports and has_news_verbs:
+            return True
+        return False
+    def _validate_story_format(self, story_content: str) -> bool:
+        """
+        Enhanced validation of story format and content
+        Args:
+            story_content: Story content to validate
+        Returns:
+            Boolean indicating if format is valid
+        """
+        if not story_content or not isinstance(story_content, str):
+            return False
+        # Check minimum length (at least 50 characters for a meaningful story)
+        if len(story_content.strip()) < 50:
+            return False
+        # Check for Arabic characters (must have substantial Arabic content)
+        arabic_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
+        arabic_chars = len(arabic_pattern.findall(story_content))
+        # Arabic characters should be at least 30% of total characters
+        if arabic_chars < len(story_content.strip()) * 0.3:
+            return False
+        # Pre-check for obvious news content
+        if self._pre_check_news_content(story_content):
+            return False
+        return True
+    def moderate_story(self, story_content: str) -> Dict[str, Any]:
+        """
+        Main method to moderate Arabic story content with enhanced validation and strict enforcement
+        Args:
+            story_content: The Arabic story to moderate
+        Returns:
+            Dictionary with moderation result
+        """
+        # Enhanced validation
+        if not self._validate_story_format(story_content):
+            return {
+                "approved": False,
+                "response": "no",
+                "reason": "المحتوى يبدو أنه تقرير إخباري أو صحفي وليس قصة أدبية، أو فشل في التحقق من صحة التنسيق",
+                "violation_type": "format_violation",
+                "timestamp": datetime.now().isoformat()
+            }
+        # Clean and prepare content
+        cleaned_content = story_content.strip()
+        # Check for profanity and violations first
+        violation_check = self._check_profanity_and_violations(cleaned_content)
+        if violation_check['has_violations']:
+            return {
+                "approved": False,
+                "response": "no",
+                "reason": "المحتوى يحتوي على انتهاكات صريحة للقيم الدينية أو الثقافية أو ألفاظ نابية",
+                "violation_type": "content_violation",
+                "violation_details": violation_check,
+                "timestamp": datetime.now().isoformat()
+            }
+        # Check for religious sensitivity issues
+        religious_check = self._check_religious_sensitivity(cleaned_content)
+        if religious_check['has_issues']:
+            return {
+                "approved": False,
+                "response": "no",
+                "reason": "المحتوى يحتوي على استخدام غير مناسب للمصطلحات الدينية أو انتهاكات دينية",
+                "violation_type": "religious_violation",
+                "religious_details": religious_check,
+                "timestamp": datetime.now().isoformat()
+            }
+        # Call Deepseek API for final check
+        api_response = self._call_deepseek_api(cleaned_content)
+        if "error" in api_response:
+            logger.error(f"Moderation failed: {api_response['error']}")
+            return {
+                "approved": False,
+                "response": "no",
+                "reason": "خطأ في خدمة المراجعة",
+                "violation_type": "service_error",
+                "error": api_response["error"],
+                "timestamp": datetime.now().isoformat()
+            }
+        try:
+            # Extract the moderation decision
+            ai_response = api_response.get("choices", [{}])[0].get("message", {}).get("content", "").strip().lower()
+            # Clean the response (remove any extra whitespace or characters)
+            ai_response = re.sub(r'[^\w]', '', ai_response)
+            # Determine if content is approved (be very strict)
+            approved = ai_response == "true"
+            response_value = "true" if approved else "no"
+            result = {
+                "approved": approved,
+                "response": response_value,
+                "ai_decision": ai_response,
+                "timestamp": datetime.now().isoformat(),
+                "content_length": len(cleaned_content),
+                "violation_type": "none" if approved else "ai_detected"
+            }
+            if not approved:
+                result["reason"] = "المحتوى ينتهك المعايير الصارمة للثقافة العربية والإسلامية، أو أنه ليس قصة أدبية حقيقية بل محتوى إخباري أو غير مناسب"
+            else:
+                result["reason"] = "المحتوى مقبول ويلتزم بالمعايير الصارمة المطلوبة للثقافة العربية والإسلامية"
+            logger.info(f"Moderation completed: {response_value} for content of length {len(cleaned_content)}")
+            return result
+        except Exception as e:
+            logger.error(