walker11 commited on
Commit
930b9b6
·
verified ·
1 Parent(s): e3c83c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +360 -346
app.py CHANGED
@@ -1,347 +1,361 @@
1
- import os
2
- import json
3
- import logging
4
- from typing import Dict, Any, List
5
- import requests
6
- from datetime import datetime
7
- import re
8
- from flask import Flask, request, jsonify
9
-
10
- # Configure logging
11
- logging.basicConfig(level=logging.INFO)
12
- logger = logging.getLogger(__name__)
13
-
14
- class ArabicContentModerator:
15
- """
16
- Arabic Story Content Moderation Model using Deepseek API
17
- Checks for cultural violations and inappropriate content
18
- """
19
-
20
- def __init__(self, deepseek_api_key: str = None):
21
- """
22
- Initialize the content moderator
23
-
24
- Args:
25
- deepseek_api_key: Deepseek API key
26
- """
27
- self.api_key = deepseek_api_key or os.getenv('DEEPSEEK_API_KEY')
28
- if not self.api_key:
29
- raise ValueError("Deepseek API key is required")
30
-
31
- self.api_url = "https://api.deepseek.com/chat/completions"
32
- self.headers = {
33
- "Authorization": f"Bearer {self.api_key}",
34
- "Content-Type": "application/json"
35
- }
36
-
37
- # Cultural and content guidelines for Arabic stories
38
- self.moderation_prompt = """
39
- You are a content reviewer specialized in Arabic literature and culture. Your task is to review short Arabic stories to ensure they comply with Arab and Islamic cultural values and do not contain inappropriate content.
40
-
41
- Review criteria:
42
-
43
- 1. Cultural and religious content:
44
- - No mockery of Islam or Arab traditions
45
- - No disrespectful approach to topics that contradict Islamic values
46
- - Respect for social and religious symbols
47
-
48
- 2. Sexual content and violence:
49
- - No explicit sexual content or overt sexual innuendos
50
- - No excessive or graphic violence
51
- - No profanity or obscene language
52
-
53
- 3. Sensitive political content:
54
- - Avoid sectarian or ethnic incitement
55
- - No approach to controversial political topics in an offensive manner
56
-
57
- 4. Social values:
58
- - Respect for family values and Arab society
59
- - No promotion of socially destructive behaviors
60
-
61
- Response instructions:
62
- - If the story complies with all criteria, answer with "true"
63
- - If the story violates any of the criteria, answer with "no"
64
- - Your answer must only be "true" or "no" without any additional text
65
-
66
- Story to review:
67
- """
68
-
69
- def _call_deepseek_api(self, story_content: str) -> Dict[str, Any]:
70
- """
71
- Call Deepseek API for content moderation
72
-
73
- Args:
74
- story_content: The Arabic story content to moderate
75
-
76
- Returns:
77
- API response dictionary
78
- """
79
- try:
80
- payload = {
81
- "model": "deepseek-chat",
82
- "messages": [
83
- {
84
- "role": "system",
85
- "content": "You are a content reviewer specialized in Arabic literature. Your task is to review stories to ensure they comply with Arab cultural values."
86
- },
87
- {
88
- "role": "user",
89
- "content": f"{self.moderation_prompt}\n\n{story_content}"
90
- }
91
- ],
92
- "max_tokens": 10,
93
- "temperature": 0.1,
94
- "stream": False
95
- }
96
-
97
- response = requests.post(
98
- self.api_url,
99
- headers=self.headers,
100
- json=payload,
101
- timeout=30
102
- )
103
-
104
- if response.status_code == 200:
105
- return response.json()
106
- else:
107
- logger.error(f"API Error: {response.status_code} - {response.text}")
108
- return {"error": f"API Error: {response.status_code}"}
109
-
110
- except Exception as e:
111
- logger.error(f"Exception calling Deepseek API: {str(e)}")
112
- return {"error": str(e)}
113
-
114
- def _validate_story_format(self, story_content: str) -> bool:
115
- """
116
- Basic validation of story format and content
117
-
118
- Args:
119
- story_content: Story content to validate
120
-
121
- Returns:
122
- Boolean indicating if format is valid
123
- """
124
- if not story_content or not isinstance(story_content, str):
125
- return False
126
-
127
- # Check minimum length (at least 10 characters)
128
- if len(story_content.strip()) < 10:
129
- return False
130
-
131
- # Check for Arabic characters
132
- arabic_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
133
- if not arabic_pattern.search(story_content):
134
- return False
135
-
136
- return True
137
-
138
- def moderate_story(self, story_content: str) -> Dict[str, Any]:
139
- """
140
- Main method to moderate Arabic story content
141
-
142
- Args:
143
- story_content: The Arabic story to moderate
144
-
145
- Returns:
146
- Dictionary with moderation result
147
- """
148
- # Validate input
149
- if not self._validate_story_format(story_content):
150
- return {
151
- "approved": False,
152
- "response": "no",
153
- "reason": "Invalid story format or missing Arabic content",
154
- "timestamp": datetime.now().isoformat()
155
- }
156
-
157
- # Clean and prepare content
158
- cleaned_content = story_content.strip()
159
-
160
- # Call Deepseek API
161
- api_response = self._call_deepseek_api(cleaned_content)
162
-
163
- if "error" in api_response:
164
- logger.error(f"Moderation failed: {api_response['error']}")
165
- return {
166
- "approved": False,
167
- "response": "no",
168
- "reason": "Moderation service error",
169
- "error": api_response["error"],
170
- "timestamp": datetime.now().isoformat()
171
- }
172
-
173
- try:
174
- # Extract the moderation decision
175
- ai_response = api_response.get("choices", [{}])[0].get("message", {}).get("content", "").strip().lower()
176
-
177
- # Determine if content is approved
178
- approved = ai_response == "true"
179
- response_value = "true" if approved else "no"
180
-
181
- result = {
182
- "approved": approved,
183
- "response": response_value,
184
- "ai_decision": ai_response,
185
- "timestamp": datetime.now().isoformat()
186
- }
187
-
188
- if not approved:
189
- result["reason"] = "Content violates community guidelines or cultural norms"
190
-
191
- logger.info(f"Moderation completed: {response_value}")
192
- return result
193
-
194
- except Exception as e:
195
- logger.error(f"Error processing API response: {str(e)}")
196
- return {
197
- "approved": False,
198
- "response": "no",
199
- "reason": "Error processing moderation result",
200
- "error": str(e),
201
- "timestamp": datetime.now().isoformat()
202
- }
203
-
204
-
205
- # Flask application
206
- app = Flask(__name__)
207
-
208
- # Initialize the moderator (API key will be set via environment variable)
209
- try:
210
- moderator = ArabicContentModerator()
211
- logger.info("Arabic Content Moderator initialized successfully")
212
- except ValueError as e:
213
- logger.error(f"Failed to initialize moderator: {e}")
214
- moderator = None
215
-
216
- @app.route('/', methods=['GET'])
217
- def home():
218
- """Home endpoint with API documentation"""
219
- return jsonify({
220
- "service": "Arabic Story Content Moderator",
221
- "version": "1.0.0",
222
- "description": "AI-powered moderation for Arabic short stories",
223
- "endpoints": {
224
- "/health": "Health check",
225
- "/moderate": "POST - Moderate single story",
226
- "/moderate/batch": "POST - Moderate multiple stories"
227
- },
228
- "usage": {
229
- "moderate": {
230
- "method": "POST",
231
- "payload": {"story_content": "Arabic story text"},
232
- "response": {"approved": "boolean", "response": "true/no"}
233
- }
234
- },
235
- "status": "healthy" if moderator else "service unavailable"
236
- })
237
-
238
- @app.route('/health', methods=['GET'])
239
- def health_check():
240
- """Health check endpoint"""
241
- return jsonify({
242
- "status": "healthy" if moderator else "unhealthy",
243
- "service": "Arabic Content Moderator",
244
- "timestamp": datetime.now().isoformat(),
245
- "api_available": moderator is not None
246
- })
247
-
248
- @app.route('/moderate', methods=['POST'])
249
- def moderate_content():
250
- """
251
- Main moderation endpoint
252
-
253
- Expected JSON payload:
254
- {
255
- "story_content": "Arabic story text here"
256
- }
257
-
258
- Returns:
259
- {
260
- "approved": true/false,
261
- "response": "true"/"no",
262
- "timestamp": "ISO timestamp"
263
- }
264
- """
265
- if not moderator:
266
- return jsonify({
267
- "error": "Moderation service not available - API key not configured",
268
- "approved": False,
269
- "response": "no"
270
- }), 500
271
-
272
- try:
273
- data = request.get_json()
274
-
275
- if not data or 'story_content' not in data:
276
- return jsonify({
277
- "error": "Missing story_content in request",
278
- "approved": False,
279
- "response": "no"
280
- }), 400
281
-
282
- story_content = data['story_content']
283
- result = moderator.moderate_story(story_content)
284
-
285
- return jsonify(result)
286
-
287
- except Exception as e:
288
- logger.error(f"Error in moderate_content: {str(e)}")
289
- return jsonify({
290
- "error": "Internal server error",
291
- "approved": False,
292
- "response": "no",
293
- "details": str(e)
294
- }), 500
295
-
296
- @app.route('/moderate/batch', methods=['POST'])
297
- def moderate_batch():
298
- """
299
- Batch moderation endpoint
300
-
301
- Expected JSON payload:
302
- {
303
- "stories": ["story1", "story2", "story3"]
304
- }
305
- """
306
- if not moderator:
307
- return jsonify({
308
- "error": "Moderation service not available - API key not configured"
309
- }), 500
310
-
311
- try:
312
- data = request.get_json()
313
-
314
- if not data or 'stories' not in data:
315
- return jsonify({
316
- "error": "Missing stories array in request"
317
- }), 400
318
-
319
- stories = data['stories']
320
- if not isinstance(stories, list):
321
- return jsonify({
322
- "error": "Stories must be an array"
323
- }), 400
324
-
325
- results = []
326
- for i, story in enumerate(stories):
327
- logger.info(f"Moderating story {i+1}/{len(stories)}")
328
- result = moderator.moderate_story(story)
329
- results.append(result)
330
-
331
- return jsonify({
332
- "results": results,
333
- "total_processed": len(results),
334
- "timestamp": datetime.now().isoformat()
335
- })
336
-
337
- except Exception as e:
338
- logger.error(f"Error in moderate_batch: {str(e)}")
339
- return jsonify({
340
- "error": "Internal server error",
341
- "details": str(e)
342
- }), 500
343
-
344
- if __name__ == '__main__':
345
- # For local testing
346
- port = int(os.environ.get('PORT', 7860))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  app.run(host='0.0.0.0', port=port, debug=False)
 
1
+ import os
2
+ import json
3
+ import logging
4
+ from typing import Dict, Any, List
5
+ import requests
6
+ from datetime import datetime
7
+ import re
8
+ from flask import Flask, request, jsonify
9
+
10
+ # Configure logging
11
+ logging.basicConfig(level=logging.INFO)
12
+ logger = logging.getLogger(__name__)
13
+
14
+ class ArabicContentModerator:
15
+ """
16
+ Arabic Story Content Moderation Model using Deepseek API
17
+ Checks for cultural violations and inappropriate content
18
+ """
19
+
20
+ def __init__(self, deepseek_api_key: str = None):
21
+ """
22
+ Initialize the content moderator
23
+
24
+ Args:
25
+ deepseek_api_key: Deepseek API key
26
+ """
27
+ self.api_key = deepseek_api_key or os.getenv('DEEPSEEK_API_KEY')
28
+ if not self.api_key:
29
+ raise ValueError("Deepseek API key is required")
30
+
31
+ self.api_url = "https://api.deepseek.com/chat/completions"
32
+ self.headers = {
33
+ "Authorization": f"Bearer {self.api_key}",
34
+ "Content-Type": "application/json"
35
+ }
36
+
37
+ # Cultural and content guidelines for Arabic stories
38
+ self.moderation_prompt = """
39
+ **Strict Arabic Content Moderation Protocol**
40
+ You are an AI cultural compliance scanner for Arabic stories. Perform sequential analysis with forensic precision:
41
+
42
+ 1. **Lexical Deep Scan**
43
+ - Tokenize every word/phrase comparing against:
44
+ a. Religious Profanity Database: [سب الدين, سب الله, سب الرسول, سب القرآن, لحس دينه, كفر بالله, ...]
45
+ b. Egyptian/Arabic Profanity Index: [متناك, خول, قحبة, كس أمك, طيز, زق, ...]
46
+ c. Adult Content Triggers: جنس صريح, عري, علاقة جنسية, إباحية
47
+
48
+ 2. **Contextual Zero-Tolerance Checks**
49
+ - Immediate violation if ANY religious blasphemy detected (e.g., "سب الدين" = automatic fail)
50
+ - Red-flag Egyptian profanity regardless of context (e.g., "يا خول" = violation even if joking)
51
+ - Reject any sexual descriptions beyond medical/educational needs
52
+
53
+ 3. **Cultural Alignment Verification**
54
+ Verify story DOES NOT:
55
+ - Mock Islamic pillars/prayers/Quran
56
+ - Normalize haram relationships (زنا, شذوذ)
57
+ - Promote violence/drugs without moral condemnation
58
+ - Degrade family values (e.g., rebellious children as heroes)
59
+
60
+ 4. **Dialect-Specific Profanity Filter**
61
+ Activate regional sub-scanners for:
62
+ - Egyptian: [يا وسخة, يا ابن الوسخة, كسختك, ...]
63
+ - Gulf: [يا حمار, يا كلب, خنيث, ...]
64
+ - Levantine: [شرميط, عرص, ...]
65
+
66
+ **Output Requirements:**
67
+ ```json
68
+ {
69
+ "compliance_status": "approved/rejected",
70
+ "violations": [
71
+ {
72
+ "excerpt": "exact offending phrase",
73
+ "violation_type": "religious/sexual/profanity/cultural",
74
+ "severity": "critical/high",
75
+ "context": "explain why it violates norms"
76
+ }
77
+ ],
78
+ "risk_score": 0-100%,
79
+ "dialect_flags": ["egyptian_profanity", "levantine_slurs", ...]
80
+ }
81
+ """
82
+
83
+ def _call_deepseek_api(self, story_content: str) -> Dict[str, Any]:
84
+ """
85
+ Call Deepseek API for content moderation
86
+
87
+ Args:
88
+ story_content: The Arabic story content to moderate
89
+
90
+ Returns:
91
+ API response dictionary
92
+ """
93
+ try:
94
+ payload = {
95
+ "model": "deepseek-chat",
96
+ "messages": [
97
+ {
98
+ "role": "system",
99
+ "content": "You are a content reviewer specialized in Arabic literature. Your task is to review stories to ensure they comply with Arab cultural values."
100
+ },
101
+ {
102
+ "role": "user",
103
+ "content": f"{self.moderation_prompt}\n\n{story_content}"
104
+ }
105
+ ],
106
+ "max_tokens": 10,
107
+ "temperature": 0.1,
108
+ "stream": False
109
+ }
110
+
111
+ response = requests.post(
112
+ self.api_url,
113
+ headers=self.headers,
114
+ json=payload,
115
+ timeout=30
116
+ )
117
+
118
+ if response.status_code == 200:
119
+ return response.json()
120
+ else:
121
+ logger.error(f"API Error: {response.status_code} - {response.text}")
122
+ return {"error": f"API Error: {response.status_code}"}
123
+
124
+ except Exception as e:
125
+ logger.error(f"Exception calling Deepseek API: {str(e)}")
126
+ return {"error": str(e)}
127
+
128
+ def _validate_story_format(self, story_content: str) -> bool:
129
+ """
130
+ Basic validation of story format and content
131
+
132
+ Args:
133
+ story_content: Story content to validate
134
+
135
+ Returns:
136
+ Boolean indicating if format is valid
137
+ """
138
+ if not story_content or not isinstance(story_content, str):
139
+ return False
140
+
141
+ # Check minimum length (at least 10 characters)
142
+ if len(story_content.strip()) < 10:
143
+ return False
144
+
145
+ # Check for Arabic characters
146
+ arabic_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
147
+ if not arabic_pattern.search(story_content):
148
+ return False
149
+
150
+ return True
151
+
152
+ def moderate_story(self, story_content: str) -> Dict[str, Any]:
153
+ """
154
+ Main method to moderate Arabic story content
155
+
156
+ Args:
157
+ story_content: The Arabic story to moderate
158
+
159
+ Returns:
160
+ Dictionary with moderation result
161
+ """
162
+ # Validate input
163
+ if not self._validate_story_format(story_content):
164
+ return {
165
+ "approved": False,
166
+ "response": "no",
167
+ "reason": "Invalid story format or missing Arabic content",
168
+ "timestamp": datetime.now().isoformat()
169
+ }
170
+
171
+ # Clean and prepare content
172
+ cleaned_content = story_content.strip()
173
+
174
+ # Call Deepseek API
175
+ api_response = self._call_deepseek_api(cleaned_content)
176
+
177
+ if "error" in api_response:
178
+ logger.error(f"Moderation failed: {api_response['error']}")
179
+ return {
180
+ "approved": False,
181
+ "response": "no",
182
+ "reason": "Moderation service error",
183
+ "error": api_response["error"],
184
+ "timestamp": datetime.now().isoformat()
185
+ }
186
+
187
+ try:
188
+ # Extract the moderation decision
189
+ ai_response = api_response.get("choices", [{}])[0].get("message", {}).get("content", "").strip().lower()
190
+
191
+ # Determine if content is approved
192
+ approved = ai_response == "true"
193
+ response_value = "true" if approved else "no"
194
+
195
+ result = {
196
+ "approved": approved,
197
+ "response": response_value,
198
+ "ai_decision": ai_response,
199
+ "timestamp": datetime.now().isoformat()
200
+ }
201
+
202
+ if not approved:
203
+ result["reason"] = "Content violates community guidelines or cultural norms"
204
+
205
+ logger.info(f"Moderation completed: {response_value}")
206
+ return result
207
+
208
+ except Exception as e:
209
+ logger.error(f"Error processing API response: {str(e)}")
210
+ return {
211
+ "approved": False,
212
+ "response": "no",
213
+ "reason": "Error processing moderation result",
214
+ "error": str(e),
215
+ "timestamp": datetime.now().isoformat()
216
+ }
217
+
218
+
219
+ # Flask application
220
+ app = Flask(__name__)
221
+
222
+ # Initialize the moderator (API key will be set via environment variable)
223
+ try:
224
+ moderator = ArabicContentModerator()
225
+ logger.info("Arabic Content Moderator initialized successfully")
226
+ except ValueError as e:
227
+ logger.error(f"Failed to initialize moderator: {e}")
228
+ moderator = None
229
+
230
+ @app.route('/', methods=['GET'])
231
+ def home():
232
+ """Home endpoint with API documentation"""
233
+ return jsonify({
234
+ "service": "Arabic Story Content Moderator",
235
+ "version": "1.0.0",
236
+ "description": "AI-powered moderation for Arabic short stories",
237
+ "endpoints": {
238
+ "/health": "Health check",
239
+ "/moderate": "POST - Moderate single story",
240
+ "/moderate/batch": "POST - Moderate multiple stories"
241
+ },
242
+ "usage": {
243
+ "moderate": {
244
+ "method": "POST",
245
+ "payload": {"story_content": "Arabic story text"},
246
+ "response": {"approved": "boolean", "response": "true/no"}
247
+ }
248
+ },
249
+ "status": "healthy" if moderator else "service unavailable"
250
+ })
251
+
252
+ @app.route('/health', methods=['GET'])
253
+ def health_check():
254
+ """Health check endpoint"""
255
+ return jsonify({
256
+ "status": "healthy" if moderator else "unhealthy",
257
+ "service": "Arabic Content Moderator",
258
+ "timestamp": datetime.now().isoformat(),
259
+ "api_available": moderator is not None
260
+ })
261
+
262
+ @app.route('/moderate', methods=['POST'])
263
+ def moderate_content():
264
+ """
265
+ Main moderation endpoint
266
+
267
+ Expected JSON payload:
268
+ {
269
+ "story_content": "Arabic story text here"
270
+ }
271
+
272
+ Returns:
273
+ {
274
+ "approved": true/false,
275
+ "response": "true"/"no",
276
+ "timestamp": "ISO timestamp"
277
+ }
278
+ """
279
+ if not moderator:
280
+ return jsonify({
281
+ "error": "Moderation service not available - API key not configured",
282
+ "approved": False,
283
+ "response": "no"
284
+ }), 500
285
+
286
+ try:
287
+ data = request.get_json()
288
+
289
+ if not data or 'story_content' not in data:
290
+ return jsonify({
291
+ "error": "Missing story_content in request",
292
+ "approved": False,
293
+ "response": "no"
294
+ }), 400
295
+
296
+ story_content = data['story_content']
297
+ result = moderator.moderate_story(story_content)
298
+
299
+ return jsonify(result)
300
+
301
+ except Exception as e:
302
+ logger.error(f"Error in moderate_content: {str(e)}")
303
+ return jsonify({
304
+ "error": "Internal server error",
305
+ "approved": False,
306
+ "response": "no",
307
+ "details": str(e)
308
+ }), 500
309
+
310
+ @app.route('/moderate/batch', methods=['POST'])
311
+ def moderate_batch():
312
+ """
313
+ Batch moderation endpoint
314
+
315
+ Expected JSON payload:
316
+ {
317
+ "stories": ["story1", "story2", "story3"]
318
+ }
319
+ """
320
+ if not moderator:
321
+ return jsonify({
322
+ "error": "Moderation service not available - API key not configured"
323
+ }), 500
324
+
325
+ try:
326
+ data = request.get_json()
327
+
328
+ if not data or 'stories' not in data:
329
+ return jsonify({
330
+ "error": "Missing stories array in request"
331
+ }), 400
332
+
333
+ stories = data['stories']
334
+ if not isinstance(stories, list):
335
+ return jsonify({
336
+ "error": "Stories must be an array"
337
+ }), 400
338
+
339
+ results = []
340
+ for i, story in enumerate(stories):
341
+ logger.info(f"Moderating story {i+1}/{len(stories)}")
342
+ result = moderator.moderate_story(story)
343
+ results.append(result)
344
+
345
+ return jsonify({
346
+ "results": results,
347
+ "total_processed": len(results),
348
+ "timestamp": datetime.now().isoformat()
349
+ })
350
+
351
+ except Exception as e:
352
+ logger.error(f"Error in moderate_batch: {str(e)}")
353
+ return jsonify({
354
+ "error": "Internal server error",
355
+ "details": str(e)
356
+ }), 500
357
+
358
+ if __name__ == '__main__':
359
+ # For local testing
360
+ port = int(os.environ.get('PORT', 7860))
361
  app.run(host='0.0.0.0', port=port, debug=False)