Mr-Help commited on
Commit
44f14a4
·
verified ·
1 Parent(s): caeb779

Create message_understanding.py

Browse files
Files changed (1) hide show
  1. knowledge/message_understanding.py +508 -0
knowledge/message_understanding.py ADDED
@@ -0,0 +1,508 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional
2
+
3
+
4
+ def normalize_text(text: str) -> str:
5
+ return (text or "").strip().lower()
6
+
7
+
8
+ def contains_any(text: str, keywords: list) -> bool:
9
+ return any(k in text for k in keywords)
10
+
11
+
12
+ def is_yes(text: str) -> bool:
13
+ t = normalize_text(text)
14
+ return t in [
15
+ "نعم", "اه", "أه", "ايوه", "أيوه", "yes", "y",
16
+ "درست", "اه درست", "أيوه درست"
17
+ ]
18
+
19
+
20
+ def is_no(text: str) -> bool:
21
+ t = normalize_text(text)
22
+ return t in [
23
+ "لا", "لأ", "لاا", "no", "n",
24
+ "مدرستش", "ما درستش", "لا مدرستش"
25
+ ]
26
+
27
+
28
+ def is_new_student(text: str) -> bool:
29
+ t = normalize_text(text)
30
+ return contains_any(t, [
31
+ "طالب جديد", "جديد", "عميل جديد", "اول مرة", "أول مرة",
32
+ "لسه جديد", "مشترك جديد"
33
+ ])
34
+
35
+
36
+ def is_current_student(text: str) -> bool:
37
+ t = normalize_text(text)
38
+ return contains_any(t, [
39
+ "طالب حالي", "حالي", "عميل حالي", "مشترك", "مشترك حالي",
40
+ "أنا طالب", "انا طالب عندكم", "انا مشترك"
41
+ ])
42
+
43
+
44
+ def is_adults(text: str) -> bool:
45
+ t = normalize_text(text)
46
+ return contains_any(t, [
47
+ "كبار", "adult", "adults", "الكبار", "كورسات الكبار"
48
+ ])
49
+
50
+
51
+ def is_children(text: str) -> bool:
52
+ t = normalize_text(text)
53
+ return contains_any(t, [
54
+ "اطفال", "أطفال", "طفل", "children", "kids",
55
+ "كورسات الأطفال", "كورسات الاطفال"
56
+ ])
57
+
58
+
59
+ def is_support_request(text: str) -> bool:
60
+ t = normalize_text(text)
61
+ return contains_any(t, [
62
+ "استفسار", "سؤال", "عندي سؤال", "مشكلة", "مش فاهم",
63
+ "عايز اسأل", "عايزة اسأل", "محتاج مساعدة", "محتاجه مساعدة",
64
+ "support", "خدمة العملاء"
65
+ ])
66
+
67
+
68
+ def is_next_level_booking(text: str) -> bool:
69
+ t = normalize_text(text)
70
+ return contains_any(t, [
71
+ "حجز", "احجز", "المستوى التالي", "مستوى تالي",
72
+ "next level", "احجز المستوى", "حجز مستوى"
73
+ ])
74
+
75
+
76
+ def is_complaint(text: str) -> bool:
77
+ t = normalize_text(text)
78
+ return contains_any(t, [
79
+ "شكوى", "اشتكي", "اشتك", "مشكلة كبيرة", "complaint"
80
+ ])
81
+
82
+
83
+ def wants_direct_support(text: str) -> bool:
84
+ t = normalize_text(text)
85
+ return contains_any(t, [
86
+ "تواصل", "اكلم", "عايز حد يكلمني", "عايزة حد يكلمني",
87
+ "عايز اكلم خدمة العملاء", "عايزة اكلم خدمة العملاء"
88
+ ])
89
+
90
+
91
+ def wants_start(text: str) -> bool:
92
+ t = normalize_text(text)
93
+ return contains_any(t, [
94
+ "ابدأ", "ابدا", "مساعدة", "مساعده", "start", "menu", "القائمة"
95
+ ])
96
+
97
+
98
+ def wants_restart(text: str) -> bool:
99
+ t = normalize_text(text)
100
+ return contains_any(t, [
101
+ "من جديد", "ابدأ من جديد", "restart", "مينيو", "القائمة", "ابدأ"
102
+ ])
103
+
104
+
105
+ def wants_new_topic(text: str) -> bool:
106
+ t = normalize_text(text)
107
+ return contains_any(t, [
108
+ "عايز اسال عن حاجة تانية",
109
+ "عايزة اسال عن حاجة تانية",
110
+ "استفسار جديد",
111
+ "موضوع تاني",
112
+ "حاجة تانية"
113
+ ])
114
+
115
+
116
+ def wants_courses_info(text: str) -> bool:
117
+ t = normalize_text(text)
118
+ return contains_any(t, [
119
+ "كورسات",
120
+ "الكورسات",
121
+ "ايه الكورسات",
122
+ "ما هي الكورسات",
123
+ "الأنواع",
124
+ "الانواع",
125
+ "عايز اعرف الكورسات",
126
+ "عايزة اعرف الكورسات",
127
+ "ايه الكورسات المتاحة",
128
+ "الكورسات المتاحة"
129
+ ])
130
+
131
+
132
+ def asks_about_prior_study_case(text: str) -> bool:
133
+ t = normalize_text(text)
134
+ return contains_any(t, [
135
+ "لو كنت درست",
136
+ "لو كنت دارس",
137
+ "لو درست قبل كده",
138
+ "طب لو درست",
139
+ "ولو درست",
140
+ "اذا كنت درست",
141
+ "إذا كنت درست",
142
+ "اختبار تحديد مستوى",
143
+ "تحديد مستوى"
144
+ ])
145
+
146
+
147
+ def asks_about_beginner_case(text: str) -> bool:
148
+ t = normalize_text(text)
149
+ return contains_any(t, [
150
+ "لو مكنتش درست",
151
+ "لو ما درستش",
152
+ "لو مدرستش",
153
+ "لو لسه جديد",
154
+ "لو مبتدئ",
155
+ "لو بادئ",
156
+ "لو اول مرة",
157
+ "لو أول مرة"
158
+ ])
159
+
160
+
161
+ def detect_level(text: str) -> Optional[str]:
162
+ t = normalize_text(text)
163
+
164
+ if contains_any(t, ["1a", "a1", "a1.1", "1 a"]):
165
+ return "1A"
166
+
167
+ if contains_any(t, ["2a", "a2", "a1.2", "2 a"]):
168
+ return "2A"
169
+
170
+ if contains_any(t, ["1b", "b1", "b1.1", "1 b"]):
171
+ return "1B"
172
+
173
+ if contains_any(t, ["1c", "2b", "b2", "1c2/b", "1 c", "2 b"]):
174
+ return "1C2/B"
175
+
176
+ return None
177
+
178
+
179
+ def detect_payment_method(text: str) -> Optional[str]:
180
+ t = normalize_text(text)
181
+
182
+ if contains_any(t, ["فرع", "فروع", "كاش", "cash"]):
183
+ return "branch_or_cash"
184
+
185
+ if contains_any(t, ["تحويل", "بنكي", "bank", "transfer"]):
186
+ return "bank_transfer"
187
+
188
+ if contains_any(t, ["فودافون", "vodafone", "vodafone cash"]):
189
+ return "vodafone_cash"
190
+
191
+ if contains_any(t, ["فيزا", "visa", "ماستر", "master", "credit card", "card"]):
192
+ return "card"
193
+
194
+ if contains_any(t, ["تقسيط", "value", "فاليو"]):
195
+ return "installments"
196
+
197
+ return None
198
+
199
+
200
+ def classify_message(state: str, text: str, flow_data: dict | None = None):
201
+ """
202
+ Returns a structured classification:
203
+ {
204
+ "kind": "direct_answer" | "state_switch" | "topic_switch" | "unclear",
205
+ "value": str | None,
206
+ "confidence": float,
207
+ "entities": dict
208
+ }
209
+ """
210
+ flow_data = flow_data or {}
211
+ t = normalize_text(text)
212
+
213
+ # ===== Global topic switches =====
214
+ if wants_restart(t):
215
+ return {
216
+ "kind": "topic_switch",
217
+ "value": "restart",
218
+ "confidence": 0.99,
219
+ "entities": {}
220
+ }
221
+
222
+ if wants_new_topic(t):
223
+ return {
224
+ "kind": "topic_switch",
225
+ "value": "new_topic",
226
+ "confidence": 0.95,
227
+ "entities": {}
228
+ }
229
+
230
+ if is_complaint(t):
231
+ return {
232
+ "kind": "topic_switch",
233
+ "value": "complaint",
234
+ "confidence": 0.98,
235
+ "entities": {}
236
+ }
237
+
238
+ if wants_direct_support(t):
239
+ return {
240
+ "kind": "topic_switch",
241
+ "value": "direct_support",
242
+ "confidence": 0.95,
243
+ "entities": {}
244
+ }
245
+
246
+ if wants_courses_info(t):
247
+ return {
248
+ "kind": "topic_switch",
249
+ "value": "courses_info",
250
+ "confidence": 0.90,
251
+ "entities": {}
252
+ }
253
+
254
+ if is_children(t):
255
+ return {
256
+ "kind": "topic_switch",
257
+ "value": "children_courses",
258
+ "confidence": 0.88,
259
+ "entities": {"audience": "children"}
260
+ }
261
+
262
+ if is_adults(t):
263
+ return {
264
+ "kind": "topic_switch",
265
+ "value": "adults_courses",
266
+ "confidence": 0.88,
267
+ "entities": {"audience": "adults"}
268
+ }
269
+
270
+ if is_new_student(t):
271
+ return {
272
+ "kind": "topic_switch",
273
+ "value": "new_student",
274
+ "confidence": 0.90,
275
+ "entities": {"customer_type": "new"}
276
+ }
277
+
278
+ if is_current_student(t):
279
+ return {
280
+ "kind": "topic_switch",
281
+ "value": "current_student",
282
+ "confidence": 0.90,
283
+ "entities": {"customer_type": "current"}
284
+ }
285
+
286
+ # ===== State-specific understanding =====
287
+ if state == "WAITING_USER_TYPE":
288
+ if is_new_student(t):
289
+ return {
290
+ "kind": "direct_answer",
291
+ "value": "new_student",
292
+ "confidence": 0.95,
293
+ "entities": {"customer_type": "new"}
294
+ }
295
+ if is_current_student(t):
296
+ return {
297
+ "kind": "direct_answer",
298
+ "value": "current_student",
299
+ "confidence": 0.95,
300
+ "entities": {"customer_type": "current"}
301
+ }
302
+
303
+ if state == "WAITING_AUDIENCE":
304
+ if is_adults(t):
305
+ return {
306
+ "kind": "direct_answer",
307
+ "value": "adults",
308
+ "confidence": 0.95,
309
+ "entities": {"audience": "adults"}
310
+ }
311
+ if is_children(t):
312
+ return {
313
+ "kind": "direct_answer",
314
+ "value": "children",
315
+ "confidence": 0.95,
316
+ "entities": {"audience": "children"}
317
+ }
318
+
319
+ if state == "WAITING_PRIOR_STUDY":
320
+ if is_yes(t):
321
+ return {
322
+ "kind": "direct_answer",
323
+ "value": "prior_study_yes",
324
+ "confidence": 0.96,
325
+ "entities": {"prior_study": True}
326
+ }
327
+ if is_no(t):
328
+ return {
329
+ "kind": "direct_answer",
330
+ "value": "prior_study_no",
331
+ "confidence": 0.96,
332
+ "entities": {"prior_study": False}
333
+ }
334
+
335
+ if state in [
336
+ "WAITING_BEGINNER_SCHEDULE_CHOICE",
337
+ "WAITING_PDF_102_CONFIRMATION",
338
+ "WAITING_PLACEMENT_TEST_CONFIRMATION",
339
+ ]:
340
+ if asks_about_prior_study_case(t):
341
+ return {
342
+ "kind": "state_switch",
343
+ "value": "switch_to_prior_study_true",
344
+ "confidence": 0.92,
345
+ "entities": {"prior_study": True}
346
+ }
347
+
348
+ if asks_about_beginner_case(t):
349
+ return {
350
+ "kind": "state_switch",
351
+ "value": "switch_to_prior_study_false",
352
+ "confidence": 0.92,
353
+ "entities": {"prior_study": False}
354
+ }
355
+
356
+ if state == "WAITING_BEGINNER_SCHEDULE_CHOICE":
357
+ if contains_any(t, ["تم", "اخترت", "اختارت", "جاهز", "جاهزة"]):
358
+ return {
359
+ "kind": "direct_answer",
360
+ "value": "confirm_schedule_reviewed",
361
+ "confidence": 0.92,
362
+ "entities": {}
363
+ }
364
+
365
+ if contains_any(t, ["عايز احجز", "عايزة احجز", "احجز", "حجز", "اشترك", "اشتراك"]):
366
+ return {
367
+ "kind": "direct_answer",
368
+ "value": "proceed_booking",
369
+ "confidence": 0.90,
370
+ "entities": {}
371
+ }
372
+
373
+ if is_support_request(t):
374
+ return {
375
+ "kind": "state_switch",
376
+ "value": "support_needed",
377
+ "confidence": 0.88,
378
+ "entities": {}
379
+ }
380
+
381
+ if state == "WAITING_PDF_102_CONFIRMATION":
382
+ if contains_any(t, ["تم", "خلصت", "قريت", "اطلعت", "جاهز", "جاهزة"]):
383
+ return {
384
+ "kind": "direct_answer",
385
+ "value": "confirm_pdf_reviewed",
386
+ "confidence": 0.92,
387
+ "entities": {}
388
+ }
389
+
390
+ if is_support_request(t):
391
+ return {
392
+ "kind": "state_switch",
393
+ "value": "support_needed",
394
+ "confidence": 0.88,
395
+ "entities": {}
396
+ }
397
+
398
+ if state == "WAITING_PLACEMENT_TEST_CONFIRMATION":
399
+ if contains_any(t, ["تم", "اخترت", "اختارت", "جاهز", "جاهزة"]):
400
+ return {
401
+ "kind": "direct_answer",
402
+ "value": "confirm_placement_test_reviewed",
403
+ "confidence": 0.92,
404
+ "entities": {}
405
+ }
406
+
407
+ if is_support_request(t):
408
+ return {
409
+ "kind": "state_switch",
410
+ "value": "support_needed",
411
+ "confidence": 0.88,
412
+ "entities": {}
413
+ }
414
+
415
+ if state == "WAITING_CURRENT_STUDENT_ACTION":
416
+ if is_support_request(t):
417
+ return {
418
+ "kind": "direct_answer",
419
+ "value": "current_student_support",
420
+ "confidence": 0.92,
421
+ "entities": {}
422
+ }
423
+
424
+ if is_next_level_booking(t):
425
+ return {
426
+ "kind": "direct_answer",
427
+ "value": "current_student_next_level",
428
+ "confidence": 0.92,
429
+ "entities": {}
430
+ }
431
+
432
+ if state == "WAITING_SUPPORT_QUESTION":
433
+ if t:
434
+ return {
435
+ "kind": "direct_answer",
436
+ "value": "support_question_text",
437
+ "confidence": 0.85,
438
+ "entities": {"support_question": text}
439
+ }
440
+
441
+ if state == "WAITING_LEVEL_SELECTION":
442
+ level = detect_level(t)
443
+ if level:
444
+ return {
445
+ "kind": "direct_answer",
446
+ "value": "level_selected",
447
+ "confidence": 0.95,
448
+ "entities": {"selected_level": level}
449
+ }
450
+
451
+ if is_support_request(t) or contains_any(t, ["مش عارف", "مش متأكد", "مش متاكدة"]):
452
+ return {
453
+ "kind": "state_switch",
454
+ "value": "support_needed",
455
+ "confidence": 0.85,
456
+ "entities": {}
457
+ }
458
+
459
+ if state == "WAITING_PAYMENT_METHOD":
460
+ payment_method = detect_payment_method(t)
461
+ if payment_method:
462
+ return {
463
+ "kind": "direct_answer",
464
+ "value": "payment_method_selected",
465
+ "confidence": 0.95,
466
+ "entities": {"payment_method": payment_method}
467
+ }
468
+
469
+ if is_support_request(t):
470
+ return {
471
+ "kind": "state_switch",
472
+ "value": "support_needed",
473
+ "confidence": 0.85,
474
+ "entities": {}
475
+ }
476
+
477
+ if state == "WAITING_COMPLAINT_FORM":
478
+ if contains_any(t, ["تم", "خلصت", "سجلت", "قدمت", "بعت"]):
479
+ return {
480
+ "kind": "direct_answer",
481
+ "value": "complaint_form_submitted",
482
+ "confidence": 0.90,
483
+ "entities": {}
484
+ }
485
+
486
+ if state == "HANDOFF_DONE":
487
+ if contains_any(t, ["شكرا", "متشكر", "تسلم", "ميرسي"]):
488
+ return {
489
+ "kind": "direct_answer",
490
+ "value": "thanks",
491
+ "confidence": 0.95,
492
+ "entities": {}
493
+ }
494
+
495
+ if is_support_request(t):
496
+ return {
497
+ "kind": "topic_switch",
498
+ "value": "direct_support",
499
+ "confidence": 0.90,
500
+ "entities": {}
501
+ }
502
+
503
+ return {
504
+ "kind": "unclear",
505
+ "value": None,
506
+ "confidence": 0.30,
507
+ "entities": {}
508
+ }