Gankit12 commited on
Commit
89dfc42
·
1 Parent(s): 746d0a2
Files changed (2) hide show
  1. app/agent/honeypot.py +100 -111
  2. app/models/extractor.py +33 -79
app/agent/honeypot.py CHANGED
@@ -396,34 +396,6 @@ class HoneypotAgent:
396
  )
397
  return natural_response
398
 
399
- def _pick_unique_response(self, alternatives: List[str], messages: List[Dict] = None) -> str:
400
- """
401
- Pick a response that hasn't been used recently in the conversation.
402
-
403
- Args:
404
- alternatives: List of possible responses
405
- messages: Conversation history to check for recently used responses
406
-
407
- Returns:
408
- A response that wasn't recently used (or random if all were used)
409
- """
410
- import random
411
-
412
- # Get recent agent messages (last 5) to avoid repetition
413
- recent_agent_msgs = set()
414
- if messages:
415
- agent_msgs = [m.get("message", "").lower().strip() for m in messages if m.get("sender") == "agent"]
416
- recent_agent_msgs = set(agent_msgs[-5:]) # Last 5 agent messages
417
-
418
- # Filter out alternatives that were recently used
419
- available = [alt for alt in alternatives if alt.lower().strip() not in recent_agent_msgs]
420
-
421
- # If all were used, reset and pick any
422
- if not available:
423
- available = alternatives
424
-
425
- return random.choice(available)
426
-
427
  def _filter_bot_response(
428
  self,
429
  response: str,
@@ -534,28 +506,35 @@ class HoneypotAgent:
534
 
535
  # PRIORITY 0: Handle OTP requests with VARIED responses
536
  if scammer_asking_otp:
537
- # Combine all OTP alternatives and pick one that hasn't been used
538
- all_otp_alternatives = [
539
- # Confused responses
540
- "OTP? I didn't receive any message on my phone. Can you send it again?",
541
- "I don't see any OTP on my phone. Where does it come from?",
542
- "What OTP? I checked my messages but nothing came. Please resend!",
543
- "No OTP on my phone... Maybe give me your UPI and I'll send money directly?",
544
- "OTP not received yet. Let me check again... No, nothing here!",
545
- # Offer alternatives
546
- "Still no OTP! Maybe network issue? Give me your UPI, I'll send directly!",
547
- "I keep checking but no OTP! Can you call me? What's your number?",
548
- "OTP still not coming! Let me do bank transfer instead - give me account number!",
549
- "My phone is not getting OTP! Can I send money to your UPI instead?",
550
- "Nothing received! Maybe give me your phone number and I'll call you?",
551
- # Frustrated responses
552
- "Sir I've checked 10 times, no OTP! My phone might have problem. Just give me UPI!",
553
- "I don't know why OTP is not coming! Please just tell me where to send money!",
554
- "OTP problem is frustrating me also! Let me call you - what's your number?",
555
- "Forget OTP! Give me bank account, I'll do NEFT transfer directly!",
556
- "This OTP is not working! Tell me another way to send the money!",
557
- ]
558
- return self._pick_unique_response(all_otp_alternatives, messages)
 
 
 
 
 
 
 
559
 
560
  # PRIORITY 1: Respond to scammer's question/confusion FIRST
561
  if scammer_asking_for_number:
@@ -567,7 +546,7 @@ class HoneypotAgent:
567
  "I want to save YOUR number! What is your phone number?",
568
  "Your contact number! Tell me, I'll save it and then send money!",
569
  ]
570
- return self._pick_unique_response(alternatives, messages)
571
 
572
  if scammer_confused:
573
  # Scammer is confused - clarify what we want
@@ -595,7 +574,7 @@ class HoneypotAgent:
595
  "Sorry, I don't understand technology well. Guide me step by step!",
596
  "Can you explain again? I really want to do this correctly!",
597
  ]
598
- return self._pick_unique_response(alternatives, messages)
599
 
600
  if scammer_said_already_told:
601
  # Scammer frustrated that they already gave info - acknowledge and proceed
@@ -642,7 +621,7 @@ class HoneypotAgent:
642
  "Apologies! I noted it wrong. Let me try again!",
643
  "Yes, I remember now! Processing the payment...",
644
  ]
645
- return self._pick_unique_response(alternatives, messages)
646
 
647
  # PRIORITY 2: Respond based on what scammer just provided
648
  if gave_upi_now:
@@ -671,7 +650,7 @@ class HoneypotAgent:
671
  "Noted! Last thing - confirm the beneficiary name?",
672
  "OK! What name should I put for the transfer?",
673
  ]
674
- return self._pick_unique_response(alternatives, messages)
675
 
676
  if gave_number_now:
677
  # They just gave phone number - acknowledge and ask for what we don't have
@@ -699,7 +678,7 @@ class HoneypotAgent:
699
  "Perfect! Confirm full name as per bank records?",
700
  "Got it! What name will show on my transaction?",
701
  ]
702
- return self._pick_unique_response(alternatives, messages)
703
 
704
  # PRIORITY 3: Generate response based on what we still need
705
  # CORRECT ORDER: UPI -> Phone -> Bank Account -> IFSC -> Name (once only)
@@ -756,7 +735,7 @@ class HoneypotAgent:
756
  "OK! Making the payment with these details.",
757
  ]
758
 
759
- return self._pick_unique_response(alternatives, messages)
760
 
761
  def _generate_fallback_response(
762
  self,
@@ -836,149 +815,159 @@ class HoneypotAgent:
836
 
837
  # PRIORITY 0: Handle OTP requests with varied responses
838
  if scammer_asking_otp:
839
- # Combine all OTP alternatives and pick unique one
840
- all_otp_alternatives = [
841
- "OTP? I didn't receive any message. Can you send it again?",
842
- "I don't see any OTP on my phone. Where does it come from?",
843
- "What OTP? Nothing came to my phone. Please resend!",
844
- "Still no OTP! Give me your UPI, I'll send money directly!",
845
- "OTP not coming! Can you call me? What's your number?",
846
- "OTP still not received! Let me do bank transfer - give me account!",
847
- "Sir, no OTP even now! Just give me UPI or bank account!",
848
- "Forget OTP! Tell me where to send money - UPI or bank!",
849
- "OTP problem! Let me call you - what's your number?",
850
- "My phone doesn't show any OTP. Maybe network problem?",
851
- "I checked 5 times, no OTP! Can I just send to your UPI?",
852
- "OTP not received! Give me your bank account, I'll do NEFT.",
853
- ]
854
- return self._pick_unique_response(all_otp_alternatives, all_messages)
 
 
855
 
856
  # PRIORITY 1: Handle scammer's confusion or frustration
857
  if scammer_confused:
858
  if not has_upi:
859
- alternatives = [
860
  "I want to send you money! Just tell me your UPI ID!",
861
  "Sir, where should I send the payment? Give me UPI ID!",
862
  "I'm ready to pay! Just tell me where - what's your UPI?",
863
- ]
864
  elif not has_phone:
865
- alternatives = [
866
  "I want your phone number to call if there's problem!",
867
  "Give me your number so I can confirm the payment!",
868
  "What's your phone number? I'll call you after sending!",
869
- ]
870
  else:
871
- alternatives = [
872
  "I'm trying to help you! What should I do next?",
873
  "Tell me clearly - what exactly do you need from me?",
874
  "Can you explain again? I really want to do this correctly!",
875
- ]
876
- return self._pick_unique_response(alternatives, all_messages)
877
 
878
  if scammer_said_already:
879
  # When scammer says "I already sent", check what we ACTUALLY have
880
  if has_bank and has_ifsc:
881
  # We have everything needed for bank transfer
882
- alternatives = [
883
  "Yes yes, sorry! I see all the details now. Sending the payment!",
884
  "Okay okay, I found everything! Processing the transfer now!",
885
  "Apologies! I have account and IFSC. Making the payment now!",
886
  "Yes, I see it now! Account number and IFSC noted. Transferring...",
887
- ]
888
  elif has_bank and not has_ifsc:
889
  # We have bank but need IFSC
890
- alternatives = [
891
  "Yes, I found the account! Just need IFSC code to complete the transfer.",
892
  "Sorry, I see the account now! What's the IFSC code?",
893
  "Got the account! My bank needs IFSC. Please share!",
894
- ]
895
  elif has_upi and has_phone and not has_bank:
896
  # We have UPI and phone, need bank
897
- alternatives = [
898
  "Yes, I have UPI and phone! But UPI is failing. Give me bank account?",
899
  "Sorry, I found UPI! But it's showing error. What's your account number?",
900
  "Got it! UPI not working. Can I do bank transfer? Account number?",
901
- ]
902
  elif has_upi and not has_phone:
903
- alternatives = [
904
  "Sorry sorry! Yes, I see the UPI now! What's your phone number in case it fails?",
905
  "Oh yes, I found it! Sending now. Give me your phone number also please!",
906
  "Okay got it! I'm transferring now. What's your number?",
907
- ]
908
  elif has_upi and has_phone:
909
- alternatives = [
910
  "Yes yes, sorry! I'm old and forgetful. I'm doing it now!",
911
  "Okay okay, I found it! Let me try again. Please wait!",
912
  "Apologies! I noted it wrong. Let me try again!",
913
- ]
914
  else:
915
- alternatives = [
916
  "Sorry, my memory is bad! Please send the details one more time?",
917
  "Arey, I couldn't find it! Can you please repeat?",
918
  "I missed it, please tell me one more time!",
919
- ]
920
- return self._pick_unique_response(alternatives, all_messages)
921
 
922
  # PRIORITY 2: Acknowledge what scammer just gave
923
  if gave_upi_now and not has_phone:
924
- alternatives = [
925
  "Okay, got the UPI! Let me try. What's your phone number in case it fails?",
926
  "Noted! Sending now. Also give me your number to call if there's problem.",
927
  "Got it! What's your phone number? My son wants to verify first.",
928
- ]
929
- return self._pick_unique_response(alternatives, all_messages)
930
 
931
  if gave_number_now and not has_upi:
932
- alternatives = [
933
  "Saved your number! Now tell me where to send - what's your UPI ID?",
934
  "Got your number! Now what's the UPI ID for the transfer?",
935
  "Number noted! Tell me your UPI ID so I can send the payment.",
936
- ]
937
- return self._pick_unique_response(alternatives, all_messages)
938
 
939
  # PRIORITY 3: Ask for what we still need
940
  # CORRECT ORDER: UPI -> Phone -> Bank Account -> IFSC -> Name (once only)
941
  if not has_upi:
942
- alternatives = [
943
  "Okay, I understand! Where should I send the money? What's your UPI ID?",
944
  "Yes, I'm ready! Tell me your UPI ID and I'll transfer!",
945
  "I want to pay! Just give me your UPI ID!",
946
- ]
947
  elif not has_phone:
948
- alternatives = [
949
  "I've noted the UPI! What's your phone number in case there's any issue?",
950
  "Got it! Also give me your number - I'll call to confirm.",
951
  "Okay! Sending now. What's your phone number for confirmation?",
952
- ]
953
  elif not has_bank:
954
  # Ask for bank account BEFORE IFSC
955
- alternatives = [
956
  "UPI is not going through! What's your bank account number?",
957
  "Getting error on UPI! Can you give your bank account number?",
958
  "Payment stuck! Tell me your bank account number.",
959
- ]
960
  elif not has_ifsc:
961
  # Only ask for IFSC AFTER we have bank account
962
- alternatives = [
963
  "Got the account number! What's the IFSC code?",
964
  "Need IFSC code to complete the transfer. What is it?",
965
  "Account noted! What's the IFSC code?",
966
- ]
967
  elif not already_asked_name:
968
  # Only ask for name ONCE
969
- alternatives = [
970
  "Got all details! What name should appear on the transfer?",
971
  "Almost done! What's the account holder name?",
972
  "Just need to confirm - what's the beneficiary name?",
973
- ]
974
  else:
975
  # We have everything - confirm and proceed
976
- alternatives = [
977
  "Perfect! I have all the details. Processing payment now.",
978
  "Got everything! Let me send the money.",
979
  "All noted! Making the transfer now.",
980
- ]
981
- return self._pick_unique_response(alternatives, all_messages)
 
 
 
 
 
 
 
 
 
 
 
 
982
 
983
  def _extract_intelligence(self, state: HoneypotState) -> Dict[str, Any]:
984
  """
 
396
  )
397
  return natural_response
398
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
  def _filter_bot_response(
400
  self,
401
  response: str,
 
506
 
507
  # PRIORITY 0: Handle OTP requests with VARIED responses
508
  if scammer_asking_otp:
509
+ # Vary response based on how many times they've asked for OTP
510
+ if otp_ask_count <= 1:
511
+ # First time - be confused about OTP
512
+ alternatives = [
513
+ "OTP? I didn't receive any message on my phone. Can you send it again?",
514
+ "I don't see any OTP on my phone. Where does it come from?",
515
+ "What OTP? I checked my messages but nothing came. Please resend!",
516
+ "No OTP on my phone... Maybe give me your UPI and I'll send money directly?",
517
+ "OTP not received yet. Let me check again... No, nothing here!",
518
+ ]
519
+ elif otp_ask_count <= 3:
520
+ # They've asked multiple times - offer alternatives
521
+ alternatives = [
522
+ "Still no OTP! Maybe network issue? Give me your UPI, I'll send directly!",
523
+ "I keep checking but no OTP! Can you call me? What's your number?",
524
+ "OTP still not coming! Let me do bank transfer instead - give me account number!",
525
+ "My phone is not getting OTP! Can I send money to your UPI instead?",
526
+ "Nothing received! Maybe give me your phone number and I'll call you?",
527
+ ]
528
+ else:
529
+ # They've asked many times - get frustrated but still offer to pay
530
+ alternatives = [
531
+ "Sir I've checked 10 times, no OTP! My phone might have problem. Just give me UPI!",
532
+ "I don't know why OTP is not coming! Please just tell me where to send money!",
533
+ "OTP problem is frustrating me also! Let me call you - what's your number?",
534
+ "Forget OTP! Give me bank account, I'll do NEFT transfer directly!",
535
+ "This OTP is not working! Tell me another way to send the money!",
536
+ ]
537
+ return random.choice(alternatives)
538
 
539
  # PRIORITY 1: Respond to scammer's question/confusion FIRST
540
  if scammer_asking_for_number:
 
546
  "I want to save YOUR number! What is your phone number?",
547
  "Your contact number! Tell me, I'll save it and then send money!",
548
  ]
549
+ return random.choice(alternatives)
550
 
551
  if scammer_confused:
552
  # Scammer is confused - clarify what we want
 
574
  "Sorry, I don't understand technology well. Guide me step by step!",
575
  "Can you explain again? I really want to do this correctly!",
576
  ]
577
+ return random.choice(alternatives)
578
 
579
  if scammer_said_already_told:
580
  # Scammer frustrated that they already gave info - acknowledge and proceed
 
621
  "Apologies! I noted it wrong. Let me try again!",
622
  "Yes, I remember now! Processing the payment...",
623
  ]
624
+ return random.choice(alternatives)
625
 
626
  # PRIORITY 2: Respond based on what scammer just provided
627
  if gave_upi_now:
 
650
  "Noted! Last thing - confirm the beneficiary name?",
651
  "OK! What name should I put for the transfer?",
652
  ]
653
+ return random.choice(alternatives)
654
 
655
  if gave_number_now:
656
  # They just gave phone number - acknowledge and ask for what we don't have
 
678
  "Perfect! Confirm full name as per bank records?",
679
  "Got it! What name will show on my transaction?",
680
  ]
681
+ return random.choice(alternatives)
682
 
683
  # PRIORITY 3: Generate response based on what we still need
684
  # CORRECT ORDER: UPI -> Phone -> Bank Account -> IFSC -> Name (once only)
 
735
  "OK! Making the payment with these details.",
736
  ]
737
 
738
+ return random.choice(alternatives)
739
 
740
  def _generate_fallback_response(
741
  self,
 
815
 
816
  # PRIORITY 0: Handle OTP requests with varied responses
817
  if scammer_asking_otp:
818
+ if otp_ask_count <= 1:
819
+ return random.choice([
820
+ "OTP? I didn't receive any message. Can you send it again?",
821
+ "I don't see any OTP on my phone. Where does it come from?",
822
+ "What OTP? Nothing came to my phone. Please resend!",
823
+ ])
824
+ elif otp_ask_count <= 3:
825
+ return random.choice([
826
+ "Still no OTP! Give me your UPI, I'll send money directly!",
827
+ "OTP not coming! Can you call me? What's your number?",
828
+ "OTP still not received! Let me do bank transfer - give me account!",
829
+ ])
830
+ else:
831
+ return random.choice([
832
+ "Sir, no OTP even now! Just give me UPI or bank account!",
833
+ "Forget OTP! Tell me where to send money - UPI or bank!",
834
+ "OTP problem! Let me call you - what's your number?",
835
+ ])
836
 
837
  # PRIORITY 1: Handle scammer's confusion or frustration
838
  if scammer_confused:
839
  if not has_upi:
840
+ return random.choice([
841
  "I want to send you money! Just tell me your UPI ID!",
842
  "Sir, where should I send the payment? Give me UPI ID!",
843
  "I'm ready to pay! Just tell me where - what's your UPI?",
844
+ ])
845
  elif not has_phone:
846
+ return random.choice([
847
  "I want your phone number to call if there's problem!",
848
  "Give me your number so I can confirm the payment!",
849
  "What's your phone number? I'll call you after sending!",
850
+ ])
851
  else:
852
+ return random.choice([
853
  "I'm trying to help you! What should I do next?",
854
  "Tell me clearly - what exactly do you need from me?",
855
  "Can you explain again? I really want to do this correctly!",
856
+ ])
 
857
 
858
  if scammer_said_already:
859
  # When scammer says "I already sent", check what we ACTUALLY have
860
  if has_bank and has_ifsc:
861
  # We have everything needed for bank transfer
862
+ return random.choice([
863
  "Yes yes, sorry! I see all the details now. Sending the payment!",
864
  "Okay okay, I found everything! Processing the transfer now!",
865
  "Apologies! I have account and IFSC. Making the payment now!",
866
  "Yes, I see it now! Account number and IFSC noted. Transferring...",
867
+ ])
868
  elif has_bank and not has_ifsc:
869
  # We have bank but need IFSC
870
+ return random.choice([
871
  "Yes, I found the account! Just need IFSC code to complete the transfer.",
872
  "Sorry, I see the account now! What's the IFSC code?",
873
  "Got the account! My bank needs IFSC. Please share!",
874
+ ])
875
  elif has_upi and has_phone and not has_bank:
876
  # We have UPI and phone, need bank
877
+ return random.choice([
878
  "Yes, I have UPI and phone! But UPI is failing. Give me bank account?",
879
  "Sorry, I found UPI! But it's showing error. What's your account number?",
880
  "Got it! UPI not working. Can I do bank transfer? Account number?",
881
+ ])
882
  elif has_upi and not has_phone:
883
+ return random.choice([
884
  "Sorry sorry! Yes, I see the UPI now! What's your phone number in case it fails?",
885
  "Oh yes, I found it! Sending now. Give me your phone number also please!",
886
  "Okay got it! I'm transferring now. What's your number?",
887
+ ])
888
  elif has_upi and has_phone:
889
+ return random.choice([
890
  "Yes yes, sorry! I'm old and forgetful. I'm doing it now!",
891
  "Okay okay, I found it! Let me try again. Please wait!",
892
  "Apologies! I noted it wrong. Let me try again!",
893
+ ])
894
  else:
895
+ return random.choice([
896
  "Sorry, my memory is bad! Please send the details one more time?",
897
  "Arey, I couldn't find it! Can you please repeat?",
898
  "I missed it, please tell me one more time!",
899
+ ])
 
900
 
901
  # PRIORITY 2: Acknowledge what scammer just gave
902
  if gave_upi_now and not has_phone:
903
+ return random.choice([
904
  "Okay, got the UPI! Let me try. What's your phone number in case it fails?",
905
  "Noted! Sending now. Also give me your number to call if there's problem.",
906
  "Got it! What's your phone number? My son wants to verify first.",
907
+ ])
 
908
 
909
  if gave_number_now and not has_upi:
910
+ return random.choice([
911
  "Saved your number! Now tell me where to send - what's your UPI ID?",
912
  "Got your number! Now what's the UPI ID for the transfer?",
913
  "Number noted! Tell me your UPI ID so I can send the payment.",
914
+ ])
 
915
 
916
  # PRIORITY 3: Ask for what we still need
917
  # CORRECT ORDER: UPI -> Phone -> Bank Account -> IFSC -> Name (once only)
918
  if not has_upi:
919
+ return random.choice([
920
  "Okay, I understand! Where should I send the money? What's your UPI ID?",
921
  "Yes, I'm ready! Tell me your UPI ID and I'll transfer!",
922
  "I want to pay! Just give me your UPI ID!",
923
+ ])
924
  elif not has_phone:
925
+ return random.choice([
926
  "I've noted the UPI! What's your phone number in case there's any issue?",
927
  "Got it! Also give me your number - I'll call to confirm.",
928
  "Okay! Sending now. What's your phone number for confirmation?",
929
+ ])
930
  elif not has_bank:
931
  # Ask for bank account BEFORE IFSC
932
+ return random.choice([
933
  "UPI is not going through! What's your bank account number?",
934
  "Getting error on UPI! Can you give your bank account number?",
935
  "Payment stuck! Tell me your bank account number.",
936
+ ])
937
  elif not has_ifsc:
938
  # Only ask for IFSC AFTER we have bank account
939
+ return random.choice([
940
  "Got the account number! What's the IFSC code?",
941
  "Need IFSC code to complete the transfer. What is it?",
942
  "Account noted! What's the IFSC code?",
943
+ ])
944
  elif not already_asked_name:
945
  # Only ask for name ONCE
946
+ return random.choice([
947
  "Got all details! What name should appear on the transfer?",
948
  "Almost done! What's the account holder name?",
949
  "Just need to confirm - what's the beneficiary name?",
950
+ ])
951
  else:
952
  # We have everything - confirm and proceed
953
+ return random.choice([
954
  "Perfect! I have all the details. Processing payment now.",
955
  "Got everything! Let me send the money.",
956
  "All noted! Making the transfer now.",
957
+ ])
958
+
959
+ # Build context from all messages for scam type detection
960
+ context = last_message
961
+ if all_messages:
962
+ context = " ".join(m.get("message", "") for m in all_messages)
963
+
964
+ # Get context-aware response based on scam type
965
+ response = get_context_aware_response(context, turn_count, language)
966
+ if response:
967
+ return response
968
+
969
+ # Fall back to persona sample
970
+ return get_sample_response(persona, language)
971
 
972
  def _extract_intelligence(self, state: HoneypotState) -> Dict[str, Any]:
973
  """
app/models/extractor.py CHANGED
@@ -88,12 +88,8 @@ class IntelligenceExtractor:
88
  # Bank accounts: 9-18 digits (not starting with 0 typically)
89
  "bank_accounts": r"\b[1-9]\d{8,17}\b",
90
 
91
- # IFSC codes: 4-5 letters + (0 or O) + 6 alphanumeric
92
- # Handle common typos:
93
- # - O instead of 0 in 5th position
94
- # - Extra letter (e.g., SBIIN instead of SBIN)
95
- # We're lenient here because we want to CAPTURE scammer data
96
- "ifsc_codes": r"\b[A-Z]{4,5}[0O][A-Z0-9]{6}\b",
97
 
98
  # Phone numbers: Indian mobile format with optional +91
99
  "phone_numbers": r"(?:\+91[\s\-]?)?(?:0)?[6-9]\d{9}\b",
@@ -165,22 +161,6 @@ class IntelligenceExtractor:
165
  intel["phone_numbers"] = self._normalize_phone_numbers(intel["phone_numbers"])
166
  intel["phishing_links"] = self._validate_phishing_links(intel["phishing_links"])
167
 
168
- # Remove any bank accounts that are actually phone numbers
169
- # Phone numbers are normalized to +91XXXXXXXXXX format
170
- phone_digits = set()
171
- for phone in intel["phone_numbers"]:
172
- # Extract the 10-digit number from +91XXXXXXXXXX
173
- digits = phone.replace("+91", "")
174
- phone_digits.add(digits)
175
- # Also add with 91 prefix in case it was captured that way
176
- phone_digits.add("91" + digits)
177
-
178
- # Filter out bank accounts that match phone numbers
179
- intel["bank_accounts"] = [
180
- acc for acc in intel["bank_accounts"]
181
- if acc not in phone_digits and acc[-10:] not in phone_digits
182
- ]
183
-
184
  # Use spaCy NER for additional entities if available
185
  if self.nlp is not None:
186
  self._extract_with_spacy(text, intel)
@@ -266,13 +246,6 @@ class IntelligenceExtractor:
266
  """
267
  Validate bank account numbers for precision >85% (AC-3.1.2).
268
 
269
- Bank account numbers in India:
270
- - SBI: 11 digits
271
- - HDFC: 13-14 digits
272
- - ICICI: 12 digits
273
- - Axis: 15 digits
274
- - Other banks: 9-18 digits
275
-
276
  Args:
277
  accounts: List of potential account numbers
278
 
@@ -286,13 +259,13 @@ class IntelligenceExtractor:
286
  if len(account) < 9 or len(account) > 18:
287
  continue
288
 
289
- # Exclude exactly 10-digit numbers starting with 6,7,8,9 (Indian phone numbers)
290
- if len(account) == 10 and account[0] in "6789":
291
  continue
292
 
293
- # Exclude if it looks like +91 phone (starts with 91 + 6-9 and is 12 digits)
294
- if len(account) == 12 and account[:2] == "91" and account[2] in "6789":
295
- continue
296
 
297
  # Check for repeated digits (unlikely to be valid account)
298
  if len(set(account)) == 1:
@@ -307,68 +280,55 @@ class IntelligenceExtractor:
307
  return list(set(validated))
308
 
309
  def _is_sequential(self, number: str) -> bool:
310
- """
311
- Check if number is an OBVIOUS sequential pattern.
 
312
 
313
- IMPORTANT: For a honeypot, we WANT to capture ALL data scammers provide,
314
- even if it looks like a test/sequential pattern. Scammers might use
315
- obvious patterns, and we should still track them.
 
 
 
 
 
 
316
 
317
- Therefore, this function now returns False for all inputs.
318
- We only reject all-same-digit patterns (handled in _validate_bank_accounts).
319
- """
320
- # DO NOT reject sequential patterns - capture all scammer data
321
- # The honeypot's purpose is intelligence gathering, not validation
322
  return False
323
 
324
  def _validate_ifsc_codes(self, ifsc_codes: List[str]) -> List[str]:
325
  """
326
- Validate IFSC codes - lenient to capture scammer data even with typos.
327
-
328
- Standard IFSC format: 4 letters + 0 + 6 alphanumeric (11 chars)
329
- But we accept typos like:
330
- - SBIIN0010789 (12 chars with extra letter)
331
- - SBIN0O10789 (O instead of 0)
332
 
333
- For a honeypot, we want to CAPTURE the data scammers provide.
334
 
335
  Args:
336
  ifsc_codes: List of potential IFSC codes
337
 
338
  Returns:
339
- List of validated/normalized IFSC codes
340
  """
341
  validated = []
342
 
343
  for ifsc in ifsc_codes:
344
  ifsc_upper = ifsc.upper()
345
 
346
- # Accept 11-12 characters (allow for typos like SBIIN)
347
- if len(ifsc_upper) < 11 or len(ifsc_upper) > 12:
348
  continue
349
 
350
- # First 4 must be letters (bank code) - for 12 char, first 5 are letters
351
- letter_prefix_len = 4 if len(ifsc_upper) == 11 else 5
352
- if not ifsc_upper[:letter_prefix_len].isalpha():
353
  continue
354
 
355
- # Replace all O (letter) with 0 (zero) in numeric parts
356
- fixed_suffix = ""
357
- for char in ifsc_upper[letter_prefix_len:]:
358
- if char == "O":
359
- fixed_suffix += "0"
360
- else:
361
- fixed_suffix += char
362
-
363
- # Check that after the letter prefix, first char is 0
364
- if fixed_suffix and fixed_suffix[0] != "0":
365
  continue
366
 
367
- # Rest must be alphanumeric
368
- if not fixed_suffix[1:].isalnum():
369
  continue
370
 
371
- # Store the original (we want to capture what scammer actually sent)
372
  validated.append(ifsc_upper)
373
 
374
  return list(set(validated))
@@ -419,9 +379,6 @@ class IntelligenceExtractor:
419
  """
420
  Validate and filter phishing links for precision >95% (AC-3.1.5).
421
 
422
- In a scam context, we want to capture ALL links that aren't from
423
- well-known legitimate domains, as they could be phishing attempts.
424
-
425
  Args:
426
  links: List of potential phishing links
427
 
@@ -448,16 +405,13 @@ class IntelligenceExtractor:
448
  else:
449
  domain_clean = domain
450
 
451
- # Skip only well-known legitimate domains
452
  if domain_clean in LEGITIMATE_DOMAINS or domain in LEGITIMATE_DOMAINS:
453
  continue
454
 
455
- # In a scam context, ANY unknown link is suspicious
456
- # Since this is a honeypot system, we want to capture all links
457
- # that scammers share - they're likely phishing attempts
458
- is_suspicious = True
459
 
460
- # Extra flags for definitely suspicious patterns
461
  for pattern in SUSPICIOUS_DOMAIN_PATTERNS:
462
  if re.search(pattern, link, re.IGNORECASE):
463
  is_suspicious = True
 
88
  # Bank accounts: 9-18 digits (not starting with 0 typically)
89
  "bank_accounts": r"\b[1-9]\d{8,17}\b",
90
 
91
+ # IFSC codes: 4 letters + 0 + 6 alphanumeric
92
+ "ifsc_codes": r"\b[A-Z]{4}0[A-Z0-9]{6}\b",
 
 
 
 
93
 
94
  # Phone numbers: Indian mobile format with optional +91
95
  "phone_numbers": r"(?:\+91[\s\-]?)?(?:0)?[6-9]\d{9}\b",
 
161
  intel["phone_numbers"] = self._normalize_phone_numbers(intel["phone_numbers"])
162
  intel["phishing_links"] = self._validate_phishing_links(intel["phishing_links"])
163
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  # Use spaCy NER for additional entities if available
165
  if self.nlp is not None:
166
  self._extract_with_spacy(text, intel)
 
246
  """
247
  Validate bank account numbers for precision >85% (AC-3.1.2).
248
 
 
 
 
 
 
 
 
249
  Args:
250
  accounts: List of potential account numbers
251
 
 
259
  if len(account) < 9 or len(account) > 18:
260
  continue
261
 
262
+ # Exclude exactly 10 digits (likely phone numbers)
263
+ if len(account) == 10:
264
  continue
265
 
266
+ # Exclude common patterns that aren't accounts
267
+ # OTPs are typically 4-6 digits (already excluded by length)
268
+ # PINs are 4-6 digits (already excluded)
269
 
270
  # Check for repeated digits (unlikely to be valid account)
271
  if len(set(account)) == 1:
 
280
  return list(set(validated))
281
 
282
  def _is_sequential(self, number: str) -> bool:
283
+ """Check if number is a sequential pattern."""
284
+ if len(number) < 9:
285
+ return False
286
 
287
+ # Check ascending
288
+ ascending = "".join(str(i % 10) for i in range(len(number)))
289
+ if number == ascending[:len(number)]:
290
+ return True
291
+
292
+ # Check descending
293
+ descending = "".join(str(9 - (i % 10)) for i in range(len(number)))
294
+ if number == descending[:len(number)]:
295
+ return True
296
 
 
 
 
 
 
297
  return False
298
 
299
  def _validate_ifsc_codes(self, ifsc_codes: List[str]) -> List[str]:
300
  """
301
+ Validate IFSC codes for precision >95% (AC-3.1.3).
 
 
 
 
 
302
 
303
+ IFSC format: 4 letters (bank code) + 0 + 6 alphanumeric (branch code)
304
 
305
  Args:
306
  ifsc_codes: List of potential IFSC codes
307
 
308
  Returns:
309
+ List of validated IFSC codes
310
  """
311
  validated = []
312
 
313
  for ifsc in ifsc_codes:
314
  ifsc_upper = ifsc.upper()
315
 
316
+ # Must be exactly 11 characters
317
+ if len(ifsc_upper) != 11:
318
  continue
319
 
320
+ # First 4 must be letters (bank code)
321
+ if not ifsc_upper[:4].isalpha():
 
322
  continue
323
 
324
+ # 5th character must be 0
325
+ if ifsc_upper[4] != "0":
 
 
 
 
 
 
 
 
326
  continue
327
 
328
+ # Last 6 must be alphanumeric (branch code)
329
+ if not ifsc_upper[5:].isalnum():
330
  continue
331
 
 
332
  validated.append(ifsc_upper)
333
 
334
  return list(set(validated))
 
379
  """
380
  Validate and filter phishing links for precision >95% (AC-3.1.5).
381
 
 
 
 
382
  Args:
383
  links: List of potential phishing links
384
 
 
405
  else:
406
  domain_clean = domain
407
 
408
+ # Skip legitimate domains
409
  if domain_clean in LEGITIMATE_DOMAINS or domain in LEGITIMATE_DOMAINS:
410
  continue
411
 
412
+ # Flag as suspicious if matches suspicious patterns
413
+ is_suspicious = False
 
 
414
 
 
415
  for pattern in SUSPICIOUS_DOMAIN_PATTERNS:
416
  if re.search(pattern, link, re.IGNORECASE):
417
  is_suspicious = True