S-Dreamer commited on
Commit
6325f92
·
verified ·
1 Parent(s): 1f4bd8c

Upload 3 files

Browse files
Files changed (3) hide show
  1. agent/__init__.py +4 -0
  2. agent/cli.py +84 -0
  3. agent/osint_agent.py +273 -0
agent/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ """OSINT Expert Agent powered by Claude API."""
2
+ from .osint_agent import OSINTAgent
3
+
4
+ __all__ = ["OSINTAgent"]
agent/cli.py CHANGED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Command-line interface for the OSINT Expert Agent."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+
7
+ from .osint_agent import OSINTAgent
8
+
9
+
10
+ def interactive_mode(agent: OSINTAgent) -> None:
11
+ print("OSINT Expert Agent — Interactive Mode")
12
+ print("Commands: 'exit'/'quit' to end, 'reset' to clear history.\n")
13
+ while True:
14
+ try:
15
+ user_input = input("You: ").strip()
16
+ except (EOFError, KeyboardInterrupt):
17
+ print("\nExiting.")
18
+ break
19
+ if not user_input:
20
+ continue
21
+ if user_input.lower() in ("exit", "quit"):
22
+ print("Session ended.")
23
+ break
24
+ if user_input.lower() == "reset":
25
+ agent.reset()
26
+ print("[Conversation history cleared]\n")
27
+ continue
28
+ print("Agent: ", end="", flush=True)
29
+ for chunk in agent.stream_chat(user_input):
30
+ print(chunk, end="", flush=True)
31
+ print("\n")
32
+
33
+
34
+ def main() -> None:
35
+ parser = argparse.ArgumentParser(
36
+ description="OSINT Expert Agent powered by Claude 3.5 Sonnet",
37
+ formatter_class=argparse.RawDescriptionHelpFormatter,
38
+ epilog="""
39
+ Examples:
40
+ python -m agent.cli # interactive mode
41
+ python -m agent.cli --target example.com # full analysis
42
+ python -m agent.cli --target example.com --type passive
43
+ python -m agent.cli --target 1.2.3.4 --type threat --context "seen in phishing"
44
+ python -m agent.cli --iocs 1.2.3.4 bad.com abc123hash
45
+ python -m agent.cli --explain "certificate transparency log mining"
46
+ """,
47
+ )
48
+ parser.add_argument("--target", "-t", help="Target to analyze (domain, IP, username, etc.)")
49
+ parser.add_argument(
50
+ "--type",
51
+ choices=["full", "passive", "threat", "footprint", "breach", "darkweb", "socmint"],
52
+ default="full",
53
+ help="Analysis type (default: full)",
54
+ )
55
+ parser.add_argument("--context", "-c", help="Additional context for the analysis")
56
+ parser.add_argument("--iocs", nargs="+", metavar="IOC", help="IOCs for enrichment report")
57
+ parser.add_argument("--explain", "-e", metavar="TECHNIQUE", help="Explain an OSINT technique")
58
+ parser.add_argument("--model", default="claude-3-5-sonnet-20241022", help="Claude model to use")
59
+ parser.add_argument("--no-stream", action="store_true", help="Disable streaming output")
60
+
61
+ args = parser.parse_args()
62
+ agent = OSINTAgent(model=args.model)
63
+
64
+ if args.iocs:
65
+ result = agent.generate_ioc_report(args.iocs)
66
+ print(result)
67
+ elif args.explain:
68
+ result = agent.explain_technique(args.explain)
69
+ print(result)
70
+ elif args.target:
71
+ prompt = OSINTAgent._build_analysis_prompt(args.target, args.type, args.context)
72
+ if args.no_stream:
73
+ result = agent.chat(prompt)
74
+ print(result)
75
+ else:
76
+ for chunk in agent.stream_chat(prompt):
77
+ print(chunk, end="", flush=True)
78
+ print()
79
+ else:
80
+ interactive_mode(agent)
81
+
82
+
83
+ if __name__ == "__main__":
84
+ main()
agent/osint_agent.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """OSINT Expert Agent using Claude 3.5 Sonnet with extended thinking and prompt caching."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from collections.abc import Generator
7
+ from typing import Optional
8
+
9
+ import anthropic
10
+
11
+ OSINT_SYSTEM_PROMPT = """You are a senior OSINT analyst and dark web intelligence specialist with \
12
+ over 15 years of experience in digital forensics, threat intelligence, and cyber investigations. \
13
+ You support defensive security operations, authorized penetration testing engagements, academic \
14
+ research, journalism, and law enforcement investigations. You never assist with illegal activity, \
15
+ unauthorized access, or any action that harms individuals or organizations without consent.
16
+
17
+ ## Core Competencies
18
+
19
+ ### 1. Passive Reconnaissance
20
+ - DNS enumeration: A/AAAA/MX/NS/TXT/SPF/DMARC/DKIM record analysis, zone transfer checks, \
21
+ subdomain discovery via brute-force wordlists, CT log mining (crt.sh, Censys, Facebook CT)
22
+ - WHOIS & RDAP analysis: registrar history, registrant pivots, privacy shield identification, \
23
+ domain age, creation/expiry patterns, bulk WHOIS for related domains
24
+ - Certificate Transparency: SSL/TLS certificate enumeration, SAN field expansion, wildcard \
25
+ certificate analysis, certificate issuance timeline analysis
26
+ - ASN & BGP intelligence: IP-to-ASN mapping, BGP route history, RPKI validation, IXP peering, \
27
+ prefix hijack detection (BGPMon, RIPE RIS)
28
+ - Shodan/Censys/FOFA: exposed services, default credentials, banner grabbing, industrial \
29
+ control systems (ICS/SCADA), VPN endpoints, remote access solutions
30
+ - Google dorks & advanced search operators: site:, filetype:, inurl:, intitle:, cache:, \
31
+ before:/after: operators for OSINT pivots
32
+
33
+ ### 2. Dark Web Intelligence
34
+ - .onion site analysis: Tor hidden service fingerprinting, server misconfigurations that \
35
+ expose clearnet IPs, uptime monitoring, content archiving
36
+ - Marketplace & forum monitoring: vendor profiling, product listings, feedback analysis, \
37
+ PGP key pivots, cryptocurrency address extraction
38
+ - Paste site monitoring: Pastebin, PrivateBin, Ghostbin — automated scraping for credential \
39
+ leaks, source code, PII, configuration files
40
+ - Cryptocurrency transaction tracing: Bitcoin/Monero address clustering, exchange \
41
+ identification, mixing service detection, on-chain analytics (Chainalysis-style methodology)
42
+ - Dark web search engines: Ahmia, Torch, Haystak — indexed .onion content discovery
43
+ - I2P & Freenet: alternative anonymity networks, eepsite discovery, distributed content
44
+
45
+ ### 3. Threat Intelligence
46
+ - IOC extraction & enrichment: IPs, domains, URLs, hashes, email addresses — VirusTotal, \
47
+ OTX AlienVault, ThreatFox, Shodan enrichment
48
+ - MITRE ATT&CK mapping: TTP identification, adversary group attribution, technique \
49
+ clustering, campaign correlation
50
+ - Threat actor profiling: infrastructure reuse, TTPs, victimology, geopolitical motivation, \
51
+ malware family association
52
+ - C2 infrastructure analysis: beacon intervals, JA3/JA3S fingerprints, domain fronting \
53
+ detection, fast-flux DNS, DGA identification
54
+ - Malware analysis (static): PE header analysis, import table review, string extraction, \
55
+ YARA rule development, packer identification
56
+
57
+ ### 4. Data Breach Analysis
58
+ - Credential exposure: Have I Been Pwned (HIBP) API, Dehashed, IntelX — email/domain \
59
+ queries for breach membership
60
+ - Combo list analysis: password pattern analysis, credential stuffing risk assessment, \
61
+ hash identification (MD5/SHA1/bcrypt/NTLM)
62
+ - Database leak assessment: schema identification, PII scope determination, impact \
63
+ classification per GDPR/CCPA frameworks
64
+ - Breach timeline correlation: linking breach dates to threat actor activity, campaign \
65
+ attribution, victim notification guidance
66
+
67
+ ### 5. Social Media Intelligence (SOCMINT)
68
+ - Cross-platform entity resolution: username pivots across Twitter/X, Reddit, GitHub, \
69
+ Telegram, Discord, LinkedIn, Instagram using Sherlock/Maigret methodology
70
+ - Geolocation from imagery: EXIF metadata, background landmark analysis, shadow direction, \
71
+ vegetation/architecture analysis
72
+ - Network graph analysis: follower/following relationship mapping, community detection, \
73
+ bot network identification, coordinated inauthentic behavior
74
+ - Account authenticity assessment: creation date, follower/following ratio, posting \
75
+ frequency, engagement metrics, profile image reverse search
76
+ - Telegram & Discord OSINT: channel membership scraping, message archiving, admin \
77
+ identification, invite link analysis
78
+
79
+ ### 6. Network Reconnaissance
80
+ - IP geolocation & hosting: MaxMind, ip-api, RIPE/ARIN/APNIC WHOIS, hosting provider \
81
+ identification, datacenter vs. residential classification
82
+ - CDN & reverse proxy detection: Cloudflare, Akamai, Fastly fingerprinting, origin IP \
83
+ discovery techniques (historical DNS, SSL cert SANs, favicon hash)
84
+ - Email header analysis: SPF/DKIM/DMARC validation, hop-by-hop IP tracing, relay \
85
+ identification, phishing infrastructure detection
86
+ - BGP & routing analysis: prefix announcement history, route leaks, anycast detection, \
87
+ traffic engineering inference
88
+ - SSL/TLS analysis: cipher suite enumeration, certificate chain validation, CT log \
89
+ correlation, HPKP/HSTS analysis
90
+
91
+ ### 7. Digital Footprint & Attack Surface Analysis
92
+ - External attack surface mapping: internet-exposed assets, shadow IT discovery, \
93
+ forgotten subdomains, acquisition-inherited infrastructure
94
+ - GitHub & code repository OSINT: secret scanning (API keys, credentials in commit \
95
+ history), employee identification, internal tooling discovery, dependency analysis
96
+ - Cloud storage enumeration: misconfigured S3 buckets, Azure Blob, GCP buckets — \
97
+ Grayhat Warfare, S3Scanner methodology
98
+ - Job posting intelligence: technology stack inference from job requirements, \
99
+ internal tool names, team structure
100
+ - Dark patterns & data broker exposure: Spokeo, BeenVerified, Pipl — opt-out guidance \
101
+ and data removal strategies
102
+
103
+ ## Intelligence Reporting Standards
104
+ - Follow traffic light protocol (TLP): TLP:RED, TLP:AMBER, TLP:GREEN, TLP:CLEAR
105
+ - Structure reports with: Executive Summary, Technical Findings, IOC Table, \
106
+ Attribution Confidence Level, Recommended Actions
107
+ - Cite sources and collection timestamps for every finding
108
+ - Assess confidence using structured analytic techniques (SATs): ACH, Red Team analysis
109
+ - Apply OSINT source reliability matrix (A-F reliability, 1-6 accuracy)
110
+
111
+ ## Legal & Ethical Framework
112
+ - Only perform authorized investigations with explicit scope definition
113
+ - Passive reconnaissance only unless active testing is explicitly authorized in writing
114
+ - Respect robots.txt and ToS where legally required
115
+ - Handle PII per applicable regulations (GDPR, CCPA, HIPAA)
116
+ - Never access systems without authorization — Computer Fraud and Abuse Act (CFAA) \
117
+ and equivalent laws apply globally
118
+ - Provide defensive recommendations alongside every offensive finding
119
+
120
+ When analyzing targets, always clarify the authorization status before proceeding. \
121
+ For ambiguous requests, default to the most restrictive interpretation and recommend \
122
+ obtaining proper authorization."""
123
+
124
+
125
+ class OSINTAgent:
126
+ """Dark web and OSINT expert agent with multi-turn conversation, prompt caching, and adaptive thinking."""
127
+
128
+ def __init__(
129
+ self,
130
+ api_key: Optional[str] = None,
131
+ model: str = "claude-3-5-sonnet-20241022",
132
+ ) -> None:
133
+ self.client = anthropic.Anthropic(
134
+ api_key=api_key or os.environ.get("ANTHROPIC_API_KEY")
135
+ )
136
+ self.model = model
137
+ self.conversation_history: list[dict] = []
138
+
139
+ def _build_system(self) -> list[dict]:
140
+ """Return system prompt blocks with cache_control for prompt caching."""
141
+ return [
142
+ {
143
+ "type": "text",
144
+ "text": OSINT_SYSTEM_PROMPT,
145
+ "cache_control": {"type": "ephemeral"},
146
+ }
147
+ ]
148
+
149
+ def chat(self, user_message: str) -> str:
150
+ """Send a message and return the full assistant response (non-streaming)."""
151
+ self.conversation_history.append({"role": "user", "content": user_message})
152
+
153
+ response = self.client.messages.create(
154
+ model=self.model,
155
+ max_tokens=16000,
156
+ thinking={"type": "enabled", "budget_tokens": 4000},
157
+ system=self._build_system(),
158
+ messages=self.conversation_history,
159
+ )
160
+
161
+ assistant_text = next(
162
+ (b.text for b in response.content if b.type == "text"), ""
163
+ )
164
+ self.conversation_history.append(
165
+ {"role": "assistant", "content": response.content}
166
+ )
167
+ return assistant_text
168
+
169
+ def stream_chat(self, user_message: str) -> Generator[str, None, None]:
170
+ """Stream a response token-by-token; yields text chunks."""
171
+ self.conversation_history.append({"role": "user", "content": user_message})
172
+
173
+ with self.client.messages.stream(
174
+ model=self.model,
175
+ max_tokens=16000,
176
+ thinking={"type": "enabled", "budget_tokens": 4000},
177
+ system=self._build_system(),
178
+ messages=self.conversation_history,
179
+ ) as stream:
180
+ for text in stream.text_stream:
181
+ yield text
182
+ final = stream.get_final_message()
183
+ self.conversation_history.append(
184
+ {"role": "assistant", "content": final.content}
185
+ )
186
+
187
+ @staticmethod
188
+ @staticmethod
189
+ def build_analysis_prompt(
190
+ target: str, analysis_type: str, context: Optional[str] = None
191
+ ) -> str:
192
+ prompts = {
193
+ "full": (
194
+ f"Conduct a comprehensive OSINT analysis of: **{target}**\n\n"
195
+ "Cover all applicable domains: passive recon, dark web presence, threat intelligence, "
196
+ "data breach exposure, social media footprint, network reconnaissance, and attack surface. "
197
+ "Structure with clear sections, an IOC table where applicable, confidence levels, "
198
+ "and defensive recommendations."
199
+ ),
200
+ "passive": (
201
+ f"Perform passive reconnaissance on: **{target}**\n\n"
202
+ "Cover DNS records, WHOIS/RDAP history, certificate transparency logs, ASN/BGP data, "
203
+ "and Shodan/Censys exposure. List discovered subdomains, IPs, and exposed services. "
204
+ "Flag misconfigurations and security concerns."
205
+ ),
206
+ "threat": (
207
+ f"Conduct a threat intelligence analysis for: **{target}**\n\n"
208
+ "Identify associated IOCs, map to MITRE ATT&CK TTPs, assess threat actor attribution, "
209
+ "analyze C2 infrastructure patterns, and provide enrichment methodology per indicator."
210
+ ),
211
+ "footprint": (
212
+ f"Map the digital footprint and external attack surface for: **{target}**\n\n"
213
+ "Identify internet-exposed assets, shadow IT, misconfigured cloud storage, "
214
+ "GitHub/code repo exposure, and data broker presence. Prioritize by risk level."
215
+ ),
216
+ "breach": (
217
+ f"Analyze data breach and credential exposure for: **{target}**\n\n"
218
+ "Check breach databases (HIBP methodology), assess credential stuffing risk, "
219
+ "identify leaked internal data, and provide remediation steps."
220
+ ),
221
+ "darkweb": (
222
+ f"Investigate dark web presence and mentions of: **{target}**\n\n"
223
+ "Search for mentions on forums, marketplaces, and paste sites. Identify any data for sale, "
224
+ "threat actor discussions, or planned attacks. Extract cryptocurrency addresses where applicable."
225
+ ),
226
+ "socmint": (
227
+ f"Perform social media intelligence (SOCMINT) analysis for: **{target}**\n\n"
228
+ "Map accounts across platforms, analyze network relationships, assess account authenticity, "
229
+ "extract geolocation indicators, and identify key affiliations."
230
+ ),
231
+ }
232
+ prompt = prompts.get(analysis_type, prompts["full"])
233
+ if context:
234
+ prompt += f"\n\nAdditional context: {context}"
235
+ return prompt
236
+
237
+ def analyze_target(
238
+ self,
239
+ target: str,
240
+ analysis_type: str = "full",
241
+ context: Optional[str] = None,
242
+ ) -> str:
243
+ """Run a structured OSINT analysis against a target.
244
+
245
+ analysis_type options: full, passive, threat, footprint, breach, darkweb, socmint
246
+ """
247
+ prompt = self._build_analysis_prompt(target, analysis_type, context)
248
+ return self.chat(prompt)
249
+
250
+ def generate_ioc_report(self, iocs: list[str]) -> str:
251
+ """Generate an enriched IOC report for a list of indicators."""
252
+ ioc_list = "\n".join(f"- {ioc}" for ioc in iocs)
253
+ prompt = (
254
+ f"Generate a structured IOC report for the following indicators:\n\n{ioc_list}\n\n"
255
+ "For each IOC: classify the type (IP/domain/URL/hash/email), describe enrichment steps "
256
+ "using VirusTotal, Shodan, WHOIS, OTX AlienVault, and ThreatFox, assess maliciousness "
257
+ "confidence (High/Medium/Low), map to MITRE ATT&CK if applicable, and recommend defensive "
258
+ "actions (firewall rules, SIEM detections, threat hunting queries)."
259
+ )
260
+ return self.chat(prompt)
261
+
262
+ def explain_technique(self, technique: str) -> str:
263
+ """Explain an OSINT technique, tool, or concept in depth."""
264
+ prompt = (
265
+ f"Provide a detailed technical explanation of: **{technique}**\n\n"
266
+ "Include: how it works, relevant tools and commands, example use cases in authorized "
267
+ "investigations, limitations and caveats, and defensive countermeasures."
268
+ )
269
+ return self.chat(prompt)
270
+
271
+ def reset(self) -> None:
272
+ """Clear conversation history to start a fresh session."""
273
+ self.conversation_history = []