#!/usr/bin/env python3 """ πŸ”“ QUICK TDC OUTLOOK HARVEST Direkte fra lokal Outlook - ingen admin! """ import win32com.client import pythoncom import json import hashlib from datetime import datetime, timedelta from pathlib import Path from neo4j import GraphDatabase NEO4J_URI = "neo4j+s://054eff27.databases.neo4j.io" NEO4J_USER = "neo4j" NEO4J_PASSWORD = "Qrt37mkb0xBZ7_ts5tG1J70K2mVDGPMF2L7Njlm7cg8" KEYWORDS = ["strategi", "cyber", "NIS2", "SOC", "MDR", "cloud", "Azure", "AI", "Copilot", "Columbus", "ERP", "budget", "kunde", "kontrakt", "SKI", "produkt", "arkitektur", "roadmap", "sikkerhed", "incident"] def main(): print("=" * 60) print("πŸ”“ TDC OUTLOOK QUICK HARVEST") print("=" * 60) pythoncom.CoInitialize() outlook = win32com.client.Dispatch('Outlook.Application') ns = outlook.GetNamespace('MAPI') print(f"βœ… Forbundet til: clak@tdc.dk") # Neo4j neo4j = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) # Hent Inbox inbox = ns.GetDefaultFolder(6) # 6 = olFolderInbox items = inbox.Items items.Sort("[ReceivedTime]", True) total = items.Count print(f"πŸ“§ Emails i Inbox: {total}") print(f"πŸ” SΓΈger efter: {len(KEYWORDS)} keywords") print() results = [] stats = {"scanned": 0, "matched": 0} cutoff = datetime.now() - timedelta(days=180) # Scan emails for i, item in enumerate(items): try: if item.Class != 43: # MailItem continue stats["scanned"] += 1 # Check date received = item.ReceivedTime if datetime(received.year, received.month, received.day) < cutoff: continue subject = str(item.Subject or "").lower() body = str(item.Body or "")[:1500].lower() sender = str(item.SenderEmailAddress or "") # Match keywords matched = [kw for kw in KEYWORDS if kw.lower() in subject or kw.lower() in body] if matched: stats["matched"] += 1 email_data = { "id": item.EntryID[:50], "subject": item.Subject[:200] if item.Subject else "", "sender": sender, "sender_name": str(item.SenderName or ""), "received": received.strftime("%Y-%m-%d %H:%M"), "preview": body[:400], "keywords": matched, "has_attachments": item.Attachments.Count > 0 } results.append(email_data) # Save to Neo4j content_hash = hashlib.md5(f"{email_data['subject']}:{email_data['id']}".encode()).hexdigest() with neo4j.session() as session: session.run(""" MERGE (e:OutlookEmail {contentHash: $hash}) ON CREATE SET e.subject = $subject, e.sender = $sender, e.senderName = $senderName, e.received = $received, e.preview = $preview, e.keywords = $keywords, e.hasAttachments = $hasAtt, e.harvestedAt = datetime() MERGE (ds:DataSource {name: 'TDC_Outlook_Local'}) MERGE (e)-[:HARVESTED_FROM]->(ds) """, hash=content_hash, subject=email_data["subject"], sender=email_data["sender"], senderName=email_data["sender_name"], received=email_data["received"], preview=email_data["preview"], keywords=email_data["keywords"], hasAtt=email_data["has_attachments"] ) print(f" βœ… [{', '.join(matched[:2])}] {email_data['subject'][:60]}") # Progress if stats["scanned"] % 500 == 0: print(f" ... scannet {stats['scanned']}/{total}") # Limit if stats["scanned"] >= 5000: print(" (stop ved 5000)") break except Exception as e: continue # Save JSON output = Path("data/outlook_local_harvest") output.mkdir(parents=True, exist_ok=True) output_file = output / f"tdc_emails_{datetime.now().strftime('%Y%m%d_%H%M')}.json" with open(output_file, 'w', encoding='utf-8') as f: json.dump({"stats": stats, "emails": results}, f, indent=2, ensure_ascii=False) neo4j.close() pythoncom.CoUninitialize() print() print("=" * 60) print("πŸ“Š RESULTAT") print("=" * 60) print(f" Scannet: {stats['scanned']}") print(f" Matched: {stats['matched']}") print(f" Gemt: {output_file}") print("=" * 60) if __name__ == "__main__": main()