Spaces:
Paused
Paused
File size: 5,241 Bytes
34367da | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | #!/usr/bin/env python3
"""
🔓 QUICK TDC OUTLOOK HARVEST
Direkte fra lokal Outlook - ingen admin!
"""
import win32com.client
import pythoncom
import json
import hashlib
from datetime import datetime, timedelta
from pathlib import Path
from neo4j import GraphDatabase
NEO4J_URI = "neo4j+s://054eff27.databases.neo4j.io"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "Qrt37mkb0xBZ7_ts5tG1J70K2mVDGPMF2L7Njlm7cg8"
KEYWORDS = ["strategi", "cyber", "NIS2", "SOC", "MDR", "cloud", "Azure", "AI",
"Copilot", "Columbus", "ERP", "budget", "kunde", "kontrakt", "SKI",
"produkt", "arkitektur", "roadmap", "sikkerhed", "incident"]
def main():
print("=" * 60)
print("🔓 TDC OUTLOOK QUICK HARVEST")
print("=" * 60)
pythoncom.CoInitialize()
outlook = win32com.client.Dispatch('Outlook.Application')
ns = outlook.GetNamespace('MAPI')
print(f"✅ Forbundet til: clak@tdc.dk")
# Neo4j
neo4j = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
# Hent Inbox
inbox = ns.GetDefaultFolder(6) # 6 = olFolderInbox
items = inbox.Items
items.Sort("[ReceivedTime]", True)
total = items.Count
print(f"📧 Emails i Inbox: {total}")
print(f"🔍 Søger efter: {len(KEYWORDS)} keywords")
print()
results = []
stats = {"scanned": 0, "matched": 0}
cutoff = datetime.now() - timedelta(days=180)
# Scan emails
for i, item in enumerate(items):
try:
if item.Class != 43: # MailItem
continue
stats["scanned"] += 1
# Check date
received = item.ReceivedTime
if datetime(received.year, received.month, received.day) < cutoff:
continue
subject = str(item.Subject or "").lower()
body = str(item.Body or "")[:1500].lower()
sender = str(item.SenderEmailAddress or "")
# Match keywords
matched = [kw for kw in KEYWORDS if kw.lower() in subject or kw.lower() in body]
if matched:
stats["matched"] += 1
email_data = {
"id": item.EntryID[:50],
"subject": item.Subject[:200] if item.Subject else "",
"sender": sender,
"sender_name": str(item.SenderName or ""),
"received": received.strftime("%Y-%m-%d %H:%M"),
"preview": body[:400],
"keywords": matched,
"has_attachments": item.Attachments.Count > 0
}
results.append(email_data)
# Save to Neo4j
content_hash = hashlib.md5(f"{email_data['subject']}:{email_data['id']}".encode()).hexdigest()
with neo4j.session() as session:
session.run("""
MERGE (e:OutlookEmail {contentHash: $hash})
ON CREATE SET
e.subject = $subject,
e.sender = $sender,
e.senderName = $senderName,
e.received = $received,
e.preview = $preview,
e.keywords = $keywords,
e.hasAttachments = $hasAtt,
e.harvestedAt = datetime()
MERGE (ds:DataSource {name: 'TDC_Outlook_Local'})
MERGE (e)-[:HARVESTED_FROM]->(ds)
""",
hash=content_hash,
subject=email_data["subject"],
sender=email_data["sender"],
senderName=email_data["sender_name"],
received=email_data["received"],
preview=email_data["preview"],
keywords=email_data["keywords"],
hasAtt=email_data["has_attachments"]
)
print(f" ✅ [{', '.join(matched[:2])}] {email_data['subject'][:60]}")
# Progress
if stats["scanned"] % 500 == 0:
print(f" ... scannet {stats['scanned']}/{total}")
# Limit
if stats["scanned"] >= 5000:
print(" (stop ved 5000)")
break
except Exception as e:
continue
# Save JSON
output = Path("data/outlook_local_harvest")
output.mkdir(parents=True, exist_ok=True)
output_file = output / f"tdc_emails_{datetime.now().strftime('%Y%m%d_%H%M')}.json"
with open(output_file, 'w', encoding='utf-8') as f:
json.dump({"stats": stats, "emails": results}, f, indent=2, ensure_ascii=False)
neo4j.close()
pythoncom.CoUninitialize()
print()
print("=" * 60)
print("📊 RESULTAT")
print("=" * 60)
print(f" Scannet: {stats['scanned']}")
print(f" Matched: {stats['matched']}")
print(f" Gemt: {output_file}")
print("=" * 60)
if __name__ == "__main__":
main()
|