File size: 5,241 Bytes
34367da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/env python3
"""
🔓 QUICK TDC OUTLOOK HARVEST
Direkte fra lokal Outlook - ingen admin!
"""
import win32com.client
import pythoncom
import json
import hashlib
from datetime import datetime, timedelta
from pathlib import Path
from neo4j import GraphDatabase

NEO4J_URI = "neo4j+s://054eff27.databases.neo4j.io"
NEO4J_USER = "neo4j"  
NEO4J_PASSWORD = "Qrt37mkb0xBZ7_ts5tG1J70K2mVDGPMF2L7Njlm7cg8"

KEYWORDS = ["strategi", "cyber", "NIS2", "SOC", "MDR", "cloud", "Azure", "AI", 
            "Copilot", "Columbus", "ERP", "budget", "kunde", "kontrakt", "SKI", 
            "produkt", "arkitektur", "roadmap", "sikkerhed", "incident"]

def main():
    print("=" * 60)
    print("🔓 TDC OUTLOOK QUICK HARVEST")
    print("=" * 60)
    
    pythoncom.CoInitialize()
    outlook = win32com.client.Dispatch('Outlook.Application')
    ns = outlook.GetNamespace('MAPI')
    
    print(f"✅ Forbundet til: clak@tdc.dk")
    
    # Neo4j
    neo4j = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
    
    # Hent Inbox
    inbox = ns.GetDefaultFolder(6)  # 6 = olFolderInbox
    items = inbox.Items
    items.Sort("[ReceivedTime]", True)
    
    total = items.Count
    print(f"📧 Emails i Inbox: {total}")
    print(f"🔍 Søger efter: {len(KEYWORDS)} keywords")
    print()
    
    results = []
    stats = {"scanned": 0, "matched": 0}
    cutoff = datetime.now() - timedelta(days=180)
    
    # Scan emails
    for i, item in enumerate(items):
        try:
            if item.Class != 43:  # MailItem
                continue
            
            stats["scanned"] += 1
            
            # Check date
            received = item.ReceivedTime
            if datetime(received.year, received.month, received.day) < cutoff:
                continue
            
            subject = str(item.Subject or "").lower()
            body = str(item.Body or "")[:1500].lower()
            sender = str(item.SenderEmailAddress or "")
            
            # Match keywords
            matched = [kw for kw in KEYWORDS if kw.lower() in subject or kw.lower() in body]
            
            if matched:
                stats["matched"] += 1
                email_data = {
                    "id": item.EntryID[:50],
                    "subject": item.Subject[:200] if item.Subject else "",
                    "sender": sender,
                    "sender_name": str(item.SenderName or ""),
                    "received": received.strftime("%Y-%m-%d %H:%M"),
                    "preview": body[:400],
                    "keywords": matched,
                    "has_attachments": item.Attachments.Count > 0
                }
                results.append(email_data)
                
                # Save to Neo4j
                content_hash = hashlib.md5(f"{email_data['subject']}:{email_data['id']}".encode()).hexdigest()
                with neo4j.session() as session:
                    session.run("""
                        MERGE (e:OutlookEmail {contentHash: $hash})
                        ON CREATE SET
                            e.subject = $subject,
                            e.sender = $sender,
                            e.senderName = $senderName,
                            e.received = $received,
                            e.preview = $preview,
                            e.keywords = $keywords,
                            e.hasAttachments = $hasAtt,
                            e.harvestedAt = datetime()
                        MERGE (ds:DataSource {name: 'TDC_Outlook_Local'})
                        MERGE (e)-[:HARVESTED_FROM]->(ds)
                    """,
                        hash=content_hash,
                        subject=email_data["subject"],
                        sender=email_data["sender"],
                        senderName=email_data["sender_name"],
                        received=email_data["received"],
                        preview=email_data["preview"],
                        keywords=email_data["keywords"],
                        hasAtt=email_data["has_attachments"]
                    )
                
                print(f"  ✅ [{', '.join(matched[:2])}] {email_data['subject'][:60]}")
            
            # Progress
            if stats["scanned"] % 500 == 0:
                print(f"  ... scannet {stats['scanned']}/{total}")
            
            # Limit
            if stats["scanned"] >= 5000:
                print("  (stop ved 5000)")
                break
                
        except Exception as e:
            continue
    
    # Save JSON
    output = Path("data/outlook_local_harvest")
    output.mkdir(parents=True, exist_ok=True)
    output_file = output / f"tdc_emails_{datetime.now().strftime('%Y%m%d_%H%M')}.json"
    
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump({"stats": stats, "emails": results}, f, indent=2, ensure_ascii=False)
    
    neo4j.close()
    pythoncom.CoUninitialize()
    
    print()
    print("=" * 60)
    print("📊 RESULTAT")
    print("=" * 60)
    print(f"   Scannet:  {stats['scanned']}")
    print(f"   Matched:  {stats['matched']}")
    print(f"   Gemt:     {output_file}")
    print("=" * 60)

if __name__ == "__main__":
    main()