File size: 2,912 Bytes
34367da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/env python3
"""Quick SharePoint Link Extractor fra Outlook"""
import win32com.client
import pythoncom
import re
import json
from pathlib import Path
from datetime import datetime

def quick_extract():
    pythoncom.CoInitialize()
    outlook = win32com.client.Dispatch("Outlook.Application")
    ns = outlook.GetNamespace("MAPI")
    
    # Find TDC inbox
    inbox = None
    for account in ns.Folders:
        if "tdc" in account.Name.lower():
            try:
                inbox = account.Folders["Indbakke"]
                break
            except:
                pass
    
    if not inbox:
        inbox = ns.GetDefaultFolder(6)
    
    print(f"๐Ÿ“ง Scanning {inbox.Items.Count} emails...")
    
    sp_links = set()
    relevant_emails = []
    
    items = inbox.Items
    items.Sort("[ReceivedTime]", True)
    
    keywords = ['strategi', 'cyber', 'cloud', 'ai', 'budget', 'kunde', 'produkt', 'columbus', 'nis2']
    
    count = 0
    for item in items:
        if count >= 2000:  # Scan sidste 2000 emails
            break
        count += 1
        
        if count % 200 == 0:
            print(f"   Scanned {count} emails, found {len(sp_links)} SharePoint links...")
        
        try:
            subject = str(getattr(item, 'Subject', '') or '')
            body = str(getattr(item, 'Body', '') or '')
            
            # Find SharePoint links
            links = re.findall(r'https://[a-zA-Z0-9.-]*sharepoint\.com[^\s<>"\']*', body)
            sp_links.update(links)
            
            # Check keywords
            text = (subject + ' ' + body).lower()
            if any(kw in text for kw in keywords):
                relevant_emails.append({
                    "subject": subject[:150],
                    "from": str(getattr(item, 'SenderName', '')),
                    "date": str(getattr(item, 'ReceivedTime', ''))[:19],
                    "sp_links": links[:5]
                })
                
        except:
            continue
    
    print(f"\nโœ… DONE!")
    print(f"   ๐Ÿ“ง Emails scanned: {count}")
    print(f"   ๐Ÿ”— Unique SharePoint links: {len(sp_links)}")
    print(f"   ๐Ÿ“‹ Relevant emails: {len(relevant_emails)}")
    
    print(f"\n๐Ÿ”— SHAREPOINT LINKS:")
    for link in list(sp_links)[:30]:
        print(f"   {link[:100]}")
    
    # Save results
    output = Path("data/outlook_harvest")
    output.mkdir(parents=True, exist_ok=True)
    
    with open(output / "sharepoint_links.json", 'w', encoding='utf-8') as f:
        json.dump({
            "timestamp": datetime.now().isoformat(),
            "links": list(sp_links),
            "relevant_emails": relevant_emails[:100]
        }, f, indent=2, ensure_ascii=False, default=str)
    
    print(f"\n๐Ÿ“ Saved to data/outlook_harvest/sharepoint_links.json")
    
    pythoncom.CoUninitialize()
    return sp_links

if __name__ == "__main__":
    quick_extract()