Spaces:
Paused
Paused
File size: 2,912 Bytes
34367da | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | #!/usr/bin/env python3
"""Quick SharePoint Link Extractor fra Outlook"""
import win32com.client
import pythoncom
import re
import json
from pathlib import Path
from datetime import datetime
def quick_extract():
pythoncom.CoInitialize()
outlook = win32com.client.Dispatch("Outlook.Application")
ns = outlook.GetNamespace("MAPI")
# Find TDC inbox
inbox = None
for account in ns.Folders:
if "tdc" in account.Name.lower():
try:
inbox = account.Folders["Indbakke"]
break
except:
pass
if not inbox:
inbox = ns.GetDefaultFolder(6)
print(f"๐ง Scanning {inbox.Items.Count} emails...")
sp_links = set()
relevant_emails = []
items = inbox.Items
items.Sort("[ReceivedTime]", True)
keywords = ['strategi', 'cyber', 'cloud', 'ai', 'budget', 'kunde', 'produkt', 'columbus', 'nis2']
count = 0
for item in items:
if count >= 2000: # Scan sidste 2000 emails
break
count += 1
if count % 200 == 0:
print(f" Scanned {count} emails, found {len(sp_links)} SharePoint links...")
try:
subject = str(getattr(item, 'Subject', '') or '')
body = str(getattr(item, 'Body', '') or '')
# Find SharePoint links
links = re.findall(r'https://[a-zA-Z0-9.-]*sharepoint\.com[^\s<>"\']*', body)
sp_links.update(links)
# Check keywords
text = (subject + ' ' + body).lower()
if any(kw in text for kw in keywords):
relevant_emails.append({
"subject": subject[:150],
"from": str(getattr(item, 'SenderName', '')),
"date": str(getattr(item, 'ReceivedTime', ''))[:19],
"sp_links": links[:5]
})
except:
continue
print(f"\nโ
DONE!")
print(f" ๐ง Emails scanned: {count}")
print(f" ๐ Unique SharePoint links: {len(sp_links)}")
print(f" ๐ Relevant emails: {len(relevant_emails)}")
print(f"\n๐ SHAREPOINT LINKS:")
for link in list(sp_links)[:30]:
print(f" {link[:100]}")
# Save results
output = Path("data/outlook_harvest")
output.mkdir(parents=True, exist_ok=True)
with open(output / "sharepoint_links.json", 'w', encoding='utf-8') as f:
json.dump({
"timestamp": datetime.now().isoformat(),
"links": list(sp_links),
"relevant_emails": relevant_emails[:100]
}, f, indent=2, ensure_ascii=False, default=str)
print(f"\n๐ Saved to data/outlook_harvest/sharepoint_links.json")
pythoncom.CoUninitialize()
return sp_links
if __name__ == "__main__":
quick_extract()
|