#!/usr/bin/env python3 """Quick SharePoint Link Extractor fra Outlook""" import win32com.client import pythoncom import re import json from pathlib import Path from datetime import datetime def quick_extract(): pythoncom.CoInitialize() outlook = win32com.client.Dispatch("Outlook.Application") ns = outlook.GetNamespace("MAPI") # Find TDC inbox inbox = None for account in ns.Folders: if "tdc" in account.Name.lower(): try: inbox = account.Folders["Indbakke"] break except: pass if not inbox: inbox = ns.GetDefaultFolder(6) print(f"šŸ“§ Scanning {inbox.Items.Count} emails...") sp_links = set() relevant_emails = [] items = inbox.Items items.Sort("[ReceivedTime]", True) keywords = ['strategi', 'cyber', 'cloud', 'ai', 'budget', 'kunde', 'produkt', 'columbus', 'nis2'] count = 0 for item in items: if count >= 2000: # Scan sidste 2000 emails break count += 1 if count % 200 == 0: print(f" Scanned {count} emails, found {len(sp_links)} SharePoint links...") try: subject = str(getattr(item, 'Subject', '') or '') body = str(getattr(item, 'Body', '') or '') # Find SharePoint links links = re.findall(r'https://[a-zA-Z0-9.-]*sharepoint\.com[^\s<>"\']*', body) sp_links.update(links) # Check keywords text = (subject + ' ' + body).lower() if any(kw in text for kw in keywords): relevant_emails.append({ "subject": subject[:150], "from": str(getattr(item, 'SenderName', '')), "date": str(getattr(item, 'ReceivedTime', ''))[:19], "sp_links": links[:5] }) except: continue print(f"\nāœ… DONE!") print(f" šŸ“§ Emails scanned: {count}") print(f" šŸ”— Unique SharePoint links: {len(sp_links)}") print(f" šŸ“‹ Relevant emails: {len(relevant_emails)}") print(f"\nšŸ”— SHAREPOINT LINKS:") for link in list(sp_links)[:30]: print(f" {link[:100]}") # Save results output = Path("data/outlook_harvest") output.mkdir(parents=True, exist_ok=True) with open(output / "sharepoint_links.json", 'w', encoding='utf-8') as f: json.dump({ "timestamp": datetime.now().isoformat(), "links": list(sp_links), "relevant_emails": relevant_emails[:100] }, f, indent=2, ensure_ascii=False, default=str) print(f"\nšŸ“ Saved to data/outlook_harvest/sharepoint_links.json") pythoncom.CoUninitialize() return sp_links if __name__ == "__main__": quick_extract()