Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| """Quick SharePoint Link Extractor fra Outlook""" | |
| import win32com.client | |
| import pythoncom | |
| import re | |
| import json | |
| from pathlib import Path | |
| from datetime import datetime | |
| def quick_extract(): | |
| pythoncom.CoInitialize() | |
| outlook = win32com.client.Dispatch("Outlook.Application") | |
| ns = outlook.GetNamespace("MAPI") | |
| # Find TDC inbox | |
| inbox = None | |
| for account in ns.Folders: | |
| if "tdc" in account.Name.lower(): | |
| try: | |
| inbox = account.Folders["Indbakke"] | |
| break | |
| except: | |
| pass | |
| if not inbox: | |
| inbox = ns.GetDefaultFolder(6) | |
| print(f"π§ Scanning {inbox.Items.Count} emails...") | |
| sp_links = set() | |
| relevant_emails = [] | |
| items = inbox.Items | |
| items.Sort("[ReceivedTime]", True) | |
| keywords = ['strategi', 'cyber', 'cloud', 'ai', 'budget', 'kunde', 'produkt', 'columbus', 'nis2'] | |
| count = 0 | |
| for item in items: | |
| if count >= 2000: # Scan sidste 2000 emails | |
| break | |
| count += 1 | |
| if count % 200 == 0: | |
| print(f" Scanned {count} emails, found {len(sp_links)} SharePoint links...") | |
| try: | |
| subject = str(getattr(item, 'Subject', '') or '') | |
| body = str(getattr(item, 'Body', '') or '') | |
| # Find SharePoint links | |
| links = re.findall(r'https://[a-zA-Z0-9.-]*sharepoint\.com[^\s<>"\']*', body) | |
| sp_links.update(links) | |
| # Check keywords | |
| text = (subject + ' ' + body).lower() | |
| if any(kw in text for kw in keywords): | |
| relevant_emails.append({ | |
| "subject": subject[:150], | |
| "from": str(getattr(item, 'SenderName', '')), | |
| "date": str(getattr(item, 'ReceivedTime', ''))[:19], | |
| "sp_links": links[:5] | |
| }) | |
| except: | |
| continue | |
| print(f"\nβ DONE!") | |
| print(f" π§ Emails scanned: {count}") | |
| print(f" π Unique SharePoint links: {len(sp_links)}") | |
| print(f" π Relevant emails: {len(relevant_emails)}") | |
| print(f"\nπ SHAREPOINT LINKS:") | |
| for link in list(sp_links)[:30]: | |
| print(f" {link[:100]}") | |
| # Save results | |
| output = Path("data/outlook_harvest") | |
| output.mkdir(parents=True, exist_ok=True) | |
| with open(output / "sharepoint_links.json", 'w', encoding='utf-8') as f: | |
| json.dump({ | |
| "timestamp": datetime.now().isoformat(), | |
| "links": list(sp_links), | |
| "relevant_emails": relevant_emails[:100] | |
| }, f, indent=2, ensure_ascii=False, default=str) | |
| print(f"\nπ Saved to data/outlook_harvest/sharepoint_links.json") | |
| pythoncom.CoUninitialize() | |
| return sp_links | |
| if __name__ == "__main__": | |
| quick_extract() | |