Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| """ | |
| ๐ QUICK TDC OUTLOOK HARVEST | |
| Direkte fra lokal Outlook - ingen admin! | |
| """ | |
| import win32com.client | |
| import pythoncom | |
| import json | |
| import hashlib | |
| from datetime import datetime, timedelta | |
| from pathlib import Path | |
| from neo4j import GraphDatabase | |
| NEO4J_URI = "neo4j+s://054eff27.databases.neo4j.io" | |
| NEO4J_USER = "neo4j" | |
| NEO4J_PASSWORD = "Qrt37mkb0xBZ7_ts5tG1J70K2mVDGPMF2L7Njlm7cg8" | |
| KEYWORDS = ["strategi", "cyber", "NIS2", "SOC", "MDR", "cloud", "Azure", "AI", | |
| "Copilot", "Columbus", "ERP", "budget", "kunde", "kontrakt", "SKI", | |
| "produkt", "arkitektur", "roadmap", "sikkerhed", "incident"] | |
| def main(): | |
| print("=" * 60) | |
| print("๐ TDC OUTLOOK QUICK HARVEST") | |
| print("=" * 60) | |
| pythoncom.CoInitialize() | |
| outlook = win32com.client.Dispatch('Outlook.Application') | |
| ns = outlook.GetNamespace('MAPI') | |
| print(f"โ Forbundet til: clak@tdc.dk") | |
| # Neo4j | |
| neo4j = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) | |
| # Hent Inbox | |
| inbox = ns.GetDefaultFolder(6) # 6 = olFolderInbox | |
| items = inbox.Items | |
| items.Sort("[ReceivedTime]", True) | |
| total = items.Count | |
| print(f"๐ง Emails i Inbox: {total}") | |
| print(f"๐ Sรธger efter: {len(KEYWORDS)} keywords") | |
| print() | |
| results = [] | |
| stats = {"scanned": 0, "matched": 0} | |
| cutoff = datetime.now() - timedelta(days=180) | |
| # Scan emails | |
| for i, item in enumerate(items): | |
| try: | |
| if item.Class != 43: # MailItem | |
| continue | |
| stats["scanned"] += 1 | |
| # Check date | |
| received = item.ReceivedTime | |
| if datetime(received.year, received.month, received.day) < cutoff: | |
| continue | |
| subject = str(item.Subject or "").lower() | |
| body = str(item.Body or "")[:1500].lower() | |
| sender = str(item.SenderEmailAddress or "") | |
| # Match keywords | |
| matched = [kw for kw in KEYWORDS if kw.lower() in subject or kw.lower() in body] | |
| if matched: | |
| stats["matched"] += 1 | |
| email_data = { | |
| "id": item.EntryID[:50], | |
| "subject": item.Subject[:200] if item.Subject else "", | |
| "sender": sender, | |
| "sender_name": str(item.SenderName or ""), | |
| "received": received.strftime("%Y-%m-%d %H:%M"), | |
| "preview": body[:400], | |
| "keywords": matched, | |
| "has_attachments": item.Attachments.Count > 0 | |
| } | |
| results.append(email_data) | |
| # Save to Neo4j | |
| content_hash = hashlib.md5(f"{email_data['subject']}:{email_data['id']}".encode()).hexdigest() | |
| with neo4j.session() as session: | |
| session.run(""" | |
| MERGE (e:OutlookEmail {contentHash: $hash}) | |
| ON CREATE SET | |
| e.subject = $subject, | |
| e.sender = $sender, | |
| e.senderName = $senderName, | |
| e.received = $received, | |
| e.preview = $preview, | |
| e.keywords = $keywords, | |
| e.hasAttachments = $hasAtt, | |
| e.harvestedAt = datetime() | |
| MERGE (ds:DataSource {name: 'TDC_Outlook_Local'}) | |
| MERGE (e)-[:HARVESTED_FROM]->(ds) | |
| """, | |
| hash=content_hash, | |
| subject=email_data["subject"], | |
| sender=email_data["sender"], | |
| senderName=email_data["sender_name"], | |
| received=email_data["received"], | |
| preview=email_data["preview"], | |
| keywords=email_data["keywords"], | |
| hasAtt=email_data["has_attachments"] | |
| ) | |
| print(f" โ [{', '.join(matched[:2])}] {email_data['subject'][:60]}") | |
| # Progress | |
| if stats["scanned"] % 500 == 0: | |
| print(f" ... scannet {stats['scanned']}/{total}") | |
| # Limit | |
| if stats["scanned"] >= 5000: | |
| print(" (stop ved 5000)") | |
| break | |
| except Exception as e: | |
| continue | |
| # Save JSON | |
| output = Path("data/outlook_local_harvest") | |
| output.mkdir(parents=True, exist_ok=True) | |
| output_file = output / f"tdc_emails_{datetime.now().strftime('%Y%m%d_%H%M')}.json" | |
| with open(output_file, 'w', encoding='utf-8') as f: | |
| json.dump({"stats": stats, "emails": results}, f, indent=2, ensure_ascii=False) | |
| neo4j.close() | |
| pythoncom.CoUninitialize() | |
| print() | |
| print("=" * 60) | |
| print("๐ RESULTAT") | |
| print("=" * 60) | |
| print(f" Scannet: {stats['scanned']}") | |
| print(f" Matched: {stats['matched']}") | |
| print(f" Gemt: {output_file}") | |
| print("=" * 60) | |
| if __name__ == "__main__": | |
| main() | |