widgettdc-api / apps /backend /python /tdc_outlook_quick.py
Kraft102's picture
Update backend source
34367da verified
#!/usr/bin/env python3
"""
๐Ÿ”“ QUICK TDC OUTLOOK HARVEST
Direkte fra lokal Outlook - ingen admin!
"""
import win32com.client
import pythoncom
import json
import hashlib
from datetime import datetime, timedelta
from pathlib import Path
from neo4j import GraphDatabase
NEO4J_URI = "neo4j+s://054eff27.databases.neo4j.io"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "Qrt37mkb0xBZ7_ts5tG1J70K2mVDGPMF2L7Njlm7cg8"
KEYWORDS = ["strategi", "cyber", "NIS2", "SOC", "MDR", "cloud", "Azure", "AI",
"Copilot", "Columbus", "ERP", "budget", "kunde", "kontrakt", "SKI",
"produkt", "arkitektur", "roadmap", "sikkerhed", "incident"]
def main():
print("=" * 60)
print("๐Ÿ”“ TDC OUTLOOK QUICK HARVEST")
print("=" * 60)
pythoncom.CoInitialize()
outlook = win32com.client.Dispatch('Outlook.Application')
ns = outlook.GetNamespace('MAPI')
print(f"โœ… Forbundet til: clak@tdc.dk")
# Neo4j
neo4j = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
# Hent Inbox
inbox = ns.GetDefaultFolder(6) # 6 = olFolderInbox
items = inbox.Items
items.Sort("[ReceivedTime]", True)
total = items.Count
print(f"๐Ÿ“ง Emails i Inbox: {total}")
print(f"๐Ÿ” Sรธger efter: {len(KEYWORDS)} keywords")
print()
results = []
stats = {"scanned": 0, "matched": 0}
cutoff = datetime.now() - timedelta(days=180)
# Scan emails
for i, item in enumerate(items):
try:
if item.Class != 43: # MailItem
continue
stats["scanned"] += 1
# Check date
received = item.ReceivedTime
if datetime(received.year, received.month, received.day) < cutoff:
continue
subject = str(item.Subject or "").lower()
body = str(item.Body or "")[:1500].lower()
sender = str(item.SenderEmailAddress or "")
# Match keywords
matched = [kw for kw in KEYWORDS if kw.lower() in subject or kw.lower() in body]
if matched:
stats["matched"] += 1
email_data = {
"id": item.EntryID[:50],
"subject": item.Subject[:200] if item.Subject else "",
"sender": sender,
"sender_name": str(item.SenderName or ""),
"received": received.strftime("%Y-%m-%d %H:%M"),
"preview": body[:400],
"keywords": matched,
"has_attachments": item.Attachments.Count > 0
}
results.append(email_data)
# Save to Neo4j
content_hash = hashlib.md5(f"{email_data['subject']}:{email_data['id']}".encode()).hexdigest()
with neo4j.session() as session:
session.run("""
MERGE (e:OutlookEmail {contentHash: $hash})
ON CREATE SET
e.subject = $subject,
e.sender = $sender,
e.senderName = $senderName,
e.received = $received,
e.preview = $preview,
e.keywords = $keywords,
e.hasAttachments = $hasAtt,
e.harvestedAt = datetime()
MERGE (ds:DataSource {name: 'TDC_Outlook_Local'})
MERGE (e)-[:HARVESTED_FROM]->(ds)
""",
hash=content_hash,
subject=email_data["subject"],
sender=email_data["sender"],
senderName=email_data["sender_name"],
received=email_data["received"],
preview=email_data["preview"],
keywords=email_data["keywords"],
hasAtt=email_data["has_attachments"]
)
print(f" โœ… [{', '.join(matched[:2])}] {email_data['subject'][:60]}")
# Progress
if stats["scanned"] % 500 == 0:
print(f" ... scannet {stats['scanned']}/{total}")
# Limit
if stats["scanned"] >= 5000:
print(" (stop ved 5000)")
break
except Exception as e:
continue
# Save JSON
output = Path("data/outlook_local_harvest")
output.mkdir(parents=True, exist_ok=True)
output_file = output / f"tdc_emails_{datetime.now().strftime('%Y%m%d_%H%M')}.json"
with open(output_file, 'w', encoding='utf-8') as f:
json.dump({"stats": stats, "emails": results}, f, indent=2, ensure_ascii=False)
neo4j.close()
pythoncom.CoUninitialize()
print()
print("=" * 60)
print("๐Ÿ“Š RESULTAT")
print("=" * 60)
print(f" Scannet: {stats['scanned']}")
print(f" Matched: {stats['matched']}")
print(f" Gemt: {output_file}")
print("=" * 60)
if __name__ == "__main__":
main()