Spaces:
Sleeping
Sleeping
File size: 6,139 Bytes
5c56bc9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | """
SharePoint/OneDrive Connector for Medical Document Validator.
Handles Microsoft Graph API authentication and file operations.
"""
import os
import logging
import msal
import requests
from typing import List, Dict, Optional, Any
logger = logging.getLogger(__name__)
class SharePointConnector:
"""Handles connection to Microsoft SharePoint/OneDrive via Graph API."""
def __init__(self):
self.client_id = os.environ.get("AZURE_CLIENT_ID")
self.tenant_id = os.environ.get("AZURE_TENANT_ID")
self.client_secret = os.environ.get("AZURE_CLIENT_SECRET")
self.authority = f"https://login.microsoftonline.com/{self.tenant_id}"
# Scopes required for the app
self.scopes = ["Files.Read.All", "Sites.Read.All", "User.Read"]
# Initialize MSAL Client Application
if self.client_id and self.tenant_id and self.client_secret:
self.app = msal.ConfidentialClientApplication(
self.client_id,
authority=self.authority,
client_credential=self.client_secret
)
else:
self.app = None
logger.warning("Azure credentials not fully configured. SharePoint integration disabled.")
def get_auth_url(self, redirect_uri: str, state: str = None) -> str:
"""Generate the login URL for the user."""
if not self.app:
return "#"
auth_url = self.app.get_authorization_request_url(
self.scopes,
redirect_uri=redirect_uri,
state=state
)
return auth_url
def acquire_token_by_code(self, code: str, redirect_uri: str) -> Dict[str, Any]:
"""Exchange auth code for access token."""
if not self.app:
raise ValueError("SharePoint connector not configured")
result = self.app.acquire_token_by_authorization_code(
code,
scopes=self.scopes,
redirect_uri=redirect_uri
)
if "error" in result:
logger.error(f"Failed to acquire token: {result.get('error_description')}")
raise Exception(result.get("error_description"))
return result
def get_drives(self, access_token: str) -> List[Dict[str, Any]]:
"""List available drives (document libraries)."""
headers = {'Authorization': f'Bearer {access_token}'}
# Get user's OneDrive
onedrive_url = "https://graph.microsoft.com/v1.0/me/drive"
drives = []
try:
# Try to get personal drive
resp = requests.get(onedrive_url, headers=headers)
if resp.status_code == 200:
data = resp.json()
drives.append({
"id": data.get("id"),
"name": "My OneDrive",
"type": "personal"
})
except Exception as e:
logger.error(f"Error fetching OneDrive: {e}")
# Get shared libraries (SharePoint sites)
sites_url = "https://graph.microsoft.com/v1.0/sites?search=*"
try:
resp = requests.get(sites_url, headers=headers)
if resp.status_code == 200:
sites = resp.json().get('value', [])
for site in sites:
# Get drives for this site
site_id = site.get('id')
drives_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"
d_resp = requests.get(drives_url, headers=headers)
if d_resp.status_code == 200:
site_drives = d_resp.json().get('value', [])
for drive in site_drives:
drives.append({
"id": drive.get("id"),
"name": f"{site.get('name', 'Site')} - {drive.get('name', 'Documents')}",
"type": "sharepoint"
})
except Exception as e:
logger.error(f"Error fetching SharePoint sites: {e}")
return drives
def list_items(self, access_token: str, drive_id: str, folder_id: str = None) -> List[Dict[str, Any]]:
"""List items in a drive folder."""
headers = {'Authorization': f'Bearer {access_token}'}
if folder_id:
url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/items/{folder_id}/children"
else:
url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root/children"
resp = requests.get(url, headers=headers)
if resp.status_code != 200:
logger.error(f"Failed to list items: {resp.text}")
raise Exception("Failed to list folder contents")
items = resp.json().get('value', [])
# Filter for files and folders
result = []
for item in items:
entry = {
"id": item.get("id"),
"name": item.get("name"),
"webUrl": item.get("webUrl"),
"lastModified": item.get("lastModifiedDateTime"),
"size": item.get("size")
}
if "folder" in item:
entry["type"] = "folder"
entry["childCount"] = item["folder"].get("childCount")
elif "file" in item:
entry["type"] = "file"
entry["mimeType"] = item["file"].get("mimeType")
result.append(entry)
return result
def download_file(self, access_token: str, drive_id: str, file_id: str) -> bytes:
"""Download a file content."""
headers = {'Authorization': f'Bearer {access_token}'}
url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/items/{file_id}/content"
resp = requests.get(url, headers=headers)
if resp.status_code != 200:
raise Exception("Failed to download file")
return resp.content
|