Medical-Validator

Sleeping

File size: 6,139 Bytes

5c56bc9

"""
SharePoint/OneDrive Connector for Medical Document Validator.
Handles Microsoft Graph API authentication and file operations.
"""

import os
import logging
import msal
import requests
from typing import List, Dict, Optional, Any

logger = logging.getLogger(__name__)

class SharePointConnector:
    """Handles connection to Microsoft SharePoint/OneDrive via Graph API."""
    
    def __init__(self):
        self.client_id = os.environ.get("AZURE_CLIENT_ID")
        self.tenant_id = os.environ.get("AZURE_TENANT_ID")
        self.client_secret = os.environ.get("AZURE_CLIENT_SECRET")
        self.authority = f"https://login.microsoftonline.com/{self.tenant_id}"
        
        # Scopes required for the app
        self.scopes = ["Files.Read.All", "Sites.Read.All", "User.Read"]
        
        # Initialize MSAL Client Application
        if self.client_id and self.tenant_id and self.client_secret:
            self.app = msal.ConfidentialClientApplication(
                self.client_id,
                authority=self.authority,
                client_credential=self.client_secret
            )
        else:
            self.app = None
            logger.warning("Azure credentials not fully configured. SharePoint integration disabled.")

    def get_auth_url(self, redirect_uri: str, state: str = None) -> str:
        """Generate the login URL for the user."""
        if not self.app:
            return "#"
            
        auth_url = self.app.get_authorization_request_url(
            self.scopes,
            redirect_uri=redirect_uri,
            state=state
        )
        return auth_url

    def acquire_token_by_code(self, code: str, redirect_uri: str) -> Dict[str, Any]:
        """Exchange auth code for access token."""
        if not self.app:
            raise ValueError("SharePoint connector not configured")
            
        result = self.app.acquire_token_by_authorization_code(
            code,
            scopes=self.scopes,
            redirect_uri=redirect_uri
        )
        
        if "error" in result:
            logger.error(f"Failed to acquire token: {result.get('error_description')}")
            raise Exception(result.get("error_description"))
            
        return result

    def get_drives(self, access_token: str) -> List[Dict[str, Any]]:
        """List available drives (document libraries)."""
        headers = {'Authorization': f'Bearer {access_token}'}
        
        # Get user's OneDrive
        onedrive_url = "https://graph.microsoft.com/v1.0/me/drive"
        drives = []
        
        try:
            # Try to get personal drive
            resp = requests.get(onedrive_url, headers=headers)
            if resp.status_code == 200:
                data = resp.json()
                drives.append({
                    "id": data.get("id"),
                    "name": "My OneDrive",
                    "type": "personal"
                })
        except Exception as e:
            logger.error(f"Error fetching OneDrive: {e}")

        # Get shared libraries (SharePoint sites)
        sites_url = "https://graph.microsoft.com/v1.0/sites?search=*" 
        try:
            resp = requests.get(sites_url, headers=headers)
            if resp.status_code == 200:
                sites = resp.json().get('value', [])
                for site in sites:
                    # Get drives for this site
                    site_id = site.get('id')
                    drives_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"
                    d_resp = requests.get(drives_url, headers=headers)
                    if d_resp.status_code == 200:
                        site_drives = d_resp.json().get('value', [])
                        for drive in site_drives:
                            drives.append({
                                "id": drive.get("id"),
                                "name": f"{site.get('name', 'Site')} - {drive.get('name', 'Documents')}",
                                "type": "sharepoint"
                            })
        except Exception as e:
            logger.error(f"Error fetching SharePoint sites: {e}")
            
        return drives

    def list_items(self, access_token: str, drive_id: str, folder_id: str = None) -> List[Dict[str, Any]]:
        """List items in a drive folder."""
        headers = {'Authorization': f'Bearer {access_token}'}
        
        if folder_id:
            url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/items/{folder_id}/children"
        else:
            url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root/children"
            
        resp = requests.get(url, headers=headers)
        if resp.status_code != 200:
            logger.error(f"Failed to list items: {resp.text}")
            raise Exception("Failed to list folder contents")
            
        items = resp.json().get('value', [])
        
        # Filter for files and folders
        result = []
        for item in items:
            entry = {
                "id": item.get("id"),
                "name": item.get("name"),
                "webUrl": item.get("webUrl"),
                "lastModified": item.get("lastModifiedDateTime"),
                "size": item.get("size")
            }
            
            if "folder" in item:
                entry["type"] = "folder"
                entry["childCount"] = item["folder"].get("childCount")
            elif "file" in item:
                entry["type"] = "file"
                entry["mimeType"] = item["file"].get("mimeType")
                
            result.append(entry)
            
        return result

    def download_file(self, access_token: str, drive_id: str, file_id: str) -> bytes:
        """Download a file content."""
        headers = {'Authorization': f'Bearer {access_token}'}
        url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/items/{file_id}/content"
        
        resp = requests.get(url, headers=headers)
        if resp.status_code != 200:
            raise Exception("Failed to download file")
            
        return resp.content