Spaces:
Sleeping
Sleeping
| """ | |
| SharePoint/OneDrive Connector for Medical Document Validator. | |
| Handles Microsoft Graph API authentication and file operations. | |
| """ | |
| import os | |
| import logging | |
| import msal | |
| import requests | |
| from typing import List, Dict, Optional, Any | |
| logger = logging.getLogger(__name__) | |
| class SharePointConnector: | |
| """Handles connection to Microsoft SharePoint/OneDrive via Graph API.""" | |
| def __init__(self): | |
| self.client_id = os.environ.get("AZURE_CLIENT_ID") | |
| self.tenant_id = os.environ.get("AZURE_TENANT_ID") | |
| self.client_secret = os.environ.get("AZURE_CLIENT_SECRET") | |
| self.authority = f"https://login.microsoftonline.com/{self.tenant_id}" | |
| # Scopes required for the app | |
| self.scopes = ["Files.Read.All", "Sites.Read.All", "User.Read"] | |
| # Initialize MSAL Client Application | |
| if self.client_id and self.tenant_id and self.client_secret: | |
| self.app = msal.ConfidentialClientApplication( | |
| self.client_id, | |
| authority=self.authority, | |
| client_credential=self.client_secret | |
| ) | |
| else: | |
| self.app = None | |
| logger.warning("Azure credentials not fully configured. SharePoint integration disabled.") | |
| def get_auth_url(self, redirect_uri: str, state: str = None) -> str: | |
| """Generate the login URL for the user.""" | |
| if not self.app: | |
| return "#" | |
| auth_url = self.app.get_authorization_request_url( | |
| self.scopes, | |
| redirect_uri=redirect_uri, | |
| state=state | |
| ) | |
| return auth_url | |
| def acquire_token_by_code(self, code: str, redirect_uri: str) -> Dict[str, Any]: | |
| """Exchange auth code for access token.""" | |
| if not self.app: | |
| raise ValueError("SharePoint connector not configured") | |
| result = self.app.acquire_token_by_authorization_code( | |
| code, | |
| scopes=self.scopes, | |
| redirect_uri=redirect_uri | |
| ) | |
| if "error" in result: | |
| logger.error(f"Failed to acquire token: {result.get('error_description')}") | |
| raise Exception(result.get("error_description")) | |
| return result | |
| def get_drives(self, access_token: str) -> List[Dict[str, Any]]: | |
| """List available drives (document libraries).""" | |
| headers = {'Authorization': f'Bearer {access_token}'} | |
| # Get user's OneDrive | |
| onedrive_url = "https://graph.microsoft.com/v1.0/me/drive" | |
| drives = [] | |
| try: | |
| # Try to get personal drive | |
| resp = requests.get(onedrive_url, headers=headers) | |
| if resp.status_code == 200: | |
| data = resp.json() | |
| drives.append({ | |
| "id": data.get("id"), | |
| "name": "My OneDrive", | |
| "type": "personal" | |
| }) | |
| except Exception as e: | |
| logger.error(f"Error fetching OneDrive: {e}") | |
| # Get shared libraries (SharePoint sites) | |
| sites_url = "https://graph.microsoft.com/v1.0/sites?search=*" | |
| try: | |
| resp = requests.get(sites_url, headers=headers) | |
| if resp.status_code == 200: | |
| sites = resp.json().get('value', []) | |
| for site in sites: | |
| # Get drives for this site | |
| site_id = site.get('id') | |
| drives_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives" | |
| d_resp = requests.get(drives_url, headers=headers) | |
| if d_resp.status_code == 200: | |
| site_drives = d_resp.json().get('value', []) | |
| for drive in site_drives: | |
| drives.append({ | |
| "id": drive.get("id"), | |
| "name": f"{site.get('name', 'Site')} - {drive.get('name', 'Documents')}", | |
| "type": "sharepoint" | |
| }) | |
| except Exception as e: | |
| logger.error(f"Error fetching SharePoint sites: {e}") | |
| return drives | |
| def list_items(self, access_token: str, drive_id: str, folder_id: str = None) -> List[Dict[str, Any]]: | |
| """List items in a drive folder.""" | |
| headers = {'Authorization': f'Bearer {access_token}'} | |
| if folder_id: | |
| url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/items/{folder_id}/children" | |
| else: | |
| url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root/children" | |
| resp = requests.get(url, headers=headers) | |
| if resp.status_code != 200: | |
| logger.error(f"Failed to list items: {resp.text}") | |
| raise Exception("Failed to list folder contents") | |
| items = resp.json().get('value', []) | |
| # Filter for files and folders | |
| result = [] | |
| for item in items: | |
| entry = { | |
| "id": item.get("id"), | |
| "name": item.get("name"), | |
| "webUrl": item.get("webUrl"), | |
| "lastModified": item.get("lastModifiedDateTime"), | |
| "size": item.get("size") | |
| } | |
| if "folder" in item: | |
| entry["type"] = "folder" | |
| entry["childCount"] = item["folder"].get("childCount") | |
| elif "file" in item: | |
| entry["type"] = "file" | |
| entry["mimeType"] = item["file"].get("mimeType") | |
| result.append(entry) | |
| return result | |
| def download_file(self, access_token: str, drive_id: str, file_id: str) -> bytes: | |
| """Download a file content.""" | |
| headers = {'Authorization': f'Bearer {access_token}'} | |
| url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/items/{file_id}/content" | |
| resp = requests.get(url, headers=headers) | |
| if resp.status_code != 200: | |
| raise Exception("Failed to download file") | |
| return resp.content | |