File size: 6,139 Bytes
5c56bc9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
"""
SharePoint/OneDrive Connector for Medical Document Validator.
Handles Microsoft Graph API authentication and file operations.
"""

import os
import logging
import msal
import requests
from typing import List, Dict, Optional, Any

logger = logging.getLogger(__name__)

class SharePointConnector:
    """Handles connection to Microsoft SharePoint/OneDrive via Graph API."""
    
    def __init__(self):
        self.client_id = os.environ.get("AZURE_CLIENT_ID")
        self.tenant_id = os.environ.get("AZURE_TENANT_ID")
        self.client_secret = os.environ.get("AZURE_CLIENT_SECRET")
        self.authority = f"https://login.microsoftonline.com/{self.tenant_id}"
        
        # Scopes required for the app
        self.scopes = ["Files.Read.All", "Sites.Read.All", "User.Read"]
        
        # Initialize MSAL Client Application
        if self.client_id and self.tenant_id and self.client_secret:
            self.app = msal.ConfidentialClientApplication(
                self.client_id,
                authority=self.authority,
                client_credential=self.client_secret
            )
        else:
            self.app = None
            logger.warning("Azure credentials not fully configured. SharePoint integration disabled.")

    def get_auth_url(self, redirect_uri: str, state: str = None) -> str:
        """Generate the login URL for the user."""
        if not self.app:
            return "#"
            
        auth_url = self.app.get_authorization_request_url(
            self.scopes,
            redirect_uri=redirect_uri,
            state=state
        )
        return auth_url

    def acquire_token_by_code(self, code: str, redirect_uri: str) -> Dict[str, Any]:
        """Exchange auth code for access token."""
        if not self.app:
            raise ValueError("SharePoint connector not configured")
            
        result = self.app.acquire_token_by_authorization_code(
            code,
            scopes=self.scopes,
            redirect_uri=redirect_uri
        )
        
        if "error" in result:
            logger.error(f"Failed to acquire token: {result.get('error_description')}")
            raise Exception(result.get("error_description"))
            
        return result

    def get_drives(self, access_token: str) -> List[Dict[str, Any]]:
        """List available drives (document libraries)."""
        headers = {'Authorization': f'Bearer {access_token}'}
        
        # Get user's OneDrive
        onedrive_url = "https://graph.microsoft.com/v1.0/me/drive"
        drives = []
        
        try:
            # Try to get personal drive
            resp = requests.get(onedrive_url, headers=headers)
            if resp.status_code == 200:
                data = resp.json()
                drives.append({
                    "id": data.get("id"),
                    "name": "My OneDrive",
                    "type": "personal"
                })
        except Exception as e:
            logger.error(f"Error fetching OneDrive: {e}")

        # Get shared libraries (SharePoint sites)
        sites_url = "https://graph.microsoft.com/v1.0/sites?search=*" 
        try:
            resp = requests.get(sites_url, headers=headers)
            if resp.status_code == 200:
                sites = resp.json().get('value', [])
                for site in sites:
                    # Get drives for this site
                    site_id = site.get('id')
                    drives_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"
                    d_resp = requests.get(drives_url, headers=headers)
                    if d_resp.status_code == 200:
                        site_drives = d_resp.json().get('value', [])
                        for drive in site_drives:
                            drives.append({
                                "id": drive.get("id"),
                                "name": f"{site.get('name', 'Site')} - {drive.get('name', 'Documents')}",
                                "type": "sharepoint"
                            })
        except Exception as e:
            logger.error(f"Error fetching SharePoint sites: {e}")
            
        return drives

    def list_items(self, access_token: str, drive_id: str, folder_id: str = None) -> List[Dict[str, Any]]:
        """List items in a drive folder."""
        headers = {'Authorization': f'Bearer {access_token}'}
        
        if folder_id:
            url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/items/{folder_id}/children"
        else:
            url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root/children"
            
        resp = requests.get(url, headers=headers)
        if resp.status_code != 200:
            logger.error(f"Failed to list items: {resp.text}")
            raise Exception("Failed to list folder contents")
            
        items = resp.json().get('value', [])
        
        # Filter for files and folders
        result = []
        for item in items:
            entry = {
                "id": item.get("id"),
                "name": item.get("name"),
                "webUrl": item.get("webUrl"),
                "lastModified": item.get("lastModifiedDateTime"),
                "size": item.get("size")
            }
            
            if "folder" in item:
                entry["type"] = "folder"
                entry["childCount"] = item["folder"].get("childCount")
            elif "file" in item:
                entry["type"] = "file"
                entry["mimeType"] = item["file"].get("mimeType")
                
            result.append(entry)
            
        return result

    def download_file(self, access_token: str, drive_id: str, file_id: str) -> bytes:
        """Download a file content."""
        headers = {'Authorization': f'Bearer {access_token}'}
        url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/items/{file_id}/content"
        
        resp = requests.get(url, headers=headers)
        if resp.status_code != 200:
            raise Exception("Failed to download file")
            
        return resp.content