class SharePointGraphClient
SharePoint client using Microsoft Graph API. This bypasses SharePoint REST API app-only token issues.
/tf/active/vicechatdev/SPFCsync/sharepoint_graph_client.py
8 - 526
moderate
Purpose
SharePoint client using Microsoft Graph API. This bypasses SharePoint REST API app-only token issues.
Source Code
class SharePointGraphClient:
"""
SharePoint client using Microsoft Graph API.
This bypasses SharePoint REST API app-only token issues.
"""
def __init__(self, site_url: str, client_id: str, client_secret: str):
"""
Initialize SharePoint Graph client.
Args:
site_url: SharePoint site URL
client_id: Azure AD app client ID
client_secret: Azure AD app client secret
"""
self.site_url = site_url.rstrip('/')
self.client_id = client_id
self.client_secret = client_secret
# Extract tenant and site information
if '.sharepoint.com' in site_url:
self.tenant = site_url.split('.sharepoint.com')[0].split('https://')[-1]
if '/sites/' in site_url:
self.site_name = site_url.split('/sites/')[-1]
else:
raise ValueError("Site URL must contain /sites/")
else:
raise ValueError("Invalid SharePoint URL format")
# Setup logging
self.logger = logging.getLogger(__name__)
# Get access token and site ID
self.access_token = self._get_access_token()
if self.access_token:
self.site_id = self._get_site_id()
self.drive_id = self._get_drive_id()
if self.site_id and self.drive_id:
self.logger.info(f"Successfully connected to SharePoint site via Graph API")
else:
raise ConnectionError("Failed to get site or drive information")
else:
raise ConnectionError("Failed to authenticate with Microsoft Graph")
def _make_request_with_retry(self, method: str, url: str, headers: Dict = None, params: Dict = None,
max_retries: int = 5, base_delay: float = 1.0) -> Optional[requests.Response]:
"""
Make HTTP request with intelligent retry logic for rate limiting.
Args:
method: HTTP method ('GET', 'POST', etc.)
url: Request URL
headers: Request headers
params: Request parameters
max_retries: Maximum number of retry attempts
base_delay: Base delay for exponential backoff
Returns:
Response object or None if all retries failed
"""
if headers is None:
headers = {}
for attempt in range(max_retries + 1):
try:
# Make the request
if method.upper() == 'GET':
response = requests.get(url, headers=headers, params=params)
elif method.upper() == 'POST':
response = requests.post(url, headers=headers, params=params)
else:
raise ValueError(f"Unsupported HTTP method: {method}")
# Check for success
if response.status_code == 200:
return response
# Handle rate limiting (429 Too Many Requests)
elif response.status_code == 429:
retry_after = self._get_retry_delay(response, attempt, base_delay)
if attempt < max_retries:
self.logger.warning(f"Rate limited (429). Retrying after {retry_after} seconds (attempt {attempt + 1}/{max_retries})")
time.sleep(retry_after)
continue
else:
self.logger.error(f"Rate limiting exceeded max retries. Final status: {response.status_code}")
return response
# Handle other 4xx/5xx errors that might be temporary
elif response.status_code in [500, 502, 503, 504]:
retry_after = self._get_retry_delay(response, attempt, base_delay)
if attempt < max_retries:
self.logger.warning(f"Server error ({response.status_code}). Retrying after {retry_after} seconds (attempt {attempt + 1}/{max_retries})")
time.sleep(retry_after)
continue
else:
self.logger.error(f"Server errors exceeded max retries. Final status: {response.status_code}")
return response
# Handle authentication errors
elif response.status_code == 401:
self.logger.warning(f"Authentication error (401). Refreshing token and retrying (attempt {attempt + 1}/{max_retries})")
# Refresh access token
self.access_token = self._get_access_token()
if self.access_token and 'Authorization' in headers:
headers['Authorization'] = f'Bearer {self.access_token}'
if attempt < max_retries:
time.sleep(base_delay * (2 ** attempt)) # Exponential backoff
continue
else:
self.logger.error("Authentication refresh exceeded max retries")
return response
# For other status codes, return immediately
else:
self.logger.warning(f"Request failed with status {response.status_code}")
return response
except requests.exceptions.RequestException as e:
if attempt < max_retries:
retry_delay = base_delay * (2 ** attempt)
self.logger.warning(f"Request exception: {e}. Retrying after {retry_delay} seconds (attempt {attempt + 1}/{max_retries})")
time.sleep(retry_delay)
continue
else:
self.logger.error(f"Request exception exceeded max retries: {e}")
raise
return None
def _get_retry_delay(self, response: requests.Response, attempt: int, base_delay: float) -> float:
"""
Calculate retry delay based on response headers and attempt number.
Args:
response: HTTP response object
attempt: Current attempt number (0-based)
base_delay: Base delay for exponential backoff
Returns:
Delay in seconds
"""
# Check for Retry-After header
retry_after = response.headers.get('Retry-After')
if retry_after:
try:
# Retry-After can be in seconds or HTTP date format
if retry_after.isdigit():
delay = int(retry_after)
self.logger.info(f"Using Retry-After header: {delay} seconds")
return delay
else:
# Parse HTTP date format (less common)
from email.utils import parsedate_to_datetime
retry_time = parsedate_to_datetime(retry_after)
delay = (retry_time - datetime.now()).total_seconds()
if delay > 0:
self.logger.info(f"Using Retry-After date: {delay} seconds")
return delay
except (ValueError, TypeError) as e:
self.logger.warning(f"Failed to parse Retry-After header '{retry_after}': {e}")
# Fall back to exponential backoff
delay = base_delay * (2 ** attempt)
# Cap maximum delay at 60 seconds
delay = min(delay, 60.0)
self.logger.info(f"Using exponential backoff: {delay} seconds")
return delay
def _get_access_token(self) -> Optional[str]:
"""Get access token for Microsoft Graph."""
token_url = f"https://login.microsoftonline.com/{self.tenant}.onmicrosoft.com/oauth2/v2.0/token"
data = {
'client_id': self.client_id,
'client_secret': self.client_secret,
'scope': 'https://graph.microsoft.com/.default',
'grant_type': 'client_credentials'
}
try:
response = requests.post(token_url, data=data)
if response.status_code == 200:
token_data = response.json()
return token_data.get('access_token')
else:
error_data = response.json() if response.content else {}
self.logger.error(f"Token request failed: {error_data}")
return None
except Exception as e:
self.logger.error(f"Exception getting token: {e}")
return None
def _get_site_id(self) -> Optional[str]:
"""Get the SharePoint site ID using Graph API."""
headers = {
'Authorization': f'Bearer {self.access_token}',
'Accept': 'application/json'
}
try:
# Use direct URL approach to get the main site (not Teams channels)
site_path = f"{self.tenant}.sharepoint.com:/sites/{self.site_name}"
direct_url = f"https://graph.microsoft.com/v1.0/sites/{site_path}"
response = requests.get(direct_url, headers=headers)
if response.status_code == 200:
site_data = response.json()
site_id = site_data.get('id')
site_display_name = site_data.get('displayName', 'Unknown')
site_web_url = site_data.get('webUrl', 'Unknown')
# Verify this is the main site (not a Teams channel)
if '/sites/' in site_web_url and not any(keyword in site_web_url for keyword in ['-', 'channel', 'team']):
self.logger.info(f"Found main site: {site_display_name} (ID: {site_id})")
self.logger.info(f"Site URL: {site_web_url}")
return site_id
else:
self.logger.warning(f"Found site but it appears to be a Teams channel: {site_web_url}")
# Continue with fallback search
# Fallback: Search for sites and find the main one
self.logger.info("Using fallback search to find main site")
search_url = f"https://graph.microsoft.com/v1.0/sites?search={self.site_name}"
response = requests.get(search_url, headers=headers)
if response.status_code == 200:
sites_data = response.json()
# Look for the main site (exact match, no Teams channel indicators)
for site in sites_data.get('value', []):
site_url = site.get('webUrl', '')
site_name = site.get('displayName', '')
# Check if this is the main site (not a Teams channel)
if (self.site_name.lower() == site_name.lower() and
'/sites/' + self.site_name == site_url.split('.sharepoint.com')[-1] and
not any(keyword in site_url for keyword in ['-', 'team'])):
site_id = site.get('id')
self.logger.info(f"Found main site via search: {site_name} (ID: {site_id})")
self.logger.info(f"Site URL: {site_url}")
return site_id
self.logger.error("Main site not found in search results")
return None
else:
self.logger.error(f"Failed to search sites: {response.status_code}")
return None
except Exception as e:
self.logger.error(f"Exception getting site ID: {e}")
return None
def _get_drive_id(self) -> Optional[str]:
"""Get the main Documents library drive ID for the site."""
headers = {
'Authorization': f'Bearer {self.access_token}',
'Accept': 'application/json'
}
try:
drives_url = f"https://graph.microsoft.com/v1.0/sites/{self.site_id}/drives"
response = requests.get(drives_url, headers=headers)
if response.status_code == 200:
drives_data = response.json()
drives = drives_data.get('value', [])
if drives:
# Look for the main Documents library
main_drive = None
for drive in drives:
drive_name = drive.get('name', '')
drive_url = drive.get('webUrl', '')
drive_type = drive.get('driveType', '')
# Prefer the main Documents library (not Teams channel libraries)
if (drive_name == 'Documents' and
drive_type == 'documentLibrary' and
not any(keyword in drive_url for keyword in ['channel', 'team', '-'])):
main_drive = drive
break
# If no main drive found, use the first Documents library
if not main_drive:
for drive in drives:
if drive.get('name') == 'Documents':
main_drive = drive
break
# If still no drive, use the first one
if not main_drive:
main_drive = drives[0]
if main_drive:
drive_id = main_drive.get('id')
drive_name = main_drive.get('name', 'Unknown')
drive_url = main_drive.get('webUrl', 'Unknown')
self.logger.info(f"Using drive: {drive_name} (ID: {drive_id})")
self.logger.info(f"Drive URL: {drive_url}")
return drive_id
else:
self.logger.error("No suitable drive found")
return None
else:
self.logger.error("No drives found for the site")
return None
else:
self.logger.error(f"Failed to get drives: {response.status_code}")
return None
except Exception as e:
self.logger.error(f"Exception getting drive ID: {e}")
return None
def get_all_documents(self, folder_path: str = "/") -> List[Dict]:
"""
Get all documents from SharePoint recursively using Graph API.
Args:
folder_path: Path within the drive (default: root)
Returns:
List of document information dictionaries
"""
documents = []
try:
print(f"🚀 Starting document retrieval from SharePoint...")
print(f"📂 Target folder: {folder_path}")
self._get_documents_recursive(folder_path, documents)
print(f"✅ Completed! Retrieved {len(documents)} documents total")
self.logger.info(f"Retrieved {len(documents)} documents from SharePoint via Graph API")
return documents
except Exception as e:
self.logger.error(f"Error getting documents from {folder_path}: {e}")
raise
def _get_documents_recursive(self, folder_path: str, documents: List[Dict], current_path: str = "", max_files: Optional[int] = None):
"""
Recursively get all documents from a folder using Graph API.
Args:
folder_path: Path within the drive
documents: List to append document information to
current_path: Current relative path for building full paths
max_files: Maximum number of files to collect (stops when reached)
"""
# Check if we've reached the file limit
if max_files is not None and len(documents) >= max_files:
print(f" 🛑 Reached maximum file limit ({max_files}), stopping discovery")
return
headers = {
'Authorization': f'Bearer {self.access_token}',
'Accept': 'application/json'
}
# Progress reporting
if current_path:
print(f"📁 Processing folder: {current_path}")
else:
print(f"📁 Processing root folder...")
try:
# Construct the API URL for the folder
if folder_path == "/" or folder_path == "":
items_url = f"https://graph.microsoft.com/v1.0/sites/{self.site_id}/drives/{self.drive_id}/root/children"
else:
# Remove leading slash and encode the path
clean_path = folder_path.lstrip('/')
items_url = f"https://graph.microsoft.com/v1.0/sites/{self.site_id}/drives/{self.drive_id}/root:/{clean_path}:/children"
# Use retry mechanism for API requests
response = self._make_request_with_retry('GET', items_url, headers=headers)
if response.status_code == 200:
items_data = response.json()
items = items_data.get('value', [])
# Progress reporting for this folder
folders_count = sum(1 for item in items if 'folder' in item)
files_count = sum(1 for item in items if 'file' in item)
if current_path:
print(f" 📊 Found {folders_count} folders, {files_count} files in '{current_path}'")
else:
print(f" 📊 Found {folders_count} folders, {files_count} files in root")
for item in items:
# Check file limit before processing each item
if max_files is not None and len(documents) >= max_files:
print(f" 🛑 Reached maximum file limit ({max_files}), stopping in folder {current_path or 'root'}")
return
item_name = item.get('name', '')
# Skip system files and hidden files
if item_name.startswith('.') or item_name.startswith('~'):
continue
if 'file' in item:
# It's a file
relative_path = f"{current_path}/{item_name}".lstrip('/')
# Convert Graph API response to our format
doc_info = {
'name': item_name,
'server_relative_url': f"/sites/{self.site_name}/Shared Documents/{relative_path}",
'size': item.get('size', 0),
'modified': item.get('lastModifiedDateTime', ''),
'created': item.get('createdDateTime', ''),
'author': item.get('createdBy', {}).get('user', {}).get('displayName', 'Unknown'),
'file_type': item_name.split('.')[-1] if '.' in item_name else '',
'relative_path': relative_path,
'folder_path': current_path if current_path else "/",
'download_url': item.get('@microsoft.graph.downloadUrl', ''),
'graph_id': item.get('id', '')
}
documents.append(doc_info)
print(f" 📄 Added file: {item_name} ({len(documents)} files total)")
elif 'folder' in item:
# It's a folder - recurse into it
subfolder_path = f"{current_path}/{item_name}".lstrip('/')
folder_api_path = f"{folder_path.rstrip('/')}/{item_name}" if folder_path != "/" else item_name
print(f" 🔄 Entering subfolder: {item_name}")
self._get_documents_recursive(folder_api_path, documents, subfolder_path, max_files)
elif response.status_code == 404:
# Folder not found, skip silently
pass
else:
self.logger.warning(f"Failed to get items from {folder_path}: {response.status_code}")
except Exception as e:
self.logger.error(f"Error processing folder {folder_path}: {e}")
def download_file_content(self, server_relative_url: str) -> Optional[bytes]:
"""
Download file content using Graph API.
Args:
server_relative_url: SharePoint server relative URL (for compatibility)
Returns:
File content as bytes, or None if failed
"""
# Find the document in our cache to get the download URL
documents = self.get_all_documents()
target_doc = None
for doc in documents:
if doc['server_relative_url'] == server_relative_url:
target_doc = doc
break
if not target_doc:
self.logger.error(f"Document not found: {server_relative_url}")
return None
try:
# Use the direct download URL from Graph API
download_url = target_doc.get('download_url')
if download_url:
response = requests.get(download_url)
if response.status_code == 200:
return response.content
else:
self.logger.error(f"Failed to download via direct URL: {response.status_code}")
# Fallback: Use Graph API content endpoint
headers = {
'Authorization': f'Bearer {self.access_token}',
}
graph_id = target_doc.get('graph_id')
if graph_id:
content_url = f"https://graph.microsoft.com/v1.0/sites/{self.site_id}/drives/{self.drive_id}/items/{graph_id}/content"
response = requests.get(content_url, headers=headers)
if response.status_code == 200:
return response.content
else:
self.logger.error(f"Failed to download via Graph API: {response.status_code}")
return None
except Exception as e:
self.logger.error(f"Error downloading file content {server_relative_url}: {e}")
return None
def get_file_info(self, server_relative_url: str) -> Optional[Dict]:
"""
Get detailed information about a file using Graph API.
Args:
server_relative_url: SharePoint server relative URL
Returns:
File information dictionary if successful, None otherwise
"""
# Find the document in our documents list
documents = self.get_all_documents()
for doc in documents:
if doc['server_relative_url'] == server_relative_url:
return {
'name': doc['name'],
'server_relative_url': doc['server_relative_url'],
'size': doc['size'],
'modified': doc['modified'],
'created': doc['created'],
'version': '1.0', # Graph API doesn't provide version easily
'author': doc['author'],
'file_type': doc['file_type']
}
self.logger.error(f"File not found: {server_relative_url}")
return None
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
bases: Parameter of type
Return Value
Returns unspecified type
Class Interface
Methods
__init__(self, site_url, client_id, client_secret)
Purpose: Initialize SharePoint Graph client. Args: site_url: SharePoint site URL client_id: Azure AD app client ID client_secret: Azure AD app client secret
Parameters:
site_url: Type: strclient_id: Type: strclient_secret: Type: str
Returns: None
_make_request_with_retry(self, method, url, headers, params, max_retries, base_delay) -> Optional[requests.Response]
Purpose: Make HTTP request with intelligent retry logic for rate limiting. Args: method: HTTP method ('GET', 'POST', etc.) url: Request URL headers: Request headers params: Request parameters max_retries: Maximum number of retry attempts base_delay: Base delay for exponential backoff Returns: Response object or None if all retries failed
Parameters:
method: Type: strurl: Type: strheaders: Type: Dictparams: Type: Dictmax_retries: Type: intbase_delay: Type: float
Returns: Returns Optional[requests.Response]
_get_retry_delay(self, response, attempt, base_delay) -> float
Purpose: Calculate retry delay based on response headers and attempt number. Args: response: HTTP response object attempt: Current attempt number (0-based) base_delay: Base delay for exponential backoff Returns: Delay in seconds
Parameters:
response: Type: requests.Responseattempt: Type: intbase_delay: Type: float
Returns: Returns float
_get_access_token(self) -> Optional[str]
Purpose: Get access token for Microsoft Graph.
Returns: Returns Optional[str]
_get_site_id(self) -> Optional[str]
Purpose: Get the SharePoint site ID using Graph API.
Returns: Returns Optional[str]
_get_drive_id(self) -> Optional[str]
Purpose: Get the main Documents library drive ID for the site.
Returns: Returns Optional[str]
get_all_documents(self, folder_path) -> List[Dict]
Purpose: Get all documents from SharePoint recursively using Graph API. Args: folder_path: Path within the drive (default: root) Returns: List of document information dictionaries
Parameters:
folder_path: Type: str
Returns: Returns List[Dict]
_get_documents_recursive(self, folder_path, documents, current_path, max_files)
Purpose: Recursively get all documents from a folder using Graph API. Args: folder_path: Path within the drive documents: List to append document information to current_path: Current relative path for building full paths max_files: Maximum number of files to collect (stops when reached)
Parameters:
folder_path: Type: strdocuments: Type: List[Dict]current_path: Type: strmax_files: Type: Optional[int]
Returns: None
download_file_content(self, server_relative_url) -> Optional[bytes]
Purpose: Download file content using Graph API. Args: server_relative_url: SharePoint server relative URL (for compatibility) Returns: File content as bytes, or None if failed
Parameters:
server_relative_url: Type: str
Returns: Returns Optional[bytes]
get_file_info(self, server_relative_url) -> Optional[Dict]
Purpose: Get detailed information about a file using Graph API. Args: server_relative_url: SharePoint server relative URL Returns: File information dictionary if successful, None otherwise
Parameters:
server_relative_url: Type: str
Returns: Returns Optional[Dict]
Required Imports
import requests
import json
import time
from typing import Dict
from typing import List
Usage Example
# Example usage:
# result = SharePointGraphClient(bases)
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class GraphClient 71.3% similar
-
class SharePointRestClient 70.3% similar
-
function test_graph_client 68.2% similar
-
class SharePointClient 64.8% similar
-
class ClientContext 60.5% similar