class SharePointRestClient
A SharePoint REST API client that provides app-only authentication and methods for retrieving and downloading documents from SharePoint sites.
/tf/active/vicechatdev/SPFCsync/sharepoint_rest_client.py
7 - 236
moderate
Purpose
This class implements a SharePoint client using direct REST API calls with OAuth 2.0 client credentials flow for app-only authentication. It provides functionality to connect to SharePoint sites, retrieve document listings recursively from folders, download file content, and get detailed file information. The client bypasses potential issues with the office365 library by using direct HTTP requests to SharePoint REST endpoints.
Source Code
class SharePointRestClient:
"""
SharePoint client using direct REST API calls with app-only authentication.
This bypasses potential issues with the office365 library.
"""
def __init__(self, site_url: str, client_id: str, client_secret: str):
"""
Initialize SharePoint REST client.
Args:
site_url: SharePoint site URL
client_id: Azure AD app client ID
client_secret: Azure AD app client secret
"""
self.site_url = site_url.rstrip('/')
self.client_id = client_id
self.client_secret = client_secret
# Extract tenant information
if '.sharepoint.com' in site_url:
self.tenant = site_url.split('.sharepoint.com')[0].split('https://')[-1]
self.sharepoint_resource = f"https://{self.tenant}.sharepoint.com"
else:
raise ValueError("Invalid SharePoint URL format")
# Setup logging
self.logger = logging.getLogger(__name__)
# Get access token
self.access_token = self._get_access_token()
if self.access_token:
self.logger.info("Successfully authenticated with SharePoint")
self._test_connection()
else:
raise ConnectionError("Failed to authenticate with SharePoint")
def _get_access_token(self) -> Optional[str]:
"""Get access token for SharePoint."""
token_url = f"https://login.microsoftonline.com/{self.tenant}.onmicrosoft.com/oauth2/v2.0/token"
data = {
'client_id': self.client_id,
'client_secret': self.client_secret,
'scope': f'{self.sharepoint_resource}/.default',
'grant_type': 'client_credentials'
}
try:
response = requests.post(token_url, data=data)
if response.status_code == 200:
token_data = response.json()
return token_data.get('access_token')
else:
error_data = response.json() if response.content else {}
self.logger.error(f"Token request failed: {error_data}")
return None
except Exception as e:
self.logger.error(f"Exception getting token: {e}")
return None
def _test_connection(self) -> bool:
"""Test the SharePoint connection."""
try:
headers = {
'Authorization': f'Bearer {self.access_token}',
'Accept': 'application/json',
'Content-Type': 'application/json'
}
response = requests.get(f"{self.site_url}/_api/web", headers=headers)
if response.status_code == 200:
data = response.json()
site_title = data.get('Title', 'Unknown')
self.logger.info(f"Successfully connected to SharePoint site: {site_title}")
return True
else:
self.logger.error(f"Connection test failed: {response.status_code} - {response.text}")
return False
except Exception as e:
self.logger.error(f"Connection test exception: {e}")
return False
def get_all_documents(self, folder_path: str = "/Shared Documents") -> List[Dict]:
"""
Get all documents from SharePoint recursively using REST API.
Args:
folder_path: Server relative path to folder
Returns:
List of document information dictionaries
"""
documents = []
try:
self._get_documents_recursive(folder_path, documents)
self.logger.info(f"Retrieved {len(documents)} documents from SharePoint")
return documents
except Exception as e:
self.logger.error(f"Error getting documents from {folder_path}: {e}")
raise
def _get_documents_recursive(self, folder_path: str, documents: List[Dict]):
"""
Recursively get all documents from a folder using REST API.
Args:
folder_path: Server relative path to folder
documents: List to append document information to
"""
headers = {
'Authorization': f'Bearer {self.access_token}',
'Accept': 'application/json',
'Content-Type': 'application/json'
}
try:
# Get files in current folder
files_url = f"{self.site_url}/_api/web/GetFolderByServerRelativeUrl('{folder_path}')/Files"
files_response = requests.get(files_url, headers=headers)
if files_response.status_code == 200:
files_data = files_response.json()
for file_info in files_data.get('value', []):
# Skip system files and hidden files
if not file_info['Name'].startswith('.') and not file_info['Name'].startswith('~'):
# Calculate relative path
server_relative_url = file_info['ServerRelativeUrl']
if '/Shared Documents' in server_relative_url:
relative_path = server_relative_url.split('/Shared Documents/')[-1]
else:
relative_path = file_info['Name']
doc_info = {
'name': file_info['Name'],
'server_relative_url': file_info['ServerRelativeUrl'],
'size': file_info['Length'],
'modified': file_info['TimeLastModified'],
'created': file_info['TimeCreated'],
'author': file_info.get('Author', {}).get('Title', 'Unknown') if isinstance(file_info.get('Author'), dict) else 'Unknown',
'file_type': file_info['Name'].split('.')[-1] if '.' in file_info['Name'] else '',
'relative_path': relative_path
}
documents.append(doc_info)
# Get subfolders
folders_url = f"{self.site_url}/_api/web/GetFolderByServerRelativeUrl('{folder_path}')/Folders"
folders_response = requests.get(folders_url, headers=headers)
if folders_response.status_code == 200:
folders_data = folders_response.json()
for folder_info in folders_data.get('value', []):
# Skip system folders
if not folder_info['Name'].startswith('.') and \
folder_info['Name'] not in ['Forms', 'Item']:
subfolder_path = folder_info['ServerRelativeUrl']
self._get_documents_recursive(subfolder_path, documents)
except Exception as e:
self.logger.error(f"Error processing folder {folder_path}: {e}")
# Continue processing other folders even if one fails
def download_file_content(self, server_relative_url: str) -> Optional[bytes]:
"""
Download file content from SharePoint using REST API.
Args:
server_relative_url: SharePoint server relative URL of file
Returns:
File content as bytes, or None if failed
"""
try:
headers = {
'Authorization': f'Bearer {self.access_token}',
}
# Get file content using REST API
content_url = f"{self.site_url}/_api/web/GetFileByServerRelativeUrl('{server_relative_url}')/$value"
response = requests.get(content_url, headers=headers)
if response.status_code == 200:
return response.content
else:
self.logger.error(f"Failed to download file {server_relative_url}: {response.status_code}")
return None
except Exception as e:
self.logger.error(f"Error downloading file content {server_relative_url}: {e}")
return None
def get_file_info(self, server_relative_url: str) -> Optional[Dict]:
"""
Get detailed information about a file using REST API.
Args:
server_relative_url: SharePoint server relative URL of file
Returns:
File information dictionary if successful, None otherwise
"""
try:
headers = {
'Authorization': f'Bearer {self.access_token}',
'Accept': 'application/json',
'Content-Type': 'application/json'
}
file_url = f"{self.site_url}/_api/web/GetFileByServerRelativeUrl('{server_relative_url}')"
response = requests.get(file_url, headers=headers)
if response.status_code == 200:
file_data = response.json()
return {
'name': file_data['Name'],
'server_relative_url': file_data['ServerRelativeUrl'],
'size': file_data['Length'],
'modified': file_data['TimeLastModified'],
'created': file_data['TimeCreated'],
'version': file_data.get('UIVersionLabel', '1.0'),
'author': 'Unknown', # Would need additional API call
'file_type': file_data['Name'].split('.')[-1] if '.' in file_data['Name'] else ''
}
else:
self.logger.error(f"Failed to get file info {server_relative_url}: {response.status_code}")
return None
except Exception as e:
self.logger.error(f"Error getting file info for {server_relative_url}: {e}")
return None
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
site_url: The full URL of the SharePoint site (e.g., 'https://tenant.sharepoint.com/sites/sitename'). Must contain '.sharepoint.com' in the URL. Trailing slashes are automatically removed.
client_id: Azure AD application (app registration) client ID used for authentication. This is the Application (client) ID from Azure AD app registration.
client_secret: Azure AD application client secret used for authentication. This is the secret value (not the secret ID) created in the app registration's Certificates & secrets section.
Return Value
Instantiation returns a SharePointRestClient object with an active access token and verified connection to SharePoint. The constructor raises ConnectionError if authentication fails or ValueError if the site_url format is invalid. Methods return: get_all_documents() returns List[Dict] of document metadata; download_file_content() returns Optional[bytes] of file content; get_file_info() returns Optional[Dict] with file metadata.
Class Interface
Methods
__init__(self, site_url: str, client_id: str, client_secret: str)
Purpose: Initialize the SharePoint REST client, authenticate with Azure AD, and verify the connection
Parameters:
site_url: SharePoint site URL (must contain '.sharepoint.com')client_id: Azure AD app client IDclient_secret: Azure AD app client secret
Returns: None (raises ConnectionError if authentication fails, ValueError if URL format is invalid)
_get_access_token(self) -> Optional[str]
Purpose: Obtain OAuth 2.0 access token from Azure AD using client credentials flow
Returns: Access token string if successful, None if authentication fails
_test_connection(self) -> bool
Purpose: Test the SharePoint connection by making a simple API call to retrieve site information
Returns: True if connection successful, False otherwise
get_all_documents(self, folder_path: str = '/Shared Documents') -> List[Dict]
Purpose: Recursively retrieve all documents from a SharePoint folder and its subfolders
Parameters:
folder_path: Server relative path to the folder (default: '/Shared Documents')
Returns: List of dictionaries containing document metadata (name, server_relative_url, size, modified, created, author, file_type, relative_path)
_get_documents_recursive(self, folder_path: str, documents: List[Dict])
Purpose: Internal method to recursively traverse folders and collect document information
Parameters:
folder_path: Server relative path to the folder to processdocuments: List to append document information dictionaries to (modified in place)
Returns: None (modifies the documents list in place)
download_file_content(self, server_relative_url: str) -> Optional[bytes]
Purpose: Download the binary content of a file from SharePoint
Parameters:
server_relative_url: SharePoint server relative URL of the file to download
Returns: File content as bytes if successful, None if download fails
get_file_info(self, server_relative_url: str) -> Optional[Dict]
Purpose: Retrieve detailed metadata information about a specific file
Parameters:
server_relative_url: SharePoint server relative URL of the file
Returns: Dictionary with file metadata (name, server_relative_url, size, modified, created, version, author, file_type) if successful, None otherwise
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
site_url |
str | SharePoint site URL with trailing slash removed | instance |
client_id |
str | Azure AD application client ID | instance |
client_secret |
str | Azure AD application client secret | instance |
tenant |
str | SharePoint tenant name extracted from site URL | instance |
sharepoint_resource |
str | SharePoint resource URL used for OAuth scope (e.g., 'https://tenant.sharepoint.com') | instance |
logger |
logging.Logger | Logger instance for logging operations and errors | instance |
access_token |
Optional[str] | OAuth 2.0 access token used for authenticating SharePoint API requests | instance |
Dependencies
requestsjsontypingdatetimelogging
Required Imports
import requests
import json
from typing import List, Dict, Optional
from datetime import datetime
import logging
Usage Example
import logging
from typing import List, Dict, Optional
import requests
# Setup logging
logging.basicConfig(level=logging.INFO)
# Initialize the client
client = SharePointRestClient(
site_url='https://contoso.sharepoint.com/sites/mysite',
client_id='your-client-id-guid',
client_secret='your-client-secret'
)
# Get all documents from default Shared Documents folder
documents = client.get_all_documents()
for doc in documents:
print(f"File: {doc['name']}, Size: {doc['size']}, Modified: {doc['modified']}")
# Get documents from specific folder
folder_docs = client.get_all_documents('/Shared Documents/MyFolder')
# Download a specific file
file_content = client.download_file_content('/sites/mysite/Shared Documents/file.pdf')
if file_content:
with open('downloaded_file.pdf', 'wb') as f:
f.write(file_content)
# Get detailed file information
file_info = client.get_file_info('/sites/mysite/Shared Documents/file.pdf')
if file_info:
print(f"Version: {file_info['version']}, Created: {file_info['created']}")
Best Practices
- Always handle ConnectionError during instantiation as authentication may fail
- The access token is obtained during initialization and stored for subsequent requests; consider token expiration for long-running applications
- Use try-except blocks when calling methods as they may raise exceptions for network or permission issues
- The client automatically tests the connection during initialization; check logs for connection status
- System files (starting with '.' or '~') and system folders ('Forms', 'Item') are automatically filtered out
- File paths should use server-relative URLs (e.g., '/sites/sitename/Shared Documents/file.pdf')
- The get_all_documents method recursively traverses all subfolders; be cautious with large document libraries
- Download operations return None on failure; always check for None before processing content
- The client continues processing other folders even if one folder fails during recursive traversal
- Ensure the Azure AD app has sufficient SharePoint permissions before instantiation
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class SharePointClient 79.8% similar
-
function test_rest_client 75.4% similar
-
class SharePointGraphClient 70.3% similar
-
class NativeClient 67.7% similar
-
function test_sharepoint_api_call 67.1% similar