class SharePointClient
A SharePoint client class that provides methods for connecting to SharePoint sites, retrieving documents recursively, downloading file content, and managing document metadata using app-only authentication.
/tf/active/vicechatdev/SPFCsync/sharepoint_client.py
11 - 173
moderate
Purpose
This class serves as a comprehensive interface for interacting with SharePoint document libraries. It handles authentication using Azure AD app credentials, provides recursive document discovery across folder hierarchies, downloads file content, and retrieves detailed file metadata. Designed for syncing SharePoint documents with FileCloud or other document management systems, it includes connection testing, error handling, and logging capabilities.
Source Code
class SharePointClient:
"""
SharePoint client for syncing documents with FileCloud.
"""
def __init__(self, site_url: str, client_id: str, client_secret: str):
"""
Initialize SharePoint client with app-only authentication.
Args:
site_url: SharePoint site URL
client_id: Azure AD app client ID
client_secret: Azure AD app client secret
"""
self.site_url = site_url
self.client_id = client_id
self.client_secret = client_secret
# Initialize context with app-only authentication
self.ctx = ClientContext(site_url).with_credentials(
ClientCredential(client_id, client_secret)
)
# Setup logging
self.logger = logging.getLogger(__name__)
# Test connection
self._test_connection()
def _test_connection(self) -> bool:
"""Test the SharePoint connection."""
try:
web = self.ctx.web
self.ctx.load(web)
self.ctx.execute_query()
self.logger.info(f"Successfully connected to SharePoint site: {web.properties['Title']}")
return True
except Exception as e:
self.logger.error(f"Failed to connect to SharePoint: {e}")
raise ConnectionError(f"SharePoint connection failed: {e}")
def get_all_documents(self, folder_path: str = "/Shared Documents") -> List[Dict]:
"""
Get all documents from SharePoint recursively.
Args:
folder_path: Server relative path to folder
Returns:
List of document information dictionaries
"""
documents = []
try:
self._get_documents_recursive(folder_path, documents)
self.logger.info(f"Retrieved {len(documents)} documents from SharePoint")
return documents
except Exception as e:
self.logger.error(f"Error getting documents from {folder_path}: {e}")
raise
def _get_documents_recursive(self, folder_path: str, documents: List[Dict]):
"""
Recursively get all documents from a folder and its subfolders.
Args:
folder_path: Server relative path to folder
documents: List to append document information to
"""
try:
folder = self.ctx.web.get_folder_by_server_relative_url(folder_path)
# Get files in current folder
files = folder.files
self.ctx.load(files)
self.ctx.execute_query()
for file in files:
# Skip system files and hidden files
if not file.properties['Name'].startswith('.') and not file.properties['Name'].startswith('~'):
file_info = {
'name': file.properties['Name'],
'server_relative_url': file.properties['ServerRelativeUrl'],
'size': file.properties['Length'],
'modified': file.properties['TimeLastModified'],
'created': file.properties['TimeCreated'],
'author': file.properties.get('Author', {}).get('Title', 'Unknown'),
'file_type': file.properties['Name'].split('.')[-1] if '.' in file.properties['Name'] else '',
'relative_path': file.properties['ServerRelativeUrl'].replace(
self.ctx.web.properties['ServerRelativeUrl'] + '/Shared Documents', ''
).lstrip('/')
}
documents.append(file_info)
# Get subfolders and process them recursively
folders = folder.folders
self.ctx.load(folders)
self.ctx.execute_query()
for subfolder in folders:
# Skip system folders
if not subfolder.properties['Name'].startswith('.') and \
subfolder.properties['Name'] not in ['Forms', 'Item']:
subfolder_path = subfolder.properties['ServerRelativeUrl']
self._get_documents_recursive(subfolder_path, documents)
except Exception as e:
self.logger.error(f"Error processing folder {folder_path}: {e}")
# Continue processing other folders even if one fails
def download_file_content(self, server_relative_url: str) -> Optional[bytes]:
"""
Download file content from SharePoint.
Args:
server_relative_url: SharePoint server relative URL of file
Returns:
File content as bytes, or None if failed
"""
try:
file = self.ctx.web.get_file_by_server_relative_url(server_relative_url)
# Download file content to memory
content = file.get_content()
self.ctx.execute_query()
return content.value
except Exception as e:
self.logger.error(f"Error downloading file content {server_relative_url}: {e}")
return None
def get_file_info(self, server_relative_url: str) -> Optional[Dict]:
"""
Get detailed information about a file.
Args:
server_relative_url: SharePoint server relative URL of file
Returns:
File information dictionary if successful, None otherwise
"""
try:
file = self.ctx.web.get_file_by_server_relative_url(server_relative_url)
self.ctx.load(file)
self.ctx.execute_query()
file_info = {
'name': file.properties['Name'],
'server_relative_url': file.properties['ServerRelativeUrl'],
'size': file.properties['Length'],
'modified': file.properties['TimeLastModified'],
'created': file.properties['TimeCreated'],
'version': file.properties.get('UIVersionLabel', '1.0'),
'author': file.properties.get('Author', {}).get('Title', 'Unknown'),
'file_type': file.properties['Name'].split('.')[-1] if '.' in file.properties['Name'] else ''
}
return file_info
except Exception as e:
self.logger.error(f"Error getting file info for {server_relative_url}: {e}")
return None
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
site_url: The full URL of the SharePoint site to connect to (e.g., 'https://contoso.sharepoint.com/sites/mysite'). This is the base URL for all SharePoint operations.
client_id: The Azure AD application (app registration) client ID used for app-only authentication. This ID is obtained when registering an application in Azure AD.
client_secret: The Azure AD application client secret (password) used for app-only authentication. This secret is generated in the Azure AD app registration and should be kept secure.
Return Value
Instantiation returns a SharePointClient object with an authenticated connection to the specified SharePoint site. The object maintains a ClientContext (self.ctx) for executing SharePoint operations. Methods return: get_all_documents() returns a List[Dict] of document metadata; download_file_content() returns Optional[bytes] of file content; get_file_info() returns Optional[Dict] with detailed file information; _test_connection() returns bool indicating connection success.
Class Interface
Methods
__init__(self, site_url: str, client_id: str, client_secret: str)
Purpose: Initialize the SharePoint client with app-only authentication and test the connection
Parameters:
site_url: SharePoint site URL to connect toclient_id: Azure AD app client ID for authenticationclient_secret: Azure AD app client secret for authentication
Returns: None (constructor)
_test_connection(self) -> bool
Purpose: Test the SharePoint connection by loading the web properties and verify authentication works
Returns: True if connection successful, raises ConnectionError if failed
get_all_documents(self, folder_path: str = '/Shared Documents') -> List[Dict]
Purpose: Retrieve all documents from a SharePoint folder and its subfolders recursively
Parameters:
folder_path: Server relative path to the folder to start from (default: '/Shared Documents')
Returns: List of dictionaries containing document metadata including name, server_relative_url, size, modified, created, author, file_type, and relative_path
_get_documents_recursive(self, folder_path: str, documents: List[Dict])
Purpose: Internal recursive helper method to traverse folder hierarchy and collect document information
Parameters:
folder_path: Server relative path to the current folder being processeddocuments: List to append document information dictionaries to (modified in place)
Returns: None (modifies documents list in place)
download_file_content(self, server_relative_url: str) -> Optional[bytes]
Purpose: Download the binary content of a file from SharePoint into memory
Parameters:
server_relative_url: SharePoint server relative URL of the file to download
Returns: File content as bytes if successful, None if download fails
get_file_info(self, server_relative_url: str) -> Optional[Dict]
Purpose: Retrieve detailed metadata information about a specific file
Parameters:
server_relative_url: SharePoint server relative URL of the file
Returns: Dictionary with file metadata including name, server_relative_url, size, modified, created, version, author, and file_type if successful, None if failed
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
site_url |
str | The SharePoint site URL provided during initialization | instance |
client_id |
str | The Azure AD app client ID used for authentication | instance |
client_secret |
str | The Azure AD app client secret used for authentication | instance |
ctx |
ClientContext | The authenticated SharePoint ClientContext object used for all SharePoint operations | instance |
logger |
logging.Logger | Logger instance for recording connection status, errors, and operation results | instance |
Dependencies
office365-rest-python-clientlogging
Required Imports
from office365.sharepoint.client_context import ClientContext
from office365.runtime.auth.client_credential import ClientCredential
from office365.sharepoint.files.file import File
from office365.sharepoint.folders.folder import Folder
import logging
from typing import List, Dict, Optional
Usage Example
import logging
from sharepoint_client import SharePointClient
# Configure logging
logging.basicConfig(level=logging.INFO)
# Initialize the client
client = SharePointClient(
site_url='https://contoso.sharepoint.com/sites/mysite',
client_id='your-client-id',
client_secret='your-client-secret'
)
# Get all documents from default Shared Documents library
documents = client.get_all_documents()
for doc in documents:
print(f"File: {doc['name']}, Size: {doc['size']} bytes")
# Get documents from specific folder
folder_docs = client.get_all_documents('/Shared Documents/Projects')
# Download a specific file
file_content = client.download_file_content('/sites/mysite/Shared Documents/report.pdf')
if file_content:
with open('local_report.pdf', 'wb') as f:
f.write(file_content)
# Get detailed file information
file_info = client.get_file_info('/sites/mysite/Shared Documents/report.pdf')
if file_info:
print(f"Version: {file_info['version']}, Modified: {file_info['modified']}")
Best Practices
- Always handle the ConnectionError that may be raised during instantiation if SharePoint connection fails
- The client tests the connection immediately upon instantiation, so wrap initialization in try-except blocks
- Store client_secret securely using environment variables or secure configuration management, never hardcode
- The class maintains a persistent ClientContext (self.ctx) throughout its lifetime, reusing the authenticated session
- Methods like get_all_documents() can be resource-intensive for large document libraries; consider implementing pagination or filtering for production use
- The _get_documents_recursive() method continues processing even if individual folders fail, logging errors without stopping the entire operation
- System files (starting with '.' or '~') and system folders ('Forms', 'Item') are automatically filtered out
- File content is loaded into memory by download_file_content(), so be cautious with large files
- The relative_path in document metadata is calculated relative to the Shared Documents library
- All methods use the logger for error tracking; configure logging appropriately for your application
- The ctx.execute_query() pattern is used throughout to batch SharePoint API calls for efficiency
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class SharePointRestClient 79.8% similar
-
class SharePointFileCloudSync 75.2% similar
-
function test_rest_client 68.2% similar
-
class SyncDiagnostics 66.6% similar
-
class DocumentsSharedWithMe 65.9% similar