function convert_document_to_pdf
Converts a controlled document version to PDF format with audit trail, signatures, watermarking, and PDF/A compliance capabilities, then uploads the result to FileCloud storage.
/tf/active/vicechatdev/CDocs single class/controllers/document_controller.py
1374 - 1569
complex
Purpose
This function provides comprehensive document-to-PDF conversion for controlled document management systems. It retrieves a document version from FileCloud, processes it through a document processor that adds audit data, optional signatures, watermarks, and PDF/A compliance, then uploads the resulting PDF back to FileCloud. It's designed for regulated environments requiring document traceability and archival compliance.
Source Code
def convert_document_to_pdf(
user: DocUser,
document_uid: str,
version_uid: Optional[str] = None,
include_signatures: bool = True,
convert_to_pdfa: bool = True,
add_watermark: bool = False
) -> Dict[str, Any]:
"""
Convert a document version to PDF using full document processor capabilities
Parameters
----------
user : DocUser
User performing the conversion
document_uid : str
ID of the document
version_uid : str, optional
ID of a specific version (default is current version)
include_signatures : bool
Whether to include signature images in the audit page
convert_to_pdfa : bool
Whether to convert to PDF/A format for archiving
add_watermark : bool
Whether to add a watermark to the document
Returns
-------
Dict[str, Any]
Dictionary with conversion results
"""
try:
# Get document instance
document = ControlledDocument(uid=document_uid)
if not document.uid:
raise ResourceNotFoundError(f"Document not found: {document_uid}")
# Get version
version = None
if version_uid:
version = DocumentVersion(uid=version_uid)
if not version or version.document_uid != document_uid:
raise ResourceNotFoundError(f"Version not found: {version_uid}")
else:
version = document.current_version
if not version:
raise ResourceNotFoundError(f"No versions found for document: {document_uid}")
# Check if the version has an editable file
if not version.word_file_path:
raise BusinessRuleError("Version has no editable document to convert")
# Check if PDF already exists
if version.pdf_file_path:
return {
'success': True,
'message': 'PDF version already exists',
'document_uid': document_uid,
'version_uid': version.uid,
'pdf_path': version.pdf_file_path
}
# Create a temporary directory for processing
temp_dir = tempfile.mkdtemp()
try:
# Download the editable file
editable_file_path = version.word_file_path
# Initialize FileCloud client
filecloud_client = get_filecloud_client()
# Download file content
file_content = filecloud_client.download_file(editable_file_path)
if not isinstance(file_content, bytes):
raise BusinessRuleError("Failed to download editable document")
# Save to temp file
file_ext = os.path.splitext(editable_file_path)[1]
temp_file_path = os.path.join(temp_dir, f"document{file_ext}")
with open(temp_file_path, 'wb') as f:
f.write(file_content)
# Create JSON file with audit data
audit_data = prepare_audit_data_for_document_processor(document, version, user)
json_file_path = os.path.join(temp_dir, "audit_data.json")
with open(json_file_path, 'w') as f:
json.dump(audit_data, f, default=str)
# Set up output PDF path
output_pdf_path = os.path.join(temp_dir, "document.pdf")
# Import the document processor with full capabilities
from document_auditor.src.document_processor import DocumentProcessor
# Initialize document processor
processor = DocumentProcessor()
# Set watermark image path if needed
watermark_path = None
if add_watermark:
# Use system logo as watermark if available
logo_path = settings.LOGO_PATH
if os.path.exists(logo_path):
watermark_path = logo_path
# Process the document with all features
processor.process_document(
original_doc_path=temp_file_path,
json_path=json_file_path,
output_path=output_pdf_path,
watermark_image=watermark_path,
include_signatures=include_signatures,
convert_to_pdfa=convert_to_pdfa,
compliance_level='2b',
finalize=True
)
# Calculate the FileCloud path for the PDF
editable_dir = os.path.dirname(editable_file_path)
pdf_filename = f"{os.path.splitext(os.path.basename(editable_file_path))[0]}.pdf"
pdf_file_path = os.path.join(editable_dir, pdf_filename)
# Upload PDF to FileCloud
# with open(output_pdf_path, 'rb') as pdf_file:
# upload_result = upload_document_to_filecloud(
# user=user,
# document=document_uid,
# file_content=pdf_file.read(),
# file_path=pdf_file_path,
# metadata={
# 'docNumber': document.doc_number,
# 'version': version.version_number,
# 'status': document.status,
# 'convertedBy': user.username,
# 'convertedDate': datetime.now().isoformat()
# }
# )
with open(output_pdf_path, 'rb') as pdf_file:
upload_result = upload_document_to_filecloud(
user=user,
document=document_uid,
file_content=pdf_file.read(),
file_path=pdf_file_path,
metadata=None
)
if not upload_result.get('success', False):
raise BusinessRuleError(f"Failed to upload PDF to FileCloud: {upload_result.get('message', 'Unknown error')}")
# Update document version with PDF path
version.pdf_file_path = pdf_file_path
# Log conversion event
audit_trail.log_document_lifecycle_event(
event_type="DOCUMENT_CONVERTED_TO_PDF",
user=user,
document_uid=document_uid,
details={
'version_uid': version.uid,
'version_number': version.version_number,
'pdf_path': pdf_file_path,
'includes_signatures': include_signatures,
'is_pdfa': convert_to_pdfa,
'has_watermark': add_watermark
}
)
return {
'success': True,
'message': 'Document successfully converted to PDF with full audit trail and security features',
'document_uid': document_uid,
'version_uid': version.uid,
'version_number': version.version_number,
'pdf_path': pdf_file_path
}
except Exception as e:
logger.error(f"Error in document conversion process: {str(e)}")
raise BusinessRuleError(f"Failed to convert document to PDF: {str(e)}")
finally:
# Clean up temporary directory
try:
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir)
except:
logger.warning(f"Failed to remove temporary directory: {temp_dir}")
except (ResourceNotFoundError, ValidationError, PermissionError, BusinessRuleError) as e:
# Re-raise known errors
raise
except Exception as e:
logger.error(f"Error converting document to PDF: {str(e)}")
raise BusinessRuleError(f"Failed to convert document to PDF: {str(e)}")
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
user |
DocUser | - | positional_or_keyword |
document_uid |
str | - | positional_or_keyword |
version_uid |
Optional[str] | None | positional_or_keyword |
include_signatures |
bool | True | positional_or_keyword |
convert_to_pdfa |
bool | True | positional_or_keyword |
add_watermark |
bool | False | positional_or_keyword |
Parameter Details
user: DocUser object representing the authenticated user performing the conversion. Used for permissions, audit logging, and FileCloud operations.
document_uid: Unique identifier (string) of the controlled document to convert. Must exist in the system or ResourceNotFoundError is raised.
version_uid: Optional unique identifier (string) of a specific document version to convert. If None, the current/latest version of the document is used. Must belong to the specified document.
include_signatures: Boolean flag (default True) indicating whether to include signature images in the generated PDF's audit page. Useful for compliance documentation.
convert_to_pdfa: Boolean flag (default True) to convert the output to PDF/A format (compliance level 2b) for long-term archival and regulatory compliance.
add_watermark: Boolean flag (default False) to add a watermark image to the document. Uses the system logo from settings.LOGO_PATH if available.
Return Value
Type: Dict[str, Any]
Returns a dictionary with conversion results. On success: {'success': True, 'message': str, 'document_uid': str, 'version_uid': str, 'version_number': str, 'pdf_path': str}. The 'pdf_path' contains the FileCloud path to the generated PDF. If PDF already exists, returns early with existing path. On failure, raises BusinessRuleError, ResourceNotFoundError, ValidationError, or PermissionError.
Dependencies
logginguuidostempfiletypingdatetimeiopanelshutiltracebackjsonreCDocsdocument_auditor
Required Imports
import logging
import os
import tempfile
import shutil
import json
from typing import Dict, Any, Optional
from datetime import datetime
from CDocs.models.document import ControlledDocument, DocumentVersion
from CDocs.models.user_extensions import DocUser
from CDocs.utils import audit_trail
from CDocs.config import settings
from CDocs.controllers import require_permission, log_controller_action
from CDocs.controllers import ResourceNotFoundError, ValidationError, PermissionError, BusinessRuleError
from CDocs.controllers.filecloud_controller import upload_document_to_filecloud, get_filecloud_client
Conditional/Optional Imports
These imports are only needed under specific conditions:
from document_auditor.src.document_processor import DocumentProcessor
Condition: Required during PDF conversion process, imported inside the function after file preparation
Required (conditional)Usage Example
from CDocs.models.user_extensions import DocUser
from CDocs.controllers.document_controller import convert_document_to_pdf
# Get authenticated user
user = DocUser(uid='user123')
# Convert current version to PDF with all features
result = convert_document_to_pdf(
user=user,
document_uid='doc-12345',
version_uid=None,
include_signatures=True,
convert_to_pdfa=True,
add_watermark=True
)
if result['success']:
print(f"PDF created at: {result['pdf_path']}")
print(f"Version: {result['version_number']}")
else:
print(f"Conversion failed: {result['message']}")
# Convert specific version without watermark
result = convert_document_to_pdf(
user=user,
document_uid='doc-12345',
version_uid='version-67890',
include_signatures=True,
convert_to_pdfa=True,
add_watermark=False
)
Best Practices
- Ensure user has CONVERT_DOCUMENT permission before calling (handled by decorator but good to verify)
- Handle ResourceNotFoundError for invalid document_uid or version_uid
- Handle BusinessRuleError for conversion failures or missing editable files
- The function automatically cleans up temporary files even on failure
- If PDF already exists for a version, the function returns early without re-conversion
- Use convert_to_pdfa=True for regulatory compliance and long-term archival
- Watermark feature requires settings.LOGO_PATH to be configured and file to exist
- The function logs all conversion events to audit trail automatically
- Temporary directory creation and cleanup is handled internally - no manual cleanup needed
- FileCloud upload failures will raise BusinessRuleError with details
- The function updates the DocumentVersion object with pdf_file_path on success
- Ensure prepare_audit_data_for_document_processor function is available in scope
- Document processor uses compliance_level='2b' for PDF/A-2b standard
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
function convert_document_to_pdf_v1 97.9% similar
-
function convert_document_to_pdf_v1 91.9% similar
-
function download_document_version 71.3% similar
-
function download_document_version_v1 70.2% similar
-
function upload_document_to_filecloud 70.1% similar