function prepare_audit_data_for_document_processor
Prepares comprehensive audit data for a controlled document version, including revision history, reviews, approvals, and event history, formatted for DocumentProcessor consumption.
/tf/active/vicechatdev/CDocs single class/controllers/document_controller.py
1572 - 1765
complex
Purpose
This function aggregates and formats audit trail information from multiple sources (document versions, review cycles, approvals, and audit events) into a standardized dictionary structure. It's designed to support PDF conversion and document archival by providing complete metadata about a document's lifecycle, including who created/reviewed/approved it, when changes occurred, and what modifications were made. The function handles various date formats and safely extracts data from the database to create a comprehensive audit record.
Source Code
def prepare_audit_data_for_document_processor(document: ControlledDocument, version: DocumentVersion, user: DocUser) -> Dict[str, Any]:
"""
Prepare audit data in the format expected by the DocumentProcessor.
Parameters
----------
document : ControlledDocument
The document being converted
version : DocumentVersion
The document version being converted
user : DocUser
User performing the conversion
Returns
-------
Dict[str, Any]
Audit data in the format expected by DocumentProcessor
"""
# Get document audit trail using the correct function
audit_events = audit_trail.get_document_history(document.uid)
# Format date strings
created_date_str = datetime.now().strftime("%Y-%m-%d")
if version.created_date:
if isinstance(version.created_date, datetime):
created_date_str = version.created_date.strftime("%Y-%m-%d")
elif isinstance(version.created_date, str):
# Try to parse the string date if possible
try:
dt = datetime.fromisoformat(version.created_date.replace('Z', '+00:00'))
created_date_str = dt.strftime("%Y-%m-%d")
except (ValueError, AttributeError):
if re.match(r'\d{4}-\d{2}-\d{2}', version.created_date):
created_date_str = version.created_date.split('T')[0]
effective_date_str = ""
if version.effective_date:
if isinstance(version.effective_date, datetime):
effective_date_str = version.effective_date.strftime("%Y-%m-%d")
elif isinstance(version.effective_date, str):
try:
dt = datetime.fromisoformat(version.effective_date.replace('Z', '+00:00'))
effective_date_str = dt.strftime("%Y-%m-%d")
except (ValueError, AttributeError):
if re.match(r'\d{4}-\d{2}-\d{2}', version.effective_date):
effective_date_str = version.effective_date.split('T')[0]
# Get all version history for revision history
all_versions = document.get_all_versions()
revision_history = []
for ver in all_versions:
ver_date = datetime.now().strftime("%Y-%m-%d")
if ver.created_date:
if isinstance(ver.created_date, datetime):
ver_date = ver.created_date.strftime("%Y-%m-%d")
elif isinstance(ver.created_date, str) and re.match(r'\d{4}-\d{2}-\d{2}', ver.created_date):
ver_date = ver.created_date.split('T')[0]
author_name = "Unknown"
if ver.author:
author_name = ver.author.name
revision_history.append({
"version": ver.version_number,
"date": ver_date,
"author": author_name,
"changes": ver.change_summary or f"Version {ver.version_number} created"
})
# Get reviews and approvals for the document
reviews = db.run_query(
"""
MATCH (v:DocumentVersion {UID: $version_uid})-[:FOR_REVIEW]->(r:ReviewCycle)
OPTIONAL MATCH (r)-[:REVIEWED_BY]->(reviewer:User)
OPTIONAL MATCH (r)-[:COMMENTED_ON]->(c:ReviewComment)
RETURN r.UID as review_id, r.status as status,
r.startDate as start_date, r.endDate as end_date,
reviewer.name as reviewer_name, reviewer.username as reviewer_username,
reviewer.role as reviewer_role,
COLLECT(c.text) as comments
""",
{"version_uid": version.uid}
)
approvals = db.run_query(
"""
MATCH (v:DocumentVersion {UID: $version_uid})-[:FOR_APPROVAL]->(a:Approval)
OPTIONAL MATCH (a)-[:APPROVED_BY]->(approver:User)
RETURN a.UID as approval_id, a.status as status,
a.date as approval_date, a.level as approval_level,
approver.name as approver_name, approver.username as approver_username,
approver.role as approver_role, a.comment as approval_comment
""",
{"version_uid": version.uid}
)
# Extract review information
formatted_reviews = []
for review in reviews:
review_date = ""
if review.get("end_date"):
if isinstance(review.get("end_date"), datetime):
review_date = review.get("end_date").strftime("%Y-%m-%d")
elif isinstance(review.get("end_date"), str) and re.match(r'\d{4}-\d{2}-\d{2}', review.get("end_date")):
review_date = review.get("end_date").split('T')[0]
# Combine all comments
comments_text = ""
if review.get("comments"):
comments_text = "; ".join([c for c in review.get("comments") if c])
formatted_reviews.append({
"reviewer_name": review.get("reviewer_name", ""),
"reviewer_role": review.get("reviewer_role", "Reviewer"),
"reviewer_username": review.get("reviewer_username", ""),
"review_date": review_date,
"status": review.get("status", ""),
"comments": comments_text
})
# Extract approval information
formatted_approvals = []
for approval in approvals:
approval_date = ""
if approval.get("approval_date"):
if isinstance(approval.get("approval_date"), datetime):
approval_date = approval.get("approval_date").strftime("%Y-%m-%d")
elif isinstance(approval.get("approval_date"), str) and re.match(r'\d{4}-\d{2}-\d{2}', approval.get("approval_date")):
approval_date = approval.get("approval_date").split('T')[0]
formatted_approvals.append({
"approver_name": approval.get("approver_name", ""),
"approver_role": approval.get("approver_role", f"Level {approval.get('approval_level', '1')} Approver"),
"approval_date": approval_date,
"status": approval.get("status", ""),
"comments": approval.get("approval_comment", "")
})
# Format audit events for event history
event_history = []
for event in audit_events:
# Format timestamp
event_date = ""
if event.get("timestamp"):
if isinstance(event.get("timestamp"), datetime):
event_date = event.get("timestamp").strftime("%Y-%m-%d %H:%M:%S")
elif isinstance(event.get("timestamp"), str):
# Try to extract date part
match = re.search(r'\d{4}-\d{2}-\d{2}', event.get("timestamp"))
if match:
event_date = match.group(0)
else:
event_date = event.get("timestamp")
# Format details
details_str = ""
if event.get("details"):
if isinstance(event.get("details"), dict):
# Convert dictionary to string representation
details_str = "; ".join([f"{k}: {v}" for k, v in event.get("details").items()])
else:
details_str = str(event.get("details"))
event_history.append({
"date": event_date,
"user": event.get("userName", "System"),
"action": event.get("eventType", ""),
"description": event.get("description", ""),
"details": details_str
})
# Build the final JSON structure according to expected format
audit_data = {
"document_title": document.title,
"document_id": document.doc_number,
"version": version.version_number,
"author": version.author.name if version.author else user.name,
"department": document.get_department_name(),
"creation_date": created_date_str,
"effective_date": effective_date_str,
"status": document.get_status_name(),
"doc_type": document.doc_type_name,
"reviews": formatted_reviews,
"approvals": formatted_approvals,
"revision_history": revision_history,
"event_history": event_history,
"conversion_info": {
"converted_by": user.name,
"conversion_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"conversion_reason": "PDF conversion requested for document archival and distribution"
}
}
return audit_data
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
document |
ControlledDocument | - | positional_or_keyword |
version |
DocumentVersion | - | positional_or_keyword |
user |
DocUser | - | positional_or_keyword |
Parameter Details
document: A ControlledDocument instance representing the document being processed. Must have properties like uid, title, doc_number, doc_type_name, and methods like get_all_versions(), get_department_name(), and get_status_name().
version: A DocumentVersion instance representing the specific version being converted. Must have properties like uid, version_number, created_date, effective_date, author, and change_summary. The created_date and effective_date can be datetime objects or ISO format strings.
user: A DocUser instance representing the user performing the conversion operation. Must have a name property that will be recorded in the conversion_info section of the audit data.
Return Value
Type: Dict[str, Any]
Returns a dictionary containing structured audit data with keys: document_title, document_id, version, author, department, creation_date, effective_date, status, doc_type, reviews (list of reviewer info with names, roles, dates, status, comments), approvals (list of approver info with names, roles, dates, status, comments), revision_history (list of all versions with version numbers, dates, authors, changes), event_history (list of audit events with dates, users, actions, descriptions, details), and conversion_info (metadata about the current conversion including converted_by, conversion_date, conversion_reason). All dates are formatted as 'YYYY-MM-DD' strings.
Dependencies
datetimeretypingCDocs.dbCDocs.utils.audit_trail
Required Imports
from datetime import datetime
import re
from typing import Dict, Any
from CDocs import db
from CDocs.utils import audit_trail
from CDocs.models.document import ControlledDocument, DocumentVersion
from CDocs.models.user_extensions import DocUser
Usage Example
from datetime import datetime
from typing import Dict, Any
from CDocs.models.document import ControlledDocument, DocumentVersion
from CDocs.models.user_extensions import DocUser
from CDocs.utils import audit_trail
from CDocs import db
# Assume document, version, and user objects are already loaded
document = ControlledDocument.get_by_uid('doc-123')
version = document.get_version('1.0')
user = DocUser.get_by_username('john.doe')
# Prepare audit data for PDF conversion
audit_data = prepare_audit_data_for_document_processor(document, version, user)
# The returned dictionary contains all audit information
print(f"Document: {audit_data['document_title']}")
print(f"Version: {audit_data['version']}")
print(f"Status: {audit_data['status']}")
print(f"Number of reviews: {len(audit_data['reviews'])}")
print(f"Number of approvals: {len(audit_data['approvals'])}")
print(f"Revision history entries: {len(audit_data['revision_history'])}")
# Pass to DocumentProcessor for PDF generation
from document_auditor.src.document_processor import DocumentProcessor
processor = DocumentProcessor()
processor.process_document(audit_data)
Best Practices
- Ensure the document, version, and user objects are fully loaded with all required properties before calling this function
- The function handles multiple date formats (datetime objects, ISO strings, YYYY-MM-DD strings) but may default to current date if parsing fails
- Database queries for reviews and approvals may return empty lists if no relationships exist - this is handled gracefully
- The function makes multiple database queries (audit events, reviews, approvals) which could be performance-intensive for documents with extensive history
- Consider caching the result if the same audit data will be used multiple times
- The conversion_info section always uses the current timestamp, so the returned data is time-sensitive
- Ensure proper database transaction handling when calling this function as part of a larger operation
- The function assumes the audit_trail.get_document_history() function exists and returns a list of event dictionaries
- Review and approval data structure depends on specific Neo4j schema - ensure database schema matches expected node labels and relationships
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
function prepare_audit_data_for_document_processor_v1 97.4% similar
-
function convert_document_to_pdf_v1 66.6% similar
-
function convert_document_to_pdf 66.1% similar
-
function generate_audit_report 64.6% similar
-
function get_document_audit_trail 62.6% similar