function create_word_report
Generates a formatted Microsoft Word document report containing warranty disclosures with a table of contents, metadata, and structured sections for each warranty.
/tf/active/vicechatdev/convert_disclosures_to_table.py
233 - 371
complex
Purpose
This function creates a comprehensive Word document report for Project Victoria warranty disclosures. It processes a list of warranty dictionaries, formats them with proper heading hierarchy, includes document metadata, generates a table of contents, and intelligently parses disclosure content to preserve heading structures. The function handles various markdown and text formatting patterns to create a well-structured document with appropriate Word styles.
Source Code
def create_word_report(warranties, output_file):
"""Create Word document report with proper heading styles."""
logger.info(f"Creating Word report: {output_file}")
try:
# Create a new document
doc = Document()
# Add document title
title = doc.add_heading('Project Victoria - Warranty Disclosures', 0)
title.alignment = 1 # Center alignment
# Add document metadata
doc.add_paragraph(f"Generated on: {datetime.now().strftime('%B %d, %Y at %H:%M:%S')}")
doc.add_paragraph(f"Total Warranties Processed: {len(warranties)}")
doc.add_paragraph(f"Total Disclosures Generated: {len(warranties)}")
# Add a page break
doc.add_page_break()
# Add table of contents header
toc_heading = doc.add_heading('Table of Contents', level=1)
# Create table of contents
for warranty in sorted(warranties, key=lambda x: x['Warranty_ID']):
warranty_id = warranty['Warranty_ID']
warranty_title = warranty['Warranty_Title']
doc.add_paragraph(f"{warranty_id} - {warranty_title}", style='List Number')
# Add page break before warranties
doc.add_page_break()
# Add each warranty as a section
for warranty in sorted(warranties, key=lambda x: x['Warranty_ID']):
warranty_id = clean_text_for_xml(warranty['Warranty_ID'])
warranty_title = clean_text_for_xml(warranty['Warranty_Title'])
section_name = clean_text_for_xml(warranty['Section_Name'])
source_docs_count = clean_text_for_xml(warranty['Source_Documents_Count'])
warranty_text = clean_text_for_xml(warranty['Warranty_Text'])
disclosure_content = clean_text_for_xml(warranty['Full_Disclosure'])
# Main warranty heading (Level 1)
main_heading = doc.add_heading(f"{warranty_id} - {warranty_title}", level=1)
# Section information
doc.add_paragraph(f"Section: {section_name}", style='Heading 2')
doc.add_paragraph(f"Source Documents Found: {source_docs_count}")
# Warranty Text subsection
doc.add_heading('Warranty Text', level=2)
warranty_para = doc.add_paragraph(warranty_text)
warranty_para.style = 'Quote'
# Disclosure subsection
doc.add_heading('Disclosure', level=2)
# Parse disclosure content more carefully to preserve heading structure
if disclosure_content:
# Split by lines and process each line to detect headings
lines = disclosure_content.split('\n')
current_paragraph = []
for line in lines:
line = line.strip()
if not line:
# Empty line - finish current paragraph if any
if current_paragraph:
doc.add_paragraph(' '.join(current_paragraph))
current_paragraph = []
continue
# Check for different heading patterns
heading_level = None
clean_text = line
# Check for markdown-style headings (# ## ### etc.)
if line.startswith('#'):
heading_level = min(line.count('#') + 2, 6) # +2 because Disclosure is level 2
clean_text = line.lstrip('#').strip()
# Check for bold headings (**text**)
elif line.startswith('**') and line.endswith('**') and len(line) > 4:
heading_level = 3
clean_text = line[2:-2].strip()
# Check for numbered/lettered headings
elif (any(line.startswith(prefix) for prefix in ['## ', '### ', '#### ']) or
re.match(r'^[IVX]+\.\s', line) or # Roman numerals
re.match(r'^[A-Z]\.\s', line) or # Capital letters
re.match(r'^\d+\.\s', line) or # Numbers
re.match(r'^[a-z]\)\s', line)): # Lower case with parenthesis
heading_level = 3
# Don't clean the text for these as the numbering is important
# Check for section-like headers (words ending with colon)
elif line.endswith(':') and len(line.split()) <= 4 and not line.startswith('-'):
heading_level = 4
clean_text = line[:-1].strip() # Remove the colon
# Check for emphasized patterns that look like headings
elif (line.isupper() and len(line.split()) <= 5) or \
(line.startswith('- **') and line.endswith('**:')) or \
re.match(r'^[A-Z][a-z]+ [A-Z][a-z]+.*:$', line):
heading_level = 4
clean_text = line.replace('**', '').replace('- ', '').rstrip(':').strip()
if heading_level:
# Finish current paragraph if any
if current_paragraph:
doc.add_paragraph(' '.join(current_paragraph))
current_paragraph = []
# Add heading
doc.add_heading(clean_text, level=heading_level)
else:
# Regular text - add to current paragraph
current_paragraph.append(line)
# Finish any remaining paragraph
if current_paragraph:
doc.add_paragraph(' '.join(current_paragraph))
else:
doc.add_paragraph("No disclosure content available.")
# Add separator between warranties
doc.add_paragraph("_" * 80)
doc.add_paragraph() # Empty line
# Save the document
doc.save(output_file)
logger.info(f"Created Word report: {output_file}")
return True
except ImportError:
logger.warning("python-docx not available, skipping Word export")
return False
except Exception as e:
logger.error(f"Error creating Word document: {e}")
return False
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
warranties |
- | - | positional_or_keyword |
output_file |
- | - | positional_or_keyword |
Parameter Details
warranties: A list of dictionaries where each dictionary represents a warranty. Each warranty dictionary must contain the keys: 'Warranty_ID' (unique identifier), 'Warranty_Title' (title of the warranty), 'Section_Name' (section classification), 'Source_Documents_Count' (number of source documents), 'Warranty_Text' (the actual warranty text), and 'Full_Disclosure' (the disclosure content with potential markdown formatting). The list will be sorted by Warranty_ID during processing.
output_file: String or Path object specifying the file path where the Word document (.docx) should be saved. Should include the .docx extension. The directory must exist or be writable.
Return Value
Returns a boolean value: True if the Word document was successfully created and saved, False if an error occurred (such as missing python-docx library or file writing errors). The function logs appropriate messages for success and failure cases.
Dependencies
python-docxdatetimeloggingre
Required Imports
from datetime import datetime
from docx import Document
import logging
import re
Conditional/Optional Imports
These imports are only needed under specific conditions:
from docx import Document
Condition: Required for Word document creation; function returns False with warning if not available
Required (conditional)from docx.shared import Inches
Condition: Listed in source file imports but not used in this function
Optionalfrom docx.enum.style import WD_STYLE_TYPE
Condition: Listed in source file imports but not used in this function
OptionalUsage Example
import logging
from datetime import datetime
from docx import Document
import re
# Setup logger
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
# Define clean_text_for_xml helper function
def clean_text_for_xml(text):
if text is None:
return ''
return str(text).replace('&', '&').replace('<', '<').replace('>', '>')
# Sample warranty data
warranties = [
{
'Warranty_ID': 'W001',
'Warranty_Title': 'Product Quality Warranty',
'Section_Name': 'Quality Assurance',
'Source_Documents_Count': '5',
'Warranty_Text': 'All products meet quality standards.',
'Full_Disclosure': '## Overview\n\nThis warranty covers defects.\n\n**Coverage Period**: 12 months\n\n- Item 1\n- Item 2'
},
{
'Warranty_ID': 'W002',
'Warranty_Title': 'Service Warranty',
'Section_Name': 'Services',
'Source_Documents_Count': '3',
'Warranty_Text': 'Services performed professionally.',
'Full_Disclosure': 'Standard service warranty applies.'
}
]
# Create the Word report
success = create_word_report(warranties, 'warranty_report.docx')
if success:
print('Report created successfully')
else:
print('Failed to create report')
Best Practices
- Ensure the 'clean_text_for_xml' function is defined in the same module to sanitize text content before adding to the document
- Configure a logger instance before calling this function to capture informational and error messages
- Validate that all warranty dictionaries contain the required keys before passing to this function
- Install python-docx library using 'pip install python-docx' before using this function
- Ensure the output directory exists and has write permissions
- The function handles various markdown patterns (##, **, numbered lists, etc.) in disclosure content - format your disclosure text accordingly for best results
- Warranty data is automatically sorted by Warranty_ID, so input order does not matter
- The function gracefully handles missing python-docx by returning False and logging a warning rather than raising an exception
- For large warranty lists, consider memory usage as the entire document is built in memory before saving
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
function create_word_report_improved 96.6% similar
-
function create_enhanced_word_document 85.1% similar
-
function create_enhanced_word_document_v1 83.8% similar
-
function main_v1 73.4% similar
-
function main_v5 73.4% similar