function create_word_report_improved
Generates a formatted Microsoft Word document report containing warranty disclosures with table of contents, structured sections, and references.
/tf/active/vicechatdev/improved_convert_disclosures_to_table.py
291 - 419
complex
Purpose
This function creates a comprehensive Word document report for Project Victoria warranty disclosures. It processes warranty data to generate a professionally formatted document with a title page, metadata, table of contents, individual warranty sections with disclosure content, and a references section. The document includes proper heading hierarchy, styled paragraphs, and preserves inline references from markdown-style content.
Source Code
def create_word_report_improved(warranties, references_section, output_file):
"""Create Word document report with proper heading styles and references."""
logger.info(f"Creating Word report: {output_file}")
try:
# Create a new document
doc = Document()
# Add document title
title = doc.add_heading('Project Victoria - Warranty Disclosures', 0)
title.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
# Add document metadata
doc.add_paragraph(f"Generated on: {datetime.now().strftime('%B %d, %Y at %H:%M:%S')}")
doc.add_paragraph(f"Total Warranties Processed: {len(warranties)}")
doc.add_paragraph(f"Total Disclosures Generated: {len(warranties)}")
if references_section:
ref_count = len(re.findall(r'\*\*\[(\d+)\]\*\*', references_section))
doc.add_paragraph(f"Total References: {ref_count}")
# Add a page break
doc.add_page_break()
# Add table of contents header
toc_heading = doc.add_heading('Table of Contents', level=1)
# Create table of contents
for warranty in sorted(warranties, key=lambda x: x['Warranty_ID']):
warranty_id = warranty['Warranty_ID']
warranty_title = warranty['Warranty_Title']
doc.add_paragraph(f"{warranty_id} - {warranty_title}", style='List Number')
# Add page break before warranties
doc.add_page_break()
# Add each warranty as a section
for warranty in sorted(warranties, key=lambda x: x['Warranty_ID']):
warranty_id = warranty['Warranty_ID']
warranty_title = warranty['Warranty_Title']
section_name = warranty['Section_Name']
source_docs_count = warranty['Source_Documents_Count']
warranty_text = warranty['Warranty_Text']
disclosure_content = warranty['Full_Disclosure']
# Main warranty heading (Level 1)
main_heading = doc.add_heading(f"{warranty_id} - {warranty_title}", level=1)
# Section information
if section_name:
doc.add_paragraph(f"Section: {section_name}", style='Heading 2')
doc.add_paragraph(f"Source Documents Found: {source_docs_count}")
# Warranty Text subsection
doc.add_heading('Warranty Text', level=2)
warranty_para = doc.add_paragraph(clean_text_for_xml(warranty_text))
warranty_para.style = 'Quote'
# Disclosure subsection
doc.add_heading('Disclosure', level=2)
# Process disclosure content more carefully to preserve references and structure
if disclosure_content:
# Split by paragraphs and process each
paragraphs = disclosure_content.split('\n\n')
for para in paragraphs:
para = para.strip()
if not para:
continue
# Check for different heading patterns
if para.startswith('# '):
doc.add_heading(clean_text_for_xml(para[2:]), level=3)
elif para.startswith('## '):
doc.add_heading(clean_text_for_xml(para[3:]), level=4)
elif para.startswith('### '):
doc.add_heading(clean_text_for_xml(para[4:]), level=5)
elif para.startswith('**') and para.endswith('**') and len(para.split()) <= 6:
# Short bold text likely to be a heading
doc.add_heading(clean_text_for_xml(para[2:-2]), level=5)
else:
# Regular paragraph - preserve inline references
doc.add_paragraph(clean_text_for_xml(para))
else:
doc.add_paragraph("No disclosure content available.")
# Add separator between warranties
doc.add_paragraph("_" * 80)
doc.add_paragraph() # Empty line
# Add references section if available
if references_section:
doc.add_page_break()
doc.add_heading('References', level=1)
# Parse and add references
ref_pattern = r'\*\*\[(\d+)\]\*\*\s*(.+?)(?:\n\s*\*Content preview\*:\s*(.+?))?(?=\n\n|\*\*\[|\Z)'
matches = re.findall(ref_pattern, references_section, re.DOTALL)
for match in matches:
ref_num = match[0]
source = match[1].strip()
preview = match[2].strip() if len(match) > 2 and match[2] else ""
# Add reference entry
ref_para = doc.add_paragraph()
ref_run = ref_para.add_run(f"[{ref_num}] ")
ref_run.bold = True
ref_para.add_run(clean_text_for_xml(source))
if preview:
preview_para = doc.add_paragraph()
preview_run = preview_para.add_run("Content preview: ")
preview_run.italic = True
preview_para.add_run(clean_text_for_xml(preview))
doc.add_paragraph() # Empty line between references
# Save the document
doc.save(output_file)
logger.info(f"Created Word report: {output_file}")
return True
except ImportError:
logger.warning("python-docx not available, skipping Word export")
return False
except Exception as e:
logger.error(f"Error creating Word document: {e}")
return False
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
warranties |
- | - | positional_or_keyword |
references_section |
- | - | positional_or_keyword |
output_file |
- | - | positional_or_keyword |
Parameter Details
warranties: List of dictionaries containing warranty information. Each dictionary must have keys: 'Warranty_ID' (identifier), 'Warranty_Title' (title text), 'Section_Name' (section classification), 'Source_Documents_Count' (number of source documents), 'Warranty_Text' (original warranty text), and 'Full_Disclosure' (generated disclosure content with markdown formatting).
references_section: String containing formatted references in markdown style with pattern **[number]** followed by source information and optional content preview. Can be None or empty string if no references are available.
output_file: String or Path object specifying the file path where the Word document (.docx) should be saved. Should include the .docx extension.
Return Value
Returns a boolean value: True if the Word document was successfully created and saved, False if an error occurred (such as missing python-docx library or file writing errors).
Dependencies
python-docxreloggingdatetime
Required Imports
import re
import logging
from datetime import datetime
from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
Conditional/Optional Imports
These imports are only needed under specific conditions:
from docx import Document
Condition: Required for Word document creation; function will return False if not available
Required (conditional)from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
Condition: Required for text alignment in Word document
Required (conditional)Usage Example
import logging
from datetime import datetime
from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
import re
# Setup logger
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
# Define helper function
def clean_text_for_xml(text):
return text.replace('&', '&').replace('<', '<').replace('>', '>')
# Prepare warranty data
warranties = [
{
'Warranty_ID': 'W001',
'Warranty_Title': 'Product Quality Warranty',
'Section_Name': 'Quality Assurance',
'Source_Documents_Count': 5,
'Warranty_Text': 'All products meet quality standards.',
'Full_Disclosure': '# Overview\n\nThis warranty covers product quality.\n\n**[1]** Reference to quality standards.'
}
]
# References section
references = '**[1]** Quality Standards Document v2.0\n*Content preview*: Standards for product quality...'
# Create report
success = create_word_report_improved(
warranties=warranties,
references_section=references,
output_file='warranty_report.docx'
)
if success:
print('Report created successfully')
else:
print('Failed to create report')
Best Practices
- Ensure the 'clean_text_for_xml' function is defined before calling this function to properly sanitize text for Word XML format
- Verify that python-docx library is installed before calling this function, or handle the False return value appropriately
- Provide complete warranty dictionaries with all required keys to avoid KeyError exceptions
- Use consistent markdown formatting in disclosure content for proper heading detection (# for level 3, ## for level 4, etc.)
- Ensure the output directory exists and has write permissions before calling the function
- The function sorts warranties by Warranty_ID, so ensure IDs are sortable (numeric or alphanumeric)
- References section should follow the pattern **[number]** for proper parsing
- Consider the file size when processing large numbers of warranties as Word documents can become large
- The function logs extensively, so ensure logging is properly configured to capture diagnostic information
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
function create_word_report 96.6% similar
-
function create_enhanced_word_document_v1 86.4% similar
-
function create_enhanced_word_document 84.9% similar
-
function main_v2 73.3% similar
-
function main_v1 73.1% similar