class InvoiceExtractionLogger
A comprehensive logging configuration class for invoice extraction systems that provides console and file logging with optional JSON formatting, request tracking via correlation IDs, and configurable log levels.
/tf/active/vicechatdev/invoice_extraction/utils/logging_utils.py
8 - 208
moderate
Purpose
InvoiceExtractionLogger manages the complete logging infrastructure for invoice extraction applications. It centralizes logging configuration, supports multiple output formats (plain text and JSON), enables request correlation tracking, handles file and console logging simultaneously, and provides global exception handling. The class is designed to be instantiated once at application startup and used throughout the application lifecycle to maintain consistent logging behavior.
Source Code
class InvoiceExtractionLogger:
"""
Configures and sets up logging for the invoice extraction system.
Features:
- Console and file logging
- JSON formatting option for machine readability
- Configurable log levels
- Request tracking with correlation IDs
- Performance metrics logging
"""
DEFAULT_LOG_FORMAT = "%(asctime)s [%(levelname)s] %(name)s - %(message)s"
DEFAULT_JSON_FIELDS = [
"timestamp", "level", "logger", "message", "module", "function",
"path", "correlation_id", "execution_time_ms"
]
def __init__(self, config: Optional[Dict[str, Any]] = None):
"""
Initialize logger with configuration.
Args:
config: Dictionary containing logging configuration
"""
self.config = config or {}
# Set default log level
self.log_level = self._get_log_level()
# Log file configuration
self.log_to_file = self.config.get("log_to_file", False)
self.log_dir = self.config.get("log_dir", "logs")
self.log_file = self.config.get("log_file")
# JSON logging configuration
self.json_logs = self.config.get("json_logs", False)
self.json_fields = self.config.get("json_fields", self.DEFAULT_JSON_FIELDS)
# Request tracking
self.request_id = None
# Initialize on creation
self.setup_logging()
def _get_log_level(self) -> int:
"""Get log level from config or environment variables."""
# Check config first
log_level = self.config.get("log_level", "INFO")
# Then check environment variables
env_log_level = os.environ.get("INVOICE_EXTRACTION_LOG_LEVEL")
if env_log_level:
log_level = env_log_level
# Convert string to logging level
return getattr(logging, log_level.upper(), logging.INFO)
def setup_logging(self) -> None:
"""Configure logging with handlers based on configuration."""
# Create root logger
root_logger = logging.getLogger()
root_logger.setLevel(self.log_level)
# Clear existing handlers to avoid duplicates when called multiple times
for handler in root_logger.handlers[:]:
root_logger.removeHandler(handler)
# Configure console handler
console_handler = self._setup_console_handler()
root_logger.addHandler(console_handler)
# Configure file handler if enabled
if self.log_to_file:
file_handler = self._setup_file_handler()
if file_handler:
root_logger.addHandler(file_handler)
# Set exception handling
sys.excepthook = self.exception_handler
# Log initial message
logging.info(f"Logging initialized at level: {logging.getLevelName(self.log_level)}")
if self.log_to_file and hasattr(self, 'log_file_path'):
logging.info(f"Log file: {self.log_file_path}")
def _setup_console_handler(self) -> logging.Handler:
"""Create and configure console logging handler."""
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(self.log_level)
if self.json_logs:
formatter = self.JsonFormatter(self.json_fields)
else:
formatter = logging.Formatter(self.DEFAULT_LOG_FORMAT)
console_handler.setFormatter(formatter)
return console_handler
def _setup_file_handler(self) -> Optional[logging.Handler]:
"""Create and configure file logging handler."""
try:
# Create log directory if it doesn't exist
if not os.path.exists(self.log_dir):
os.makedirs(self.log_dir)
# Set default log file name if not provided
if not self.log_file:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
self.log_file = f"invoice_extraction_{timestamp}.log"
# Full path to log file
self.log_file_path = os.path.join(self.log_dir, self.log_file)
# Create file handler
file_handler = logging.FileHandler(self.log_file_path)
file_handler.setLevel(self.log_level)
if self.json_logs:
formatter = self.JsonFormatter(self.json_fields)
else:
formatter = logging.Formatter(self.DEFAULT_LOG_FORMAT)
file_handler.setFormatter(formatter)
return file_handler
except Exception as e:
logging.error(f"Failed to set up file logging: {str(e)}")
return None
class JsonFormatter(logging.Formatter):
"""Custom formatter for JSON-formatted logs."""
def __init__(self, fields=None):
super().__init__()
self.fields = fields or []
def format(self, record):
log_record = {}
# Standard fields
log_record["timestamp"] = self.formatTime(record)
log_record["level"] = record.levelname
log_record["logger"] = record.name
log_record["message"] = record.getMessage()
# Include module information if available
if hasattr(record, "module"):
log_record["module"] = record.module
if hasattr(record, "funcName"):
log_record["function"] = record.funcName
if hasattr(record, "pathname"):
log_record["path"] = record.pathname
# Include exception info if available
if record.exc_info:
log_record["exception"] = self.formatException(record.exc_info)
# Include custom fields
for key, value in getattr(record, "extra_fields", {}).items():
log_record[key] = value
# Filter fields if specified
if self.fields:
log_record = {k: v for k, v in log_record.items() if k in self.fields}
return json.dumps(log_record)
def exception_handler(self, exc_type, exc_value, exc_traceback) -> None:
"""Global exception handler to log unhandled exceptions."""
logging.error(
"Unhandled exception",
exc_info=(exc_type, exc_value, exc_traceback)
)
def set_request_id(self, request_id: str) -> None:
"""
Set a request ID for correlation in logs.
Args:
request_id: Unique identifier for request tracking
"""
self.request_id = request_id
# Create a filter to add request ID to all log records
class RequestIdFilter(logging.Filter):
def __init__(self, request_id):
super().__init__()
self.request_id = request_id
def filter(self, record):
record.correlation_id = self.request_id
return True
# Add the filter to all handlers
request_filter = RequestIdFilter(request_id)
root_logger = logging.getLogger()
for handler in root_logger.handlers:
handler.addFilter(request_filter)
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
config: Optional dictionary containing logging configuration. Supported keys: 'log_level' (str: DEBUG/INFO/WARNING/ERROR/CRITICAL), 'log_to_file' (bool: enable file logging), 'log_dir' (str: directory path for log files), 'log_file' (str: specific log filename), 'json_logs' (bool: enable JSON formatting), 'json_fields' (list: fields to include in JSON logs). If None or empty dict, uses default configuration with INFO level and console-only logging.
Return Value
Instantiation returns an InvoiceExtractionLogger object with fully configured logging handlers. The setup_logging() method returns None but configures the root logger as a side effect. The _get_log_level() method returns an integer representing the logging level (e.g., 10 for DEBUG, 20 for INFO). The _setup_console_handler() method returns a logging.Handler object for console output. The _setup_file_handler() method returns Optional[logging.Handler] (None if file handler creation fails). The set_request_id() method returns None but adds correlation ID filtering to all handlers.
Class Interface
Methods
__init__(self, config: Optional[Dict[str, Any]] = None)
Purpose: Initialize the logger with configuration and automatically set up logging handlers
Parameters:
config: Optional dictionary with logging configuration keys: log_level, log_to_file, log_dir, log_file, json_logs, json_fields
Returns: None (constructor)
_get_log_level(self) -> int
Purpose: Determine the log level from configuration or environment variables, with environment taking precedence
Returns: Integer representing logging level (e.g., 10 for DEBUG, 20 for INFO, 30 for WARNING, 40 for ERROR, 50 for CRITICAL)
setup_logging(self) -> None
Purpose: Configure the root logger with console and optional file handlers based on instance configuration
Returns: None, but modifies the root logger and sys.excepthook as side effects
_setup_console_handler(self) -> logging.Handler
Purpose: Create and configure a console handler with appropriate formatter (plain text or JSON)
Returns: logging.Handler configured for console output to stdout
_setup_file_handler(self) -> Optional[logging.Handler]
Purpose: Create and configure a file handler, creating log directory if needed and generating timestamped filename if not specified
Returns: logging.Handler for file output if successful, None if file handler creation fails
exception_handler(self, exc_type, exc_value, exc_traceback) -> None
Purpose: Global exception handler that logs unhandled exceptions with full traceback information
Parameters:
exc_type: Type of the exceptionexc_value: Exception instanceexc_traceback: Traceback object
Returns: None, logs the exception to the configured handlers
set_request_id(self, request_id: str) -> None
Purpose: Set a correlation ID for request tracking and add it to all subsequent log records via a filter
Parameters:
request_id: Unique identifier string for correlating log entries across a request or transaction
Returns: None, but adds a filter to all root logger handlers that injects correlation_id into log records
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
DEFAULT_LOG_FORMAT |
str | Class variable defining the default log format string for plain text logging | class |
DEFAULT_JSON_FIELDS |
List[str] | Class variable listing default fields to include in JSON-formatted logs | class |
config |
Dict[str, Any] | Instance configuration dictionary containing all logging settings | instance |
log_level |
int | Integer representing the configured logging level (e.g., logging.INFO, logging.DEBUG) | instance |
log_to_file |
bool | Flag indicating whether file logging is enabled | instance |
log_dir |
str | Directory path where log files will be stored | instance |
log_file |
Optional[str] | Specific log filename, or None to auto-generate timestamped filename | instance |
json_logs |
bool | Flag indicating whether to use JSON formatting for log output | instance |
json_fields |
List[str] | List of field names to include in JSON-formatted logs | instance |
request_id |
Optional[str] | Current request/correlation ID for tracking related log entries, None if not set | instance |
log_file_path |
str | Full path to the log file, set dynamically when file handler is created | instance |
JsonFormatter |
class | Nested class that extends logging.Formatter to provide JSON-formatted log output with configurable fields | instance |
Dependencies
loggingossysjsondatetimetyping
Required Imports
import logging
import os
import sys
import json
from datetime import datetime
from typing import Dict, Any, Optional, Union
Usage Example
# Basic usage with default configuration
logger_config = InvoiceExtractionLogger()
# Advanced usage with custom configuration
config = {
'log_level': 'DEBUG',
'log_to_file': True,
'log_dir': './app_logs',
'log_file': 'invoice_app.log',
'json_logs': True,
'json_fields': ['timestamp', 'level', 'message', 'correlation_id']
}
logger_config = InvoiceExtractionLogger(config)
# Set request ID for correlation tracking
logger_config.set_request_id('req-12345-abcde')
# Use standard Python logging after setup
import logging
logger = logging.getLogger(__name__)
logger.info('Processing invoice')
logger.error('Failed to extract data', extra={'extra_fields': {'invoice_id': '123'}})
# Reconfigure logging if needed
logger_config.setup_logging()
Best Practices
- Instantiate InvoiceExtractionLogger once at application startup, typically in the main entry point or initialization module
- Call set_request_id() at the beginning of each request/transaction to enable correlation tracking across log entries
- Use the standard Python logging module (logging.getLogger()) after InvoiceExtractionLogger is instantiated - do not call methods on the logger instance for actual logging
- Avoid calling setup_logging() multiple times unless reconfiguration is needed, as it clears and recreates all handlers
- When using JSON logging, pass custom fields via the 'extra_fields' key in the extra parameter of logging calls
- Ensure the log directory has write permissions before enabling log_to_file
- Use environment variable INVOICE_EXTRACTION_LOG_LEVEL for deployment-specific log level overrides without code changes
- The class modifies sys.excepthook globally, so instantiating multiple instances may cause unexpected behavior
- Log files are created with timestamps in the filename if log_file is not specified, preventing overwrites
- The request_id attribute is instance-specific but filters are applied to the root logger, affecting all loggers in the application
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class TestLoggingUtils 73.7% similar
-
class BaseExtractor 60.2% similar
-
function get_logger 59.7% similar
-
class InvoiceProcessor 56.4% similar
-
class Config_v5 55.2% similar