class TestBEExtractor
Unit test class for testing the BEExtractor class, which extracts structured data from Belgian invoices using LLM-based extraction.
/tf/active/vicechatdev/invoice_extraction/tests/test_extractors.py
305 - 474
moderate
Purpose
This test class validates the functionality of the BEExtractor class, ensuring it correctly extracts invoice metadata, vendor information, amounts, tax data, payment details, and line items from Belgian invoices. It tests Belgian-specific formatting requirements such as VAT number formatting (BE0.XXX.XXX.XXX), comma-based decimal separators, and proper handling of Belgian invoice structures. The tests use mocked LLM responses to verify extraction logic without requiring actual LLM API calls.
Source Code
class TestBEExtractor(unittest.TestCase):
"""Test cases for the BEExtractor class."""
def setUp(self):
"""Set up test environment before each test."""
self.config = {
'confidence_threshold': 0.7,
'llm': {
'provider': 'test',
'model': 'test-model'
}
}
# Create a mock LLM client
self.mock_llm = MockLLMClient({
'invoice metadata': json.dumps({
"number": "FACT-2023-156",
"issue_date": "15/01/2023",
"due_date": "15/02/2023",
"reference": "BON-7890"
}),
'vendor data': json.dumps({
"name": "Belgian Test SPRL",
"vat_number": "BE0123.456.789",
"address": "123 Rue de Bruxelles, 1000 Bruxelles, Belgique",
"contact": "contact@betestvendor.be"
}),
'amounts': json.dumps({
"subtotal": 500.00,
"total": 605.00,
"currency": "EUR"
}),
'tax data': json.dumps({
"vat": 105.00,
"vat_rate": 21,
"vat_regime": "standard"
}),
'payment data': json.dumps({
"bank_name": "KBC Bank",
"iban": "BE68 5390 0754 7034",
"bic": "KREDBEBB",
"payment_terms": "30 jours",
"communication": "+++123/4567/89012+++"
}),
'line items': json.dumps([
{
"description": "Produit de Test 1",
"quantity": 2,
"unit_price": 100.00,
"vat_rate": 21,
"amount": 200.00
},
{
"description": "Produit de Test 2",
"quantity": 3,
"unit_price": 100.00,
"vat_rate": 21,
"amount": 300.00
}
])
})
# Sample Belgian invoice
self.be_doc = {
'text': 'Facture #FACT-2023-156\nDate: 15/01/2023\nBelgian Test SPRL\n'
'TVA: BE0123.456.789\n123 Rue de Bruxelles, 1000 Bruxelles, Belgique\n'
'Sous-total: 500,00 €\nTVA (21%): 105,00 €\nTotal: 605,00 €',
'pages': [
{
'text': 'Facture #FACT-2023-156\nDate: 15/01/2023\nBelgian Test SPRL\n'
'TVA: BE0123.456.789\n123 Rue de Bruxelles, 1000 Bruxelles, Belgique',
'width': 800,
'height': 1000,
'tables': []
},
{
'text': 'Sous-total: 500,00 €\nTVA (21%): 105,00 €\nTotal: 605,00 €',
'width': 800,
'height': 1000,
'tables': []
}
]
}
@patch('extractors.be_extractor.LLMClient')
def test_be_extract(self, mock_llm_client):
"""Test extraction of data from Belgian invoice."""
# Setup the mock
mock_llm_client.return_value = self.mock_llm
# Create extractor
be_extractor = BEExtractor(self.config)
# Extract data
result = be_extractor.extract(self.be_doc, 'fr')
# Check that we have the expected sections
self.assertIn('invoice', result)
self.assertIn('vendor', result)
self.assertIn('amounts', result)
self.assertIn('payment', result)
self.assertIn('line_items', result)
# Check specific fields
self.assertEqual(result['invoice']['number'], 'FACT-2023-156')
self.assertEqual(result['vendor']['vat_number'], 'BE0123.456.789')
self.assertEqual(result['amounts']['total'], 605.00)
self.assertEqual(result['amounts']['vat'], 105.00)
self.assertEqual(result['payment']['iban'], 'BE68 5390 0754 7034')
self.assertEqual(len(result['line_items']), 2)
# Check confidence score is calculated
self.assertIn('confidence', result)
self.assertIsInstance(result['confidence'], float)
@patch('extractors.be_extractor.LLMClient')
def test_be_vat_number_formatting(self, mock_llm_client):
"""Test that Belgian VAT number is properly formatted."""
# Setup the mock with malformatted VAT number
mock_llm = MockLLMClient({
'vendor data': json.dumps({
"name": "Belgian Test SPRL",
"vat_number": "be0123456789", # Malformatted
"address": "123 Rue de Bruxelles, 1000 Bruxelles, Belgique"
}),
# Add minimum required responses for other fields
'invoice metadata': json.dumps({"number": "12345", "issue_date": "15/01/2023"}),
'amounts': json.dumps({"subtotal": 100, "total": 121, "vat": 21, "currency": "EUR"}),
'tax data': json.dumps({"vat": 21, "vat_rate": 21}),
'line items': json.dumps([])
})
mock_llm_client.return_value = mock_llm
# Create extractor
be_extractor = BEExtractor(self.config)
# Extract data
result = be_extractor.extract(self.be_doc, 'fr')
# Check that VAT number was formatted correctly
self.assertEqual(result['vendor']['vat_number'], 'BE0.123.456.789')
@patch('extractors.be_extractor.LLMClient')
def test_be_amount_formatting(self, mock_llm_client):
"""Test that Belgian amounts with comma decimal separator are properly parsed."""
# Setup the mock with Belgian number format
mock_llm = MockLLMClient({
'amounts': json.dumps({
"subtotal": "500,00", # Belgian format with comma
"total": "605,00",
"currency": "EUR"
}),
# Add minimum required responses for other fields
'invoice metadata': json.dumps({"number": "12345", "issue_date": "15/01/2023"}),
'vendor data': json.dumps({"name": "Belgian Test", "vat_number": "BE0123456789"}),
'tax data': json.dumps({"vat": "105,00", "vat_rate": 21}),
'line items': json.dumps([])
})
mock_llm_client.return_value = mock_llm
# Create extractor
be_extractor = BEExtractor(self.config)
# Extract data
result = be_extractor.extract(self.be_doc, 'fr')
# Check that amounts were parsed as floats
self.assertEqual(result['amounts']['subtotal'], 500.00)
self.assertEqual(result['amounts']['total'], 605.00)
self.assertEqual(result['amounts']['vat'], 105.00)
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
unittest.TestCase | - |
Parameter Details
bases: Inherits from unittest.TestCase to provide testing framework functionality including assertions, test discovery, and test execution capabilities
Return Value
As a test class, it does not return values directly. Test methods use assertions to validate behavior and raise AssertionError on failure. The unittest framework collects and reports test results.
Class Interface
Methods
setUp(self) -> None
Purpose: Initializes test environment before each test method execution, creating mock LLM client and sample Belgian invoice data
Returns: None - sets up instance attributes for use in test methods
test_be_extract(self, mock_llm_client) -> None
Purpose: Tests complete extraction of all data sections from a Belgian invoice including invoice metadata, vendor info, amounts, payment details, and line items
Parameters:
mock_llm_client: Mocked LLMClient class injected by @patch decorator
Returns: None - uses assertions to validate extraction results
test_be_vat_number_formatting(self, mock_llm_client) -> None
Purpose: Tests that Belgian VAT numbers are properly formatted to BE0.XXX.XXX.XXX format even when provided in malformed format
Parameters:
mock_llm_client: Mocked LLMClient class injected by @patch decorator
Returns: None - asserts correct VAT number formatting
test_be_amount_formatting(self, mock_llm_client) -> None
Purpose: Tests that Belgian amounts using comma as decimal separator (e.g., '500,00') are correctly parsed to float values
Parameters:
mock_llm_client: Mocked LLMClient class injected by @patch decorator
Returns: None - asserts correct amount parsing from Belgian format
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
config |
dict | Configuration dictionary containing confidence_threshold and LLM provider settings for BEExtractor initialization | instance |
mock_llm |
MockLLMClient | Mock LLM client instance that returns predefined responses for different extraction prompts without making actual API calls | instance |
be_doc |
dict | Sample Belgian invoice document structure containing text content and page information for testing extraction | instance |
Dependencies
unittestunittest.mockjsonloggingospathlibdatetimeextractors.base_extractorextractors.be_extractor
Required Imports
import unittest
from unittest.mock import patch, MagicMock
import json
import logging
import os
from pathlib import Path
import datetime
from extractors.base_extractor import BaseExtractor
from extractors.be_extractor import BEExtractor
Usage Example
import unittest
from unittest.mock import patch
import json
from extractors.be_extractor import BEExtractor
# Run a single test
test = TestBEExtractor()
test.setUp()
with patch('extractors.be_extractor.LLMClient') as mock_llm:
mock_llm.return_value = test.mock_llm
test.test_be_extract(mock_llm)
# Run all tests in the class
if __name__ == '__main__':
suite = unittest.TestLoader().loadTestsFromTestCase(TestBEExtractor)
unittest.TextTestRunner(verbosity=2).run(suite)
Best Practices
- Each test method is independent and uses setUp() to initialize fresh test data
- Mock objects are used to isolate BEExtractor from actual LLM API calls
- Tests verify both successful extraction and Belgian-specific formatting rules
- The @patch decorator is used to replace LLMClient with mock implementations
- Test data includes realistic Belgian invoice formats with proper VAT numbers, IBAN, and structured communication
- Always call setUp() before running individual tests to ensure proper initialization
- Tests should be run through unittest framework for proper test discovery and reporting
- Mock responses are structured as JSON strings matching expected LLM output format
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class TestBEValidator 84.6% similar
-
class BEExtractor 83.9% similar
-
class TestUKExtractor 76.4% similar
-
class TestAUExtractor 74.2% similar
-
class BEValidator 73.6% similar