TestBEExtractor - Code Extractor

class TestBEExtractor

Maturity: 48

Unit test class for testing the BEExtractor class, which extracts structured data from Belgian invoices using LLM-based extraction.

File:
/tf/active/vicechatdev/invoice_extraction/tests/test_extractors.py

Lines:
305 - 474

Complexity:
moderate

Purpose

This test class validates the functionality of the BEExtractor class, ensuring it correctly extracts invoice metadata, vendor information, amounts, tax data, payment details, and line items from Belgian invoices. It tests Belgian-specific formatting requirements such as VAT number formatting (BE0.XXX.XXX.XXX), comma-based decimal separators, and proper handling of Belgian invoice structures. The tests use mocked LLM responses to verify extraction logic without requiring actual LLM API calls.

Source Code

class TestBEExtractor(unittest.TestCase):
    """Test cases for the BEExtractor class."""
    
    def setUp(self):
        """Set up test environment before each test."""
        self.config = {
            'confidence_threshold': 0.7,
            'llm': {
                'provider': 'test',
                'model': 'test-model'
            }
        }
        
        # Create a mock LLM client
        self.mock_llm = MockLLMClient({
            'invoice metadata': json.dumps({
                "number": "FACT-2023-156",
                "issue_date": "15/01/2023",
                "due_date": "15/02/2023",
                "reference": "BON-7890"
            }),
            'vendor data': json.dumps({
                "name": "Belgian Test SPRL",
                "vat_number": "BE0123.456.789",
                "address": "123 Rue de Bruxelles, 1000 Bruxelles, Belgique",
                "contact": "contact@betestvendor.be"
            }),
            'amounts': json.dumps({
                "subtotal": 500.00,
                "total": 605.00,
                "currency": "EUR"
            }),
            'tax data': json.dumps({
                "vat": 105.00,
                "vat_rate": 21,
                "vat_regime": "standard"
            }),
            'payment data': json.dumps({
                "bank_name": "KBC Bank",
                "iban": "BE68 5390 0754 7034",
                "bic": "KREDBEBB",
                "payment_terms": "30 jours",
                "communication": "+++123/4567/89012+++"
            }),
            'line items': json.dumps([
                {
                    "description": "Produit de Test 1",
                    "quantity": 2,
                    "unit_price": 100.00,
                    "vat_rate": 21,
                    "amount": 200.00
                },
                {
                    "description": "Produit de Test 2",
                    "quantity": 3,
                    "unit_price": 100.00,
                    "vat_rate": 21,
                    "amount": 300.00
                }
            ])
        })
        
        # Sample Belgian invoice
        self.be_doc = {
            'text': 'Facture #FACT-2023-156\nDate: 15/01/2023\nBelgian Test SPRL\n'
                   'TVA: BE0123.456.789\n123 Rue de Bruxelles, 1000 Bruxelles, Belgique\n'
                   'Sous-total: 500,00 €\nTVA (21%): 105,00 €\nTotal: 605,00 €',
            'pages': [
                {
                    'text': 'Facture #FACT-2023-156\nDate: 15/01/2023\nBelgian Test SPRL\n'
                           'TVA: BE0123.456.789\n123 Rue de Bruxelles, 1000 Bruxelles, Belgique',
                    'width': 800,
                    'height': 1000,
                    'tables': []
                },
                {
                    'text': 'Sous-total: 500,00 €\nTVA (21%): 105,00 €\nTotal: 605,00 €',
                    'width': 800,
                    'height': 1000,
                    'tables': []
                }
            ]
        }
    
    @patch('extractors.be_extractor.LLMClient')
    def test_be_extract(self, mock_llm_client):
        """Test extraction of data from Belgian invoice."""
        # Setup the mock
        mock_llm_client.return_value = self.mock_llm
        
        # Create extractor
        be_extractor = BEExtractor(self.config)
        
        # Extract data
        result = be_extractor.extract(self.be_doc, 'fr')
        
        # Check that we have the expected sections
        self.assertIn('invoice', result)
        self.assertIn('vendor', result)
        self.assertIn('amounts', result)
        self.assertIn('payment', result)
        self.assertIn('line_items', result)
        
        # Check specific fields
        self.assertEqual(result['invoice']['number'], 'FACT-2023-156')
        self.assertEqual(result['vendor']['vat_number'], 'BE0123.456.789')
        self.assertEqual(result['amounts']['total'], 605.00)
        self.assertEqual(result['amounts']['vat'], 105.00)
        self.assertEqual(result['payment']['iban'], 'BE68 5390 0754 7034')
        self.assertEqual(len(result['line_items']), 2)
        
        # Check confidence score is calculated
        self.assertIn('confidence', result)
        self.assertIsInstance(result['confidence'], float)
    
    @patch('extractors.be_extractor.LLMClient')
    def test_be_vat_number_formatting(self, mock_llm_client):
        """Test that Belgian VAT number is properly formatted."""
        # Setup the mock with malformatted VAT number
        mock_llm = MockLLMClient({
            'vendor data': json.dumps({
                "name": "Belgian Test SPRL",
                "vat_number": "be0123456789",  # Malformatted
                "address": "123 Rue de Bruxelles, 1000 Bruxelles, Belgique"
            }),
            # Add minimum required responses for other fields
            'invoice metadata': json.dumps({"number": "12345", "issue_date": "15/01/2023"}),
            'amounts': json.dumps({"subtotal": 100, "total": 121, "vat": 21, "currency": "EUR"}),
            'tax data': json.dumps({"vat": 21, "vat_rate": 21}),
            'line items': json.dumps([])
        })
        mock_llm_client.return_value = mock_llm
        
        # Create extractor
        be_extractor = BEExtractor(self.config)
        
        # Extract data
        result = be_extractor.extract(self.be_doc, 'fr')
        
        # Check that VAT number was formatted correctly
        self.assertEqual(result['vendor']['vat_number'], 'BE0.123.456.789')
    
    @patch('extractors.be_extractor.LLMClient')
    def test_be_amount_formatting(self, mock_llm_client):
        """Test that Belgian amounts with comma decimal separator are properly parsed."""
        # Setup the mock with Belgian number format
        mock_llm = MockLLMClient({
            'amounts': json.dumps({
                "subtotal": "500,00",  # Belgian format with comma
                "total": "605,00",
                "currency": "EUR"
            }),
            # Add minimum required responses for other fields
            'invoice metadata': json.dumps({"number": "12345", "issue_date": "15/01/2023"}),
            'vendor data': json.dumps({"name": "Belgian Test", "vat_number": "BE0123456789"}),
            'tax data': json.dumps({"vat": "105,00", "vat_rate": 21}),
            'line items': json.dumps([])
        })
        mock_llm_client.return_value = mock_llm
        
        # Create extractor
        be_extractor = BEExtractor(self.config)
        
        # Extract data
        result = be_extractor.extract(self.be_doc, 'fr')
        
        # Check that amounts were parsed as floats
        self.assertEqual(result['amounts']['subtotal'], 500.00)
        self.assertEqual(result['amounts']['total'], 605.00)
        self.assertEqual(result['amounts']['vat'], 105.00)

Parameters

Name	Type	Default	Kind
`bases`	unittest.TestCase	-

Parameter Details

bases: Inherits from unittest.TestCase to provide testing framework functionality including assertions, test discovery, and test execution capabilities

Return Value

As a test class, it does not return values directly. Test methods use assertions to validate behavior and raise AssertionError on failure. The unittest framework collects and reports test results.

Class Interface

Methods

`setUp(self) -> None`

Purpose: Initializes test environment before each test method execution, creating mock LLM client and sample Belgian invoice data

Returns: None - sets up instance attributes for use in test methods

`test_be_extract(self, mock_llm_client) -> None`

Purpose: Tests complete extraction of all data sections from a Belgian invoice including invoice metadata, vendor info, amounts, payment details, and line items

Parameters:

mock_llm_client: Mocked LLMClient class injected by @patch decorator

Returns: None - uses assertions to validate extraction results

`test_be_vat_number_formatting(self, mock_llm_client) -> None`

Purpose: Tests that Belgian VAT numbers are properly formatted to BE0.XXX.XXX.XXX format even when provided in malformed format

Parameters:

mock_llm_client: Mocked LLMClient class injected by @patch decorator

Returns: None - asserts correct VAT number formatting

`test_be_amount_formatting(self, mock_llm_client) -> None`

Purpose: Tests that Belgian amounts using comma as decimal separator (e.g., '500,00') are correctly parsed to float values

Parameters:

mock_llm_client: Mocked LLMClient class injected by @patch decorator

Returns: None - asserts correct amount parsing from Belgian format

Attributes

Name	Type	Description	Scope
`config`	dict	Configuration dictionary containing confidence_threshold and LLM provider settings for BEExtractor initialization	instance
`mock_llm`	MockLLMClient	Mock LLM client instance that returns predefined responses for different extraction prompts without making actual API calls	instance
`be_doc`	dict	Sample Belgian invoice document structure containing text content and page information for testing extraction	instance

Dependencies

unittest
unittest.mock
json
logging
os
pathlib
datetime
extractors.base_extractor
extractors.be_extractor

Required Imports

import unittest
from unittest.mock import patch, MagicMock
import json
import logging
import os
from pathlib import Path
import datetime
from extractors.base_extractor import BaseExtractor
from extractors.be_extractor import BEExtractor

Usage Example

import unittest
from unittest.mock import patch
import json
from extractors.be_extractor import BEExtractor

# Run a single test
test = TestBEExtractor()
test.setUp()
with patch('extractors.be_extractor.LLMClient') as mock_llm:
    mock_llm.return_value = test.mock_llm
    test.test_be_extract(mock_llm)

# Run all tests in the class
if __name__ == '__main__':
    suite = unittest.TestLoader().loadTestsFromTestCase(TestBEExtractor)
    unittest.TextTestRunner(verbosity=2).run(suite)

Best Practices

Each test method is independent and uses setUp() to initialize fresh test data
Mock objects are used to isolate BEExtractor from actual LLM API calls
Tests verify both successful extraction and Belgian-specific formatting rules
The @patch decorator is used to replace LLMClient with mock implementations
Test data includes realistic Belgian invoice formats with proper VAT numbers, IBAN, and structured communication
Always call setUp() before running individual tests to ensure proper initialization
Tests should be run through unittest framework for proper test discovery and reporting
Mock responses are structured as JSON strings matching expected LLM output format

Similar Components

AI-powered semantic similarity - components with related functionality:

class TestBEValidator 84.6% similar

Unit test class for validating the BEValidator class, which validates Belgian invoice extraction results including VAT numbers, addresses, IBAN, currency, and legal requirements.
From: /tf/active/vicechatdev/invoice_extraction/tests/test_validators.py
class BEExtractor 83.9% similar

Belgium-specific invoice data extractor that uses LLM (Large Language Model) to extract structured invoice data from Belgian invoices in multiple languages (English, French, Dutch).
From: /tf/active/vicechatdev/invoice_extraction/extractors/be_extractor.py
class TestUKExtractor 76.4% similar

Unit test class for testing the UKExtractor class, which extracts structured data from UK invoices including VAT numbers, dates, amounts, and line items.
From: /tf/active/vicechatdev/invoice_extraction/tests/test_extractors.py
class TestAUExtractor 74.2% similar

Unit test class for testing the AUExtractor class, which extracts data from Australian invoices including ABN, GST, and payment details.
From: /tf/active/vicechatdev/invoice_extraction/tests/test_extractors.py
class BEValidator 73.6% similar

Belgium-specific invoice data validator that extends BaseValidator to implement Belgian invoice validation rules including VAT number format, address verification, IBAN validation, and legal requirements.
From: /tf/active/vicechatdev/invoice_extraction/validators/be_validator.py

← Back to Browse

Assistant

Hi! I can help improve this code. Tell me what you'd like to enhance (e.g., "add error handling", "optimize performance", "improve readability", "add type hints").

Code Comparison

Original Code

                            class TestBEExtractor(unittest.TestCase):
    """Test cases for the BEExtractor class."""
    
    def setUp(self):
        """Set up test environment before each test."""
        self.config = {
            'confidence_threshold': 0.7,
            'llm': {
                'provider': 'test',
                'model': 'test-model'
            }
        }
        
        # Create a mock LLM client
        self.mock_llm = MockLLMClient({
            'invoice metadata': json.dumps({
                "number": "FACT-2023-156",
                "issue_date": "15/01/2023",
                "due_date": "15/02/2023",
                "reference": "BON-7890"
            }),
            'vendor data': json.dumps({
                "name": "Belgian Test SPRL",
                "vat_number": "BE0123.456.789",
                "address": "123 Rue de Bruxelles, 1000 Bruxelles, Belgique",
                "contact": "contact@betestvendor.be"
            }),
            'amounts': json.dumps({
                "subtotal": 500.00,
                "total": 605.00,
                "currency": "EUR"
            }),
            'tax data': json.dumps({
                "vat": 105.00,
                "vat_rate": 21,
                "vat_regime": "standard"
            }),
            'payment data': json.dumps({
                "bank_name": "KBC Bank",
                "iban": "BE68 5390 0754 7034",
                "bic": "KREDBEBB",
                "payment_terms": "30 jours",
                "communication": "+++123/4567/89012+++"
            }),
            'line items': json.dumps([
                {
                    "description": "Produit de Test 1",
                    "quantity": 2,
                    "unit_price": 100.00,
                    "vat_rate": 21,
                    "amount": 200.00
                },
                {
                    "description": "Produit de Test 2",
                    "quantity": 3,
                    "unit_price": 100.00,
                    "vat_rate": 21,
                    "amount": 300.00
                }
            ])
        })
        
        # Sample Belgian invoice
        self.be_doc = {
            'text': 'Facture #FACT-2023-156\nDate: 15/01/2023\nBelgian Test SPRL\n'
                   'TVA: BE0123.456.789\n123 Rue de Bruxelles, 1000 Bruxelles, Belgique\n'
                   'Sous-total: 500,00 €\nTVA (21%): 105,00 €\nTotal: 605,00 €',
            'pages': [
                {
                    'text': 'Facture #FACT-2023-156\nDate: 15/01/2023\nBelgian Test SPRL\n'
                           'TVA: BE0123.456.789\n123 Rue de Bruxelles, 1000 Bruxelles, Belgique',
                    'width': 800,
                    'height': 1000,
                    'tables': []
                },
                {
                    'text': 'Sous-total: 500,00 €\nTVA (21%): 105,00 €\nTotal: 605,00 €',
                    'width': 800,
                    'height': 1000,
                    'tables': []
                }
            ]
        }
    
    @patch('extractors.be_extractor.LLMClient')
    def test_be_extract(self, mock_llm_client):
        """Test extraction of data from Belgian invoice."""
        # Setup the mock
        mock_llm_client.return_value = self.mock_llm
        
        # Create extractor
        be_extractor = BEExtractor(self.config)
        
        # Extract data
        result = be_extractor.extract(self.be_doc, 'fr')
        
        # Check that we have the expected sections
        self.assertIn('invoice', result)
        self.assertIn('vendor', result)
        self.assertIn('amounts', result)
        self.assertIn('payment', result)
        self.assertIn('line_items', result)
        
        # Check specific fields
        self.assertEqual(result['invoice']['number'], 'FACT-2023-156')
        self.assertEqual(result['vendor']['vat_number'], 'BE0123.456.789')
        self.assertEqual(result['amounts']['total'], 605.00)
        self.assertEqual(result['amounts']['vat'], 105.00)
        self.assertEqual(result['payment']['iban'], 'BE68 5390 0754 7034')
        self.assertEqual(len(result['line_items']), 2)
        
        # Check confidence score is calculated
        self.assertIn('confidence', result)
        self.assertIsInstance(result['confidence'], float)
    
    @patch('extractors.be_extractor.LLMClient')
    def test_be_vat_number_formatting(self, mock_llm_client):
        """Test that Belgian VAT number is properly formatted."""
        # Setup the mock with malformatted VAT number
        mock_llm = MockLLMClient({
            'vendor data': json.dumps({
                "name": "Belgian Test SPRL",
                "vat_number": "be0123456789",  # Malformatted
                "address": "123 Rue de Bruxelles, 1000 Bruxelles, Belgique"
            }),
            # Add minimum required responses for other fields
            'invoice metadata': json.dumps({"number": "12345", "issue_date": "15/01/2023"}),
            'amounts': json.dumps({"subtotal": 100, "total": 121, "vat": 21, "currency": "EUR"}),
            'tax data': json.dumps({"vat": 21, "vat_rate": 21}),
            'line items': json.dumps([])
        })
        mock_llm_client.return_value = mock_llm
        
        # Create extractor
        be_extractor = BEExtractor(self.config)
        
        # Extract data
        result = be_extractor.extract(self.be_doc, 'fr')
        
        # Check that VAT number was formatted correctly
        self.assertEqual(result['vendor']['vat_number'], 'BE0.123.456.789')
    
    @patch('extractors.be_extractor.LLMClient')
    def test_be_amount_formatting(self, mock_llm_client):
        """Test that Belgian amounts with comma decimal separator are properly parsed."""
        # Setup the mock with Belgian number format
        mock_llm = MockLLMClient({
            'amounts': json.dumps({
                "subtotal": "500,00",  # Belgian format with comma
                "total": "605,00",
                "currency": "EUR"
            }),
            # Add minimum required responses for other fields
            'invoice metadata': json.dumps({"number": "12345", "issue_date": "15/01/2023"}),
            'vendor data': json.dumps({"name": "Belgian Test", "vat_number": "BE0123456789"}),
            'tax data': json.dumps({"vat": "105,00", "vat_rate": 21}),
            'line items': json.dumps([])
        })
        mock_llm_client.return_value = mock_llm
        
        # Create extractor
        be_extractor = BEExtractor(self.config)
        
        # Extract data
        result = be_extractor.extract(self.be_doc, 'fr')
        
        # Check that amounts were parsed as floats
        self.assertEqual(result['amounts']['subtotal'], 500.00)
        self.assertEqual(result['amounts']['total'], 605.00)
        self.assertEqual(result['amounts']['vat'], 105.00)
                        

Improved Code

🔍 Code Extractor

class TestBEExtractor

Purpose

Source Code

Parameters

Parameter Details

Return Value

Class Interface

Methods

`setUp(self) -> None`

`test_be_extract(self, mock_llm_client) -> None`

`test_be_vat_number_formatting(self, mock_llm_client) -> None`

`test_be_amount_formatting(self, mock_llm_client) -> None`

Attributes

Dependencies

Required Imports

Usage Example

Best Practices

Tags

Similar Components

class TestBEValidator 84.6% similar

class BEExtractor 83.9% similar

class TestUKExtractor 76.4% similar

class TestAUExtractor 74.2% similar

class BEValidator 73.6% similar

class TestBEExtractor

Purpose

Source Code

Parameters

Parameter Details

Return Value

Class Interface

Methods

setUp(self) -> None

test_be_extract(self, mock_llm_client) -> None

test_be_vat_number_formatting(self, mock_llm_client) -> None

test_be_amount_formatting(self, mock_llm_client) -> None

Attributes

Dependencies

Required Imports

Usage Example

Best Practices

Tags

Similar Components

class TestBEValidator 84.6% similar

class BEExtractor 83.9% similar

class TestUKExtractor 76.4% similar

class TestAUExtractor 74.2% similar

class BEValidator 73.6% similar

✨ Improve Code: TestBEExtractor

Code Comparison

`setUp(self) -> None`

`test_be_extract(self, mock_llm_client) -> None`

`test_be_vat_number_formatting(self, mock_llm_client) -> None`

`test_be_amount_formatting(self, mock_llm_client) -> None`