class ReferenceManager
Manages document references for inline citation and bibliography generation in a RAG (Retrieval-Augmented Generation) system.
/tf/active/vicechatdev/fixed_project_victoria_generator.py
49 - 217
moderate
Purpose
The ReferenceManager class provides a comprehensive system for tracking, citing, and formatting document references. It assigns unique reference numbers to documents, prevents duplicate references, generates inline citations, and creates formatted bibliographies with metadata. This is essential for maintaining academic-style citations in AI-generated content that draws from multiple source documents.
Source Code
class ReferenceManager:
"""
Manages document references for inline citation and bibliography generation.
"""
def __init__(self):
self.references = {} # Dictionary mapping reference number to reference info
self.reference_counter = 1
self.used_documents = set() # Track which documents have been referenced
def add_document(self, doc_id: str, content: str, metadata: Dict[str, Any]) -> int:
"""
Add a document to the reference system and return its reference number.
Args:
doc_id: Unique document identifier
content: Document content
metadata: Document metadata
Returns:
Reference number for inline citation
"""
# Check if document already has a reference
for ref_num, ref_info in self.references.items():
if ref_info['doc_id'] == doc_id:
return ref_num
# Create new reference
ref_num = self.reference_counter
# Create a clean content preview
clean_content = content.replace('\n', ' ').replace('\r', ' ').replace('\t', ' ')
# Remove multiple spaces
clean_content = ' '.join(clean_content.split())
preview = clean_content[:250] + ("..." if len(clean_content) > 250 else "")
self.references[ref_num] = {
'doc_id': doc_id,
'source': self.format_source(metadata),
'preview': preview,
'metadata': metadata
}
self.reference_counter += 1
self.used_documents.add(doc_id)
return ref_num
def format_source(self, metadata: Dict[str, Any]) -> str:
"""Format source information for display."""
# Extract key information from metadata
doc_type = metadata.get('type', metadata.get('document_type', 'Document'))
doc_id = metadata.get('id', metadata.get('document_id', 'unknown'))
title = metadata.get('title', metadata.get('filename', ''))
author = metadata.get('author', metadata.get('creator', ''))
date = metadata.get('date', metadata.get('created_date', ''))
# Build formatted source string
source_parts = []
# Add title if available
if title and title != 'unknown':
source_parts.append(f'"{title}"')
# Add author if available
if author and author != 'unknown':
source_parts.append(f"by {author}")
# Add document type
source_parts.append(f"({doc_type})")
# Add date if available
if date and date != 'unknown':
source_parts.append(f"dated {date}")
# Add document ID
source_parts.append(f"[ID: {doc_id}]")
return " ".join(source_parts)
def get_citation(self, doc_id: str) -> str:
"""
Get inline citation for a document.
Args:
doc_id: Document identifier
Returns:
Inline citation string like [1]
"""
for ref_num, ref_info in self.references.items():
if ref_info['doc_id'] == doc_id:
return f"[{ref_num}]"
return "[?]" # Should not happen if add_document was called first
def generate_bibliography(self) -> str:
"""
Generate a formatted bibliography section.
Returns:
Formatted bibliography in markdown
"""
if not self.references:
return "\n## References\n\nNo references available.\n"
bibliography = ["\n## References\n"]
bibliography.append("*The following documents were referenced in generating the warranty disclosures:*\n")
for ref_num in sorted(self.references.keys()):
ref_info = self.references[ref_num]
source = ref_info['source']
preview = ref_info['preview']
metadata = ref_info['metadata']
# Main reference entry with improved formatting
bibliography.append(f"**[{ref_num}]** {source}")
# Content preview with better formatting
if preview and preview.strip():
clean_preview = preview.replace('\n', ' ').replace('\r', ' ')
# Limit preview length and add ellipsis if needed
if len(clean_preview) > 300:
clean_preview = clean_preview[:297] + "..."
bibliography.append(f" *Content preview:* {clean_preview}")
# Add metadata in a more organized way
metadata_items = []
if metadata.get('document_type') and metadata['document_type'] != 'Document':
metadata_items.append(f"*Document type:* {metadata['document_type']}")
if metadata.get('date') and metadata['date'] != 'unknown':
metadata_items.append(f"*Date:* {metadata['date']}")
if metadata.get('source') and metadata['source'] != 'unknown':
metadata_items.append(f"*Source location:* {metadata['source']}")
if metadata.get('author') and metadata['author'] != 'unknown':
metadata_items.append(f"*Author:* {metadata['author']}")
if metadata.get('category'):
metadata_items.append(f"*Category:* {metadata['category']}")
if metadata.get('relevance_score'):
score = float(metadata['relevance_score'])
metadata_items.append(f"*Relevance:* {score:.2f}")
if metadata.get('summary') and len(metadata['summary']) > 10:
summary = metadata['summary'][:200] + ("..." if len(metadata['summary']) > 200 else "")
metadata_items.append(f"*Summary:* {summary}")
# Add metadata items with proper indentation
for item in metadata_items:
bibliography.append(f" {item}")
# Add spacing between references
bibliography.append("")
# Add footer note
bibliography.append("---")
bibliography.append(f"*Total references: {len(self.references)}*")
bibliography.append("")
return "\n".join(bibliography)
def clear(self):
"""Clear all references."""
self.references = {}
self.reference_counter = 1
self.used_documents = set()
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
__init__: No parameters required. Initializes an empty reference management system with a counter starting at 1 and empty tracking structures.
Return Value
Instantiation returns a ReferenceManager object. Key method returns: add_document() returns an integer reference number for inline citation; get_citation() returns a formatted citation string like '[1]'; generate_bibliography() returns a markdown-formatted string containing all references with metadata; clear() returns None.
Class Interface
Methods
__init__(self)
Purpose: Initialize a new ReferenceManager instance with empty reference tracking structures
Returns: None (constructor)
add_document(self, doc_id: str, content: str, metadata: Dict[str, Any]) -> int
Purpose: Add a document to the reference system and return its reference number. Prevents duplicates by checking if doc_id already exists.
Parameters:
doc_id: Unique document identifier string used to track and prevent duplicate referencescontent: Full document content string, which will be cleaned and truncated to create a previewmetadata: Dictionary containing document metadata such as type, title, author, date, source, category, relevance_score, and summary
Returns: Integer reference number (starting from 1) that can be used for inline citations. Returns existing reference number if document was already added.
format_source(self, metadata: Dict[str, Any]) -> str
Purpose: Format source information from metadata into a human-readable citation string
Parameters:
metadata: Dictionary containing document metadata fields like type, id, title, author, date, filename, creator, created_date
Returns: Formatted string combining title, author, document type, date, and ID in a readable citation format
get_citation(self, doc_id: str) -> str
Purpose: Get inline citation string for a previously added document
Parameters:
doc_id: Document identifier string that was used when calling add_document()
Returns: Formatted citation string like '[1]' or '[?]' if document was not found (should not happen if add_document was called first)
generate_bibliography(self) -> str
Purpose: Generate a complete formatted bibliography section with all referenced documents and their metadata
Returns: Markdown-formatted string containing a References section with numbered entries, source information, content previews, metadata details, and a footer with total reference count
clear(self)
Purpose: Clear all references and reset the reference counter to start fresh
Returns: None
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
references |
Dict[int, Dict[str, Any]] | Dictionary mapping reference numbers (int) to reference information dictionaries containing 'doc_id', 'source', 'preview', and 'metadata' keys | instance |
reference_counter |
int | Counter tracking the next reference number to assign, starts at 1 and increments for each new unique document | instance |
used_documents |
set | Set of document IDs (strings) that have been referenced, used to track which documents have been added to the reference system | instance |
Dependencies
typing
Required Imports
from typing import Dict, Any
Usage Example
from typing import Dict, Any
# Instantiate the reference manager
ref_manager = ReferenceManager()
# Add documents and get reference numbers
metadata1 = {
'type': 'Technical Manual',
'id': 'doc_001',
'title': 'Product Warranty Guide',
'author': 'Engineering Team',
'date': '2024-01-15'
}
ref_num1 = ref_manager.add_document(
doc_id='doc_001',
content='This product comes with a 2-year warranty covering manufacturing defects...',
metadata=metadata1
)
metadata2 = {
'type': 'Policy Document',
'id': 'doc_002',
'title': 'Warranty Terms',
'date': '2024-02-01'
}
ref_num2 = ref_manager.add_document(
doc_id='doc_002',
content='Extended warranty options are available for purchase...',
metadata=metadata2
)
# Get inline citations
citation1 = ref_manager.get_citation('doc_001') # Returns '[1]'
citation2 = ref_manager.get_citation('doc_002') # Returns '[2]'
# Generate formatted bibliography
bibliography = ref_manager.generate_bibliography()
print(bibliography)
# Clear all references when done
ref_manager.clear()
Best Practices
- Always call add_document() before get_citation() to ensure the document is registered in the reference system
- Use the same doc_id consistently for the same document to prevent duplicate references
- Call add_document() returns the reference number, which can be used immediately for inline citations
- The class automatically prevents duplicate references by checking existing doc_ids
- Call clear() when starting a new document or session to reset the reference counter
- The reference counter starts at 1 and increments for each new unique document
- Metadata should include at least 'id' or 'document_id' for proper tracking; other fields like 'title', 'author', 'date' enhance bibliography formatting
- Content previews are automatically truncated to 250 characters with ellipsis
- The generate_bibliography() method returns markdown-formatted text suitable for appending to generated documents
- State is maintained across multiple add_document() calls, so instantiate once per document generation session
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class ReferenceManager_v1 94.6% similar
-
class ReferenceManager_v2 74.7% similar
-
class ReferenceManager_v3 74.6% similar
-
class ReferenceManager_v4 74.4% similar
-
function parse_references_section 52.2% similar