class ReferenceManager_v3
Manages extraction and formatting of references for LLM chat responses. Handles both file references and BibTeX citations, formatting them according to various academic citation styles.
/tf/active/vicechatdev/OneCo_hybrid_RAG_old.py
75 - 632
moderate
Purpose
Manages extraction and formatting of references for LLM chat responses. Handles both file references and BibTeX citations, formatting them according to various academic citation styles.
Source Code
class ReferenceManager:
"""
Manages extraction and formatting of references for LLM chat responses.
Handles both file references and BibTeX citations, formatting them according
to various academic citation styles.
"""
def __init__(self, default_style="apa"):
"""
Initialize the reference manager.
Args:
default_style: Default citation style to use
"""
self.default_style = default_style
self.style_titles = {
"apa": "References",
"mla": "Works Cited",
"chicago": "Bibliography",
"ieee": "References",
"harvard1": "References",
"vancouver": "References",
"nature": "References"
}
def extract_references(self, text):
"""
Extract block references from text in various formats including:
- Single blocks: [block X] or [Block X]
- Multiple blocks: [Block X, Block Y, Block Z] or [Block X, Y, Z]
Args:
text: Text to extract references from
Returns:
list: List of block numbers referenced in the text
"""
# Regex patterns to find different reference formats
# Single block pattern: [block 1] or [Block 1]
single_pattern = r'\[(?:[Bb]lock)\s+(\d+)\]'
# Multiple blocks pattern: [Block 1, Block 2, Block 3]
multi_block_pattern = r'\[(?:[Bb]lock\s+\d+(?:,\s*[Bb]lock\s+\d+)+)\]'
# Multiple blocks pattern with abbreviated format: [Block 1, 2, 3]
abbreviated_pattern = r'\[(?:[Bb]lock\s+\d+(?:,\s*\d+)+)\]'
# Find all patterns and extract block numbers
unique_refs = []
# Extract single block references
single_refs = re.findall(single_pattern, text)
for ref in single_refs:
block_num = int(ref)
if block_num not in unique_refs:
unique_refs.append(block_num)
# Extract multiple block references with full "Block X" format
multi_blocks = re.findall(multi_block_pattern, text)
for block_group in multi_blocks:
# Remove outer brackets
clean_group = block_group[1:-1]
# Find all block numbers in this group
block_nums = re.findall(r'[Bb]lock\s+(\d+)', clean_group)
for num in block_nums:
block_num = int(num)
if block_num not in unique_refs:
unique_refs.append(block_num)
# Extract multiple block references with abbreviated format
abbreviated_blocks = re.findall(abbreviated_pattern, text)
for block_group in abbreviated_blocks:
# Remove outer brackets
clean_group = block_group[1:-1]
# Find the first block number (with "Block" prefix)
first_block_match = re.search(r'[Bb]lock\s+(\d+)', clean_group)
if first_block_match:
first_block = int(first_block_match.group(1))
if first_block not in unique_refs:
unique_refs.append(first_block)
# Find all additional numbers in this group
additional_nums = re.findall(r',\s*(\d+)', clean_group)
for num in additional_nums:
block_num = int(num)
if block_num not in unique_refs:
unique_refs.append(block_num)
return unique_refs
def process_references(self, text, blocks_dict, style=None):
"""
Process references in text, lookup block information, and generate reference section.
Args:
text: Text containing references
blocks_dict: Dictionary with block data indexed by block number
style: Citation style to use (defaults to self.default_style)
Returns:
tuple: (updated_text, references_section)
"""
# Use default style if none specified
if style is None:
style = self.default_style
# Extract referenced block numbers
block_numbers = self.extract_references(text)
#print("block numbers", block_numbers)
if not block_numbers:
return text, "" # No references found
# Prepare reference objects
references = []
citation_map = {}
for i, block_num in enumerate(block_numbers, 1):
if block_num not in blocks_dict:
continue # Skip if block not found
block_data = blocks_dict[block_num]
#print("block data", block_data)
block_type = block_data.get("type", "unknown")
ref_id = str(i)
# Create reference object based on block type
if block_type == "document":
# Document/file reference
ref_obj = {
"id": ref_id,
"type": "file",
"path": block_data.get("path", ""),
"description": block_data.get("description", None)
}
elif block_type == "literature":
# BibTeX reference
ref_obj = {
"id": ref_id,
"type": "bibtex",
"content": block_data.get("bibtex", "")
}
elif block_type == "web":
# Web reference
ref_obj = {
"id": ref_id,
"type": "web",
"url": block_data.get("url", ""),
"title": block_data.get("title", "Web Page"),
"snippet": block_data.get("snippet", ""),
"date": block_data.get("date", "")
}
else:
# Generic reference (fallback)
ref_obj = {
"id": ref_id,
"type": "generic",
"content": block_data.get("content", "")
}
references.append(ref_obj)
citation_map[block_num] = ref_id
# Generate reference section
references_section, _ = self.generate_references_section(references, style)
# Replace [block X] references with [N] citation format
updated_text = text
# First, handle standard single block references like [Block 1] or [block 1]
for block_num, ref_id in citation_map.items():
updated_text = updated_text.replace(f"[block {block_num}]", f"[{ref_id}]")
updated_text = updated_text.replace(f"[Block {block_num}]", f"[{ref_id}]")
# Handle complex multi-block references like [Block 1, Block 2, Block 3]
# by looking for all block references and replacing them one by one
multi_block_refs = re.findall(r'\[(?:[Bb]lock\s+\d+(?:,\s*(?:[Bb]lock\s+)?\d+)*)\]', updated_text)
for ref_group in multi_block_refs:
new_ref = ref_group
for block_num, ref_id in citation_map.items():
# Replace full "Block X" and "block X" patterns
new_ref = new_ref.replace(f"Block {block_num}", f"{ref_id}")
new_ref = new_ref.replace(f"block {block_num}", f"{ref_id}")
# Also replace just the number when in a list context (preceded by comma and spaces)
new_ref = re.sub(r',\s*' + str(block_num) + r'(?=[\s,\]])', f', {ref_id}', new_ref)
# Fix the format to remove any "Block" text that's left
new_ref = re.sub(r'[Bb]lock\s+', '', new_ref)
# Clean up spaces and commas
new_ref = re.sub(r'\s+', ' ', new_ref) # Normalize whitespace
new_ref = re.sub(r',\s*,', ',', new_ref) # Remove double commas
new_ref = re.sub(r'^\[(,\s*)+', '[', new_ref) # Clean start of reference
# Replace the original reference group with the new one
updated_text = updated_text.replace(ref_group, new_ref)
return updated_text, references_section
def format_bibtex_reference(self, bibtex_string, ref_id, style_name="apa"):
"""
Format a BibTeX entry.
Args:
bibtex_string: BibTeX entry as a string
ref_id: Reference ID for citation
style_name: Style name (apa, ieee, etc.)
Returns:
tuple: (citation_marker, reference_entry)
"""
# Parse BibTeX string
parser = bibtex.Parser()
bib_data = parser.parse_string(bibtex_string)
# Get the key of the first entry
entry_key = list(bib_data.entries)[0]
entry = bib_data.entries[entry_key]
# Create citation marker
citation_marker = f"[{ref_id}]"
# Extract common fields
authors = self.format_authors(entry, style_name)
title = entry.fields.get('title', '')
journal = entry.fields.get('journal', '')
year = entry.fields.get('year', '')
volume = entry.fields.get('volume', '')
number = entry.fields.get('number', '')
pages = entry.fields.get('pages', '')
month = entry.fields.get('month', '')
doi = entry.fields.get('doi', '')
publisher = entry.fields.get('publisher', '')
address = entry.fields.get('address', '')
# Format according to style and entry type
if entry.type == 'article':
if style_name.lower() == "apa":
# APA: Author. (Year). Title. Journal, Volume(Number), Pages.
reference = f"{authors} ({year}). {title}. *{journal}*, {volume}"
if number:
reference += f"({number})"
reference += f", {pages}."
elif style_name.lower() == "ieee":
# IEEE: Author, "Title," Journal, vol. Volume, no. Number, pp. Pages, Month Year.
reference = f"{authors}, \"{title},\" *{journal}*"
if volume:
reference += f", vol. {volume}"
if number:
reference += f", no. {number}"
reference += f", pp. {pages}"
if month:
reference += f", {month}"
reference += f" {year}."
elif style_name.lower() == "chicago":
# Chicago: Author. "Title." Journal Volume, no. Number (Year): Pages.
reference = f"{authors}. \"{title}.\" *{journal}* {volume}"
if number:
reference += f", no. {number}"
reference += f" ({year}): {pages}."
elif style_name.lower() == "nature":
# Nature: Author. Title. Journal Volume, Pages (Year).
reference = f"{authors}. {title}. *{journal}* {volume}, {pages} ({year})."
elif style_name.lower() == "harvard1":
# Harvard: Author (Year) 'Title', Journal, Volume(Number), pp. Pages.
reference = f"{authors} ({year}) '{title}', *{journal}*, {volume}"
if number:
reference += f"({number})"
reference += f", pp. {pages}."
elif style_name.lower() == "vancouver":
# Vancouver: Author. Title. Journal. Year;Volume(Number):Pages.
reference = f"{authors}. {title}. {journal}. {year};{volume}"
if number:
reference += f"({number})"
reference += f":{pages}."
else:
# Plain: Author. Title. Journal, Volume(Number), Pages, Year.
reference = f"{authors}. {title}. *{journal}*, {volume}"
if number:
reference += f"({number})"
reference += f", {pages}, {year}."
elif entry.type in ['book', 'incollection', 'inbook']:
if style_name.lower() == "apa":
# APA: Author. (Year). Title. Publisher, Address.
reference = f"{authors} ({year}). *{title}*. "
if publisher:
reference += f"{publisher}"
if address:
reference += f", {address}"
reference += "."
elif style_name.lower() == "ieee":
# IEEE: Author, Title. Address: Publisher, Year.
reference = f"{authors}, *{title}*"
if address or publisher:
reference += ". "
if address:
reference += f"{address}"
if publisher:
reference += ": "
if publisher:
reference += f"{publisher}"
reference += f", {year}."
elif style_name.lower() == "chicago":
# Chicago: Author. Title. Address: Publisher, Year.
reference = f"{authors}. *{title}*. "
if address:
reference += f"{address}: "
if publisher:
reference += f"{publisher}, "
reference += f"{year}."
else:
# Default/Plain: Author. Title. Publisher, Address, Year.
reference = f"{authors}. *{title}*. "
if publisher:
reference += f"{publisher}"
if address:
reference += f", {address}"
reference += f", {year}."
else: # Other entry types
# Generic format for misc entry types
reference = f"{authors} ({year}). {title}."
# Add DOI with proper Markdown link formatting if available
if doi and "doi:" not in reference.lower() and "doi.org" not in reference.lower():
reference += f" [doi:{doi}](https://doi.org/{doi})"
# Format as a numbered reference
markdown_reference = f"[{ref_id}]: {reference}"
return citation_marker, markdown_reference
def format_authors(self, entry, style_name):
"""Format authors according to the style."""
if 'author' not in entry.persons and 'editor' not in entry.persons:
return ""
# Use editors if no authors are available
if 'author' in entry.persons:
authors = entry.persons['author']
else:
authors = entry.persons['editor']
# IEEE, Vancouver: Use last name first with initials
if style_name.lower() in ["ieee", "vancouver"]:
if len(authors) == 1:
return self.format_last_first(authors[0], True)
elif len(authors) <= 7:
names = [self.format_last_first(author, True) for author in authors[:-1]]
names.append(f"and {self.format_last_first(authors[-1], True)}")
return ", ".join(names)
else:
# More than 7 authors: show first 6 + et al.
names = [self.format_last_first(author, True) for author in authors[:6]]
names.append("et al.")
return ", ".join(names)
# Harvard, APA: Normal order with full first names, et al. for >3
elif style_name.lower() in ["harvard1", "apa"]:
if len(authors) > 7:
# For APA with more than 7 authors: First 6 + ... + last author
names = [self.format_name(author) for author in authors[:6]]
names.append("...")
names.append(self.format_name(authors[-1]))
return ", ".join(names)
elif len(authors) > 3:
return f"{self.format_name(authors[0])} et al."
elif len(authors) == 1:
return self.format_name(authors[0])
else:
names = [self.format_name(author) for author in authors[:-1]]
names.append(f"and {self.format_name(authors[-1])}")
return ", ".join(names)
# Nature: All authors in normal order
elif style_name.lower() == "nature":
if len(authors) == 1:
return self.format_name(authors[0])
else:
names = [self.format_name(author) for author in authors[:-1]]
names.append(f"& {self.format_name(authors[-1])}")
return ", ".join(names)
# Chicago: All authors in normal order
elif style_name.lower() == "chicago":
if len(authors) == 1:
return self.format_name(authors[0])
else:
names = [self.format_name(author) for author in authors[:-1]]
names.append(f"and {self.format_name(authors[-1])}")
return ", ".join(names)
# Default: Normal order
else:
if len(authors) == 1:
return self.format_name(authors[0])
else:
names = [self.format_name(author) for author in authors[:-1]]
names.append(f"and {self.format_name(authors[-1])}")
return ", ".join(names)
def format_name(self, person):
"""Format a person's name in normal order: First Middle Last"""
first = " ".join(person.first_names)
last = " ".join(person.last_names)
return f"{first} {last}"
def format_last_first(self, person, initials=False):
"""Format a person's name as Last, First or Last, F."""
if initials:
first = " ".join([name[0] + "." for name in person.first_names])
else:
first = " ".join(person.first_names)
last = " ".join(person.last_names)
if first:
return f"{last}, {first}"
else:
return last
def process_web_reference(self, web_data, ref_id):
"""
Convert a web reference to a formatted citation according to style.
Args:
web_data: Dictionary with web reference data (title, url, etc.)
ref_id: Reference ID for citation
Returns:
tuple: (citation_marker, reference_entry)
"""
citation_marker = f"[{ref_id}]"
# Extract web data
title = web_data.get("title", "Web Page")
url = web_data.get("url", "")
date = web_data.get("date", "")
# If no date is provided, use current date
if not date:
from datetime import datetime
date = datetime.now().strftime("%B %d, %Y")
# Format according to APA style for websites
markdown_reference = f"[{ref_id}] {title}. "
# Add date if available
if date:
markdown_reference += f"({date}). "
# Add URL
markdown_reference += f"Retrieved from [{url}]({url})"
return citation_marker, markdown_reference
def process_file_reference(self, filepath, ref_id, description=None):
"""
Convert a filepath to a markdown reference with clickable link.
Args:
filepath: Path to the file
ref_id: Reference ID for citation
description: Optional description for the file
Returns:
tuple: (citation_marker, reference_entry)
"""
citation_marker = f"[{ref_id}]"
# Create file basename for display
filename = os.path.basename(filepath)
# Create clickable URL in the specific format:
# https://filecloud.vicebio.com/ui/core/index.html?filter=FILENAME#expl-tabl.%2FSHARED%2Fvicebio_shares%2FPATH
# Escape spaces in filename with + for the first part
encoded_filename = filename.replace(' ', '+')
# Encode path for the second part (after #expl-tabl.)
# Extract directory path without filename
directory_path = os.path.dirname(filepath)
# Ensure path ends with '/'
if directory_path and not directory_path.endswith('/'):
directory_path += '/'
encoded_path = f"/SHARED/vicebio_shares/{directory_path}"
encoded_path = encoded_path.replace(' ', '%20')
# Construct the full URL
file_url = f"https://filecloud.vicebio.com/ui/core/index.html?filter={encoded_filename}#expl-tabl.{encoded_path}"
# Create the reference with description if provided
if description:
markdown_reference = f"[{ref_id}]: **{description}**: [{filename}]({file_url})"
else:
markdown_reference = f"[{ref_id}]: [{filename}]({file_url})"
return citation_marker, markdown_reference
def generate_references_section(self, references, style="apa"):
"""
Generate a complete references section in Markdown from a list of reference objects.
Args:
references: List of reference objects (dictionaries with required fields)
style: Citation style to use (e.g., 'apa', 'ieee', 'nature', etc.)
Returns:
tuple: (markdown_output, citation_map)
"""
title = self.style_titles.get(style.lower(), "References")
markdown_output = f"## {title}\n\n"
citation_map = {}
# Format each reference
for ref in references:
ref_id = ref["id"]
ref_type = ref["type"]
if ref_type == "bibtex":
citation, reference = self.format_bibtex_reference(ref["content"], ref_id, style)
elif ref_type == "file" or ref_type == "document":
description = ref.get("description", None)
citation, reference = self.process_file_reference(ref["path"], ref_id, description)
elif ref_type == "web":
citation, reference = self.process_web_reference(ref, ref_id)
else:
# Simple text reference
citation_marker = f"[{ref_id}]"
markdown_reference = f"[{ref_id}]: {ref.get('content', 'Reference')}"
citation, reference = citation_marker, markdown_reference
markdown_output += reference + "\n\n"
citation_map[ref_id] = citation
return markdown_output.strip(), citation_map
def list_available_styles(self):
"""
List all available citation styles
Returns:
list: Names of available styles
"""
return ["apa", "ieee", "chicago", "harvard1", "nature", "vancouver", "plain"]
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
bases: Parameter of type
Return Value
Returns unspecified type
Class Interface
Methods
__init__(self, default_style)
Purpose: Initialize the reference manager. Args: default_style: Default citation style to use
Parameters:
default_style: Parameter
Returns: None
extract_references(self, text)
Purpose: Extract block references from text in various formats including: - Single blocks: [block X] or [Block X] - Multiple blocks: [Block X, Block Y, Block Z] or [Block X, Y, Z] Args: text: Text to extract references from Returns: list: List of block numbers referenced in the text
Parameters:
text: Parameter
Returns: See docstring for return details
process_references(self, text, blocks_dict, style)
Purpose: Process references in text, lookup block information, and generate reference section. Args: text: Text containing references blocks_dict: Dictionary with block data indexed by block number style: Citation style to use (defaults to self.default_style) Returns: tuple: (updated_text, references_section)
Parameters:
text: Parameterblocks_dict: Parameterstyle: Parameter
Returns: See docstring for return details
format_bibtex_reference(self, bibtex_string, ref_id, style_name)
Purpose: Format a BibTeX entry. Args: bibtex_string: BibTeX entry as a string ref_id: Reference ID for citation style_name: Style name (apa, ieee, etc.) Returns: tuple: (citation_marker, reference_entry)
Parameters:
bibtex_string: Parameterref_id: Parameterstyle_name: Parameter
Returns: See docstring for return details
format_authors(self, entry, style_name)
Purpose: Format authors according to the style.
Parameters:
entry: Parameterstyle_name: Parameter
Returns: None
format_name(self, person)
Purpose: Format a person's name in normal order: First Middle Last
Parameters:
person: Parameter
Returns: None
format_last_first(self, person, initials)
Purpose: Format a person's name as Last, First or Last, F.
Parameters:
person: Parameterinitials: Parameter
Returns: None
process_web_reference(self, web_data, ref_id)
Purpose: Convert a web reference to a formatted citation according to style. Args: web_data: Dictionary with web reference data (title, url, etc.) ref_id: Reference ID for citation Returns: tuple: (citation_marker, reference_entry)
Parameters:
web_data: Parameterref_id: Parameter
Returns: See docstring for return details
process_file_reference(self, filepath, ref_id, description)
Purpose: Convert a filepath to a markdown reference with clickable link. Args: filepath: Path to the file ref_id: Reference ID for citation description: Optional description for the file Returns: tuple: (citation_marker, reference_entry)
Parameters:
filepath: Parameterref_id: Parameterdescription: Parameter
Returns: See docstring for return details
generate_references_section(self, references, style)
Purpose: Generate a complete references section in Markdown from a list of reference objects. Args: references: List of reference objects (dictionaries with required fields) style: Citation style to use (e.g., 'apa', 'ieee', 'nature', etc.) Returns: tuple: (markdown_output, citation_map)
Parameters:
references: Parameterstyle: Parameter
Returns: See docstring for return details
list_available_styles(self)
Purpose: List all available citation styles Returns: list: Names of available styles
Returns: See docstring for return details
Required Imports
from typing import List
from typing import Any
from typing import Dict
import os
import panel as pn
Usage Example
# Example usage:
# result = ReferenceManager(bases)
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class ReferenceManager_v2 98.7% similar
-
class ReferenceManager_v4 98.5% similar
-
class ReferenceManager 74.6% similar
-
class ReferenceManager_v1 73.8% similar
-
function parse_references_section 52.0% similar