class TextRewriter
AI-powered text rewriting engine that processes documents based on detected annotations (strikethrough, highlight, markup) and generates improved versions using structured LLM prompts.
/tf/active/vicechatdev/e-ink-llm/text_rewriter.py
12 - 283
moderate
Purpose
TextRewriter is responsible for analyzing annotated documents and generating rewritten content based on detected annotations. It uses different prompt templates for various annotation types (strikethrough, highlight, markup, mixed) to guide an LLM in producing improved document versions. The class also provides functionality to generate improvement suggestions and create editing summaries that describe the changes made during the rewriting process.
Source Code
class TextRewriter:
"""
AI-powered text rewriting engine that processes documents based on detected annotations
Generates improved versions of text content using structured prompts
"""
def __init__(self, llm_handler: LLMHandler):
self.llm_handler = llm_handler
# Structured prompts for different annotation types
self.rewriting_prompts = {
'strikethrough': """
You are analyzing a document where text has been marked for deletion with strikethrough annotations.
Your task is to rewrite the content by removing the marked sections and ensuring smooth flow.
Guidelines:
- Remove content that appears to be struck through
- Ensure transitions remain natural
- Maintain the document's tone and style
- Fix any grammatical issues caused by deletions
Original content: {content}
Annotation context: {annotation_info}
Please provide a clean, revised version:
""",
'highlight': """
You are analyzing a document where text has been highlighted for emphasis or attention.
Your task is to enhance the highlighted sections while maintaining the overall message.
Guidelines:
- Pay special attention to highlighted sections
- Enhance clarity and impact of highlighted content
- Ensure highlighted points are well-integrated
- Maintain document structure and flow
Original content: {content}
Highlighted sections: {annotation_info}
Please provide an enhanced version that better emphasizes the key points:
""",
'markup': """
You are analyzing a document with handwritten markup and corrections.
Your task is to incorporate the suggested changes and improvements.
Guidelines:
- Interpret markup as correction suggestions
- Improve clarity and readability
- Fix any errors indicated by markup
- Enhance overall quality while preserving meaning
Original content: {content}
Markup annotations: {annotation_info}
Please provide a corrected and improved version:
""",
'mixed': """
You are analyzing a document with multiple types of annotations including strikethroughs, highlights, and markup.
Your task is to create an improved version that incorporates all the suggested changes.
Guidelines:
- Remove struck-through content
- Enhance highlighted sections
- Incorporate markup corrections
- Ensure coherent flow and readability
- Maintain the document's original purpose and tone
Original content: {content}
Annotation summary: {annotation_info}
Please provide a comprehensive revision:
"""
}
async def rewrite_document_from_annotations(
self,
original_content: str,
annotations: List[Dict[str, Any]]
) -> Optional[str]:
"""
Rewrite document content based on detected annotations
Args:
original_content: The original document content/analysis
annotations: List of annotation information
Returns:
Rewritten content or None if failed
"""
try:
if not annotations:
logger.info("No annotations provided for rewriting")
return None
# Analyze annotation types
annotation_types = set()
annotation_summary = []
for ann in annotations:
ann_type = ann.get('annotation_type', 'unknown')
annotation_types.add(ann_type)
# Create annotation summary
confidence = ann.get('confidence', 0)
area = ann.get('area', 0)
annotation_summary.append(f"{ann_type} (confidence: {confidence:.2f}, area: {area}px)")
# Choose appropriate prompt based on annotation types
prompt_key = self._select_prompt_key(annotation_types)
prompt_template = self.rewriting_prompts.get(prompt_key, self.rewriting_prompts['mixed'])
# Format annotation information
annotation_info = "; ".join(annotation_summary)
# Create the prompt
prompt = prompt_template.format(
content=original_content[:3000], # Limit content length for API
annotation_info=annotation_info
)
logger.info(f"Rewriting content using prompt type: {prompt_key}")
# Generate rewritten content
rewritten_content = await self.llm_handler.generate_text(
prompt,
max_tokens=2000,
temperature=0.3 # Lower temperature for more consistent rewriting
)
if rewritten_content:
logger.info(f"Successfully generated {len(rewritten_content)} characters of rewritten content")
return rewritten_content.strip()
else:
logger.warning("Failed to generate rewritten content")
return None
except Exception as e:
logger.error(f"Error in document rewriting: {e}")
return None
def _select_prompt_key(self, annotation_types: set) -> str:
"""Select the most appropriate prompt based on annotation types"""
if len(annotation_types) == 1:
# Single annotation type
single_type = next(iter(annotation_types))
if single_type in self.rewriting_prompts:
return single_type
# Multiple types or unsupported single type
return 'mixed'
async def generate_improvement_suggestions(
self,
original_content: str,
annotations: List[Dict[str, Any]]
) -> List[str]:
"""
Generate specific improvement suggestions based on annotations
Args:
original_content: The original document content
annotations: List of annotation information
Returns:
List of specific improvement suggestions
"""
try:
if not annotations:
return ["No annotations detected - document appears to be in final form"]
# Analyze annotations to generate targeted suggestions
suggestions = []
# Count annotation types
type_counts = {}
for ann in annotations:
ann_type = ann.get('annotation_type', 'unknown')
type_counts[ann_type] = type_counts.get(ann_type, 0) + 1
# Generate type-specific suggestions
if 'strikethrough' in type_counts:
count = type_counts['strikethrough']
suggestions.append(f"Consider removing {count} section(s) marked for deletion")
if 'highlight' in type_counts:
count = type_counts['highlight']
suggestions.append(f"Enhance {count} highlighted section(s) for greater impact")
if 'markup' in type_counts:
count = type_counts['markup']
suggestions.append(f"Incorporate {count} handwritten correction(s) or addition(s)")
if 'underline' in type_counts:
count = type_counts['underline']
suggestions.append(f"Review {count} underlined section(s) for emphasis or correction")
# Add general suggestions based on annotation density
total_annotations = len(annotations)
if total_annotations >= 5:
suggestions.append("Heavy editing detected - consider comprehensive revision")
elif total_annotations >= 3:
suggestions.append("Moderate editing detected - focus on marked areas")
else:
suggestions.append("Light editing detected - minor refinements needed")
# Calculate average confidence
avg_confidence = sum(ann.get('confidence', 0) for ann in annotations) / len(annotations)
if avg_confidence < 0.5:
suggestions.append("Some annotations have low confidence - manual review recommended")
return suggestions
except Exception as e:
logger.error(f"Error generating improvement suggestions: {e}")
return ["Error generating suggestions - manual review recommended"]
async def create_editing_summary(
self,
original_content: str,
rewritten_content: str,
annotations: List[Dict[str, Any]]
) -> str:
"""
Create a summary of the editing process and changes made
Args:
original_content: Original document content
rewritten_content: Rewritten document content
annotations: List of annotations that guided the rewriting
Returns:
Summary of editing process and changes
"""
try:
# Create basic statistics
original_length = len(original_content)
rewritten_length = len(rewritten_content)
length_change = rewritten_length - original_length
# Count annotation types
type_counts = {}
for ann in annotations:
ann_type = ann.get('annotation_type', 'unknown')
type_counts[ann_type] = type_counts.get(ann_type, 0) + 1
# Generate summary
summary_parts = [
f"Editing Summary:",
f"• Processed {len(annotations)} annotations across {len(type_counts)} types",
f"• Content length: {original_length:,} → {rewritten_length:,} characters ({length_change:+,})",
]
# Add annotation breakdown
if type_counts:
type_breakdown = ", ".join([f"{count} {type}" for type, count in type_counts.items()])
summary_parts.append(f"• Annotation types: {type_breakdown}")
# Add change assessment
if abs(length_change) > original_length * 0.1: # More than 10% change
change_type = "substantial revision" if length_change > 0 else "significant condensation"
summary_parts.append(f"• Change assessment: {change_type}")
else:
summary_parts.append("• Change assessment: focused improvements")
return "\n".join(summary_parts)
except Exception as e:
logger.error(f"Error creating editing summary: {e}")
return "Error creating editing summary"
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
llm_handler: An instance of LLMHandler that provides the interface to the language model for generating rewritten text. This handler must implement a generate_text method that accepts prompts and returns generated content asynchronously.
Return Value
Instantiation returns a TextRewriter object. Key method returns: rewrite_document_from_annotations returns Optional[str] (rewritten content or None on failure), generate_improvement_suggestions returns List[str] (list of actionable suggestions), and create_editing_summary returns str (formatted summary of changes made).
Class Interface
Methods
__init__(self, llm_handler: LLMHandler)
Purpose: Initialize the TextRewriter with an LLM handler and set up structured prompts for different annotation types
Parameters:
llm_handler: LLMHandler instance that provides text generation capabilities
Returns: None (constructor)
async rewrite_document_from_annotations(self, original_content: str, annotations: List[Dict[str, Any]]) -> Optional[str]
Purpose: Rewrite document content based on detected annotations by selecting appropriate prompt template and generating improved version via LLM
Parameters:
original_content: The original document text to be rewritten (limited to first 3000 characters)annotations: List of annotation dictionaries containing 'annotation_type', 'confidence', and 'area' keys
Returns: Rewritten content as string if successful, None if no annotations provided or generation fails
_select_prompt_key(self, annotation_types: set) -> str
Purpose: Select the most appropriate prompt template key based on the types of annotations present
Parameters:
annotation_types: Set of annotation type strings found in the document
Returns: String key for prompt template ('strikethrough', 'highlight', 'markup', or 'mixed')
async generate_improvement_suggestions(self, original_content: str, annotations: List[Dict[str, Any]]) -> List[str]
Purpose: Generate specific, actionable improvement suggestions based on annotation types, counts, and confidence levels
Parameters:
original_content: The original document content (used for context)annotations: List of annotation dictionaries to analyze
Returns: List of string suggestions describing recommended improvements, or error message list on failure
async create_editing_summary(self, original_content: str, rewritten_content: str, annotations: List[Dict[str, Any]]) -> str
Purpose: Create a formatted summary of the editing process including statistics, annotation breakdown, and change assessment
Parameters:
original_content: Original document text before rewritingrewritten_content: Rewritten document text after processingannotations: List of annotations that guided the rewriting process
Returns: Multi-line formatted string containing editing statistics and summary, or error message on failure
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
llm_handler |
LLMHandler | Handler instance for interacting with the language model to generate rewritten text | instance |
rewriting_prompts |
Dict[str, str] | Dictionary mapping annotation types ('strikethrough', 'highlight', 'markup', 'mixed') to their corresponding prompt templates with placeholders for content and annotation_info | instance |
Dependencies
typingloggingllm_handler
Required Imports
from typing import List, Dict, Any, Optional
import logging
from llm_handler import LLMHandler
Usage Example
import asyncio
from typing import List, Dict, Any
from llm_handler import LLMHandler
from text_rewriter import TextRewriter
# Initialize the LLM handler
llm_handler = LLMHandler(api_key='your-api-key')
# Create TextRewriter instance
rewriter = TextRewriter(llm_handler)
# Define annotations detected from document
annotations = [
{'annotation_type': 'strikethrough', 'confidence': 0.85, 'area': 1200},
{'annotation_type': 'highlight', 'confidence': 0.92, 'area': 800}
]
original_text = "This is the original document content with some sections marked for deletion and others highlighted for emphasis."
# Rewrite document based on annotations
async def main():
rewritten = await rewriter.rewrite_document_from_annotations(original_text, annotations)
if rewritten:
print(f"Rewritten: {rewritten}")
# Get improvement suggestions
suggestions = await rewriter.generate_improvement_suggestions(original_text, annotations)
print(f"Suggestions: {suggestions}")
# Create editing summary
if rewritten:
summary = await rewriter.create_editing_summary(original_text, rewritten, annotations)
print(f"Summary: {summary}")
asyncio.run(main())
Best Practices
- Always instantiate with a properly configured LLMHandler that has valid API credentials
- All rewriting methods are async and must be awaited in an async context
- The class limits content to 3000 characters when sending to LLM to avoid token limits - consider chunking for longer documents
- Methods return None or empty lists on failure - always check return values before using
- Annotation dictionaries should contain 'annotation_type', 'confidence', and 'area' keys for best results
- The class uses temperature=0.3 for consistent rewriting - this is intentionally low to reduce variability
- Multiple annotation types automatically trigger the 'mixed' prompt template for comprehensive revision
- Error handling is built-in but logs errors - ensure logging is configured to capture issues
- The rewriting_prompts dictionary can be modified after instantiation to customize prompt templates
- For production use, implement retry logic around the async methods as LLM calls may fail intermittently
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class EditingWorkflowHandler 63.3% similar
-
class LLMHandler 55.4% similar
-
function chat_with_text_section 53.4% similar
-
function main_v68 53.1% similar
-
class RemarkableEInkProcessor 52.5% similar