class EditingWorkflowHandler
Orchestrates a complete document editing workflow that detects annotations in PDFs, analyzes their types and intent, generates AI-powered text improvements, and provides editing recommendations.
/tf/active/vicechatdev/e-ink-llm/editing_workflow.py
28 - 185
complex
Purpose
This class serves as the main coordinator for processing documents with editing annotations. It integrates annotation detection, analysis, and AI-powered rewriting to help users understand and apply edits marked in PDF documents. The workflow includes: (1) detecting visual annotations like strikethroughs, highlights, and markups, (2) analyzing annotation types to understand editing intent, (3) generating rewritten content based on detected edits, and (4) providing actionable recommendations. It's designed for document processing pipelines where PDFs contain manual editing marks that need to be interpreted and applied programmatically.
Source Code
class EditingWorkflowHandler:
"""
Handles the complete editing workflow:
1. Detect annotations in document
2. Analyze annotation types and intent
3. Generate AI-powered text improvements
4. Provide editing recommendations
"""
def __init__(self, llm_handler: LLMHandler):
self.llm_handler = llm_handler
self.annotation_detector = AnnotationDetector()
self.text_rewriter = TextRewriter(llm_handler)
async def process_document_for_editing(
self,
file_path: Path,
image_b64: str,
original_analysis: str
) -> Optional[EditingWorkflowResult]:
"""
Process a document through the complete editing workflow
Args:
file_path: Path to the original document
image_b64: Base64 encoded image of the document
original_analysis: The original AI analysis of the document
Returns:
EditingWorkflowResult with workflow results or None if failed
"""
try:
logger.info(f"Starting editing workflow for {file_path.name}")
# Step 1: Detect annotations
print(f" 🔍 Detecting annotations...")
annotation_result = await self.annotation_detector.detect_annotations_in_pdf(str(file_path))
if not annotation_result or annotation_result.total_annotations == 0:
logger.info("No annotations detected, skipping editing workflow")
return EditingWorkflowResult(
annotations_detected=0,
confidence_score=0.0,
recommendations=["No annotations detected - document appears to be final"],
workflow_summary="No editing annotations found in document"
)
print(f" ✅ Found {annotation_result.total_annotations} annotations")
# Step 2: Analyze annotation types and generate recommendations
recommendations = self._generate_recommendations(annotation_result)
# Step 3: Generate rewritten content if significant annotations found
rewritten_content = None
if annotation_result.total_annotations >= 2: # Only rewrite if substantial markup
print(f" ✏️ Generating rewritten content...")
rewritten_content = await self.text_rewriter.rewrite_document_from_annotations(
original_analysis, annotation_result.annotations
)
if rewritten_content:
print(f" ✅ Generated {len(rewritten_content):,} characters of rewritten content")
else:
print(f" ⚠️ Failed to generate rewritten content")
# Step 4: Calculate confidence score
confidence_score = self._calculate_confidence_score(annotation_result)
# Step 5: Generate workflow summary
workflow_summary = self._generate_workflow_summary(
annotation_result, len(rewritten_content) if rewritten_content else 0
)
return EditingWorkflowResult(
annotations_detected=annotation_result.total_annotations,
confidence_score=confidence_score,
recommendations=recommendations,
rewritten_content=rewritten_content,
annotation_details=[{
'type': ann.annotation_type,
'confidence': ann.confidence,
'area': ann.area,
'text': ann.text_content or 'No text detected'
} for ann in annotation_result.annotations],
workflow_summary=workflow_summary
)
except Exception as e:
logger.error(f"Error in editing workflow: {e}")
return None
def _generate_recommendations(self, annotation_result) -> List[str]:
"""Generate editing recommendations based on detected annotations"""
recommendations = []
# Count annotation types
annotation_types = {}
for ann in annotation_result.annotations:
annotation_types[ann.annotation_type] = annotation_types.get(ann.annotation_type, 0) + 1
# Generate type-specific recommendations
if 'strikethrough' in annotation_types:
recommendations.append(f"Document contains {annotation_types['strikethrough']} deletion(s) - content removal suggested")
if 'highlight' in annotation_types:
recommendations.append(f"Document contains {annotation_types['highlight']} highlight(s) - important sections marked")
if 'markup' in annotation_types:
recommendations.append(f"Document contains {annotation_types['markup']} markup(s) - corrections or additions suggested")
if 'underline' in annotation_types:
recommendations.append(f"Document contains {annotation_types['underline']} underline(s) - emphasis or corrections indicated")
# Overall recommendations
if annotation_result.total_annotations >= 5:
recommendations.append("Heavy editing detected - consider major revision")
elif annotation_result.total_annotations >= 2:
recommendations.append("Moderate editing detected - focused improvements needed")
else:
recommendations.append("Light editing detected - minor adjustments suggested")
return recommendations
def _calculate_confidence_score(self, annotation_result) -> float:
"""Calculate confidence score for the editing workflow"""
if not annotation_result or annotation_result.total_annotations == 0:
return 0.0
# Base confidence on number and quality of annotations
base_confidence = min(annotation_result.total_annotations * 0.2, 0.8)
# Boost confidence for high-confidence annotations
avg_annotation_confidence = sum(ann.confidence for ann in annotation_result.annotations) / len(annotation_result.annotations)
confidence_boost = (avg_annotation_confidence - 0.5) * 0.4
# Final confidence capped at 1.0
final_confidence = min(base_confidence + confidence_boost, 1.0)
return max(final_confidence, 0.1) # Minimum 0.1 if any annotations found
def _generate_workflow_summary(self, annotation_result, rewritten_length: int) -> str:
"""Generate a summary of the workflow process"""
summary_parts = [
f"Editing workflow processed {annotation_result.total_annotations} annotations"
]
if rewritten_length > 0:
summary_parts.append(f"Generated {rewritten_length:,} characters of improved content")
# Add annotation type breakdown
annotation_types = {}
for ann in annotation_result.annotations:
annotation_types[ann.annotation_type] = annotation_types.get(ann.annotation_type, 0) + 1
if annotation_types:
type_summary = ", ".join([f"{count} {type}" for type, count in annotation_types.items()])
summary_parts.append(f"Annotation breakdown: {type_summary}")
return ". ".join(summary_parts) + "."
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
llm_handler: An instance of LLMHandler that provides AI/LLM capabilities for text rewriting and analysis. This handler is passed to the TextRewriter component and is essential for generating improved content based on detected annotations. Must be a properly initialized LLMHandler with valid API credentials.
Return Value
The constructor returns an EditingWorkflowHandler instance. The main method 'process_document_for_editing' returns an Optional[EditingWorkflowResult] - either an EditingWorkflowResult dataclass containing annotations_detected (int), confidence_score (float), recommendations (List[str]), rewritten_content (Optional[str]), annotation_details (List[Dict]), and workflow_summary (str), or None if the workflow fails. Returns a result with 0 annotations if no edits are detected.
Class Interface
Methods
__init__(self, llm_handler: LLMHandler)
Purpose: Initializes the EditingWorkflowHandler with required dependencies for annotation detection and text rewriting
Parameters:
llm_handler: An LLMHandler instance that provides AI capabilities for text generation and rewriting
Returns: None - constructor initializes the instance
async process_document_for_editing(self, file_path: Path, image_b64: str, original_analysis: str) -> Optional[EditingWorkflowResult]
Purpose: Main workflow method that processes a document through the complete editing pipeline: detects annotations, analyzes types, generates rewritten content, and provides recommendations
Parameters:
file_path: Path object pointing to the PDF document to be processedimage_b64: Base64 encoded image representation of the document (currently unused but may be for future features)original_analysis: The original text content/analysis of the document that will be used as the base for rewriting
Returns: EditingWorkflowResult containing annotations_detected, confidence_score, recommendations, rewritten_content, annotation_details, and workflow_summary. Returns None if the workflow encounters an error. Returns a result with 0 annotations if no edits are found.
_generate_recommendations(self, annotation_result) -> List[str]
Purpose: Generates human-readable editing recommendations based on the types and quantities of detected annotations
Parameters:
annotation_result: An annotation result object containing detected annotations with their types and properties
Returns: List of string recommendations describing the editing work needed (e.g., 'Heavy editing detected - consider major revision')
_calculate_confidence_score(self, annotation_result) -> float
Purpose: Calculates a confidence score (0.0-1.0) for the editing workflow based on the number and quality of detected annotations
Parameters:
annotation_result: An annotation result object containing detected annotations with confidence values
Returns: Float between 0.0 and 1.0 representing confidence in the annotation detection and workflow results. Returns 0.0 if no annotations found, minimum 0.1 if any annotations detected.
_generate_workflow_summary(self, annotation_result, rewritten_length: int) -> str
Purpose: Generates a comprehensive text summary of the workflow execution including annotation counts, types, and rewritten content length
Parameters:
annotation_result: An annotation result object containing all detected annotationsrewritten_length: Integer representing the character count of generated rewritten content (0 if none generated)
Returns: String summary describing the workflow results, e.g., 'Editing workflow processed 5 annotations. Generated 1,234 characters of improved content. Annotation breakdown: 2 strikethrough, 3 highlight.'
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
llm_handler |
LLMHandler | Instance of LLMHandler used for AI-powered text generation and rewriting operations | instance |
annotation_detector |
AnnotationDetector | Instance of AnnotationDetector responsible for detecting and analyzing annotations in PDF documents | instance |
text_rewriter |
TextRewriter | Instance of TextRewriter that uses the LLM to generate improved text based on detected annotations | instance |
Dependencies
asynciotypingpathlibdataclassesloggingannotation_detectortext_rewriterllm_handler
Required Imports
import asyncio
from typing import Optional, Dict, Any, List
from pathlib import Path
from dataclasses import dataclass
import logging
from annotation_detector import AnnotationDetector
from text_rewriter import TextRewriter
from llm_handler import LLMHandler
Usage Example
from pathlib import Path
import asyncio
from llm_handler import LLMHandler
from editing_workflow_handler import EditingWorkflowHandler
# Initialize dependencies
llm_handler = LLMHandler(api_key='your-api-key')
# Create workflow handler
workflow = EditingWorkflowHandler(llm_handler)
# Process a document
async def process_doc():
file_path = Path('document_with_edits.pdf')
image_b64 = 'base64_encoded_image_string'
original_analysis = 'Original document text content'
result = await workflow.process_document_for_editing(
file_path=file_path,
image_b64=image_b64,
original_analysis=original_analysis
)
if result:
print(f'Annotations found: {result.annotations_detected}')
print(f'Confidence: {result.confidence_score}')
print(f'Recommendations: {result.recommendations}')
if result.rewritten_content:
print(f'Rewritten content: {result.rewritten_content[:200]}...')
else:
print('Workflow failed')
# Run the async workflow
asyncio.run(process_doc())
Best Practices
- Always use async/await when calling process_document_for_editing as it performs asynchronous operations
- Ensure the LLMHandler is properly initialized with valid credentials before instantiating EditingWorkflowHandler
- The workflow only generates rewritten content if 2 or more annotations are detected (threshold for 'substantial markup')
- Handle None return values from process_document_for_editing to gracefully manage workflow failures
- The image_b64 parameter is currently passed but not used in the workflow - it may be for future enhancements
- Check the confidence_score in the result to assess the reliability of detected annotations (ranges 0.0-1.0)
- The workflow is stateless - each call to process_document_for_editing is independent
- Log messages are printed to console and logged via the logger - ensure logging is configured appropriately
- Annotation detection requires the PDF file to exist at the specified file_path
- The original_analysis parameter should contain the full text content of the document for best rewriting results
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class TextRewriter 63.3% similar
-
class AnnotationDetector 59.1% similar
-
class HybridResponseHandler 57.7% similar
-
class DocumentProcessor_v3 56.7% similar
-
class EditingWorkflowResult 56.3% similar