class RootDocSchemaRepair
A repair tool for fixing corrupted root.docSchema entries in reMarkable cloud storage by recalculating document sizes and rebuilding the schema.
/tf/active/vicechatdev/e-ink-llm/cloudtest/fix_root_docschema.py
24 - 402
complex
Purpose
This class provides a comprehensive solution for repairing corrupted root.docSchema entries in the reMarkable cloud sync system. It authenticates with reMarkable servers, backs up the current state, analyzes document entries to identify broken ones, recalculates correct document sizes by summing component sizes, rebuilds the root.docSchema with corrected entries, and uploads the fixed schema back to the server. The tool categorizes entries into folders, working PDFs, broken documents, and unknown types, preserving working entries while fixing corrupted ones. It supports dry-run mode for safe testing before applying changes.
Source Code
class RootDocSchemaRepair:
"""Repairs corrupted root.docSchema entries"""
def __init__(self):
# Load auth session
from auth import RemarkableAuth
auth = RemarkableAuth()
self.session = auth.get_authenticated_session()
if not self.session:
raise RuntimeError("Failed to authenticate with reMarkable")
self.base_dir = Path(__file__).parent
self.backup_dir = self.base_dir / "docschema_repair"
self.backup_dir.mkdir(exist_ok=True)
print("š§ reMarkable Root DocSchema Repair Tool Initialized")
def backup_current_state(self) -> Dict[str, Any]:
"""Backup current root.docSchema and related data"""
print("\nš¾ Step 1: Backing up current state...")
try:
# Get current root info
root_response = self.session.get("https://eu.tectonic.remarkable.com/sync/v4/root")
root_response.raise_for_status()
root_data = root_response.json()
# Get current root.docSchema content
root_content_response = self.session.get(f"https://eu.tectonic.remarkable.com/sync/v3/files/{root_data['hash']}")
root_content_response.raise_for_status()
root_content = root_content_response.text
# Save backup
timestamp = int(time.time())
backup_data = {
'timestamp': timestamp,
'root_info': root_data,
'root_content': root_content,
'backup_reason': 'Pre-repair backup'
}
backup_file = self.backup_dir / f"root_backup_{timestamp}.json"
with open(backup_file, 'w') as f:
json.dump(backup_data, f, indent=2)
print(f"ā
Current state backed up to: {backup_file}")
print(f" Root hash: {root_data['hash']}")
print(f" Generation: {root_data.get('generation')}")
print(f" Content size: {len(root_content)} bytes")
return backup_data
except Exception as e:
print(f"ā Backup failed: {e}")
raise
def analyze_current_entries(self, root_content: str) -> Dict[str, List[Dict]]:
"""Analyze current root.docSchema entries and categorize them"""
print("\nš Step 2: Analyzing current root.docSchema entries...")
lines = root_content.strip().split('\n')
version = lines[0]
entries = lines[1:]
print(f"š DocSchema version: {version}")
print(f"š Total entries: {len(entries)}")
categorized = {
'folders': [],
'working_pdfs': [], # The two invoice PDFs that still work
'broken_documents': [], # Documents that need fixing
'unknown': []
}
# Known working documents (the two invoices that still work)
working_pdf_names = ['invoice vicebio', 'invoice poulpharm']
for i, line in enumerate(entries):
if ':' in line:
parts = line.split(':')
if len(parts) >= 5:
entry = {
'line_number': i + 1,
'hash': parts[0],
'uuid': parts[2],
'type': parts[3],
'size': parts[4],
'full_line': line
}
# Categorize by type
if entry['type'] == '2': # Folders
categorized['folders'].append(entry)
print(f"š Folder: {entry['uuid'][:8]}... (size: {entry['size']})")
elif entry['type'] in ['4', '5']: # PDF documents or notebooks
# Try to identify if this is one of the working invoices
try:
# Fetch the document's metadata to get its name
doc_response = self.session.get(f"https://eu.tectonic.remarkable.com/sync/v3/files/{entry['hash']}")
if doc_response.status_code == 200:
doc_content = doc_response.text
# Look for metadata component
for doc_line in doc_content.split('\n')[1:]:
if '.metadata' in doc_line and ':' in doc_line:
metadata_hash = doc_line.split(':')[0]
metadata_response = self.session.get(f"https://eu.tectonic.remarkable.com/sync/v3/files/{metadata_hash}")
if metadata_response.status_code == 200:
metadata = json.loads(metadata_response.text)
doc_name = metadata.get('visibleName', '').lower()
if any(working_name in doc_name for working_name in working_pdf_names):
categorized['working_pdfs'].append(entry)
print(f"ā
Working PDF: '{metadata.get('visibleName')}' {entry['uuid'][:8]}... (size: {entry['size']})")
break
break
else:
# Couldn't identify as working invoice
categorized['broken_documents'].append(entry)
print(f"š§ Document to fix: {entry['uuid'][:8]}... type {entry['type']} (size: {entry['size']})")
else:
categorized['broken_documents'].append(entry)
print(f"ā Inaccessible document: {entry['uuid'][:8]}... type {entry['type']} (size: {entry['size']})")
except Exception as e:
categorized['broken_documents'].append(entry)
print(f"ā Error analyzing document {entry['uuid'][:8]}...: {e}")
else:
categorized['unknown'].append(entry)
print(f"ā Unknown type {entry['type']}: {entry['uuid'][:8]}... (size: {entry['size']})")
print(f"\nš Categorization Summary:")
print(f" š Folders (keep unchanged): {len(categorized['folders'])}")
print(f" ā
Working PDFs (keep unchanged): {len(categorized['working_pdfs'])}")
print(f" š§ Documents to fix: {len(categorized['broken_documents'])}")
print(f" ā Unknown entries: {len(categorized['unknown'])}")
return categorized
def calculate_correct_document_size(self, doc_hash: str, doc_uuid: str) -> Tuple[int, Dict]:
"""Calculate the correct size for a document by summing its components"""
print(f"\nš§® Calculating correct size for document {doc_uuid[:8]}...")
try:
# Fetch document schema
doc_response = self.session.get(f"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_hash}")
doc_response.raise_for_status()
doc_content = doc_response.text
print(f" š DocSchema size: {len(doc_content)} bytes")
# Parse components and sum their sizes
lines = doc_content.strip().split('\n')
if len(lines) < 2:
print(f" ā Invalid docSchema format")
return len(doc_content), {'error': 'Invalid format', 'components': []}
component_sizes = []
component_details = []
total_component_size = 0
for line in lines[1:]: # Skip version header
if ':' in line:
parts = line.split(':')
if len(parts) >= 5:
comp_hash = parts[0]
comp_name = parts[2]
comp_size = int(parts[4])
component_sizes.append(comp_size)
component_details.append({
'name': comp_name,
'hash': comp_hash,
'size': comp_size
})
total_component_size += comp_size
print(f" š¦ Component {comp_name}: {comp_size} bytes")
# The correct size should be the sum of all component sizes
# (This is what we discovered from analyzing the real Pylontech document)
correct_size = total_component_size
print(f" š Total component sizes: {total_component_size} bytes")
print(f" š DocSchema itself: {len(doc_content)} bytes")
print(f" ā
Correct root.docSchema size: {correct_size} bytes")
return correct_size, {
'docschema_size': len(doc_content),
'component_count': len(component_sizes),
'component_total': total_component_size,
'components': component_details
}
except Exception as e:
print(f" ā Error calculating size: {e}")
return len(doc_content) if 'doc_content' in locals() else 0, {'error': str(e)}
def fix_document_entries(self, broken_documents: List[Dict]) -> List[str]:
"""Fix the broken document entries with correct sizes"""
print(f"\nš§ Step 3: Fixing {len(broken_documents)} broken document entries...")
fixed_lines = []
for i, entry in enumerate(broken_documents, 1):
print(f"\nš§ Fixing document {i}/{len(broken_documents)}: {entry['uuid'][:8]}...")
# Calculate correct size
correct_size, details = self.calculate_correct_document_size(entry['hash'], entry['uuid'])
if 'error' not in details:
# Reconstruct the line with correct size
parts = entry['full_line'].split(':')
parts[4] = str(correct_size) # Replace size
fixed_line = ':'.join(parts)
fixed_lines.append(fixed_line)
print(f" ā
Fixed: {entry['uuid'][:8]}... size {entry['size']} ā {correct_size}")
print(f" š Old line: {entry['full_line']}")
print(f" š New line: {fixed_line}")
else:
# Keep original line if we can't fix it
fixed_lines.append(entry['full_line'])
print(f" ā ļø Keeping original: {entry['uuid'][:8]}... (couldn't fix: {details.get('error')})")
return fixed_lines
def rebuild_root_docschema(self, categorized: Dict, fixed_document_lines: List[str]) -> str:
"""Rebuild the complete root.docSchema with all entries"""
print(f"\nšļø Step 4: Rebuilding complete root.docSchema...")
# Start with version header (use original version)
new_lines = ['43'] # Standard version
# Add all unchanged entries
unchanged_count = 0
# Add folders (unchanged)
for folder in categorized['folders']:
new_lines.append(folder['full_line'])
unchanged_count += 1
# Add working PDFs (unchanged)
for pdf in categorized['working_pdfs']:
new_lines.append(pdf['full_line'])
unchanged_count += 1
# Add unknown entries (unchanged)
for unknown in categorized['unknown']:
new_lines.append(unknown['full_line'])
unchanged_count += 1
# Add fixed document entries
for fixed_line in fixed_document_lines:
new_lines.append(fixed_line)
new_content = '\n'.join(new_lines)
print(f"ā
Root.docSchema rebuilt:")
print(f" š Total entries: {len(new_lines) - 1}") # -1 for version header
print(f" š Unchanged entries: {unchanged_count}")
print(f" š§ Fixed entries: {len(fixed_document_lines)}")
print(f" š Total content size: {len(new_content)} bytes")
return new_content
def upload_fixed_root_docschema(self, new_content: str) -> bool:
"""Upload the fixed root.docSchema to the server"""
print(f"\nā¬ļø Step 5: Uploading fixed root.docSchema...")
try:
# Calculate new hash
new_hash = hashlib.sha256(new_content.encode()).hexdigest()
print(f" š New content hash: {new_hash}")
# Upload new content
upload_response = self.session.put(
f"https://eu.tectonic.remarkable.com/sync/v3/files/{new_hash}",
data=new_content.encode(),
headers={'Content-Type': 'text/plain'}
)
if upload_response.status_code in [200, 202]:
print(f" ā
Content uploaded successfully ({upload_response.status_code})")
# Update root hash
root_update_response = self.session.put(
"https://eu.tectonic.remarkable.com/sync/v4/root",
json={'hash': new_hash}
)
if root_update_response.status_code in [200, 202]:
print(f" ā
Root hash updated successfully ({root_update_response.status_code})")
# Verify the update
verify_response = self.session.get("https://eu.tectonic.remarkable.com/sync/v4/root")
if verify_response.status_code == 200:
verify_data = verify_response.json()
if verify_data['hash'] == new_hash:
print(f" ā
Root hash verified: {new_hash}")
print(f" š New generation: {verify_data.get('generation')}")
return True
else:
print(f" ā Root hash verification failed: {verify_data['hash']} != {new_hash}")
else:
print(f" ā ļø Cannot verify root hash update")
return True # Assume success
else:
print(f" ā Root hash update failed: {root_update_response.status_code}")
print(f" š Response: {root_update_response.text}")
return False
else:
print(f" ā Content upload failed: {upload_response.status_code}")
print(f" š Response: {upload_response.text}")
return False
except Exception as e:
print(f" ā Upload failed: {e}")
return False
def run_repair(self, dry_run: bool = False) -> bool:
"""Run the complete repair process"""
print(f"\nš Starting Root DocSchema Repair Process")
print(f"š Mode: {'DRY RUN (no changes)' if dry_run else 'LIVE REPAIR (will make changes)'}")
print("=" * 60)
try:
# Step 1: Backup current state
backup_data = self.backup_current_state()
root_content = backup_data['root_content']
# Step 2: Analyze entries
categorized = self.analyze_current_entries(root_content)
# Step 3: Fix broken documents
if categorized['broken_documents']:
fixed_lines = self.fix_document_entries(categorized['broken_documents'])
else:
fixed_lines = []
print("ā
No broken documents found to fix")
# Step 4: Rebuild root.docSchema
new_content = self.rebuild_root_docschema(categorized, fixed_lines)
# Save the rebuilt content for inspection
rebuilt_file = self.backup_dir / f"rebuilt_root_{int(time.time())}.txt"
with open(rebuilt_file, 'w') as f:
f.write(new_content)
print(f"š Rebuilt root.docSchema saved to: {rebuilt_file}")
if dry_run:
print(f"\nš DRY RUN COMPLETE - No changes made to server")
print(f"ā
Repair plan ready - run with dry_run=False to apply changes")
return True
# Step 5: Upload fixed root.docSchema
success = self.upload_fixed_root_docschema(new_content)
if success:
print(f"\nš ROOT DOCSCHEMA REPAIR COMPLETED SUCCESSFULLY!")
print(f"ā
The following should now be visible in your reMarkable app:")
print(f" š All folders (unchanged)")
print(f" š invoice vicebio (unchanged)")
print(f" š invoice poulpharm (unchanged)")
print(f" š All other documents (with corrected sizes)")
print(f"\nš” Check your reMarkable device to verify the repair worked")
else:
print(f"\nā ROOT DOCSCHEMA REPAIR FAILED")
print(f"š” Your data is safe - the backup is available in {self.backup_dir}")
return success
except Exception as e:
print(f"\nā Repair process failed: {e}")
print(f"š” Your data is safe - check {self.backup_dir} for backups")
return False
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
No constructor parameters: The __init__ method takes no parameters. It automatically initializes authentication, creates backup directories, and prepares the repair environment.
Return Value
Instantiation returns a RootDocSchemaRepair object. The main method run_repair() returns a boolean indicating success (True) or failure (False) of the repair process. Other methods return various types: backup_current_state() returns Dict[str, Any] with backup data, analyze_current_entries() returns Dict[str, List[Dict]] with categorized entries, calculate_correct_document_size() returns Tuple[int, Dict] with size and details, fix_document_entries() returns List[str] of fixed lines, rebuild_root_docschema() returns str with new content, and upload_fixed_root_docschema() returns bool for success status.
Class Interface
Methods
__init__(self)
Purpose: Initializes the repair tool by authenticating with reMarkable, setting up session, and creating backup directory
Returns: None - raises RuntimeError if authentication fails
backup_current_state(self) -> Dict[str, Any]
Purpose: Backs up the current root.docSchema and related data to a timestamped JSON file
Returns: Dictionary containing timestamp, root_info (hash and generation), root_content (schema text), and backup_reason
analyze_current_entries(self, root_content: str) -> Dict[str, List[Dict]]
Purpose: Parses and categorizes root.docSchema entries into folders, working PDFs, broken documents, and unknown types
Parameters:
root_content: The raw text content of root.docSchema to analyze
Returns: Dictionary with keys 'folders', 'working_pdfs', 'broken_documents', 'unknown', each containing list of entry dictionaries with hash, uuid, type, size, and full_line
calculate_correct_document_size(self, doc_hash: str, doc_uuid: str) -> Tuple[int, Dict]
Purpose: Calculates the correct size for a document by fetching its schema and summing all component sizes
Parameters:
doc_hash: The hash identifier of the document's schemadoc_uuid: The UUID of the document for logging purposes
Returns: Tuple of (correct_size as int, details dict with docschema_size, component_count, component_total, and components list)
fix_document_entries(self, broken_documents: List[Dict]) -> List[str]
Purpose: Fixes broken document entries by recalculating their sizes and reconstructing their schema lines
Parameters:
broken_documents: List of document entry dictionaries identified as broken during analysis
Returns: List of fixed schema lines (strings) with corrected sizes, or original lines if fixing failed
rebuild_root_docschema(self, categorized: Dict, fixed_document_lines: List[str]) -> str
Purpose: Rebuilds the complete root.docSchema by combining unchanged entries with fixed document lines
Parameters:
categorized: Dictionary of categorized entries from analyze_current_entries()fixed_document_lines: List of fixed document lines from fix_document_entries()
Returns: Complete root.docSchema content as a newline-separated string with version header
upload_fixed_root_docschema(self, new_content: str) -> bool
Purpose: Uploads the fixed root.docSchema content to reMarkable servers and updates the root hash
Parameters:
new_content: The complete rebuilt root.docSchema content to upload
Returns: True if upload and root hash update succeeded and were verified, False otherwise
run_repair(self, dry_run: bool = False) -> bool
Purpose: Executes the complete repair process: backup, analyze, fix, rebuild, and optionally upload
Parameters:
dry_run: If True, performs all steps except upload to preview changes; if False, applies changes to server
Returns: True if repair process completed successfully (or dry run completed), False if any step failed
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
session |
requests.Session | Authenticated HTTP session for making API requests to reMarkable servers | instance |
base_dir |
Path | Base directory path where the script is located | instance |
backup_dir |
Path | Directory path for storing backup files (base_dir/docschema_repair), created if it doesn't exist | instance |
Dependencies
jsontimehashlibpathlibtypingrequestsauth
Required Imports
import json
import time
import hashlib
from pathlib import Path
from typing import Dict, List, Tuple, Any
import requests
Conditional/Optional Imports
These imports are only needed under specific conditions:
from auth import RemarkableAuth
Condition: imported lazily inside __init__ method when class is instantiated
Required (conditional)Usage Example
# Basic usage with dry run first
repair_tool = RootDocSchemaRepair()
# First, run in dry-run mode to see what would be changed
success = repair_tool.run_repair(dry_run=True)
if success:
# If dry run looks good, run the actual repair
success = repair_tool.run_repair(dry_run=False)
if success:
print("Repair completed successfully!")
else:
print("Repair failed, check backups")
# Advanced usage: manual step-by-step repair
repair_tool = RootDocSchemaRepair()
# Step 1: Backup
backup_data = repair_tool.backup_current_state()
# Step 2: Analyze
categorized = repair_tool.analyze_current_entries(backup_data['root_content'])
# Step 3: Fix broken documents
if categorized['broken_documents']:
fixed_lines = repair_tool.fix_document_entries(categorized['broken_documents'])
# Step 4: Rebuild
new_content = repair_tool.rebuild_root_docschema(categorized, fixed_lines)
# Step 5: Upload
success = repair_tool.upload_fixed_root_docschema(new_content)
Best Practices
- Always run with dry_run=True first to preview changes before applying them
- The class automatically creates backups in a 'docschema_repair' subdirectory - never delete these backups until repair is verified
- Authentication happens automatically during instantiation - ensure RemarkableAuth is properly configured before creating an instance
- The repair process is stateless - each run_repair() call performs a complete backup-analyze-fix-upload cycle
- Check the backup_dir after each run to review backup files and rebuilt schemas
- The class makes network requests to reMarkable servers - ensure stable internet connection
- If upload_fixed_root_docschema() fails, the original data remains intact on the server
- The tool preserves working entries (folders and working PDFs) unchanged while fixing broken documents
- Document size calculation is based on summing component sizes from the document's schema
- The class uses SHA256 hashing to generate content hashes for upload
- All operations are logged to console with emoji indicators for easy monitoring
- The repair process can be interrupted safely - no changes are made until upload_fixed_root_docschema() succeeds
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class CorrectedRootDocSchemaRepair 93.1% similar
-
function main_v64 80.1% similar
-
function repair_system 77.7% similar
-
class RootCleaner 74.1% similar
-
function show_current_root 70.0% similar