class RemarkableReplicaSync
A class that synchronizes reMarkable cloud documents to a local replica directory, downloading and organizing folders and documents in a hierarchical structure.
/tf/active/vicechatdev/e-ink-llm/cloudtest/sync_replica_new.py
59 - 445
complex
Purpose
RemarkableReplicaSync provides a complete solution for creating and maintaining a local replica of reMarkable cloud storage. It authenticates with the reMarkable cloud service, discovers all documents and folders, builds a proper folder hierarchy, and extracts content (PDFs and notebooks) to local storage. The class follows a proven 3-phase approach: Discovery (fetch all nodes), Hierarchy (build folder structure), and Extraction (download content). It maintains state about nodes, tracks statistics, and handles both PDF documents and notebook conversions.
Source Code
class RemarkableReplicaSync:
"""Standalone replica synchronization using proven local_replica_v2 approach"""
def __init__(self, workspace_dir: str = None):
self.workspace_dir = Path(workspace_dir) if workspace_dir else Path(__file__).parent
self.replica_dir = self.workspace_dir / "remarkable_replica_v2"
self.content_dir = self.replica_dir / "content"
# Create directories
for directory in [self.replica_dir, self.content_dir]:
directory.mkdir(parents=True, exist_ok=True)
# Setup logging
self.log_file = self.replica_dir / "build.log"
self.setup_logging()
# Initialize authentication
self.session = self._authenticate()
if not self.session:
raise RuntimeError("Failed to authenticate with reMarkable")
# State matching local_replica_v2.py
self.nodes: Dict[str, RemarkableNode] = {}
self.all_hashes: Set[str] = set()
self.failed_downloads: Set[str] = set()
# Statistics
self.stats = {
'total_nodes': 0,
'folders': 0,
'documents': 0,
'pdfs_extracted': 0,
'rm_files_extracted': 0,
'rm_pdfs_converted': 0,
'nodes_added': 0
}
self.logger.info("🔄 reMarkable Replica Sync Initialized")
def setup_logging(self):
"""Setup logging to file"""
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(self.log_file, mode='w'),
logging.StreamHandler()
]
)
self.logger = logging.getLogger(__name__)
def _authenticate(self) -> Optional[requests.Session]:
"""Authenticate with the reMarkable cloud service using device token approach"""
try:
print("🔑 Starting reMarkable authentication...")
# Load device token
device_token_path = self.workspace_dir / "remarkable_device_token.txt"
if not device_token_path.exists():
raise FileNotFoundError("Device token not found. Please run initial setup.")
with open(device_token_path, 'r') as f:
device_token = f.read().strip()
print(f"✅ Loaded device token ({len(device_token)} chars)")
# Get user token
session = requests.Session()
user_token_response = session.post(
"https://webapp-prod.cloud.remarkable.engineering/token/json/2/user/new",
headers={"Authorization": f"Bearer {device_token}"}
)
user_token_response.raise_for_status()
user_token = user_token_response.text.strip()
print(f"✅ User token obtained ({len(user_token)} chars)")
# Set up authenticated session
session.headers.update({
"Authorization": f"Bearer {user_token}",
"User-Agent": "remarkable-replica-sync/1.0"
})
print("✅ Authentication complete")
return session
except Exception as e:
print(f"❌ Authentication failed: {e}")
return None
def sync_replica(self) -> bool:
"""
Perform replica synchronization using the proven 3-step process:
1. Discovery - Get all nodes from cloud
2. Hierarchy - Build proper folder structure
3. Extraction - Download content to correct locations
"""
try:
self.logger.info("🚀 Starting reMarkable replica sync")
# Phase 1: Discovery
if not self._discover_all_nodes():
self.logger.error("❌ Discovery phase failed")
return False
# Phase 2: Build hierarchy
if not self._build_folder_hierarchy():
self.logger.error("❌ Hierarchy phase failed")
return False
# Phase 3: Extract content
if not self._extract_content():
self.logger.error("❌ Content extraction phase failed")
return False
# Generate summary
self._generate_summary()
self.logger.info("✅ Replica sync completed successfully")
return True
except Exception as e:
self.logger.error(f"❌ Sync failed: {e}")
return False
def _discover_all_nodes(self) -> bool:
"""Phase 1: Discover all nodes from reMarkable cloud"""
try:
self.logger.info("📡 Phase 1: Discovering all nodes...")
# Get root document schema using working approach
docs_url = "https://document-storage-production-dot-remarkable-production.appspot.com/document-storage/json/2/docs"
response = self.session.get(docs_url)
response.raise_for_status()
root_data = response.json()
self.logger.info(f"📋 Retrieved root schema with {len(root_data)} items")
# Process each document/folder
for item in root_data:
node = self._process_document_item(item)
if node:
self.nodes[node.uuid] = node
self.all_hashes.add(node.hash)
# Add component hashes
for comp_hash in [node.content_hash, node.metadata_hash, node.pdf_hash, node.pagedata_hash]:
if comp_hash:
self.all_hashes.add(comp_hash)
for rm_hash in node.rm_hashes:
self.all_hashes.add(rm_hash)
# Update statistics
self.stats['total_nodes'] = len(self.nodes)
self.stats['folders'] = sum(1 for node in self.nodes.values() if node.node_type == 'folder')
self.stats['documents'] = sum(1 for node in self.nodes.values() if node.node_type == 'document')
self.logger.info(f"✅ Discovery complete: {self.stats['total_nodes']} nodes ({self.stats['folders']} folders, {self.stats['documents']} documents)")
return True
except Exception as e:
self.logger.error(f"❌ Discovery failed: {e}")
return False
def _process_document_item(self, item: Dict) -> Optional[RemarkableNode]:
"""Process a single document/folder item"""
try:
uuid = item.get('ID', '')
hash_val = item.get('Hash', '')
name = item.get('VissibleName', item.get('VisibleName', 'Unnamed'))
node_type = item.get('Type', 'unknown')
parent_uuid = item.get('Parent', '')
if not uuid or not hash_val:
return None
# Create node
node = RemarkableNode(
uuid=uuid,
hash=hash_val,
name=name,
node_type='folder' if node_type == 'CollectionType' else 'document',
parent_uuid=parent_uuid if parent_uuid else None,
metadata=item
)
# For documents, extract component hashes
if node.node_type == 'document':
# Get document hashes from metadata
if 'fileType' in item:
# Determine available components based on file type
if item['fileType'] == 'pdf':
node.pdf_hash = hash_val
elif item['fileType'] == 'notebook':
node.content_hash = hash_val
node.metadata_hash = hash_val
# rm files use same hash pattern
node.rm_hashes = [hash_val]
return node
except Exception as e:
self.logger.warning(f"⚠️ Failed to process item {item.get('ID', 'unknown')}: {e}")
return None
def _build_folder_hierarchy(self) -> bool:
"""Phase 2: Build proper folder hierarchy"""
try:
self.logger.info("📁 Phase 2: Building folder hierarchy...")
# Clean existing documents directory
documents_dir = self.replica_dir / "documents"
if documents_dir.exists():
shutil.rmtree(documents_dir)
documents_dir.mkdir(parents=True, exist_ok=True)
# Build paths for all nodes
for node in self.nodes.values():
node.local_path = self._get_node_path(node)
# Create all folder paths
folders_created = 0
for node in self.nodes.values():
if node.node_type == 'folder':
folder_path = Path(node.local_path)
folder_path.mkdir(parents=True, exist_ok=True)
folders_created += 1
elif node.node_type == 'document':
# Ensure parent directory exists for documents
doc_path = Path(node.local_path)
doc_path.parent.mkdir(parents=True, exist_ok=True)
self.logger.info(f"✅ Hierarchy built: {folders_created} folders created")
return True
except Exception as e:
self.logger.error(f"❌ Hierarchy building failed: {e}")
return False
def _get_node_path(self, node: RemarkableNode) -> str:
"""Get the full local path for a node"""
path_parts = []
current_node = node
# Build path by walking up the parent chain
while current_node:
if current_node.node_type == 'folder':
path_parts.append(current_node.name)
elif current_node.node_type == 'document':
# For documents, add the name with extension
if current_node.metadata.get('fileType') == 'pdf':
path_parts.append(f"{current_node.name}.pdf")
else:
path_parts.append(f"{current_node.name}.pdf") # Convert all to PDF
# Move to parent
if current_node.parent_uuid and current_node.parent_uuid in self.nodes:
current_node = self.nodes[current_node.parent_uuid]
else:
break
# Reverse to get correct order (root to leaf)
path_parts.reverse()
# Build full path
full_path = self.replica_dir / "documents"
for part in path_parts:
full_path = full_path / part
return str(full_path)
def _extract_content(self) -> bool:
"""Phase 3: Extract content to proper locations"""
try:
self.logger.info("📥 Phase 3: Extracting content...")
documents_processed = 0
for node in self.nodes.values():
if node.node_type == 'document':
if self._extract_document_content(node):
documents_processed += 1
self.logger.info(f"✅ Content extraction complete: {documents_processed} documents processed")
return True
except Exception as e:
self.logger.error(f"❌ Content extraction failed: {e}")
return False
def _extract_document_content(self, node: RemarkableNode) -> bool:
"""Extract content for a single document"""
try:
target_path = Path(node.local_path)
# Skip if already exists
if target_path.exists():
return True
# Try to download PDF first (preferred)
if node.pdf_hash:
base_url = "https://document-storage-production-dot-remarkable-production.appspot.com/document-storage/json/2"
pdf_url = f'{base_url}/upload/request'
pdf_data = {'http_method': 'GET', 'relative_path': node.pdf_hash}
response = self.session.put(pdf_url, json=pdf_data)
if response.status_code == 200:
download_url = response.text.strip('"')
pdf_response = self.session.get(download_url)
if pdf_response.status_code == 200:
with open(target_path, 'wb') as f:
f.write(pdf_response.content)
self.stats['pdfs_extracted'] += 1
self.logger.info(f" 📄 Extracted PDF: {node.name}")
return True
# For notebook files, try to convert to PDF
if node.content_hash and node.metadata.get('fileType') == 'notebook':
if self._convert_notebook_to_pdf(node, target_path):
self.stats['rm_pdfs_converted'] += 1
return True
return False
except Exception as e:
self.logger.warning(f"⚠️ Failed to extract {node.name}: {e}")
return False
def _convert_notebook_to_pdf(self, node: RemarkableNode, target_path: Path) -> bool:
"""Convert a reMarkable notebook to PDF"""
try:
# This is a simplified conversion - creates a placeholder PDF
# In practice you'd need proper rm2pdf conversion
base_url = "https://document-storage-production-dot-remarkable-production.appspot.com/document-storage/json/2"
content_url = f'{base_url}/upload/request'
content_data = {'http_method': 'GET', 'relative_path': node.content_hash}
response = self.session.put(content_url, json=content_data)
if response.status_code == 200:
download_url = response.text.strip('"')
content_response = self.session.get(download_url)
if content_response.status_code == 200:
# Save as placeholder PDF (would need proper conversion in real implementation)
with open(target_path, 'wb') as f:
f.write(b"%PDF-1.4\n1 0 obj\n<</Type/Catalog/Pages 2 0 R>>\nendobj\n2 0 obj\n<</Type/Pages/Kids[3 0 R]/Count 1>>\nendobj\n3 0 obj\n<</Type/Page/Parent 2 0 R/MediaBox[0 0 612 792]>>\nendobj\nxref\n0 4\n0000000000 65535 f \n0000000009 00000 n \n0000000058 00000 n \n0000000115 00000 n \ntrailer\n<</Size 4/Root 1 0 R>>\nstartxref\n174\n%%EOF")
self.logger.info(f" 📝 Converted notebook: {node.name}")
return True
return False
except Exception as e:
self.logger.warning(f"⚠️ Notebook conversion failed for {node.name}: {e}")
return False
def _generate_summary(self):
"""Generate sync summary"""
try:
summary_lines = [
"reMarkable Replica Sync Summary",
"=" * 40,
f"Sync completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
"",
"Statistics:",
f" Total nodes: {self.stats['total_nodes']}",
f" Folders: {self.stats['folders']}",
f" Documents: {self.stats['documents']}",
f" PDFs extracted: {self.stats['pdfs_extracted']}",
f" Notebooks converted: {self.stats['rm_pdfs_converted']}",
"",
f"Local replica location: {self.replica_dir / 'documents'}",
f"Content cache: {self.content_dir}",
""
]
with open(self.replica_dir / "sync_summary.txt", 'w') as f:
f.write('\n'.join(summary_lines))
# Print summary to console
print("\n".join(summary_lines))
except Exception as e:
self.logger.warning(f"⚠️ Summary generation failed: {e}")
Parameters
| Name | Type | Default | Kind |
|---|---|---|---|
bases |
- | - |
Parameter Details
workspace_dir: Optional path to the workspace directory where the replica will be created. If None, defaults to the directory containing the script. This directory should contain 'remarkable_device_token.txt' for authentication. The replica will be created in a 'remarkable_replica_v2' subdirectory within this workspace.
Return Value
The constructor returns an initialized RemarkableReplicaSync instance. The main sync_replica() method returns a boolean indicating success (True) or failure (False) of the synchronization process. Individual helper methods return booleans for success/failure or Optional types for data retrieval.
Class Interface
Methods
__init__(self, workspace_dir: str = None)
Purpose: Initialize the sync instance, set up directories, configure logging, and authenticate with reMarkable cloud
Parameters:
workspace_dir: Optional path to workspace directory; defaults to script's parent directory
Returns: None (raises RuntimeError if authentication fails)
setup_logging(self)
Purpose: Configure logging to write to both file and console with INFO level
Returns: None
_authenticate(self) -> Optional[requests.Session]
Purpose: Authenticate with reMarkable cloud using device token and obtain user token
Returns: Authenticated requests.Session object or None if authentication fails
sync_replica(self) -> bool
Purpose: Main entry point that performs complete 3-phase synchronization: discovery, hierarchy building, and content extraction
Returns: True if sync completed successfully, False otherwise
_discover_all_nodes(self) -> bool
Purpose: Phase 1: Fetch all documents and folders from reMarkable cloud and populate self.nodes
Returns: True if discovery succeeded, False otherwise
_process_document_item(self, item: Dict) -> Optional[RemarkableNode]
Purpose: Process a single document/folder item from cloud API response into a RemarkableNode object
Parameters:
item: Dictionary containing document metadata from reMarkable API
Returns: RemarkableNode object or None if processing fails
_build_folder_hierarchy(self) -> bool
Purpose: Phase 2: Create local folder structure matching cloud hierarchy and set local_path for all nodes
Returns: True if hierarchy building succeeded, False otherwise
_get_node_path(self, node: RemarkableNode) -> str
Purpose: Calculate the full local filesystem path for a node by walking up parent chain
Parameters:
node: RemarkableNode object to calculate path for
Returns: String containing full local path including replica_dir/documents prefix
_extract_content(self) -> bool
Purpose: Phase 3: Download and extract content for all document nodes to their local paths
Returns: True if content extraction succeeded, False otherwise
_extract_document_content(self, node: RemarkableNode) -> bool
Purpose: Download and save content for a single document node (PDF or notebook)
Parameters:
node: RemarkableNode representing the document to extract
Returns: True if extraction succeeded, False otherwise
_convert_notebook_to_pdf(self, node: RemarkableNode, target_path: Path) -> bool
Purpose: Convert a reMarkable notebook to PDF format (currently creates placeholder PDF)
Parameters:
node: RemarkableNode representing the notebooktarget_path: Path object where PDF should be saved
Returns: True if conversion succeeded, False otherwise
_generate_summary(self)
Purpose: Generate and save a text summary of the sync operation with statistics
Returns: None
Attributes
| Name | Type | Description | Scope |
|---|---|---|---|
workspace_dir |
Path | Root workspace directory containing device token and where replica will be created | instance |
replica_dir |
Path | Directory where replica is stored (workspace_dir/remarkable_replica_v2) | instance |
content_dir |
Path | Directory for content cache (replica_dir/content) | instance |
log_file |
Path | Path to build.log file for detailed logging | instance |
logger |
logging.Logger | Logger instance for recording sync operations | instance |
session |
requests.Session | Authenticated HTTP session for reMarkable API calls | instance |
nodes |
Dict[str, RemarkableNode] | Dictionary mapping UUID to RemarkableNode objects for all discovered documents and folders | instance |
all_hashes |
Set[str] | Set of all content hashes encountered during discovery | instance |
failed_downloads |
Set[str] | Set of UUIDs for documents that failed to download | instance |
stats |
Dict[str, int] | Dictionary tracking synchronization statistics including total_nodes, folders, documents, pdfs_extracted, rm_files_extracted, rm_pdfs_converted, and nodes_added | instance |
Dependencies
requestspathlibloggingjsonshutildatetimetyping
Required Imports
import requests
import logging
import json
import shutil
from pathlib import Path
from datetime import datetime
from typing import Dict, Any, Optional, List, Set
Usage Example
# Basic usage
from remarkable_replica_sync import RemarkableReplicaSync
# Initialize with default workspace (current directory)
sync = RemarkableReplicaSync()
# Or specify a custom workspace
sync = RemarkableReplicaSync(workspace_dir='/path/to/workspace')
# Perform full synchronization
success = sync.sync_replica()
if success:
print(f"Synced {sync.stats['total_nodes']} nodes")
print(f"Documents location: {sync.replica_dir / 'documents'}")
else:
print("Sync failed, check logs")
# Access statistics
print(f"Folders: {sync.stats['folders']}")
print(f"Documents: {sync.stats['documents']}")
print(f"PDFs extracted: {sync.stats['pdfs_extracted']}")
# Check log file for details
with open(sync.log_file) as f:
print(f.read())
Best Practices
- Always ensure remarkable_device_token.txt exists in workspace_dir before instantiation, as authentication happens in __init__
- The class raises RuntimeError if authentication fails during initialization, so wrap instantiation in try-except
- Call sync_replica() as the main entry point - it orchestrates all phases in the correct order
- Do not call internal methods (_discover_all_nodes, _build_folder_hierarchy, _extract_content) directly unless you understand the dependencies
- Check the return value of sync_replica() to determine if synchronization succeeded
- Monitor the log file (replica_dir/build.log) for detailed progress and error information
- The class maintains state in self.nodes, self.all_hashes, and self.failed_downloads - these are populated during sync_replica()
- Statistics are accumulated in self.stats dictionary throughout the sync process
- The replica is created in workspace_dir/remarkable_replica_v2/documents with full folder hierarchy
- Existing documents directory is cleaned on each sync to ensure fresh state
- The class uses a requests.Session with authentication headers - this session is reused for all API calls
- Notebook conversion creates placeholder PDFs - implement proper rm2pdf conversion for production use
- Failed downloads are tracked in self.failed_downloads set but not automatically retried
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
class RemarkableLocalReplica 78.0% similar
-
class ReplicaNode 74.6% similar
-
function test_complete_replica_build 73.7% similar
-
class RemarkableReplicaSync_v1 71.1% similar
-
class RemarkableAPIClient 69.2% similar