function analyze_pylontech_document
Performs deep forensic analysis of a specific Pylontech document stored in reMarkable Cloud, examining all document components (content, metadata, pagedata, PDF) to identify patterns and differences between app-uploaded and API-uploaded documents.
/tf/active/vicechatdev/e-ink-llm/cloudtest/analyze_pylontech_details.py
10 - 241
complex
Purpose
This function is a diagnostic tool designed to reverse-engineer the reMarkable Cloud document structure by analyzing a known good document (Pylontech datasheet). It retrieves and examines each component file (content.json, metadata.json, pagedata, PDF) using their content hashes, validates their structure, and compares them against API-uploaded documents to identify critical differences like the 'source' field that distinguishes app uploads from API uploads. Results are saved to JSON for further analysis.
Source Code
def analyze_pylontech_document():
"""Perform deep analysis of the Pylontech document components"""
# Initialize auth
auth = RemarkableAuth()
session = auth.get_authenticated_session()
if not session:
print("ā Failed to authenticate")
return
print("š Deep Analysis of 'Pylontech force H3 datasheet'")
print("=" * 60)
# Document details from previous analysis
doc_uuid = "943ff956-5979-4cab-83df-ef2f9f5909b5"
# Component hashes from the docSchema
components = {
'content': {
'hash': '072438f8e5e188eeb7114d4e3b5da48e21e42d8fd447a3fc580554e58ab3f1a7',
'expected_size': 831
},
'metadata': {
'hash': '0f8c69e78126ff7da67ed315d1d1036256d2ad69bb8d60424004581a6845f366',
'expected_size': 298
},
'pagedata': {
'hash': '75a11da44c802486bc6f65640aa48a730f0f684c5c07a42ba3cd1735eb3fb070',
'expected_size': 2
},
'pdf': {
'hash': '058a5e0c6aa040168914800447b6e0160591f668c9c86ba2134cb2a3b10827e6',
'expected_size': 370778
}
}
print(f"š Document UUID: {doc_uuid}")
print(f"š Analyzing {len(components)} components...")
analysis_results = {}
for comp_name, comp_info in components.items():
print(f"\nš Deep Analysis: {comp_name}")
print("-" * 40)
try:
response = session.get(f"https://eu.tectonic.remarkable.com/sync/v3/files/{comp_info['hash']}")
response.raise_for_status()
actual_size = len(response.content)
print(f"ā
Retrieved successfully")
print(f"š Expected size: {comp_info['expected_size']} bytes")
print(f"š Actual size: {actual_size} bytes")
print(f"š Size match: {'ā
' if actual_size == comp_info['expected_size'] else 'ā'}")
# Store raw data for analysis
analysis_results[comp_name] = {
'hash': comp_info['hash'],
'expected_size': comp_info['expected_size'],
'actual_size': actual_size,
'raw_content': response.content,
'text_content': None,
'json_content': None
}
# Component-specific analysis
if comp_name == 'content':
print("š CONTENT Analysis:")
try:
content_json = json.loads(response.text)
analysis_results[comp_name]['json_content'] = content_json
print("ā
Valid JSON content file")
print("š Content file keys:")
for key, value in content_json.items():
print(f" ⢠{key}: {value}")
# Check for unusual fields
expected_keys = {
'coverPageNumber', 'customZoomCenterX', 'customZoomCenterY',
'customZoomScale', 'documentMetadata', 'dummyDocument',
'extraMetadata', 'fileType', 'fontName', 'fontSize',
'lastOpenedPage', 'lineHeight', 'margins', 'orientation',
'pageCount', 'pageTags', 'redirectionPageMap', 'sizeInBytes',
'tags', 'textScale', 'transform'
}
for key in content_json.keys():
if key not in expected_keys:
print(f"ā ļø Unusual content key: {key}")
except json.JSONDecodeError as e:
print(f"ā Invalid JSON content: {e}")
print(f"š Raw content preview: {response.text[:200]}...")
elif comp_name == 'metadata':
print("š METADATA Analysis:")
try:
metadata_json = json.loads(response.text)
analysis_results[comp_name]['json_content'] = metadata_json
print("ā
Valid JSON metadata file")
print("š Metadata fields:")
for key, value in metadata_json.items():
print(f" ⢠{key}: {value}")
# Key findings
print("\nšÆ KEY FINDINGS:")
source = metadata_json.get('source', '')
if source:
print(f"ā
SOURCE field present: '{source}'")
if source == 'com.remarkable.macos':
print(" š± Uploaded from macOS reMarkable app")
elif source == 'com.remarkable.ios':
print(" š± Uploaded from iOS reMarkable app")
elif source == 'com.remarkable.android':
print(" š± Uploaded from Android reMarkable app")
elif source == '':
print(" š§ Uploaded from web interface or API")
else:
print(f" ā Unknown source: {source}")
else:
print("ā SOURCE field missing")
parent = metadata_json.get('parent', '')
if parent:
print(f"š Parent folder: {parent}")
else:
print("š Located in root folder")
last_opened = metadata_json.get('lastOpened', '0')
if last_opened == '0':
print("š Document never opened")
else:
print(f"š Last opened: {last_opened}")
except json.JSONDecodeError as e:
print(f"ā Invalid JSON metadata: {e}")
print(f"š Raw metadata preview: {response.text[:200]}...")
elif comp_name == 'pagedata':
print("š PAGEDATA Analysis:")
print(f"š Size: {actual_size} bytes")
if actual_size > 0:
try:
pagedata_text = response.text
analysis_results[comp_name]['text_content'] = pagedata_text
print(f"š Content: '{pagedata_text}'")
# For PDFs, pagedata often contains template info
if pagedata_text.strip():
print("š Non-empty pagedata (may contain template info)")
else:
print("š Empty pagedata")
except Exception as e:
print(f"ā Cannot decode pagedata: {e}")
print(f"š Raw bytes: {response.content}")
else:
print("š Empty pagedata file")
elif comp_name == 'pdf':
print("š PDF Analysis:")
print(f"š Size: {actual_size} bytes ({actual_size/1024:.1f} KB)")
# Check PDF header
if response.content.startswith(b'%PDF'):
version_line = response.content[:20].decode('utf-8', errors='ignore')
print(f"ā
Valid PDF file")
print(f"š PDF version: {version_line.strip()}")
else:
print(f"ā Invalid PDF header")
print(f"š File starts with: {response.content[:20]}")
# Basic PDF info
if b'/Title' in response.content:
print("š PDF contains title metadata")
if b'/Author' in response.content:
print("š PDF contains author metadata")
if b'/Creator' in response.content:
print("š PDF contains creator metadata")
except Exception as e:
print(f"ā Failed to analyze {comp_name}: {e}")
analysis_results[comp_name] = {
'error': str(e),
'hash': comp_info['hash'],
'expected_size': comp_info['expected_size']
}
# Final comparison summary
print(f"\nš COMPARISON WITH OUR UPLOADS")
print("=" * 60)
print("ā
Real App Document Pattern (Pylontech):")
print(" ⢠Source: 'com.remarkable.macos'")
print(" ⢠Parent: '' (root folder)")
print(" ⢠LastOpened: '0' (never opened)")
print(" ⢠New: false")
print(" ⢠Pinned: false")
print("\nā Our Upload Pattern:")
print(" ⢠Source: '' (empty)")
print(" ⢠Parent: '' (root folder)")
print(" ⢠LastOpened: likely set to creation time")
print(" ⢠New: true (possibly)")
print(" ⢠Pinned: false")
print(f"\nšÆ KEY DIFFERENCE IDENTIFIED:")
print(f" The 'source' field appears to be critical!")
print(f" Real app: source = 'com.remarkable.macos'")
print(f" Our uploads: source = '' (empty)")
# Save detailed analysis
results_dir = "/tf/active/e-ink-llm/cloudtest/test_results/component_analysis"
import os
os.makedirs(results_dir, exist_ok=True)
timestamp = int(time.time())
results_file = f"{results_dir}/pylontech_analysis_{timestamp}.json"
# Convert bytes to hex for JSON serialization
for comp_name, comp_data in analysis_results.items():
if 'raw_content' in comp_data and comp_data['raw_content']:
comp_data['raw_content_hex'] = comp_data['raw_content'].hex()
del comp_data['raw_content'] # Remove binary data
with open(results_file, 'w') as f:
json.dump(analysis_results, f, indent=2, default=str)
print(f"\n�� Detailed analysis saved to: {results_file}")
print(f"š Analysis complete!")
Return Value
Returns None. The function outputs analysis results to console and saves detailed component analysis to a timestamped JSON file in '/tf/active/e-ink-llm/cloudtest/test_results/component_analysis/' directory.
Dependencies
jsontimeosrequests
Required Imports
import json
import time
import os
from auth import RemarkableAuth
Usage Example
from auth import RemarkableAuth
import json
import time
import os
# Simply call the function - it's self-contained
analyze_pylontech_document()
# The function will:
# 1. Authenticate with reMarkable Cloud
# 2. Download and analyze all components of the Pylontech document
# 3. Print detailed analysis to console
# 4. Save results to JSON file in test_results/component_analysis/
# Check the output file:
# results_dir = '/tf/active/e-ink-llm/cloudtest/test_results/component_analysis'
# # Look for files matching pattern: pylontech_analysis_<timestamp>.json
Best Practices
- This function is hardcoded to analyze a specific document UUID (943ff956-5979-4cab-83df-ef2f9f5909b5) and should be adapted if analyzing different documents
- Ensure the output directory exists and has write permissions before running
- The function makes multiple HTTP requests to reMarkable Cloud - handle rate limiting if running repeatedly
- Binary content (raw_content) is converted to hex strings for JSON serialization - be aware of memory usage for large PDFs
- The component hashes are hardcoded from a previous analysis - verify they match your target document
- This is a diagnostic/research tool, not production code - it includes extensive console output for debugging
- The 'source' field discovery (com.remarkable.macos vs empty string) is a key finding for API upload compatibility
- Consider error handling if the document or components are no longer available in the cloud
- The function assumes EU region endpoint - modify URL if using different reMarkable Cloud regions
Tags
Similar Components
AI-powered semantic similarity - components with related functionality:
-
function main_v113 79.6% similar
-
class RealAppUploadAnalyzer 74.3% similar
-
function main_v62 67.6% similar
-
function main_v15 65.1% similar
-
function verify_document_status 64.9% similar